Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456 Paper page - Mobile-Agent-v3.5: Multi-platform Fundamental GUI Agents
https://github.com/X-PLUG/MobileAgent.\n","updatedAt":"2026-02-20T03:40:42.887Z","author":{"_id":"645b10e80c73ea27d13f7aca","avatarUrl":"/avatars/95e565306472a15067440b5b43e07a6f.svg","fullname":"xuhaiyang","name":"xhyandwyy","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":6,"isUserFollowing":false}},"numEdits":0,"identifiedLanguage":{"language":"en","probability":0.818566083908081},"editors":["xhyandwyy"],"editorAvatarUrls":["/avatars/95e565306472a15067440b5b43e07a6f.svg"],"reactions":[],"isReport":false}},{"id":"6998ac127d7ae20f45f5d3c0","author":{"_id":"65243980050781c16f234f1f","avatarUrl":"/avatars/743a009681d5d554c27e04300db9f267.svg","fullname":"Avi","name":"avahal","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3,"isUserFollowing":false},"createdAt":"2026-02-20T18:46:42.000Z","type":"comment","data":{"edited":false,"hidden":false,"latest":{"raw":"arXivLens breakdown of this paper 👉 https://arxivlens.com/PaperView/Details/mobile-agent-v3-5-multi-platform-fundamental-gui-agents-214-ba5f1762\n- Executive Summary\n- Detailed Breakdown\n- Practical Applications","html":"
\n","updatedAt":"2026-02-20T18:46:42.775Z","author":{"_id":"65243980050781c16f234f1f","avatarUrl":"/avatars/743a009681d5d554c27e04300db9f267.svg","fullname":"Avi","name":"avahal","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3,"isUserFollowing":false}},"numEdits":0,"identifiedLanguage":{"language":"en","probability":0.5919207334518433},"editors":["avahal"],"editorAvatarUrls":["/avatars/743a009681d5d554c27e04300db9f267.svg"],"reactions":[],"isReport":false}}],"primaryEmailConfirmed":false,"paper":{"id":"2602.16855","authors":[{"_id":"6997d6467a658569d5a101b7","name":"Haiyang Xu","hidden":false},{"_id":"6997d6467a658569d5a101b8","name":"Xi Zhang","hidden":false},{"_id":"6997d6467a658569d5a101b9","name":"Haowei Liu","hidden":false},{"_id":"6997d6467a658569d5a101ba","name":"Junyang Wang","hidden":false},{"_id":"6997d6467a658569d5a101bb","name":"Zhaozai Zhu","hidden":false},{"_id":"6997d6467a658569d5a101bc","user":{"_id":"6847d15573f604b8d2b9f738","avatarUrl":"/avatars/4e25ca44685afd597b7d4f5f7cb2aae4.svg","isPro":false,"fullname":"Shengjie Zhou","user":"ZSJ123","type":"user"},"name":"Shengjie Zhou","status":"admin_assigned","statusLastChangedAt":"2026-02-20T09:01:53.423Z","hidden":false},{"_id":"6997d6467a658569d5a101bd","user":{"_id":"6372813520a58a5e14c596a3","avatarUrl":"/avatars/9135151259db3e5b9c8969e1d00c949d.svg","isPro":false,"fullname":"XuHao Hu","user":"Foreshhh","type":"user"},"name":"Xuhao Hu","status":"claimed_verified","statusLastChangedAt":"2026-02-20T08:36:56.334Z","hidden":false},{"_id":"6997d6467a658569d5a101be","name":"Feiyu Gao","hidden":false},{"_id":"6997d6467a658569d5a101bf","user":{"_id":"659a39b95f7a6d40f75404ec","avatarUrl":"/avatars/791a6e8f31f0d0eead06374898a0ded7.svg","isPro":false,"fullname":"Junjie Cao","user":"flyingtom","type":"user"},"name":"Junjie Cao","status":"admin_assigned","statusLastChangedAt":"2026-02-20T09:01:46.813Z","hidden":false},{"_id":"6997d6467a658569d5a101c0","name":"Zihua Wang","hidden":false},{"_id":"6997d6467a658569d5a101c1","name":"Zhiyuan Chen","hidden":false},{"_id":"6997d6467a658569d5a101c2","name":"Jitong Liao","hidden":false},{"_id":"6997d6467a658569d5a101c3","name":"Qi Zheng","hidden":false},{"_id":"6997d6467a658569d5a101c4","name":"Jiahui Zeng","hidden":false},{"_id":"6997d6467a658569d5a101c5","name":"Ze Xu","hidden":false},{"_id":"6997d6467a658569d5a101c6","name":"Shuai Bai","hidden":false},{"_id":"6997d6467a658569d5a101c7","user":{"_id":"620760a26e3b7210c2ff1943","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/VC-rKqimF6yxGESNVlPoR.jpeg","isPro":false,"fullname":"Junyang Lin","user":"JustinLin610","type":"user"},"name":"Junyang Lin","status":"admin_assigned","statusLastChangedAt":"2026-02-20T09:01:28.321Z","hidden":false},{"_id":"6997d6467a658569d5a101c8","name":"Jingren Zhou","hidden":false},{"_id":"6997d6467a658569d5a101c9","name":"Ming Yan","hidden":false}],"mediaUrls":["https://cdn-uploads.huggingface.co/production/uploads/645b10e80c73ea27d13f7aca/EQN85w7A2VvZ34VP_BUF2.jpeg","https://cdn-uploads.huggingface.co/production/uploads/645b10e80c73ea27d13f7aca/tuSteuSqp79bypJBdU-eR.jpeg"],"publishedAt":"2026-02-15T01:52:19.000Z","submittedOnDailyAt":"2026-02-20T01:10:42.877Z","title":"Mobile-Agent-v3.5: Multi-platform Fundamental GUI Agents","submittedOnDailyBy":{"_id":"645b10e80c73ea27d13f7aca","avatarUrl":"/avatars/95e565306472a15067440b5b43e07a6f.svg","isPro":false,"fullname":"xuhaiyang","user":"xhyandwyy","type":"user"},"summary":"The paper introduces GUI-Owl-1.5, the latest native GUI agent model that features instruct/thinking variants in multiple sizes (2B/4B/8B/32B/235B) and supports a range of platforms (desktop, mobile, browser, and more) to enable cloud-edge collaboration and real-time interaction. GUI-Owl-1.5 achieves state-of-the-art results on more than 20+ GUI benchmarks on open-source models: (1) on GUI automation tasks, it obtains 56.5 on OSWorld, 71.6 on AndroidWorld, and 48.4 on WebArena; (2) on grounding tasks, it obtains 80.3 on ScreenSpotPro; (3) on tool-calling tasks, it obtains 47.6 on OSWorld-MCP, and 46.8 on MobileWorld; (4) on memory and knowledge tasks, it obtains 75.5 on GUI-Knowledge Bench. GUI-Owl-1.5 incorporates several key innovations: (1) Hybird Data Flywheel: we construct the data pipeline for UI understanding and trajectory generation based on a combination of simulated environments and cloud-based sandbox environments, in order to improve the efficiency and quality of data collection. (2) Unified Enhancement of Agent Capabilities: we use a unified thought-synthesis pipeline to enhance the model's reasoning capabilities, while placing particular emphasis on improving key agent abilities, including Tool/MCP use, memory and multi-agent adaptation; (3) Multi-platform Environment RL Scaling: We propose a new environment RL algorithm, MRPO, to address the challenges of multi-platform conflicts and the low training efficiency of long-horizon tasks. The GUI-Owl-1.5 models are open-sourced, and an online cloud-sandbox demo is available at https://github.com/X-PLUG/MobileAgent.","upvotes":19,"discussionId":"6997d6467a658569d5a101ca","projectPage":"https://github.com/X-PLUG/MobileAgent/tree/main/Mobile-Agent-v3.5","ai_summary":"GUI-Owl-1.5 is a multi-platform GUI agent model with varying sizes that achieves superior performance across GUI automation, grounding, tool-calling, and memory tasks through innovative data pipelines, unified capability enhancement, and multi-platform reinforcement learning.","ai_keywords":["GUI agent model","UI understanding","trajectory generation","simulated environments","cloud-based sandbox environments","thought-synthesis pipeline","tool-calling","multi-agent adaptation","reinforcement learning","MRPO"],"organization":{"_id":"67d15cca6e2cf0e062dbfb54","name":"AlibabaTongyiLab","fullname":"TongyiLab","avatar":"https://cdn-uploads.huggingface.co/production/uploads/67d1502bfabfe9974d1f77bb/XdUSVf6HqBzE7zFBfSDQP.png"}},"canReadDatabase":false,"canManagePapers":false,"canSubmit":false,"hasHfLevelAccess":false,"upvoted":false,"upvoters":[{"_id":"645b10e80c73ea27d13f7aca","avatarUrl":"/avatars/95e565306472a15067440b5b43e07a6f.svg","isPro":false,"fullname":"xuhaiyang","user":"xhyandwyy","type":"user"},{"_id":"695f29280f33f5a8ce43ac9a","avatarUrl":"/avatars/19ed164ffca78bcbf41437452fb8e65c.svg","isPro":false,"fullname":"Hao Yang","user":"yanghao1126","type":"user"},{"_id":"67b7f649c692888d545401ae","avatarUrl":"/avatars/1ee316ae3eaf41ce693b91d6240e7f60.svg","isPro":false,"fullname":"zhangxi","user":"flyingmelon97","type":"user"},{"_id":"66a2067ada490fbd6918db0a","avatarUrl":"/avatars/376f694c5e45ebac5932cb09dc4f7105.svg","isPro":false,"fullname":"zhaoqing zhu","user":"IzIy","type":"user"},{"_id":"6372813520a58a5e14c596a3","avatarUrl":"/avatars/9135151259db3e5b9c8969e1d00c949d.svg","isPro":false,"fullname":"XuHao Hu","user":"Foreshhh","type":"user"},{"_id":"65309a1d657ae56cdb65e0e7","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/no-auth/lHQI9RNjfz8E5v1uyCGeV.png","isPro":false,"fullname":"Zhi-Yuan Chen","user":"JaxChen","type":"user"},{"_id":"64e701c64a408888f9e10cb1","avatarUrl":"/avatars/91fb39d27d2bf0eedba3be6dc31ce7f2.svg","isPro":false,"fullname":"Gotz_X","user":"nikkukun","type":"user"},{"_id":"6438f6415aa69077ffb16942","avatarUrl":"/avatars/c83dbd3e10e88db97c2a86092bad5917.svg","isPro":false,"fullname":"Junyang Wang","user":"junyangwang0410","type":"user"},{"_id":"64b2f97434a92b848c7e941e","avatarUrl":"/avatars/c699c50f3b43cd1641469521127753bb.svg","isPro":false,"fullname":"Nagori","user":"MohammedNaeem","type":"user"},{"_id":"64e27b2d7e7b923a5321cd87","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/64e27b2d7e7b923a5321cd87/rYEB2sg7HIxA3eNJbGo-0.jpeg","isPro":false,"fullname":"Garry Osborne","user":"garryo","type":"user"},{"_id":"6433b6784b34368fdbfebce8","avatarUrl":"/avatars/00fc60e2ed57eb84a4a0eff386357b8c.svg","isPro":false,"fullname":"Star Bottle","user":"StarBottle","type":"user"},{"_id":"63e3da5219b1b962a4421c50","avatarUrl":"/avatars/cc1058d7631ea99d4b8d0ff423d7d4b1.svg","isPro":false,"fullname":"Qiang HE","user":"qianghe97","type":"user"}],"acceptLanguages":["*"],"dailyPaperRank":3,"organization":{"_id":"67d15cca6e2cf0e062dbfb54","name":"AlibabaTongyiLab","fullname":"TongyiLab","avatar":"https://cdn-uploads.huggingface.co/production/uploads/67d1502bfabfe9974d1f77bb/XdUSVf6HqBzE7zFBfSDQP.png"}}">
GUI-Owl-1.5 is a multi-platform GUI agent model with varying sizes that achieves superior performance across GUI automation, grounding, tool-calling, and memory tasks through innovative data pipelines, unified capability enhancement, and multi-platform reinforcement learning.
AI-generated summary
The paper introduces GUI-Owl-1.5, the latest native GUI agent model that features instruct/thinking variants in multiple sizes (2B/4B/8B/32B/235B) and supports a range of platforms (desktop, mobile, browser, and more) to enable cloud-edge collaboration and real-time interaction. GUI-Owl-1.5 achieves state-of-the-art results on more than 20+ GUI benchmarks on open-source models: (1) on GUI automation tasks, it obtains 56.5 on OSWorld, 71.6 on AndroidWorld, and 48.4 on WebArena; (2) on grounding tasks, it obtains 80.3 on ScreenSpotPro; (3) on tool-calling tasks, it obtains 47.6 on OSWorld-MCP, and 46.8 on MobileWorld; (4) on memory and knowledge tasks, it obtains 75.5 on GUI-Knowledge Bench. GUI-Owl-1.5 incorporates several key innovations: (1) Hybird Data Flywheel: we construct the data pipeline for UI understanding and trajectory generation based on a combination of simulated environments and cloud-based sandbox environments, in order to improve the efficiency and quality of data collection. (2) Unified Enhancement of Agent Capabilities: we use a unified thought-synthesis pipeline to enhance the model's reasoning capabilities, while placing particular emphasis on improving key agent abilities, including Tool/MCP use, memory and multi-agent adaptation; (3) Multi-platform Environment RL Scaling: We propose a new environment RL algorithm, MRPO, to address the challenges of multi-platform conflicts and the low training efficiency of long-horizon tasks. The GUI-Owl-1.5 models are open-sourced, and an online cloud-sandbox demo is available at https://github.com/X-PLUG/MobileAgent.
The paper introduces GUI-Owl-1.5, the latest native GUI agent model that features instruct/thinking variants in multiple sizes (2B/4B/8B/32B/235B) and supports a range of platforms (desktop, mobile, browser, and more) to enable cloud-edge collaboration and real-time interaction. GUI-Owl-1.5 achieves state-of-the-art results on more than 20+ GUI benchmarks on open-source models: (1) on GUI automation tasks, it obtains 56.5 on OSWorld, 71.6 on AndroidWorld, and 48.4 on WebArena; (2) on grounding tasks, it obtains 80.3 on ScreenSpotPro; (3) on tool-calling tasks, it obtains 47.6 on OSWorld-MCP, and 46.8 on MobileWorld; (4) on memory and knowledge tasks, it obtains 75.5 on GUI-Knowledge Bench. GUI-Owl-1.5 incorporates several key innovations: (1) \textbf{Hybird Data Flywheel}: we construct the data pipeline for UI understanding and trajectory generation based on a combination of simulated environments and cloud-based sandbox environments, in order to improve the efficiency and quality of data collection. (2) \textbf{Unified Enhancement of Agent Capabilities}: we use a unified thought-synthesis pipeline to enhance the model's reasoning capabilities, while placing particular emphasis on improving key agent abilities, including Tool/MCP use, memory and multi-agent adaptation; (3) \textbf{Multi-platform Environment RL Scaling}: We propose a new environment RL algorithm, MRPO, to address the challenges of multi-platform conflicts and the low training efficiency of long-horizon tasks. The GUI-Owl-1.5 models are open-sourced, and an online cloud-sandbox demo is available at https://github.com/X-PLUG/MobileAgent.