Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456 Paper page - INTELLECT-3: Technical Report
\n","updatedAt":"2025-12-26T22:46:39.953Z","author":{"_id":"65243980050781c16f234f1f","avatarUrl":"/avatars/743a009681d5d554c27e04300db9f267.svg","fullname":"Avi","name":"avahal","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3,"isUserFollowing":false}},"numEdits":0,"identifiedLanguage":{"language":"en","probability":0.6173005700111389},"editors":["avahal"],"editorAvatarUrls":["/avatars/743a009681d5d554c27e04300db9f267.svg"],"reactions":[],"isReport":false}}],"primaryEmailConfirmed":false,"paper":{"id":"2512.16144","authors":[{"_id":"694b7f94746a34b55dd53e1d","name":"Prime Intellect Team","hidden":false},{"_id":"694b7f94746a34b55dd53e1e","name":"Mika Senghaas","hidden":false},{"_id":"694b7f94746a34b55dd53e1f","name":"Fares Obeid","hidden":false},{"_id":"694b7f94746a34b55dd53e20","name":"Sami Jaghouar","hidden":false},{"_id":"694b7f94746a34b55dd53e21","name":"William Brown","hidden":false},{"_id":"694b7f94746a34b55dd53e22","name":"Jack Min Ong","hidden":false},{"_id":"694b7f94746a34b55dd53e23","name":"Daniel Auras","hidden":false},{"_id":"694b7f94746a34b55dd53e24","name":"Matej Sirovatka","hidden":false},{"_id":"694b7f94746a34b55dd53e25","name":"Jannik Straube","hidden":false},{"_id":"694b7f94746a34b55dd53e26","name":"Andrew Baker","hidden":false},{"_id":"694b7f94746a34b55dd53e27","name":"Sebastian Müller","hidden":false},{"_id":"694b7f94746a34b55dd53e28","name":"Justus Mattern","hidden":false},{"_id":"694b7f94746a34b55dd53e29","name":"Manveer Basra","hidden":false},{"_id":"694b7f94746a34b55dd53e2a","name":"Aiman Ismail","hidden":false},{"_id":"694b7f94746a34b55dd53e2b","name":"Dominik Scherm","hidden":false},{"_id":"694b7f94746a34b55dd53e2c","name":"Cooper Miller","hidden":false},{"_id":"694b7f94746a34b55dd53e2d","name":"Ameen Patel","hidden":false},{"_id":"694b7f94746a34b55dd53e2e","name":"Simon Kirsten","hidden":false},{"_id":"694b7f94746a34b55dd53e2f","name":"Mario Sieg","hidden":false},{"_id":"694b7f94746a34b55dd53e30","name":"Christian Reetz","hidden":false},{"_id":"694b7f94746a34b55dd53e31","name":"Kemal Erdem","hidden":false},{"_id":"694b7f94746a34b55dd53e32","name":"Vincent Weisser","hidden":false},{"_id":"694b7f94746a34b55dd53e33","user":{"_id":"606ae9ab0392350f35a22e37","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/606ae9ab0392350f35a22e37/7642Z9xUwSNf5oqFpGzyd.png","isPro":false,"fullname":"Johannes Hagemann","user":"Johannes","type":"user"},"name":"Johannes Hagemann","status":"claimed_verified","statusLastChangedAt":"2025-12-25T20:45:17.737Z","hidden":false}],"publishedAt":"2025-12-18T03:57:01.000Z","submittedOnDailyAt":"2025-12-24T03:23:51.710Z","title":"INTELLECT-3: Technical Report","submittedOnDailyBy":{"_id":"631ce4b244503b72277fc89f","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1677431596830-631ce4b244503b72277fc89f.jpeg","isPro":true,"fullname":"Quentin Gallouédec","user":"qgallouedec","type":"user"},"summary":"We present INTELLECT-3, a 106B-parameter Mixture-of-Experts model (12B active) trained with large-scale reinforcement learning on our end-to-end RL infrastructure stack. INTELLECT-3 achieves state of the art performance for its size across math, code, science and reasoning benchmarks, outperforming many larger frontier models. We open-source the model together with the full infrastructure stack used to create it, including RL frameworks, complete recipe, and a wide collection of environments, built with the verifiers library, for training and evaluation from our Environments Hub community platform. Built for this effort, we introduce prime-rl, an open framework for large-scale asynchronous reinforcement learning, which scales seamlessly from a single node to thousands of GPUs, and is tailored for agentic RL with first-class support for multi-turn interactions and tool use. Using this stack, we run both SFT and RL training on top of the GLM-4.5-Air-Base model, scaling RL training up to 512 H200s with high training efficiency.","upvotes":20,"discussionId":"694b7f94746a34b55dd53e34","ai_summary":"INTELLECT-3, a large Mixture-of-Experts model trained with reinforcement learning, achieves top performance across various benchmarks and is supported by an open-source RL infrastructure framework.","ai_keywords":["Mixture-of-Experts","reinforcement learning","RL infrastructure","prime-rl","asynchronous reinforcement learning","GLM-4.5-Air-Base","SFT","H200s"],"organization":{"_id":"656ec1d908bd4deb79a0ba70","name":"PrimeIntellect","fullname":"Prime Intellect","avatar":"https://cdn-uploads.huggingface.co/production/uploads/61e020e4a343274bb132e138/H2mcdPRWtl4iKLd-OYYBc.jpeg"}},"canReadDatabase":false,"canManagePapers":false,"canSubmit":false,"hasHfLevelAccess":false,"upvoted":false,"upvoters":[{"_id":"631ce4b244503b72277fc89f","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1677431596830-631ce4b244503b72277fc89f.jpeg","isPro":true,"fullname":"Quentin Gallouédec","user":"qgallouedec","type":"user"},{"_id":"655e4c26d5c0d3db535cdd66","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/655e4c26d5c0d3db535cdd66/7gUJ8urq7mEZ4OE4ppQCj.png","isPro":false,"fullname":"Lincoln","user":"Presidentlin","type":"user"},{"_id":"65bb837dbfb878f46c77de4c","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/65bb837dbfb878f46c77de4c/23gZ_lBEwyoqjexFy9QLD.jpeg","isPro":true,"fullname":"Prithiv Sakthi","user":"prithivMLmods","type":"user"},{"_id":"620783f24e28382272337ba4","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/620783f24e28382272337ba4/zkUveQPNiDfYjgGhuFErj.jpeg","isPro":false,"fullname":"GuoLiangTang","user":"Tommy930","type":"user"},{"_id":"64137e2150358a805203cbac","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/64137e2150358a805203cbac/vU4OjyvOlu2g5PEOOik-t.jpeg","isPro":false,"fullname":"euclaise","user":"euclaise","type":"user"},{"_id":"636e71b2b0ebc04888157b71","avatarUrl":"/avatars/957ba705d470e3a01792741d7f0ff038.svg","isPro":false,"fullname":"Alexey Malakhov","user":"ZeL1k7","type":"user"},{"_id":"684d57f26e04c265777ead3f","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/no-auth/cuOj-bQqukSZreXgUJlfm.png","isPro":false,"fullname":"Joakim Lee","user":"Reinforcement4All","type":"user"},{"_id":"66499e917ab33d141da24d3d","avatarUrl":"/avatars/1150d8ae8616b0ad72ee7cf70bbad3f2.svg","isPro":false,"fullname":"Sarvar Murad","user":"sarvarmurad","type":"user"},{"_id":"688c6ec8b6a6fc13e28eb399","avatarUrl":"/avatars/e7737aa59b757a541a5d33bd81376cbd.svg","isPro":false,"fullname":"Katherine Tieu","user":"kthrn22","type":"user"},{"_id":"651c240a37fecec1fe96c60b","avatarUrl":"/avatars/5af52af97b7907e138efecac0f20799b.svg","isPro":false,"fullname":"S.F.","user":"search-facility","type":"user"},{"_id":"652a656d1a3250bbfe3bb92d","avatarUrl":"/avatars/a1c25150d55c493edd9a7f81287fc449.svg","isPro":false,"fullname":"Alejandro Cuadron","user":"AlexCuadron","type":"user"},{"_id":"684e06c775ffb92ec6b5083c","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/no-auth/DN253Ki3X08I_5aPWmsUx.png","isPro":false,"fullname":"Michael","user":"MikeSan9","type":"user"}],"acceptLanguages":["*"],"dailyPaperRank":0,"organization":{"_id":"656ec1d908bd4deb79a0ba70","name":"PrimeIntellect","fullname":"Prime Intellect","avatar":"https://cdn-uploads.huggingface.co/production/uploads/61e020e4a343274bb132e138/H2mcdPRWtl4iKLd-OYYBc.jpeg"}}">
INTELLECT-3, a large Mixture-of-Experts model trained with reinforcement learning, achieves top performance across various benchmarks and is supported by an open-source RL infrastructure framework.
AI-generated summary
We present INTELLECT-3, a 106B-parameter Mixture-of-Experts model (12B active) trained with large-scale reinforcement learning on our end-to-end RL infrastructure stack. INTELLECT-3 achieves state of the art performance for its size across math, code, science and reasoning benchmarks, outperforming many larger frontier models. We open-source the model together with the full infrastructure stack used to create it, including RL frameworks, complete recipe, and a wide collection of environments, built with the verifiers library, for training and evaluation from our Environments Hub community platform. Built for this effort, we introduce prime-rl, an open framework for large-scale asynchronous reinforcement learning, which scales seamlessly from a single node to thousands of GPUs, and is tailored for agentic RL with first-class support for multi-turn interactions and tool use. Using this stack, we run both SFT and RL training on top of the GLM-4.5-Air-Base model, scaling RL training up to 512 H200s with high training efficiency.