Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456
OctoThinker (OctoThinker)
[go: Go Back, main page]

GAIR

\n

šŸŽÆ Our Goal: To reshape the pre-training trajectory so models scale better under RL.

\n

Check our technical report for more details!

\n

\"image/png\"

\n","classNames":"hf-sanitized hf-sanitized-4-00CtbPxBicffh9dCn8X"},"users":[{"_id":"628f6e5ab90dde28ef57d293","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/AxNzR2nvrND6Rf3RPkYMk.jpeg","isPro":false,"fullname":"Fan Zhou","user":"koalazf99","type":"user"},{"_id":"62cbeb2d72dfd24b86bdf977","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/62cbeb2d72dfd24b86bdf977/UcGYYSBNrCvPM5K9v-sro.png","isPro":false,"fullname":"Zengzhi Wang","user":"SinclairWang","type":"user"},{"_id":"64e3562342d8e2c1c69f64b3","avatarUrl":"/avatars/1ccf22d60deec213bbe069d30811efbe.svg","isPro":false,"fullname":"xuefengli","user":"xuefengli","type":"user"},{"_id":"6144a0c4ff1146bbd84d9865","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1661715958139-6144a0c4ff1146bbd84d9865.png","isPro":false,"fullname":"Pengfei Liu","user":"Pengfei","type":"user"}],"userCount":4,"collections":[{"slug":"OctoThinker/mid-training-analysis-checkpoints-llama-32-3b-68668360896c599facee2b95","title":"Mid-training Analysis Checkpoints (Llama-3.2-3B)","description":"What makes a base language model suitable for RL? Through controlled experiments, we identify key factors then leverage them to scale up mid-training.","gating":false,"lastUpdated":"2025-07-07T12:48:42.325Z","owner":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"items":[{"_id":"68668c5c74055d6a313d14d7","position":0,"type":"model","author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":0,"gated":false,"id":"OctoThinker/Llama_32_3B_finemath_4p_bs4M_seq8k_20B","availableInferenceProviders":[],"lastModified":"2025-07-07T13:41:55.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"68668c47fad31bfb85afe188","position":1,"type":"model","author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":0,"gated":false,"id":"OctoThinker/Llama_32_3B_megamath_web_pro_bs4M_seq8k_20B","availableInferenceProviders":[],"lastModified":"2025-07-07T13:43:07.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"68668c4b11f532de825c57e2","position":2,"type":"model","author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":0,"gated":false,"id":"OctoThinker/Llama_32_3B_megamath_web_pro_max_bs4M_seq8k_20B","availableInferenceProviders":[],"lastModified":"2025-07-07T13:43:38.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"68668c43896c599facef95c9","position":3,"type":"model","author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":0,"gated":false,"id":"OctoThinker/Llama_32_3B_megamath_web_pro_megamath_synth_qa_31_bs4M_seq8k_20B","availableInferenceProviders":[],"lastModified":"2025-07-03T16:08:04.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false}],"position":0,"theme":"purple","private":false,"shareUrl":"https://hf.co/collections/OctoThinker/mid-training-analysis-checkpoints-llama-32-3b","upvotes":1,"isUpvotedByUser":false},{"slug":"OctoThinker/octothinker-llama-8b-family-685bf784afcd39a067eb410d","title":"OctoThinker-Llama-8B Family","description":"What makes a base language model suitable for RL? Through controlled experiments, we identify key factors then leverage them to scale up mid-training.","gating":false,"lastUpdated":"2025-07-06T12:28:41.937Z","owner":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"items":[{"_id":"685bf7bf003fcfe85eca02fc","position":0,"type":"model","author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":11,"gated":false,"id":"OctoThinker/OctoThinker-8B-Long-Base","availableInferenceProviders":[],"lastModified":"2025-07-06T12:45:24.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":8030261248},{"_id":"685bf7c525274d9fa6cc8038","position":1,"type":"model","author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":73,"gated":false,"id":"OctoThinker/OctoThinker-8B-Hybrid-Base","availableInferenceProviders":[],"lastModified":"2025-07-06T12:53:50.000Z","likes":2,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":8030261248},{"_id":"685bf7cdcca1de4baeff1c1f","position":2,"type":"model","author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":4,"gated":false,"id":"OctoThinker/OctoThinker-8B-Short-Base","availableInferenceProviders":[],"lastModified":"2025-07-06T12:55:18.000Z","likes":1,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":8030261248}],"position":1,"theme":"purple","private":false,"shareUrl":"https://hf.co/collections/OctoThinker/octothinker-llama-8b-family","upvotes":3,"isUpvotedByUser":false},{"slug":"OctoThinker/octothinker-llama-3b-family-685bf7135ff601f024115169","title":"OctoThinker-Llama-3B Family","description":"What makes a base language model suitable for RL? Through controlled experiments, we identify key factors then leverage them to scale up mid-training.","gating":false,"lastUpdated":"2025-07-06T12:28:46.394Z","owner":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"items":[{"_id":"685bf7319a2d7bc32b644608","position":0,"type":"model","author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":6,"gated":false,"id":"OctoThinker/OctoThinker-3B-Long-Base","availableInferenceProviders":[],"lastModified":"2025-07-06T13:18:32.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":3212749824},{"_id":"685bf73d18f8993345c2f60f","position":1,"type":"model","author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":395,"gated":false,"id":"OctoThinker/OctoThinker-3B-Hybrid-Base","availableInferenceProviders":[],"lastModified":"2025-07-12T11:25:56.000Z","likes":1,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":3212749824},{"_id":"685bf7499edea6e9b486501d","position":2,"type":"model","author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":138,"gated":false,"id":"OctoThinker/OctoThinker-3B-Short-Base","availableInferenceProviders":[],"lastModified":"2025-07-12T11:20:27.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":3212749824},{"_id":"685bf7539a2d7bc32b644fdc","position":3,"type":"model","author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":7,"gated":false,"id":"OctoThinker/OctoThinker-3B-Long-Zero","availableInferenceProviders":[],"lastModified":"2025-07-06T14:09:51.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":3606752256}],"position":2,"theme":"green","private":false,"shareUrl":"https://hf.co/collections/OctoThinker/octothinker-llama-3b-family","upvotes":2,"isUpvotedByUser":false},{"slug":"OctoThinker/octothinker-llama-1b-family-685bf57dae7ab76844e4cff2","title":"OctoThinker-Llama-1B Family","description":"What makes a base language model suitable for RL? Through controlled experiments, we identify key factors then leverage them to scale up mid-training.","gating":false,"lastUpdated":"2025-07-06T12:28:50.143Z","owner":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"items":[{"_id":"685bf6363e650327633e9c9e","position":1,"type":"model","author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":5,"gated":false,"id":"OctoThinker/OctoThinker-1B-Long-Base","availableInferenceProviders":[],"lastModified":"2025-07-06T13:02:08.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":1235814400},{"_id":"685bf6420ccf94769ff62808","position":2,"type":"model","author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":1,"gated":false,"id":"OctoThinker/OctoThinker-1B-Hybrid-Base","availableInferenceProviders":[],"lastModified":"2025-07-06T13:07:13.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":1235814400},{"_id":"685bf662c4ede31faba1af84","position":3,"type":"model","author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":1,"gated":false,"id":"OctoThinker/OctoThinker-1B-Short-Base","availableInferenceProviders":[],"lastModified":"2025-07-06T13:13:45.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":1235814400},{"_id":"685bf66f0ec8fba4f6c218cc","position":4,"type":"model","author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":6,"gated":false,"id":"OctoThinker/OctoThinker-1B-Long-Zero","availableInferenceProviders":[],"lastModified":"2025-07-06T14:01:11.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":1498482688}],"position":3,"theme":"purple","private":false,"shareUrl":"https://hf.co/collections/OctoThinker/octothinker-llama-1b-family","upvotes":2,"isUpvotedByUser":false}],"datasets":[{"author":"OctoThinker","downloads":2570,"gated":false,"id":"OctoThinker/MegaMath-Web-Pro-Max","lastModified":"2025-07-06T12:08:45.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":69180548,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":36,"isLikedByUser":false,"isBenchmark":false}],"models":[{"author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":44,"gated":false,"id":"OctoThinker/OctoThinker-3B-Hybrid-Zero","availableInferenceProviders":[],"lastModified":"2025-07-12T11:26:29.000Z","likes":1,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":3606752256},{"author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":395,"gated":false,"id":"OctoThinker/OctoThinker-3B-Hybrid-Base","availableInferenceProviders":[],"lastModified":"2025-07-12T11:25:56.000Z","likes":1,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":3212749824},{"author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":4,"gated":false,"id":"OctoThinker/OctoThinker-3B-Short-Zero","availableInferenceProviders":[],"lastModified":"2025-07-12T11:21:55.000Z","likes":1,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":3606752256},{"author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":138,"gated":false,"id":"OctoThinker/OctoThinker-3B-Short-Base","availableInferenceProviders":[],"lastModified":"2025-07-12T11:20:27.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":3212749824},{"author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":0,"gated":false,"id":"OctoThinker/Llama_32_3B_megamath_web_pro_max_bs4M_seq8k_100B","availableInferenceProviders":[],"lastModified":"2025-07-07T13:52:49.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":0,"gated":false,"id":"OctoThinker/Llama_32_3B_megamath_web_pro_open_r1_longcot_general_ins_89_10_1_bs4M_seq8k_20B","availableInferenceProviders":[],"lastModified":"2025-07-07T13:49:52.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":0,"gated":false,"id":"OctoThinker/Llama_32_3B_megamath_web_pro_open_r1_longcot_91_bs4M_seq8k_20B","availableInferenceProviders":[],"lastModified":"2025-07-07T13:48:20.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":0,"gated":false,"id":"OctoThinker/Llama_32_3B_megamath_web_pro_megamath_synth_qa_general_ins_89_10_1_bs4M_seq8k_20B","availableInferenceProviders":[],"lastModified":"2025-07-07T13:46:46.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":0,"gated":false,"id":"OctoThinker/Llama_32_3B_megamath_web_pro_megamath_synth_qa_91_bs4M_seq8k_20B","availableInferenceProviders":[],"lastModified":"2025-07-07T13:44:17.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":0,"gated":false,"id":"OctoThinker/Llama_32_3B_megamath_web_pro_max_bs4M_seq8k_20B","availableInferenceProviders":[],"lastModified":"2025-07-07T13:43:38.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]}],"paperPreviews":[],"spaces":[],"buckets":[],"numBuckets":0,"numDatasets":1,"numModels":26,"numSpaces":1,"lastOrgActivities":[{"time":"2026-01-19T09:25:57.059Z","user":"Pengfei","userAvatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1661715958139-6144a0c4ff1146bbd84d9865.png","type":"paper","paper":{"id":"2601.03111","title":"One Sample to Rule Them All: Extreme Data Efficiency in RL Scaling","publishedAt":"2026-01-06T15:41:35.000Z","upvotes":10,"isUpvotedByUser":true}},{"time":"2025-07-23T08:36:51.132Z","user":"SinclairWang","userAvatarUrl":"","type":"paper","paper":{"id":"2507.16812","title":"MegaScience: Pushing the Frontiers of Post-Training Datasets for Science\n Reasoning","publishedAt":"2025-07-22T17:59:03.000Z","upvotes":63,"isUpvotedByUser":true}},{"time":"2025-07-12T11:26:30.720Z","user":"SinclairWang","userAvatarUrl":"","orgAvatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","type":"update","repoData":{"author":"OctoThinker","authorData":{"_id":"68079bb29b00fc93aa0c3692","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/628f6e5ab90dde28ef57d293/Wur4X_UXCpne0ruDCfy7M.png","fullname":"OctoThinker","name":"OctoThinker","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":33,"isUserFollowing":false},"downloads":44,"gated":false,"id":"OctoThinker/OctoThinker-3B-Hybrid-Zero","availableInferenceProviders":[],"lastModified":"2025-07-12T11:26:29.000Z","likes":1,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":3606752256},"repoId":"OctoThinker/OctoThinker-3B-Hybrid-Zero","repoType":"model","org":"OctoThinker"}],"acceptLanguages":["*"],"canReadRepos":false,"canReadSpaces":false,"blogPosts":[],"currentRepoPage":0,"filters":{},"paperView":false}">

AI & ML interests

None defined yet.

Recent Activity

šŸ™ OctoThinker is led by GAIR

šŸŽÆ Our Goal: To reshape the pre-training trajectory so models scale better under RL.

Check our technical report for more details!

image/png