TRL library.\n","classNames":"hf-sanitized hf-sanitized-Z2B34odV7xeqWwOcojXJb"},"users":[{"_id":"5e48005437cb5b49818287a5","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/4uCXGGui-9QifAT4qelxU.png","isPro":false,"fullname":"Leandro von Werra","user":"lvwerra","type":"user"},{"_id":"6200d0a443eb0913fa2df7cc","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1644220542819-noauth.jpeg","isPro":true,"fullname":"Edward Beeching","user":"edbeeching","type":"user"},{"_id":"629f3b18ee05727ce328ccbe","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1669189789447-629f3b18ee05727ce328ccbe.jpeg","isPro":false,"fullname":"Kashif Rasul","user":"kashif","type":"user"},{"_id":"5f0c746619cb630495b814fd","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1594651707950-noauth.jpeg","isPro":true,"fullname":"Lewis Tunstall","user":"lewtun","type":"user"},{"_id":"60a551a34ecc5d054c8ad93e","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/60a551a34ecc5d054c8ad93e/dhcBFtwNLcKqqASxniyVw.jpeg","isPro":false,"fullname":"Mishig Davaadorj","user":"mishig","type":"user"},{"_id":"5dd96eb166059660ed1ee413","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5dd96eb166059660ed1ee413/NQtzmrDdbG0H8qkZvRyGk.jpeg","isPro":true,"fullname":"Julien Chaumond","user":"julien-c","type":"user"},{"_id":"631ce4b244503b72277fc89f","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1677431596830-631ce4b244503b72277fc89f.jpeg","isPro":true,"fullname":"Quentin Gallouédec","user":"qgallouedec","type":"user"},{"_id":"61929226ded356549e20c5da","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61929226ded356549e20c5da/ONUjP2S5fUWd07BiFXm0i.jpeg","isPro":true,"fullname":"Sergio Paniego","user":"sergiopaniego","type":"user"},{"_id":"683ddb3724742a21487cf626","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/683ddb3724742a21487cf626/nLJ6NF7GDxKj2lVBtxvoy.jpeg","isPro":false,"fullname":"Mert Toslali","user":"toslali-ibm","type":"user"},{"_id":"5fbfd09ee366524fe8e97cd3","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1606406298765-noauth.jpeg","isPro":false,"fullname":"Albert Villanova del Moral","user":"albertvillanova","type":"user"}],"userCount":10,"collections":[{"slug":"trl-lib/preference-datasets-677e99b581018fcad9abd82c","title":"Preference datasets","description":"","gating":false,"lastUpdated":"2025-01-08T16:08:14.839Z","owner":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"items":[{"_id":"677e99db1b7f08c4ff04571a","position":0,"type":"dataset","author":"trl-lib","downloads":1098,"gated":false,"id":"trl-lib/hh-rlhf-helpful-base","lastModified":"2025-01-08T16:01:34.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":46189,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":3,"isLikedByUser":false,"isBenchmark":false},{"_id":"677e9a434ca62d6e7c3e62b1","position":1,"type":"dataset","author":"trl-lib","downloads":32,"gated":false,"id":"trl-lib/lm-human-preferences-descriptiveness","lastModified":"2025-01-08T16:01:48.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":6259,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false,"isBenchmark":false},{"_id":"677ea196b3faa45cd37c86ae","position":2,"type":"dataset","author":"trl-lib","downloads":150,"gated":false,"id":"trl-lib/lm-human-preferences-sentiment","lastModified":"2025-01-08T16:02:09.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":6264,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false},{"_id":"677ea1a867c81d831e4532de","position":3,"type":"dataset","author":"trl-lib","downloads":94,"gated":false,"id":"trl-lib/rlaif-v","lastModified":"2025-01-08T16:20:44.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":83132,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["image","text"]},"private":false,"repoType":"dataset","likes":3,"isLikedByUser":false,"isBenchmark":false}],"position":0,"theme":"pink","private":false,"shareUrl":"https://hf.co/collections/trl-lib/preference-datasets","upvotes":2,"isUpvotedByUser":false},{"slug":"trl-lib/stepwise-supervision-datasets-677ea27fd4c5941beed7a96e","title":"Stepwise supervision datasets","description":"","gating":false,"lastUpdated":"2025-01-08T16:08:15.011Z","owner":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"items":[{"_id":"677ea28663712fd2bf0c6d6d","position":0,"type":"dataset","author":"trl-lib","downloads":2101,"gated":false,"id":"trl-lib/math_shepherd","lastModified":"2025-01-08T16:03:33.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":444655,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":11,"isLikedByUser":false,"isBenchmark":false},{"_id":"677ea28d6b3a0512558e495c","position":1,"type":"dataset","author":"trl-lib","downloads":360,"gated":false,"id":"trl-lib/prm800k","lastModified":"2025-01-08T16:03:40.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":41177,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":2,"isLikedByUser":false,"isBenchmark":false}],"position":1,"theme":"pink","private":false,"shareUrl":"https://hf.co/collections/trl-lib/stepwise-supervision-datasets","upvotes":0,"isUpvotedByUser":false},{"slug":"trl-lib/prompt-completion-datasets-677ea2bb20bbb6bdccada216","title":"Prompt-completion datasets","description":"","gating":false,"lastUpdated":"2025-06-22T12:56:11.426Z","owner":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"items":[{"_id":"677ea2c32f1cc272c73c7292","position":0,"type":"dataset","author":"trl-lib","downloads":2716,"gated":false,"id":"trl-lib/tldr","lastModified":"2025-01-08T16:18:59.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":129722,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":30,"isLikedByUser":false,"isBenchmark":false},{"_id":"6857fd6bac3449231f9ce093","position":1,"type":"dataset","author":"trl-lib","downloads":316,"gated":false,"id":"trl-lib/OpenMathReasoning","lastModified":"2025-04-26T21:03:33.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":3201061,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false}],"position":2,"theme":"pink","private":false,"shareUrl":"https://hf.co/collections/trl-lib/prompt-completion-datasets","upvotes":0,"isUpvotedByUser":false},{"slug":"trl-lib/prompt-only-datasets-677ea25245d20252cea00368","title":"Prompt-only datasets","description":"","gating":false,"lastUpdated":"2025-11-13T23:52:46.152Z","owner":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"items":[{"_id":"677ea260339f17f7cd3bfc1c","position":0,"type":"dataset","author":"trl-lib","downloads":320,"gated":false,"id":"trl-lib/ultrafeedback-prompt","lastModified":"2025-01-08T16:19:56.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":39764,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":9,"isLikedByUser":false,"isBenchmark":false},{"_id":"69166f4eb691bca0e7d163cd","position":1,"type":"dataset","author":"trl-lib","downloads":4059,"gated":false,"id":"trl-lib/DeepMath-103K","lastModified":"2025-11-14T00:34:10.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":103022,"libraries":["datasets","pandas","polars","mlcroissant"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":5,"isLikedByUser":false,"isBenchmark":false}],"position":3,"theme":"pink","private":false,"shareUrl":"https://hf.co/collections/trl-lib/prompt-only-datasets","upvotes":0,"isUpvotedByUser":false},{"slug":"trl-lib/unpaired-preference-datasets-677ea22bf5f528c125b0bcdf","title":"Unpaired preference datasets","description":"","gating":false,"lastUpdated":"2025-01-08T16:12:35.453Z","owner":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"items":[{"_id":"677ea23733993c58b91e3fde","position":0,"type":"dataset","author":"trl-lib","downloads":172,"gated":false,"id":"trl-lib/ultrafeedback-gpt-3.5-turbo-helpfulness","lastModified":"2025-01-08T16:20:42.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":16561,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":4,"isLikedByUser":false,"isBenchmark":false},{"_id":"677ea3f3b3faa45cd37d1c14","position":1,"type":"dataset","author":"trl-lib","downloads":349,"gated":false,"id":"trl-lib/kto-mix-14k","lastModified":"2024-03-25T14:53:24.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":15000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":9,"isLikedByUser":false,"isBenchmark":false}],"position":4,"theme":"pink","private":false,"shareUrl":"https://hf.co/collections/trl-lib/unpaired-preference-datasets","upvotes":1,"isUpvotedByUser":false},{"slug":"trl-lib/comparing-dpo-with-ipo-and-kto-6582f76eb5a0b8ec75fbe20e","title":"Comparing DPO with IPO and KTO","description":"A collection of chat models to explore the differences between three alignment techniques: DPO, IPO, and KTO. ","gating":false,"lastUpdated":"2025-01-08T16:08:14.845Z","owner":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"items":[{"_id":"6583f454a76333af9f5c0f6c","position":0,"type":"model","note":{"html":"The chat model we optimized with DPO, IPO, and KTO.","text":"The chat model we optimized with DPO, IPO, and KTO."},"author":"teknium","authorData":{"_id":"6317aade83d8d2fd903192d9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/6317aade83d8d2fd903192d9/erOwgMXc_CZih3uMoyTAp.jpeg","fullname":"Teknium","name":"teknium","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":6789,"isUserFollowing":false},"downloads":134729,"gated":false,"id":"teknium/OpenHermes-2.5-Mistral-7B","availableInferenceProviders":[],"lastModified":"2024-02-19T17:53:06.000Z","likes":887,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"658301b010c971bcac5ade23","position":1,"type":"dataset","note":{"html":"The AI feedback dataset we used to fine-tune OpenHermes-2.5 with DPO, IPO, and KTO.","text":"The AI feedback dataset we used to fine-tune OpenHermes-2.5 with DPO, IPO, and KTO."},"author":"Intel","downloads":1855,"gated":false,"id":"Intel/orca_dpo_pairs","lastModified":"2023-11-29T14:11:17.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":12859,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["json"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":319,"isLikedByUser":false,"isBenchmark":false},{"_id":"6582f85ac8b8bb2e977cba23","position":2,"type":"model","author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"downloads":0,"gated":false,"id":"trl-lib/OpenHermes-2-Mistral-7B-ipo-beta-0.1-steps-200","availableInferenceProviders":[],"lastModified":"2023-12-20T14:16:30.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"_id":"6582fc332655d67da397c6eb","position":3,"type":"model","author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"downloads":0,"gated":false,"id":"trl-lib/OpenHermes-2-Mistral-7B-ipo-beta-0.2-steps-200","availableInferenceProviders":[],"lastModified":"2023-12-20T14:29:26.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false}],"position":5,"theme":"purple","private":false,"shareUrl":"https://hf.co/collections/trl-lib/comparing-dpo-with-ipo-and-kto","upvotes":32,"isUpvotedByUser":false},{"slug":"trl-lib/online-dpo-66acd3fa38a331a9cd457b07","title":"Online-DPO","gating":false,"lastUpdated":"2025-01-08T16:08:24.777Z","owner":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"items":[{"_id":"66acd3fa38a331a9cd457b08","position":0,"type":"model","author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"downloads":0,"gated":false,"id":"trl-lib/pythia-1b-deduped-tldr-online-dpo","availableInferenceProviders":[],"lastModified":"2024-08-02T12:31:06.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false,"numParameters":1011781632},{"_id":"66acd41c3e16a762ffdc1eb7","position":1,"type":"model","author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"downloads":3673,"gated":false,"id":"trl-lib/pythia-1b-deduped-tldr-sft","availableInferenceProviders":[],"lastModified":"2024-08-02T12:28:39.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false,"numParameters":1011781632},{"_id":"66acd52414119091005855b0","position":3,"type":"model","author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"downloads":0,"gated":false,"id":"trl-lib/pythia-6.9b-deduped-tldr-online-dpo","availableInferenceProviders":[],"lastModified":"2024-08-02T12:44:53.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false,"numParameters":6857302016},{"_id":"66acf0c73b8686aca1f184df","position":4,"type":"model","author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"downloads":0,"gated":false,"id":"trl-lib/pythia-2.8b-deduped-tldr-sft","availableInferenceProviders":[],"lastModified":"2024-08-02T13:38:59.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false}],"position":6,"theme":"orange","private":false,"shareUrl":"https://hf.co/collections/trl-lib/online-dpo","upvotes":1,"isUpvotedByUser":false}],"datasets":[{"author":"trl-lib","downloads":19780,"gated":false,"id":"trl-lib/trackio-dataset","lastModified":"2026-02-19T19:39:19.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false},{"author":"trl-lib","downloads":60350,"gated":false,"id":"trl-lib/documentation-images","lastModified":"2026-02-13T08:58:09.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":11,"libraries":["datasets","mlcroissant"],"formats":["imagefolder"],"modalities":["image"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false},{"author":"trl-lib","downloads":4059,"gated":false,"id":"trl-lib/DeepMath-103K","lastModified":"2025-11-14T00:34:10.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":103022,"libraries":["datasets","pandas","polars","mlcroissant"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":5,"isLikedByUser":false,"isBenchmark":false},{"author":"trl-lib","downloads":1452,"gated":false,"id":"trl-lib/llava-instruct-mix","lastModified":"2025-08-16T00:09:00.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":227603,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["image","text"]},"private":false,"repoType":"dataset","likes":2,"isLikedByUser":false,"isBenchmark":false},{"author":"trl-lib","downloads":316,"gated":false,"id":"trl-lib/OpenMathReasoning","lastModified":"2025-04-26T21:03:33.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":3201061,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false},{"author":"trl-lib","downloads":288,"gated":false,"id":"trl-lib/chatbot_arena_completions","lastModified":"2025-04-25T12:51:26.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":32980,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false,"isBenchmark":false},{"author":"trl-lib","downloads":94,"gated":false,"id":"trl-lib/rlaif-v","lastModified":"2025-01-08T16:20:44.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":83132,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["image","text"]},"private":false,"repoType":"dataset","likes":3,"isLikedByUser":false,"isBenchmark":false},{"author":"trl-lib","downloads":172,"gated":false,"id":"trl-lib/ultrafeedback-gpt-3.5-turbo-helpfulness","lastModified":"2025-01-08T16:20:42.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":16561,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":4,"isLikedByUser":false,"isBenchmark":false},{"author":"trl-lib","downloads":320,"gated":false,"id":"trl-lib/ultrafeedback-prompt","lastModified":"2025-01-08T16:19:56.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":39764,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":9,"isLikedByUser":false,"isBenchmark":false},{"author":"trl-lib","downloads":1070,"gated":false,"id":"trl-lib/tldr-preference","lastModified":"2025-01-08T16:19:28.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":178944,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":3,"isLikedByUser":false,"isBenchmark":false}],"models":[{"author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"downloads":3,"gated":false,"id":"trl-lib/rloo_tldr","availableInferenceProviders":[],"lastModified":"2026-02-10T13:06:28.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":1011781632},{"author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"downloads":21,"gated":false,"id":"trl-lib/ppo_tldr","availableInferenceProviders":[],"lastModified":"2026-02-10T09:51:26.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":1011781632},{"author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"downloads":0,"gated":false,"id":"trl-lib/Qwen3-4B-LoRA","availableInferenceProviders":[],"lastModified":"2025-07-28T02:56:32.000Z","likes":1,"private":false,"repoType":"model","isLikedByUser":false},{"author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"downloads":15,"gated":false,"id":"trl-lib/Qwen2-0.5B-Reward-Math-Sheperd","availableInferenceProviders":[],"lastModified":"2024-12-09T23:10:12.000Z","likes":1,"pipeline_tag":"token-classification","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":494034562},{"author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"downloads":11,"gated":false,"id":"trl-lib/Qwen2-0.5B-XPO","availableInferenceProviders":[{"provider":"featherless-ai","modelStatus":"live","providerStatus":"live","providerId":"trl-lib/Qwen2-0.5B-XPO","task":"conversational","isCheapestPricingOutput":false,"isFastestThroughput":false,"isModelAuthor":false}],"lastModified":"2024-10-24T14:03:44.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":494032768},{"author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"downloads":16,"gated":false,"id":"trl-lib/Qwen2-0.5B-OnlineDPO","availableInferenceProviders":[{"provider":"featherless-ai","modelStatus":"live","providerStatus":"live","providerId":"trl-lib/Qwen2-0.5B-OnlineDPO","task":"conversational","isCheapestPricingOutput":false,"isFastestThroughput":false,"isModelAuthor":false}],"lastModified":"2024-10-23T17:46:55.000Z","likes":1,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":494032768},{"author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"downloads":3,"gated":false,"id":"trl-lib/Qwen2-0.5B-KTO","availableInferenceProviders":[],"lastModified":"2024-10-18T16:13:49.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":494032768},{"author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"downloads":22,"gated":false,"id":"trl-lib/Qwen2-0.5B-ORPO","availableInferenceProviders":[],"lastModified":"2024-10-11T11:51:38.000Z","likes":2,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":494032768},{"author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"downloads":22,"gated":false,"id":"trl-lib/Qwen2-0.5B-DPO","availableInferenceProviders":[],"lastModified":"2024-09-27T15:54:37.000Z","likes":4,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":494032768},{"author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"downloads":56,"gated":false,"id":"trl-lib/Qwen2-0.5B-Reward","availableInferenceProviders":[],"lastModified":"2024-09-05T17:07:45.000Z","likes":1,"pipeline_tag":"text-classification","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":494033664}],"paperPreviews":[],"spaces":[{"author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"colorFrom":"gray","colorTo":"gray","createdAt":"2025-08-17T20:20:51.000Z","emoji":"🚀","id":"trl-lib/trackio","lastModified":"2026-01-27T01:23:22.000Z","likes":4,"pinned":false,"private":false,"sdk":"gradio","repoType":"space","runtime":{"stage":"RUNNING","hardware":{"current":"cpu-basic","requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"trl-lib-trackio.hf.space","stage":"READY"}],"sha":"2d18d8e75c39aae1b58d7b0173b3432c38f3e550"},"title":"Trackio","isLikedByUser":false,"ai_short_description":"Track and visualize data streams in real-time","ai_category":"Data Visualization","trendingScore":0,"tags":["gradio","trackio","region:us"],"featured":false},{"author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"colorFrom":"blue","colorTo":"blue","createdAt":"2025-06-13T14:10:07.000Z","emoji":"😻","id":"trl-lib/recommend-vllm-memory","lastModified":"2025-08-17T20:21:15.000Z","likes":2,"pinned":false,"private":false,"sdk":"gradio","repoType":"space","runtime":{"stage":"RUNNING","hardware":{"current":"cpu-basic","requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"trl-lib-recommend-vllm-memory.hf.space","stage":"READY"}],"sha":"a4ebc316313e67e08e18b86102a3045d7e6351f8"},"title":"Recommend vLLM Memory","isLikedByUser":false,"ai_short_description":"Estimate GPU memory usage for model training","ai_category":"Financial Analysis","trendingScore":0,"tags":["gradio","region:us"],"featured":false},{"author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"colorFrom":"red","colorTo":"indigo","createdAt":"2025-06-22T20:55:14.000Z","emoji":"👁","id":"trl-lib/dataset-length-profiler","lastModified":"2025-07-28T22:21:25.000Z","likes":6,"pinned":false,"private":false,"sdk":"gradio","repoType":"space","runtime":{"stage":"RUNNING","hardware":{"current":"cpu-basic","requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"trl-lib-dataset-length-profiler.hf.space","stage":"READY"}],"sha":"6f37dd9669ea77ec23b2e1ff90ac200e61a6ed63"},"title":"Dataset Length Profiler","isLikedByUser":false,"ai_short_description":"Estimate optimal max_length for SFT training","ai_category":"Model Benchmarking","trendingScore":0,"tags":["gradio","region:us"],"featured":false},{"author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"colorFrom":"gray","colorTo":"gray","createdAt":"2023-08-21T17:50:59.000Z","emoji":"⚒️","id":"trl-lib/trl-text-environment","lastModified":"2023-08-30T15:49:41.000Z","likes":9,"pinned":false,"private":false,"sdk":"gradio","repoType":"space","runtime":{"stage":"SLEEPING","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":86400,"replicas":{"requested":1},"devMode":false,"domains":[{"domain":"trl-lib-trl-text-environment.hf.space","stage":"READY"}]},"title":"TextEnvironments","isLikedByUser":false,"originRepo":{"name":"bigcode/bigcode-playground","author":{"_id":"62ce8f4248fbe688600093a0","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1659521200179-5e48005437cb5b49818287a5.png","fullname":"BigCode","name":"bigcode","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"plan":"team","followerCount":1884,"isUserFollowing":false}},"trendingScore":0,"tags":["gradio","region:us"],"featured":false},{"author":"trl-lib","authorData":{"_id":"641d6af17b08b761a4bf1d7a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","fullname":"TRL","name":"trl-lib","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":198,"isUserFollowing":false},"colorFrom":"red","colorTo":"yellow","createdAt":"2023-04-04T12:49:16.000Z","emoji":"🦙","id":"trl-lib/stack-llama","lastModified":"2023-08-08T14:25:29.000Z","likes":212,"pinned":false,"private":false,"sdk":"gradio","repoType":"space","runtime":{"stage":"RUNTIME_ERROR","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":86400,"errorMessage":"Container run error: context deadline exceeded, node: ip-10-28-237-110.ec2.internal","replicas":{"requested":1},"devMode":false,"domains":[{"domain":"trl-lib-stack-llama.hf.space","stage":"READY"}]},"title":"StackLLaMa","isLikedByUser":false,"originRepo":{"name":"philschmid/igel-playground","author":{"_id":"5ff5d596f244529b3ec0fb89","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1624629516652-5ff5d596f244529b3ec0fb89.png","fullname":"Philipp Schmid","name":"philschmid","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":967,"isUserFollowing":false}},"trendingScore":0,"tags":["gradio","region:us"],"featured":true}],"buckets":[],"numBuckets":0,"numDatasets":23,"numModels":84,"numSpaces":6,"lastOrgActivities":[{"time":"2026-02-13T21:24:27.469Z","user":"lewtun","userAvatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1594651707950-noauth.jpeg","type":"paper-daily","paper":{"id":"2602.12176","title":"Single-minus gluon tree amplitudes are nonzero","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2602.12176.png","upvotes":7,"publishedAt":"2026-02-12T17:09:06.000Z","isUpvotedByUser":true}},{"time":"2026-02-13T08:58:09.977Z","user":"sergiopaniego","userAvatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61929226ded356549e20c5da/ONUjP2S5fUWd07BiFXm0i.jpeg","org":"trl-lib","orgAvatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/zG5_UiVpP1hkuRQOD73de.png","type":"discussion","discussionData":{"num":4,"author":{"_id":"698b8af50e8fdebdbc0f5454","avatarUrl":"/avatars/7e35527d556534e445356b486fc7a1a1.svg","fullname":"christian munley","name":"cmunley1","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"isUserFollowing":false},"repo":{"name":"trl-lib/documentation-images","type":"dataset"},"title":"Upload 2 files","status":"merged","createdAt":"2026-02-12T19:20:11.000Z","isPullRequest":true,"numComments":1,"topReactions":[],"numReactionUsers":0,"pinned":false,"repoOwner":{"name":"trl-lib","isParticipating":false,"type":"org","isDiscussionAuthor":false}},"repoId":"trl-lib/documentation-images","repoType":"dataset","eventId":"698ee7a10b67152e3850390a"},{"time":"2026-02-12T15:51:44.252Z","user":"lewtun","userAvatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1594651707950-noauth.jpeg","type":"paper-daily","paper":{"id":"2602.03773","title":"Reasoning Cache: Continual Improvement Over Long Horizons via Short-Horizon RL","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2602.03773.png","upvotes":9,"publishedAt":"2026-02-03T17:34:04.000Z","isUpvotedByUser":true}}],"acceptLanguages":["*"],"canReadRepos":false,"canReadSpaces":false,"blogPosts":[],"currentRepoPage":0,"filters":{},"paperView":false}">
models
84
datasets
23
AI & ML interests
None defined yet.
Recent Activity
View all activity
Organization Card
This is the organization grouping all the models and datasets used in the TRL library.
models
84
trl-lib/rloo_tldr
Text Generation
•
1B
•
Updated
•
3
trl-lib/ppo_tldr
Text Generation
•
1B
•
Updated
•
21
trl-lib/Qwen3-4B-LoRA
Updated
•
1
trl-lib/Qwen2-0.5B-Reward-Math-Sheperd
Token Classification
•
0.5B
•
Updated
•
15
•
1
trl-lib/Qwen2-0.5B-XPO
Text Generation
•
0.5B
•
Updated
•
11
•
trl-lib/Qwen2-0.5B-OnlineDPO
Text Generation
•
0.5B
•
Updated
•
16
•
•
1
trl-lib/Qwen2-0.5B-KTO
Text Generation
•
0.5B
•
Updated
•
3
trl-lib/Qwen2-0.5B-ORPO
Text Generation
•
0.5B
•
Updated
•
22
•
2
trl-lib/Qwen2-0.5B-DPO
Text Generation
•
0.5B
•
Updated
•
22
•
4
trl-lib/Qwen2-0.5B-Reward
Text Classification
•
0.5B
•
Updated
•
56
•
1
datasets
23
trl-lib/trackio-dataset
Updated
•
19.8k
trl-lib/documentation-images
Viewer
•
Updated
•
11
•
60.4k
trl-lib/DeepMath-103K
Viewer
•
Updated
•
103k
•
4.06k
•
5
trl-lib/llava-instruct-mix
Viewer
•
Updated
•
228k
•
1.45k
•
2
trl-lib/OpenMathReasoning
Viewer
•
Updated
•
3.2M
•
316
trl-lib/chatbot_arena_completions
Viewer
•
Updated
•
33k
•
288
•
1
trl-lib/rlaif-v
Viewer
•
Updated
•
83.1k
•
94
•
3
trl-lib/ultrafeedback-gpt-3.5-turbo-helpfulness
Viewer
•
Updated
•
16.6k
•
172
•
4
trl-lib/ultrafeedback-prompt
Viewer
•
Updated
•
39.8k
•
320
•
9
trl-lib/tldr-preference
Viewer
•
Updated
•
179k
•
1.07k
•
3