Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456
stefan-it (Stefan Schweter)
[go: Go Back, main page]

https://github.com/microsoft/unilm/tree/master/kosmos-2.5","text":"Subfolder in unilm repo is empty: https://github.com/microsoft/unilm/tree/master/kosmos-2.5"},"id":"2309.11419","title":"Kosmos-2.5: A Multimodal Literate Model","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2309.11419.png","upvotes":56,"publishedAt":"2023-09-20T15:50:08.000Z","isUpvotedByUser":false},{"_id":"65534fee7a643d04598bb4bd","position":3,"type":"paper","note":{"html":"Subfolder in unilm repo is empty: https://github.com/microsoft/unilm/tree/master/unimim","text":"Subfolder in unilm repo is empty: https://github.com/microsoft/unilm/tree/master/unimim"},"id":"2210.10615","title":"A Unified View of Masked Image Modeling","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2210.10615.png","upvotes":0,"publishedAt":"2022-10-19T14:59:18.000Z","isUpvotedByUser":false}],"position":9,"theme":"green","private":false,"shareUrl":"https://hf.co/collections/stefan-it/microsoft-papers-with-no-code-data-release","upvotes":0,"isUpvotedByUser":false},{"slug":"stefan-it/georgian-ner-models-6556bd33dd1c096392074791","title":"🇬🇪 Georgian NER Models","description":"My fine-tuned NER models for Georgian","gating":false,"lastUpdated":"2023-11-17T01:10:11.818Z","owner":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"items":[{"_id":"6556bd623541e760345cf169","position":0,"type":"model","author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":19,"gated":false,"id":"stefan-it/autotrain-flair-georgian-ner-xlm_r_large-bs4-e10-lr5e-06-1","availableInferenceProviders":[],"lastModified":"2023-11-17T00:51:51.000Z","likes":0,"pipeline_tag":"token-classification","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"6556bd6660cb377db03d2eba","position":1,"type":"model","author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":0,"gated":false,"id":"stefan-it/autotrain-flair-georgian-ner-xlm_r_large-bs4-e10-lr5e-06-2","availableInferenceProviders":[],"lastModified":"2023-11-17T00:52:08.000Z","likes":0,"pipeline_tag":"token-classification","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"6556bd6bdf3c800e011e34b5","position":2,"type":"model","author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":0,"gated":false,"id":"stefan-it/autotrain-flair-georgian-ner-xlm_r_large-bs4-e10-lr5e-06-3","availableInferenceProviders":[],"lastModified":"2023-11-17T00:52:17.000Z","likes":0,"pipeline_tag":"token-classification","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"6556bd6f66423b57b2e05019","position":3,"type":"model","author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":3,"gated":false,"id":"stefan-it/autotrain-flair-georgian-ner-xlm_r_large-bs4-e10-lr5e-06-4","availableInferenceProviders":[],"lastModified":"2023-11-17T00:52:26.000Z","likes":0,"pipeline_tag":"token-classification","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]}],"position":10,"theme":"orange","private":false,"shareUrl":"https://hf.co/collections/stefan-it/georgian-ner-models","upvotes":0,"isUpvotedByUser":false},{"slug":"stefan-it/fine-tuned-co-funer-models-66058539530368090082214f","title":"💼 Fine-Tuned CO-Funer Models","description":"My fine-tuned Flair models on CO-FUN NER Dataset","gating":false,"lastUpdated":"2024-03-28T15:01:20.539Z","owner":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"items":[{"_id":"66058587033a66298d5f855f","position":0,"type":"model","note":{"html":"F1-Score on Development set: 95.17%, 91.63% on Test set.","text":"F1-Score on Development set: 95.17%, 91.63% on Test set."},"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":2,"gated":false,"id":"stefan-it/flair-co-funer-gbert_base-bs8-e10-lr5e-05-3","availableInferenceProviders":[],"lastModified":"2024-03-28T15:13:52.000Z","likes":0,"pipeline_tag":"token-classification","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"660585e2e60ec53dd4848bcc","position":1,"type":"model","note":{"html":"F1-Score on Development set: 93.78%, 91.34% on Test set.","text":"F1-Score on Development set: 93.78%, 91.34% on Test set."},"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":1,"gated":false,"id":"stefan-it/flair-co-funer-german_dbmdz_bert_base-bs8-e10-lr5e-05-1","availableInferenceProviders":[],"lastModified":"2024-03-28T15:12:12.000Z","likes":0,"pipeline_tag":"token-classification","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"6605861825a3bd84789cd4a1","position":2,"type":"model","note":{"html":"F1-Score on Development set: 93.88%, 91.59% on Test set.","text":"F1-Score on Development set: 93.88%, 91.59% on Test set."},"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":0,"gated":false,"id":"stefan-it/flair-co-funer-german_bert_base-bs8-e10-lr5e-05-2","availableInferenceProviders":[],"lastModified":"2024-03-28T15:09:26.000Z","likes":0,"pipeline_tag":"token-classification","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]}],"position":11,"theme":"purple","private":false,"shareUrl":"https://hf.co/collections/stefan-it/fine-tuned-co-funer-models","upvotes":0,"isUpvotedByUser":false},{"slug":"stefan-it/fine-tuned-cleanconll-models-6685d2e4852db86b9ca90dba","title":"🧹 Fine-Tuned CleanCoNLL Models","description":"My fine-tuned Flair NER models on CleanCoNLL dataset (with different seeds)","gating":false,"lastUpdated":"2024-07-04T21:38:19.238Z","owner":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"items":[{"_id":"6685d313636d8845cd1532c6","position":0,"type":"model","note":{"html":"Model achieving 97.43 on development set and 97.00 on test set.","text":"Model achieving 97.43 on development set and 97.00 on test set."},"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":3,"gated":false,"id":"stefan-it/flair-clean-conll-1","availableInferenceProviders":[],"lastModified":"2024-07-07T19:24:37.000Z","likes":0,"pipeline_tag":"token-classification","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"6685d320b6a5b2d0642410db","position":1,"type":"model","note":{"html":"Model achieving 97.26 on development set and 96.90 on test set.","text":"Model achieving 97.26 on development set and 96.90 on test set."},"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":5,"gated":false,"id":"stefan-it/flair-clean-conll-2","availableInferenceProviders":[],"lastModified":"2024-07-07T19:24:57.000Z","likes":0,"pipeline_tag":"token-classification","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"6685d32ac22e4833a611c0e0","position":2,"type":"model","note":{"html":"Model achieving 97.66 on development set and 97.02 on test set.","text":"Model achieving 97.66 on development set and 97.02 on test set."},"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":4,"gated":false,"id":"stefan-it/flair-clean-conll-3","availableInferenceProviders":[],"lastModified":"2024-07-07T19:25:10.000Z","likes":0,"pipeline_tag":"token-classification","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"6685d331ccb9539da8227564","position":3,"type":"model","note":{"html":"Model achieving 97.42 on development set and 96.96 on test set.","text":"Model achieving 97.42 on development set and 96.96 on test set."},"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":6,"gated":false,"id":"stefan-it/flair-clean-conll-4","availableInferenceProviders":[],"lastModified":"2024-07-07T19:25:29.000Z","likes":0,"pipeline_tag":"token-classification","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]}],"position":12,"theme":"indigo","private":false,"shareUrl":"https://hf.co/collections/stefan-it/fine-tuned-cleanconll-models","upvotes":0,"isUpvotedByUser":false},{"slug":"stefan-it/xlstm-language-models-66d0ee69f55513d1bf570f70","title":"🔧 xLSTM Language Models","description":"My trained xLSTM LMs (under development)","gating":false,"lastUpdated":"2024-08-29T21:56:56.850Z","owner":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"items":[{"_id":"66d0ee8b55c3f7d512e98157","position":0,"type":"model","author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":9,"gated":false,"id":"stefan-it/xlstm-german-wikipedia","availableInferenceProviders":[],"lastModified":"2024-09-26T11:58:58.000Z","likes":10,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"numParameters":111115200}],"position":13,"theme":"green","private":false,"shareUrl":"https://hf.co/collections/stefan-it/xlstm-language-models","upvotes":0,"isUpvotedByUser":false}],"datasets":[{"author":"stefan-it","downloads":14,"gated":false,"id":"stefan-it/xlstm-transformers-bug-data","lastModified":"2025-11-08T14:12:36.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":62464,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false},{"author":"stefan-it","downloads":29,"gated":false,"id":"stefan-it/grokipedia-urls","lastModified":"2025-10-28T11:59:16.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":885279,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["json"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":2,"isLikedByUser":false,"isBenchmark":false},{"author":"stefan-it","downloads":11,"gated":false,"id":"stefan-it/nanochat-german-city-populations","lastModified":"2025-10-26T17:41:59.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":706,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["json"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false},{"author":"stefan-it","downloads":38,"gated":false,"id":"stefan-it/nanochat-german-wordlist","lastModified":"2025-10-25T18:15:58.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":9064192,"libraries":["datasets","mlcroissant"],"formats":["text"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false},{"author":"stefan-it","downloads":14,"gated":false,"id":"stefan-it/nanochat-german-openhermes","lastModified":"2025-10-25T09:33:23.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":238658,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["json"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false},{"author":"stefan-it","downloads":14,"gated":false,"id":"stefan-it/nanochat-german-alpaca","lastModified":"2025-10-25T09:17:48.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":50469,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["json"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false},{"author":"stefan-it","downloads":567,"gated":false,"id":"stefan-it/nanochat-german-data","lastModified":"2025-10-23T12:54:16.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":51173376,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false},{"author":"stefan-it","downloads":19,"gated":false,"id":"stefan-it/nanochat-german-eval-data","lastModified":"2025-10-21T23:20:05.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":7,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["csv"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false},{"author":"stefan-it","downloads":350,"gated":false,"id":"stefan-it/awesome-tagesschau","lastModified":"2025-06-26T22:39:35.000Z","private":false,"repoType":"dataset","likes":2,"isLikedByUser":false,"isBenchmark":false},{"author":"stefan-it","downloads":105,"gated":false,"id":"stefan-it/turblimp-evaluations","lastModified":"2025-06-23T10:40:59.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false}],"models":[{"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":2,"gated":false,"id":"stefan-it/xlstm-transformers-bug-triton","availableInferenceProviders":[],"lastModified":"2025-11-08T14:12:04.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":4,"gated":false,"id":"stefan-it/xlstm-transformers-bug-native","availableInferenceProviders":[],"lastModified":"2025-11-08T14:11:39.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":8,"gated":false,"id":"stefan-it/nanochat-german-v1","availableInferenceProviders":[],"lastModified":"2025-10-28T00:26:57.000Z","likes":1,"private":false,"repoType":"model","isLikedByUser":false,"numParameters":560988160},{"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":0,"gated":false,"id":"stefan-it/nanochat-german-base-checkpoint","availableInferenceProviders":[],"lastModified":"2025-10-25T19:13:38.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":11,"gated":false,"id":"stefan-it/nanochat-german-base","availableInferenceProviders":[],"lastModified":"2025-10-24T21:51:30.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false,"numParameters":560988160},{"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":1,"gated":false,"id":"stefan-it/nanochat-german-tokenizer","availableInferenceProviders":[],"lastModified":"2025-10-24T20:12:09.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":3,"gated":false,"id":"stefan-it/ettin-encoder-400m-tokenizer-fix","availableInferenceProviders":[],"lastModified":"2025-07-20T21:44:55.000Z","likes":0,"pipeline_tag":"fill-mask","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":395881664},{"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":0,"gated":false,"id":"stefan-it/flair-ettin-400m-ner-conll03","availableInferenceProviders":[],"lastModified":"2025-07-17T08:30:38.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":19,"gated":false,"id":"stefan-it/ModernBERT-large-tokenizer-fix","availableInferenceProviders":[],"lastModified":"2025-07-16T16:20:57.000Z","likes":2,"pipeline_tag":"fill-mask","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":395881664},{"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"downloads":0,"gated":false,"id":"stefan-it/flair-modernbert-large-ner-conll03","availableInferenceProviders":[],"lastModified":"2025-05-09T22:33:08.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false}],"buckets":[],"numBuckets":0,"numberLikes":156,"papers":[{"id":"2510.21364","title":"SindBERT, the Sailor: Charting the Seas of Turkish NLP","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2510.21364.png","upvotes":1,"publishedAt":"2025-10-24T11:48:49.000Z","isUpvotedByUser":false},{"id":"2510.13996","title":"The German Commons - 154 Billion Tokens of Openly Licensed Text for\n German Language Models","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2510.13996.png","upvotes":9,"publishedAt":"2025-10-15T18:24:26.000Z","isUpvotedByUser":false},{"id":"2509.05668","title":"Llama-GENBA-10B: A Trilingual Large Language Model for German, English\n and Bavarian","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2509.05668.png","upvotes":6,"publishedAt":"2025-09-06T10:12:52.000Z","isUpvotedByUser":false},{"id":"2211.05100","title":"BLOOM: A 176B-Parameter Open-Access Multilingual Language Model","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2211.05100.png","upvotes":37,"publishedAt":"2022-11-09T18:48:09.000Z","isUpvotedByUser":false},{"id":"2205.15575","title":"hmBERT: Historical Multilingual Language Models for Named Entity\n Recognition","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2205.15575.png","upvotes":0,"publishedAt":"2022-05-31T07:30:33.000Z","isUpvotedByUser":false},{"id":"2204.05211","title":"Entities, Dates, and Languages: Zero-Shot on Historical Texts with T0","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2204.05211.png","upvotes":0,"publishedAt":"2022-04-11T15:56:13.000Z","isUpvotedByUser":false},{"id":"2107.00927","title":"Data Centric Domain Adaptation for Historical Text with OCR Errors","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2107.00927.png","upvotes":0,"publishedAt":"2021-07-02T09:37:15.000Z","isUpvotedByUser":false},{"id":"2011.06993","title":"FLERT: Document-Level Features for Named Entity Recognition","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2011.06993.png","upvotes":0,"publishedAt":"2020-11-13T16:13:59.000Z","isUpvotedByUser":false},{"id":"2010.10906","title":"German's Next Language Model","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2010.10906.png","upvotes":0,"publishedAt":"2020-10-21T11:28:23.000Z","isUpvotedByUser":false},{"id":"1906.07592","title":"Towards Robust Named Entity Recognition for Historic German","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/1906.07592.png","upvotes":0,"publishedAt":"2019-06-18T14:06:40.000Z","isUpvotedByUser":false}],"posts":[{"slug":"513898057053383","content":[{"type":"text","value":"Wohoo 🥳 I have finished my 2025 GPU workstation build and I am very excited to train new awesome open source models on it.","raw":"Wohoo 🥳 I have finished my 2025 GPU workstation build and I am very excited to train new awesome open source models on it."},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"I built my last GPU workstation 5 years ago featuring an AMD Ryzen 5900X, 64GB of G.SKILL Trident Z RGB on an ASRock X570 Taichi cooled by an Alphacool Eisbär 420. GPU was a Zotac RTX 3090 AMP Extreme. Unfortunately, I was never satisfied with the case - some Fractal Define 7, as it is definitely too small, airflow is not optimal as I had to open the front door all the time and it also arrived with a partly damaged side panel.","raw":"I built my last GPU workstation 5 years ago featuring an AMD Ryzen 5900X, 64GB of G.SKILL Trident Z RGB on an ASRock X570 Taichi cooled by an Alphacool Eisbär 420. GPU was a Zotac RTX 3090 AMP Extreme. Unfortunately, I was never satisfied with the case - some Fractal Define 7, as it is definitely too small, airflow is not optimal as I had to open the front door all the time and it also arrived with a partly damaged side panel."},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"For my new build, I've used the following components: an outstanding new AMD Ryzen 9950X3D with 64GB of Corsair Dominator Titanium (what a name). As a huge Noctua fan - warm greetings to my Austrian neighbors - I am using the brand new Noctua NH-D15 G2 on an ASRock X870E Taichi in an amazing Lian Li LANCOOL III chassis. One joke that only NVIDIA Blackwell users will understand: you definitely need a tempered glass panel to check if your GPU cables/connectors start melting 😂 And the best is yet to come: I returned my previously bought Zotac RTX 5090 Solid to the eBay seller (because of... missing ROPs, only NVIDIA Blackwell users will again understand) and bought a Zotac 5090 AMP Extreme INFINITY (yes, the long name indicates that this is the flagship model from Zotac) from a more trustworthy source (NBB in Germany).","raw":"For my new build, I've used the following components: an outstanding new AMD Ryzen 9950X3D with 64GB of Corsair Dominator Titanium (what a name). As a huge Noctua fan - warm greetings to my Austrian neighbors - I am using the brand new Noctua NH-D15 G2 on an ASRock X870E Taichi in an amazing Lian Li LANCOOL III chassis. One joke that only NVIDIA Blackwell users will understand: you definitely need a tempered glass panel to check if your GPU cables/connectors start melting 😂 And the best is yet to come: I returned my previously bought Zotac RTX 5090 Solid to the eBay seller (because of... missing ROPs, only NVIDIA Blackwell users will again understand) and bought a Zotac 5090 AMP Extreme INFINITY (yes, the long name indicates that this is the flagship model from Zotac) from a more trustworthy source (NBB in Germany)."},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"I am so happy to start training and fine-tuning new open source models - stay tuned!!!","raw":"I am so happy to start training and fine-tuning new open source models - stay tuned!!!"}],"rawContent":"Wohoo 🥳 I have finished my 2025 GPU workstation build and I am very excited to train new awesome open source models on it.\n\nI built my last GPU workstation 5 years ago featuring an AMD Ryzen 5900X, 64GB of G.SKILL Trident Z RGB on an ASRock X570 Taichi cooled by an Alphacool Eisbär 420. GPU was a Zotac RTX 3090 AMP Extreme. Unfortunately, I was never satisfied with the case - some Fractal Define 7, as it is definitely too small, airflow is not optimal as I had to open the front door all the time and it also arrived with a partly damaged side panel.\n\nFor my new build, I've used the following components: an outstanding new AMD Ryzen 9950X3D with 64GB of Corsair Dominator Titanium (what a name). As a huge Noctua fan - warm greetings to my Austrian neighbors - I am using the brand new Noctua NH-D15 G2 on an ASRock X870E Taichi in an amazing Lian Li LANCOOL III chassis. One joke that only NVIDIA Blackwell users will understand: you definitely need a tempered glass panel to check if your GPU cables/connectors start melting 😂 And the best is yet to come: I returned my previously bought Zotac RTX 5090 Solid to the eBay seller (because of... missing ROPs, only NVIDIA Blackwell users will again understand) and bought a Zotac 5090 AMP Extreme INFINITY (yes, the long name indicates that this is the flagship model from Zotac) from a more trustworthy source (NBB in Germany).\n\nI am so happy to start training and fine-tuning new open source models - stay tuned!!!","author":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"attachments":[{"type":"image","url":"https://cdn-uploads.huggingface.co/production/uploads/5e6a3d4ea9afd5125d9ec064/FWjMhNJnGqUqUcKHsKrJa.jpeg"}],"mentions":[],"reactions":[{"reaction":"🔥","users":["John6666","JLouisBiz","Anas-Mohiuddin-Syed","tommulder","madoss","johnlockejrr","catastropiyush","dantezxcd","Takugen","emre","Ah-lamloum","davanstrien"],"count":12},{"reaction":"😎","users":["dantezxcd","on3dj","davanstrien"],"count":3},{"reaction":"👍","users":["softwaredevmike2020"],"count":1}],"publishedAt":"2025-03-29T21:48:35.000Z","updatedAt":"2025-04-01T17:31:30.528Z","commentators":[{"_id":"6758a9850e3fff481964ca6d","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/no-auth/EolfJfjW25hC4Bt_hCPq8.png","fullname":"Jean Louis","name":"JLouisBiz","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":90,"isUserFollowing":false},{"_id":"629cf64165165a0df479e3f2","avatarUrl":"/avatars/262c2adb3405b3218e751cc906aa2359.svg","fullname":"Mahamadi NIKIEMA","name":"madoss","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":8,"isUserFollowing":false}],"url":"/posts/stefan-it/513898057053383","totalUniqueImpressions":4240,"identifiedLanguage":{"language":"en","probability":0.8979713320732117},"numComments":2},{"slug":"308944241065066","content":[{"type":"text","value":"🇹🇷 😍 I'm very happy to finally announce my new Turkish LM called \"BERT5urk\":","raw":"🇹🇷 😍 I'm very happy to finally announce my new Turkish LM called \"BERT5urk\":"},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"resource","resource":{"type":"model","id":"stefan-it/bert5urk"},"url":"https://huggingface.co/stefan-it/bert5urk","raw":"https://huggingface.co/stefan-it/bert5urk"},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"It is a 1.42B T5-based model, trained with UL2 pretraining objective on the Turkish part of the awesome ","raw":"It is a 1.42B T5-based model, trained with UL2 pretraining objective on the Turkish part of the awesome "},{"type":"resource","resource":{"type":"dataset","id":"HuggingFaceFW/fineweb-2"},"url":"https://huggingface.co/datasets/HuggingFaceFW/fineweb-2","raw":"https://huggingface.co/datasets/HuggingFaceFW/fineweb-2"},{"type":"text","value":" dataset.","raw":" dataset."},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"Feel free to check it out!","raw":"Feel free to check it out!"}],"rawContent":"🇹🇷 😍 I'm very happy to finally announce my new Turkish LM called \"BERT5urk\":\n\nhttps://huggingface.co/stefan-it/bert5urk\n\nIt is a 1.42B T5-based model, trained with UL2 pretraining objective on the Turkish part of the awesome https://huggingface.co/datasets/HuggingFaceFW/fineweb-2 dataset.\n\nFeel free to check it out!","author":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"attachments":[],"mentions":[],"reactions":[{"reaction":"👀","users":["John6666","dantezxcd"],"count":2}],"publishedAt":"2025-03-03T23:41:50.000Z","updatedAt":"2025-03-04T02:11:00.887Z","commentators":[{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false}],"url":"/posts/stefan-it/308944241065066","totalUniqueImpressions":1039,"identifiedLanguage":{"language":"en","probability":0.8543182015419006},"numComments":1}],"totalPosts":5,"spaces":[{"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"colorFrom":"blue","colorTo":"green","createdAt":"2025-10-26T11:41:10.000Z","emoji":"🌍","id":"stefan-it/nanochat-german-v1","lastModified":"2025-10-27T00:11:25.000Z","likes":1,"pinned":false,"private":false,"sdk":"gradio","repoType":"space","runtime":{"stage":"RUNNING","hardware":{"current":"zero-a10g","requested":"zero-a10g"},"storage":null,"gcTimeout":172800,"replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"stefan-it-nanochat-german-v1.hf.space","stage":"READY"}],"sha":"63907b4c41257131fd7adaebdf231e5c33552666"},"title":"German nanochat Demo","isLikedByUser":false,"originRepo":{"name":"nanochat-students/chat-d20-demo","author":{"_id":"68edf9a10da63eef75514b89","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/62d648291fa3e4e7ae3fa6e8/6Uk_2esDQItKgBunvl_nr.png","fullname":"nanochat students","name":"nanochat-students","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":1213,"isUserFollowing":false}},"ai_short_description":"Generate German text responses based on user input","ai_category":"Text Generation","trendingScore":0,"tags":["gradio","region:us"],"featured":false},{"author":"stefan-it","authorData":{"_id":"5e6a3d4ea9afd5125d9ec064","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","fullname":"Stefan Schweter","name":"stefan-it","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":3687,"isUserFollowing":false},"colorFrom":"indigo","colorTo":"yellow","createdAt":"2023-10-05T06:41:43.000Z","emoji":"🏆","id":"stefan-it/hmLeaderboard","lastModified":"2024-07-07T20:08:03.000Z","likes":1,"pinned":false,"private":false,"sdk":"gradio","repoType":"space","runtime":{"stage":"SLEEPING","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":null,"replicas":{"requested":1},"devMode":false,"domains":[{"domain":"stefan-it-hmleaderboard.hf.space","stage":"READY"}]},"title":"hmLeaderboard","isLikedByUser":false,"trendingScore":0,"tags":["gradio","region:us"],"featured":false}],"u":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584020801691-noauth.jpeg","isPro":true,"fullname":"Stefan Schweter","user":"stefan-it","orgs":[{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1584060655026-noauth.jpeg","fullname":"Bayerische Staatsbibliothek","name":"dbmdz","userRole":"admin","type":"org","isHf":false,"details":"(Historical) Named Entity Recognition, LM Pretraining"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1607338919026-5fce0b030931aa16b3c5dc94.png","fullname":"flair","name":"flair","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1624969772076-5dfcb1aada6d0311fd3d5448.jpeg","fullname":"Flax Community","name":"flax-community","userRole":"admin","type":"org","isHf":false,"details":"JAX, Flax, TPU, 🤗"},{"avatarUrl":"https://www.gravatar.com/avatar/3ff609c0aef7057c2113471c88eb1dba?d=retro&size=100","fullname":"dumitrescustefan-org","name":"dumitrescustefan-org","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1643662551794-5ff5d596f244529b3ec0fb89.png","fullname":"GermanT5","name":"GermanT5","userRole":"write","type":"org","isHf":false,"details":"Creating a German T5 model"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e70f6048ce3c604d78fe133/HH7VIuKus3-b-tddaWRWe.png","fullname":"BigScience: LMs for Historical Texts","name":"bigscience-historical-texts","userRole":"admin","type":"org","isHf":false,"details":"historical texts, named-entity recognition, big science"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1657276137483-60107b385ac3e86b3ea4fc34.png","fullname":"BigLAM: BigScience Libraries, Archives and Museums","name":"biglam","userRole":"write","type":"org","isHf":false,"details":"🤗 Hugging Face x 🌸 BigScience initiative to create open source community resources for LAMs."},{"avatarUrl":"https://www.gravatar.com/avatar/eaac6d5b00cd8d215ab68357d57da1a6?d=retro&size=100","fullname":"Universal NER","name":"universalner","userRole":"admin","type":"org","isHf":false,"details":"Building a Gold Standard Multilingual NER dataset."},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5f188f8a925b9863e28ad578/wQp4kAAfH4iwKPa3ilPk3.jpeg","fullname":"Libre Euro Lingua-Alliance","name":"LEL-A","userRole":"admin","type":"org","isHf":false,"details":"nlp"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1675369971012-62cb34eb0d72a9876be158ca.png","fullname":"Lang UK","name":"lang-uk","userRole":"write","type":"org","isHf":false,"details":"NLP"},{"avatarUrl":"https://www.gravatar.com/avatar/0a2cbbcd65a2a1edb5e3ae4bdd296bdd?d=retro&size=100","fullname":"BabyLM Challenge","name":"babylm-anon","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e6a3d4ea9afd5125d9ec064/b32qfA-xDPnzncA_UmnXn.jpeg","fullname":"hmByT5 Preliminary","name":"hmbyt5-preliminary","userRole":"admin","type":"org","isHf":false,"details":"ByT5, historic language models"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e6a3d4ea9afd5125d9ec064/aJHQ2RwTII6Ncj4fv2xJV.jpeg","fullname":"hmByT5","name":"hmbyt5","userRole":"admin","type":"org","isHf":false,"details":"ByT5, historical language models"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/60a551a34ecc5d054c8ad93e/Ku5nM2bKq-8ZF3Jid1ocw.png","fullname":"Blog-explorers","name":"blog-explorers","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e6a3d4ea9afd5125d9ec064/u24M48A1LRt9-2HruJ2Bl.png","fullname":"German Wikipedia LMs","name":"gwlms","userRole":"admin","type":"org","isHf":false,"details":"language modeling"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e6a3d4ea9afd5125d9ec064/w_Esvls-FWaJ6CW57JjGt.jpeg","fullname":"hmBERT","name":"hmbert","userRole":"admin","type":"org","isHf":false,"details":"Pretraining Historical Multilingual Language Models"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e6a3d4ea9afd5125d9ec064/04uZdGgDkZzjZJKOGB_rt.jpeg","fullname":"hmTEAMS","name":"hmteams","userRole":"admin","type":"org","isHf":false,"details":"Pretraining Historical Multilingual Language Models"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/noauth/b7qsFmNIlK0wMM5lg9M0R.png","fullname":"HIPE","name":"hipe","userRole":"write","type":"org","isHf":false,"details":"The HIPE HF organisation contains models and datasets related to the HIPE evaluation campaigns on the processing of named entity in historical documents. HIPE-eval is gradually evolving towards hosting models and datasets related the evaluation of information extraction systems on historical documents."},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e6a3d4ea9afd5125d9ec064/YFuaQxL6DYOF0Qxh0GQTV.png","fullname":"hmBERT Tiny","name":"hmbert-tiny","userRole":"admin","type":"org","isHf":false,"details":"Pretraining Historical Multilingual Language Models"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e6a3d4ea9afd5125d9ec064/iwBslQR_TTRnmPd9col42.png","fullname":"hmBERT 64k","name":"hmbert-64k","userRole":"admin","type":"org","isHf":false,"details":"Pretraining Historical Multilingual Language Models"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/653d43bfcbc8ae32a5de4042/RopNrWZS5KaJ5xdq2lIwA.jpeg","fullname":"LSV @ Saarland University","name":"UdS-LSV","userRole":"contributor","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/8a9c7ab86ba3bd8cf092a17e0a70cca6?d=retro&size=100","fullname":"GERMATRON","name":"germatron","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/64ce091a9e9ca8123d7a42b0/vCajdXlzRs0zAU-b_KJ_G.png","fullname":"PleIAs","name":"PleIAs","userRole":"write","type":"org","isHf":false,"details":"Open Science LLMs","plan":"team"},{"avatarUrl":"https://www.gravatar.com/avatar/c2dfb881f991a9402fcf138c8a61448d?d=retro&size=100","fullname":"German LLM Tokenizers","name":"german-llm-tokenizers","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/62fa1d95e8c9c532aa75331c/ipLoiytziTlwynohIQbNV.png","fullname":"Occiglot","name":"occiglot","userRole":"write","type":"org","isHf":false,"details":"Open Source Language Models for Europe"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5f17f0a0925b9863e28ad517/nxmdd6m86cxu55UZBlQeg.jpeg","fullname":"Social Post Explorers","name":"social-post-explorers","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e6a3d4ea9afd5125d9ec064/4aFBzIKe8wDM1aKuNfPya.jpeg","fullname":"GERTuraX","name":"gerturax","userRole":"admin","type":"org","isHf":false,"details":"New German LMs"},{"avatarUrl":"https://www.gravatar.com/avatar/bd265a25c766788c4c32d2ca70aa15a8?d=retro&size=100","fullname":"Stefmal","name":"stefmal","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/6340651b388c3fa40f9a5bc0/j6Vb_hutYuKRcQgMaDTAt.png","fullname":"Hugging Face Discord Community","name":"discord-community","userRole":"read","type":"org","isHf":false,"details":"Collaborating towards Good ML!"},{"avatarUrl":"https://www.gravatar.com/avatar/e71b99cd292dabf0c17912a39f3e2128?d=retro&size=100","fullname":"Project German LLM","name":"project-german-llm","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/ec2bd83ba78d62c7d698677875bcc92f?d=retro&size=100","fullname":"ENGEBA","name":"engeba","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/620a77b7dbba8fc1fbb8bdb4/kYHDJufHPw5AtI08-N9Gb.png","fullname":"Nerdy Face","name":"nerdyface","userRole":"read","type":"org","isHf":false,"plan":"team"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e6a3d4ea9afd5125d9ec064/LD0Zc-cz-VZpqYi3qvhQ8.png","fullname":"TensorFlow Model Garden LMs","name":"model-garden-lms","userRole":"admin","type":"org","isHf":false,"details":"Language Model Pretraining, TensorFlow Model Garden"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/62d648291fa3e4e7ae3fa6e8/itgTDqMrnvgNfJZJ4YmCt.png","fullname":"Hugging Face MCP Course","name":"mcp-course","userRole":"read","type":"org","isHf":false,"details":"Model Context Protocol, AI Agents, Python, Typescript"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e6a3d4ea9afd5125d9ec064/pLtfeCD0AaRrp_njKEaYy.png","fullname":"Bavarian NLP","name":"bavarian-nlp","userRole":"admin","type":"org","isHf":false,"details":"Bavarian NLP, NER, PoS Tagging"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e6a3d4ea9afd5125d9ec064/NGjzeoe0ZUGKznGdvkcKD.png","fullname":"Baivaria","name":"baivaria","userRole":"admin","type":"org","isHf":false,"details":"Building strong encoder-only models for Bavarian (currently focussing on DeBERTA)"},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5fb5317b0f98667521e6f7f1/tvj_amZiNEFCC1Kk2osAC.png","fullname":"SindBERT","name":"SindBERT","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/3824bece6bf64d9ed516358b1762b8fb?d=retro&size=100","fullname":"German Tokenizer Benchmark","name":"german-tokenizer-benchmark","userRole":"admin","type":"org","isHf":false,"details":"German, Tokenizer, Benchmark"}],"signup":{"github":"stefan-it","details":"Flair Library 💕, NER & PoS Tagging, LM Pretraining (mostly encoder-only & encoder-decoder), Historical Language Models, German Language Models, Bavarian NLP 🥨","homepage":"https://schweter.bayern","twitter":"","linkedin":"stefan-it","bluesky":""},"isHf":false,"isMod":false,"type":"user","theme":"light","status":"training"},"upvotes":326,"numFollowers":3687,"numFollowingUsers":313,"numFollowingOrgs":75,"numModels":1344,"numDatasets":22,"numSpaces":2,"isFollowing":false,"isFollower":false,"sampleFollowers":[{"user":"dexcompiler","fullname":"Dexter","type":"user","_id":"644459ed3c323e0918f978ed","isPro":false,"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/noauth/yeXUeX6NA-UFAsjMxtqIh.jpeg"},{"user":"Magic689","fullname":"Eric","type":"user","_id":"645cbce158f9ee3151439579","isPro":false,"avatarUrl":"/avatars/d0abc6084be9ea06dfb3e2c0115911e0.svg"},{"user":"strongant","fullname":"strongant.bai","type":"user","_id":"654ba4855255ee8671f002fc","isPro":false,"avatarUrl":"/avatars/1d622a07096c8d5782fc43346081c3f7.svg"},{"user":"tiendung","fullname":"Tien Dung","type":"user","_id":"60c953fa9cacafb192d805fd","isPro":false,"avatarUrl":"/avatars/5071c5b861341c0dcfcf6ac86327701f.svg"}],"isWatching":false,"hardwareItems":[],"isIgnored":false,"acceptLanguages":["*"],"filters":{},"currentRepoPage":0}">
Stefan Schweter's picture
In a Training Loop 🔄

Stefan Schweter PRO

stefan-it

AI & ML interests

Flair Library 💕, NER & PoS Tagging, LM Pretraining (mostly encoder-only & encoder-decoder), Historical Language Models, German Language Models, Bavarian NLP 🥨

Recent Activity

reacted to umarbutler's post with ❤️ about 10 hours ago
@abdurrahmanbutler and I just dropped Legal RAG Bench, the first benchmark for legal RAG systems to simultaneously evaluate hallucinations, retrieval failures, and reasoning errors. Our key takeaways are: 1. Embedding models, not generative models, are the primary driver of RAG accuracy. Switching from a general-purpose embedder like OpenAI's Text Embedding 3 Large to a legal domain embedder like Isaacus' Kanon 2 Embedder can raise accuracy by ~19 points. 2. Hallucinations are often triggered by retrieval failures. Fix your retrieval stack, and, in most cases, you end up fixing hallucinations. 3. Once you have a solid legal retrieval engine like Kanon 2 Embedder, it doesn’t matter as much what generative model you use; GPT-5.2 and Gemini 3.1 Pro perform relatively similarly, with Gemini 3.1 Pro achieving slightly better accuracy at the cost of more hallucinations. 4. Google's latest LLM, Gemini 3.1 Pro, is actually a bit worse than its predecessor at legal RAG, achieving 79.3% accuracy instead of 80.3%. These findings confirm what we already knew at Isaacus: that information retrieval sets the ceiling on the accuracy of legal RAG systems. It doesn’t matter how smart you are; you aren’t going to magically know what the penalty is for speeding in California without access to an up-to-date copy of the California Vehicle Code. Even still, to our knowledge, we’re the first to actually show this empirically. Unfortunately, as we highlight in our write-up, high-quality open legal benchmarks like Legal RAG Bench and our earlier MLEB are few and far between. In the interests of transparency, we have not only detailed exactly how we built Legal RAG Bench, but we’ve also released all of our data openly on Hugging Face. You can read our write up [here](https://isaacus.com/blog/legal-rag-bench), noting that we’ll soon be publishing it as a paper. Kudos to my brother @abdurrahmanbutler for serving as the lead author on this monumental release.
liked a dataset about 15 hours ago
Eurolingua/HPLT3_DE_0.9_Quantile_Adult_Filtered
View all activity

Organizations

Bayerische Staatsbibliothek's profile picture flair's profile picture Flax Community's profile picture dumitrescustefan-org's profile picture GermanT5's profile picture BigScience: LMs for Historical Texts's profile picture BigLAM: BigScience Libraries, Archives and Museums's profile picture Universal NER's profile picture Libre Euro Lingua-Alliance's profile picture Lang UK's profile picture BabyLM Challenge's profile picture hmByT5 Preliminary's profile picture hmByT5's profile picture Blog-explorers's profile picture German Wikipedia LMs's profile picture hmBERT's profile picture hmTEAMS's profile picture HIPE's profile picture hmBERT Tiny's profile picture hmBERT 64k's profile picture LSV @ Saarland University's profile picture GERMATRON's profile picture PleIAs's profile picture German LLM Tokenizers's profile picture Occiglot's profile picture Social Post Explorers's profile picture GERTuraX's profile picture Stefmal's profile picture Hugging Face Discord Community's profile picture Project German LLM's profile picture ENGEBA's profile picture Nerdy Face's profile picture TensorFlow Model Garden LMs's profile picture Hugging Face MCP Course's profile picture Bavarian NLP's profile picture Baivaria's profile picture SindBERT's profile picture German Tokenizer Benchmark's profile picture