Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456
espnet (ESPnet)
[go: Go Back, main page]

pytorch as a deep learning engine and also follows Kaldi style data processing, feature extraction/format, and recipes to provide a complete setup for various speech processing experiments.

\n
Citing ESPnet\n\n
@inproceedings{watanabe2018espnet,\n  author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson {Enrique Yalta Soplin} and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},\n  title={{ESPnet}: End-to-End Speech Processing Toolkit},\n  year={2018},\n  booktitle={Proceedings of Interspeech},\n  pages={2207--2211},\n  doi={10.21437/Interspeech.2018-1456},\n  url={http://dx.doi.org/10.21437/Interspeech.2018-1456}\n}\n\n@inproceedings{hayashi2020espnet,\n  title={{Espnet-TTS}: Unified, reproducible, and integratable open source end-to-end text-to-speech toolkit},\n  author={Hayashi, Tomoki and Yamamoto, Ryuichi and Inoue, Katsuki and Yoshimura, Takenori and Watanabe, Shinji and Toda, Tomoki and Takeda, Kazuya and Zhang, Yu and Tan, Xu},\n  booktitle={Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},\n  pages={7654--7658},\n  year={2020},\n  organization={IEEE}\n}\n\n@inproceedings{inaguma-etal-2020-espnet,\n    title = \"{ESP}net-{ST}: All-in-One Speech Translation Toolkit\",\n    author = \"Inaguma, Hirofumi  and\n      Kiyono, Shun  and\n      Duh, Kevin  and\n      Karita, Shigeki  and\n      Yalta, Nelson  and\n      Hayashi, Tomoki  and\n      Watanabe, Shinji\",\n    booktitle = \"Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations\",\n    month = jul,\n    year = \"2020\",\n    address = \"Online\",\n    publisher = \"Association for Computational Linguistics\",\n    url = \"https://www.aclweb.org/anthology/2020.acl-demos.34\",\n    pages = \"302--311\",\n}\n\n@inproceedings{li2020espnet,\n  title={{ESPnet-SE}: End-to-End Speech Enhancement and Separation Toolkit Designed for {ASR} Integration},\n  author={Chenda Li and Jing Shi and Wangyou Zhang and Aswin Shanmugam Subramanian and Xuankai Chang and Naoyuki Kamo and Moto Hira and Tomoki Hayashi and Christoph Boeddeker and Zhuo Chen and Shinji Watanabe},\n  booktitle={Proceedings of IEEE Spoken Language Technology Workshop (SLT)},\n  pages={785--792},\n  year={2021},\n  organization={IEEE},\n}\n\n@article{arora2021espnet,\n  title={ESPnet-SLU: Advancing Spoken Language Understanding through ESPnet},\n  author={Arora, Siddhant and Dalmia, Siddharth and Denisov, Pavel and Chang, Xuankai and Ueda, Yushi and Peng, Yifan and Zhang, Yuekai and Kumar, Sujay and Ganesan, Karthik and Yan, Brian and others},\n  journal={arXiv preprint arXiv:2111.14706},\n  year={2021}\n}\n\n</details>\n
\n
","classNames":"hf-sanitized hf-sanitized-5UCq286Fl4Df_mOkjIwXS"},"users":[{"_id":"60d28bba010d938bba5c6ae9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625295625494-60d28bba010d938bba5c6ae9.jpeg","isPro":true,"fullname":"Nelson Yalta","user":"Fhrozen","type":"user"},{"_id":"60d26d0d35274b8c1e4309f3","avatarUrl":"/avatars/aa0810324729268eb4a9aca78c95a7f0.svg","isPro":false,"fullname":"Arora","user":"Siddhant","type":"user"},{"_id":"6136b5707c6d9b8e41469bec","avatarUrl":"/avatars/5b7f4ee0ba36bb6b87accda0ee27bb17.svg","isPro":false,"fullname":"Xuankai Chang","user":"simpleoier","type":"user"},{"_id":"614a9fe3fc5e37e07eb602d9","avatarUrl":"/avatars/19f25e884a2778bceabc0b82e5e4f37f.svg","isPro":false,"fullname":"Dan Berrebbi","user":"DanBerrebbi6","type":"user"},{"_id":"61731e0bf9b557975b56439a","avatarUrl":"/avatars/a3a9f576000ad7ea4c9525f66c865a5c.svg","isPro":false,"fullname":"Sujay","user":"sujayskumar","type":"user"},{"_id":"6177f7212a5804028ecc498a","avatarUrl":"/avatars/a2a8b88b818d5b9e6869c52a970f9a62.svg","isPro":false,"fullname":"Pengcheng Guo","user":"pcguo","type":"user"},{"_id":"61809f31a367a8f5351ef353","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61809f31a367a8f5351ef353/s5eQ00YeoirakzE_rJ0cy.jpeg","isPro":false,"fullname":"Yifan Peng","user":"pyf98","type":"user"},{"_id":"61b0624867af0db45397ac03","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1639022319124-61b0624867af0db45397ac03.jpeg","isPro":false,"fullname":"arvind ahirwar","user":"addy88","type":"user"},{"_id":"61a01c43a2343017014145df","avatarUrl":"/avatars/de7fff13f0c9245eaed9ed295ece98a8.svg","isPro":false,"fullname":"Karthik Ganesan","user":"karthik19967829","type":"user"},{"_id":"6136ec7a40e43b8f748a0832","avatarUrl":"/avatars/089afa82ecfe6432d2dbfe81048c6ffa.svg","isPro":false,"fullname":"Jiatong Shi","user":"ftshijt","type":"user"},{"_id":"611d144adcea95fcbbc5637c","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1641039498704-611d144adcea95fcbbc5637c.jpeg","isPro":false,"fullname":"Pavel Denisov","user":"akreal","type":"user"},{"_id":"6179f36a2a4e9edab3a95798","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/6179f36a2a4e9edab3a95798/0mmFY5lFzPC5k6_GSdmYQ.jpeg","isPro":false,"fullname":"Heng-Jui Chang","user":"vectominist","type":"user"},{"_id":"620298a162b2b0e46e799698","avatarUrl":"/avatars/ae7ecde8b706e7add741100270d7cbc8.svg","isPro":false,"fullname":"Brian Yan","user":"brianyan918","type":"user"},{"_id":"6200089db22c8e266a8af514","avatarUrl":"/avatars/f5840ba28ba40febe32fb0b66aab715a.svg","isPro":false,"fullname":"Dorsa Zeinali ","user":"dzeinali","type":"user"},{"_id":"6205e8ab6c4c35a0a2415c81","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1644558391919-6205e8ab6c4c35a0a2415c81.png","isPro":false,"fullname":"Wangyou Zhang","user":"wyz","type":"user"},{"_id":"61f949be3c1583dba38d9bea","avatarUrl":"/avatars/6b588c212d8dcb65822cc9c3d747bb7b.svg","isPro":false,"fullname":"Roshan Sharma","user":"roshansh","type":"user"},{"_id":"616f05fd10adca8df8f3b158","avatarUrl":"/avatars/798f4b7630e57b48a357357518b4a9ca.svg","isPro":false,"fullname":"Yushi Ueda","user":"YushiUeda","type":"user"},{"_id":"622a2e75905aaacc47447b25","avatarUrl":"/avatars/7bdeff73b4155fb5a609eb4548dc48fc.svg","isPro":false,"fullname":"Yosuke Kashiwagi","user":"kashikashi","type":"user"},{"_id":"6236464dcd8e1f47c865cb4d","avatarUrl":"/avatars/1c20cd4239d281aa61a409a1ab5fd310.svg","isPro":false,"fullname":"Preksha Patel","user":"prekshaupatel","type":"user"},{"_id":"62364ccd76c8a780323ac062","avatarUrl":"/avatars/5e6ce280c2947fd48d63af62d30b541c.svg","isPro":false,"fullname":"Bharani Ujjaini Kempaiah","user":"bharaniuk","type":"user"},{"_id":"62376cd37c45100513d7eb1a","avatarUrl":"/avatars/2dbc366eac4e29b653dd76a56c87b9e2.svg","isPro":false,"fullname":"Young Min Kim","user":"ymk","type":"user"},{"_id":"6237746c4f73a51ab018f994","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1647892352892-6237746c4f73a51ab018f994.png","isPro":false,"fullname":"Chen Cui","user":"cuichenx","type":"user"},{"_id":"6237b40ae397f926c3b7983d","avatarUrl":"/avatars/6c67e765c729c158f8b325945bca9825.svg","isPro":false,"fullname":"Malaika Vijay","user":"mvijay","type":"user"},{"_id":"6237b415e397f926c3b79897","avatarUrl":"/avatars/919486edc70284d159d96491a0c683df.svg","isPro":false,"fullname":"Su Park","user":"sunotsue","type":"user"},{"_id":"6237b4ac5ab9df625fb5c16c","avatarUrl":"/avatars/c38c61d5c4760e3d472beea45a82d045.svg","isPro":false,"fullname":"Ye Rin Han","user":"yerinh","type":"user"},{"_id":"5feb9dbc29bcc7d2abb1cb76","avatarUrl":"/avatars/908d392634377e674ba9e5a4318b101e.svg","isPro":false,"fullname":"Shinji Watanabe","user":"sw005320","type":"user"},{"_id":"6238f2f602d8c19e7b1b5840","avatarUrl":"/avatars/8e52215357349991e61bfe1de80b5a1b.svg","isPro":false,"fullname":"Chaitanya Narisetty","user":"chaitu619","type":"user"},{"_id":"5e1f3d5e5c2e2c73f4512188","avatarUrl":"/avatars/6fff2d6c2c566abd30a664a558310e38.svg","isPro":false,"fullname":"Yosuke Higuchi","user":"yosuke","type":"user"},{"_id":"619d76e61267e7fc28a900b4","avatarUrl":"/avatars/25681455e7f269c4341d135044e92bdf.svg","isPro":false,"fullname":"Nikhil Gupta","user":"nikhilgupta23","type":"user"},{"_id":"6239390fd723244b3ba856b8","avatarUrl":"/avatars/b78a9bd748b45fa2e6967f6be1d96fb3.svg","isPro":false,"fullname":"Ashley Wu","user":"ashwu","type":"user"},{"_id":"623a15da0ea1905322da89d8","avatarUrl":"/avatars/1e6fb8bd2912ed6eba9b32ba2cb05464.svg","isPro":false,"fullname":"Suraj Tripathi","user":"SurajTripathi","type":"user"},{"_id":"623a14acefdfe9ad34fc6c88","avatarUrl":"/avatars/12164f9f5af5f5404d549add75d708b4.svg","isPro":false,"fullname":"Sumit Agarwal","user":"sumit-agrwl","type":"user"},{"_id":"623a38822cf21827c368d239","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1647982638285-noauth.jpeg","isPro":false,"fullname":"Shubham Milind Phal","user":"shubhamphal","type":"user"},{"_id":"619d480a8ae9cafd72ab2098","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1648074722734-619d480a8ae9cafd72ab2098.png","isPro":false,"fullname":"Abhishek Srivastava","user":"abhesrivas","type":"user"},{"_id":"623ca309ca87989bbf78f278","avatarUrl":"/avatars/62d048848a1c8fa0b59577c8c8a32531.svg","isPro":false,"fullname":"Sameer Jain","user":"jainsameer","type":"user"},{"_id":"5fb83316c7affe28d37d62e8","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1605907196027-noauth.jpeg","isPro":false,"fullname":"Ngo Quang Huy","user":"ngoquanghuy","type":"user"},{"_id":"62137ce1cf7928035e81349b","avatarUrl":"/avatars/25fcf50c3a7961954923989612fb5070.svg","isPro":false,"fullname":"Gunnar Thor Örnólfsson","user":"GunnarThor","type":"user"},{"_id":"624b4f69dec6f365f4fe17cc","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1650564877074-624b4f69dec6f365f4fe17cc.png","isPro":false,"fullname":"Yoshiki Masuyama","user":"Yoshiki","type":"user"},{"_id":"6266bb05b436f5788ae12adb","avatarUrl":"/avatars/e2698ee31e7fa020bd2f4d527ddff5c5.svg","isPro":false,"fullname":"Yen-Ju Lu","user":"neillu","type":"user"},{"_id":"60638ffcc1b431dab68bf985","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1652289278612-60638ffcc1b431dab68bf985.jpeg","isPro":false,"fullname":"Rohola Zandie","user":"Roh","type":"user"},{"_id":"6224ca03f8738d5692d19293","avatarUrl":"/avatars/c6a3d17ef944383fb58b8af937a70421.svg","isPro":false,"fullname":"Taskali","user":"Yerzhaisang","type":"user"},{"_id":"627d628fba19b19eff67e79a","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1655326438071-627d628fba19b19eff67e79a.jpeg","isPro":false,"fullname":"Robert Huang","user":"raiseException","type":"user"},{"_id":"62a99d4f37a0abfd7feaaee0","avatarUrl":"/avatars/7f34254bd6d6c4f68bfedd4e9a1347ca.svg","isPro":false,"fullname":"xintongwang","user":"walston","type":"user"},{"_id":"62aaf4bdd48b4d8b04745ea8","avatarUrl":"/avatars/d72cbb31e81a8432c5c48aadf826e03c.svg","isPro":false,"fullname":"Jianchuan Tian","user":"jctian98","type":"user"},{"_id":"61571d3bb8d28825f709ffc2","avatarUrl":"/avatars/bff79620f3a247c902dfd30dca9cf2b8.svg","isPro":false,"fullname":"Emmanuel Schmidbauer","user":"eschmidbauer","type":"user"},{"_id":"62c0bb2143d4858f8221d300","avatarUrl":"/avatars/cb0e57d87428471e9ed4f1f239d758fa.svg","isPro":false,"fullname":"Maia Iyer","user":"maia-iyer","type":"user"},{"_id":"62d9562f51e8289052bf6ff5","avatarUrl":"/avatars/fae9a1fbf596392930e41a99526e1c79.svg","isPro":false,"fullname":"Jessica Huynh","user":"jessicah25","type":"user"},{"_id":"62c0b5203e7b8a5067d2c6d4","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1656797308493-62c0b5203e7b8a5067d2c6d4.jpeg","isPro":false,"fullname":"Ahnaf Mozib Samin","user":"ahnafsamin","type":"user"},{"_id":"630438615c70c21d0eae6613","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/630438615c70c21d0eae6613/KEHsB-znDIKHt7M0eyP8j.png","isPro":true,"fullname":"William Chen","user":"wanchichen","type":"user"},{"_id":"6311001864939fabc00bb4dc","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1678031945983-6311001864939fabc00bb4dc.jpeg","isPro":false,"fullname":"Shih-Lun Wu","user":"slseanwu","type":"user"},{"_id":"63133199b46fc4e2432f8201","avatarUrl":"/avatars/ea3a84249327a5e704c5242c3b897876.svg","isPro":false,"fullname":"鸢一折纸","user":"Methratton","type":"user"},{"_id":"631fec98c1a8269da39ac652","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/631fec98c1a8269da39ac652/fzEYU6xAhUDml3MMoTyAn.jpeg","isPro":false,"fullname":"Jiyang Tang","user":"tjysdsg","type":"user"},{"_id":"6331d6465102481d8dd2b55e","avatarUrl":"/avatars/6dc792023b896e526d636c9c5d6e6547.svg","isPro":false,"fullname":"Soumi Maiti","user":"soumi-maiti","type":"user"},{"_id":"631d44b1aa346997918353e3","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1662862504884-noauth.jpeg","isPro":false,"fullname":"Michael Lin","user":"michaellin52","type":"user"},{"_id":"6313f61cb46fc4e24335853c","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/6313f61cb46fc4e24335853c/OqZ81QsQVMTlvFYDdfo0t.jpeg","isPro":false,"fullname":"Masabumi Ishihara","user":"maty0505","type":"user"},{"_id":"636306482691058b19dfadb2","avatarUrl":"/avatars/f719b5968d38170bb7d616c0c90a7202.svg","isPro":false,"fullname":"Samarth Navali","user":"snavali567","type":"user"},{"_id":"63645d9eb1186f7362556895","avatarUrl":"/avatars/f4c14b13a6762f7308ba70fab99b2fd0.svg","isPro":false,"fullname":"Kenneth Zheng","user":"kenzheng99","type":"user"},{"_id":"6375cfeddee28348a9c63280","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1668883341867-6375cfeddee28348a9c63280.jpeg","isPro":false,"fullname":"Ziang Zhou","user":"realzza","type":"user"},{"_id":"61b0bcd7e40f2fb4697fee24","avatarUrl":"/avatars/ad6d18e48906e8fb5e4cd8d9f1f23dfe.svg","isPro":false,"fullname":"ASHWINI DASARE","user":"Dasare","type":"user"},{"_id":"62e8d9b3f7e720c6b134a2be","avatarUrl":"/avatars/172a288069ad65780e115db5961789da.svg","isPro":false,"fullname":"Arun Kumar A","user":"aruniitm","type":"user"},{"_id":"63b4e10c34df1f9a90a2d3ed","avatarUrl":"/avatars/e80a76fe14cfa4ccfb4f7ee4399ad408.svg","isPro":false,"fullname":"Koichi Miyazaki","user":"kmiyazaki","type":"user"},{"_id":"619e824057785be19f61e0a4","avatarUrl":"/avatars/9447a1e2ad821db9e4b92612fa6600b6.svg","isPro":false,"fullname":"Jonathan Mukiibi","user":"jmukiibi","type":"user"},{"_id":"63cff6f30b57fd08646c3f04","avatarUrl":"/avatars/812846a25e09dba568a149f2ad09ff5f.svg","isPro":false,"fullname":"Zhong-Qiu Wang","user":"zhongqiu","type":"user"},{"_id":"5f4a6b0e79c1ba4c353d1265","avatarUrl":"/avatars/6a55822733d846306832b347e5a8e6ba.svg","isPro":false,"fullname":"Ah Lam","user":"iamanigeeit","type":"user"},{"_id":"6304a8b0bad6ce7fc02631b2","avatarUrl":"/avatars/951b5fc25b8a3d668a74da457f1caa77.svg","isPro":false,"fullname":"Ben Milde","user":"milde","type":"user"},{"_id":"63e302cdc123238b189ea573","avatarUrl":"/avatars/514ef8350bf2e32efab6a9bdaa5cd071.svg","isPro":false,"fullname":"lyl","user":"Liyunlai","type":"user"},{"_id":"61caeda441f9432649f03ab6","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61caeda441f9432649f03ab6/IazJBCi7Cr34DgZXZeI4k.jpeg","isPro":true,"fullname":"s3nh","user":"s3nh","type":"user"},{"_id":"63eba2774dcaaf087638e3d6","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1676386919413-noauth.jpeg","isPro":false,"fullname":"Jiang","user":"Dongwei","type":"user"},{"_id":"61d2997c22fde1aa587e50f4","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1678522676915-61d2997c22fde1aa587e50f4.jpeg","isPro":false,"fullname":"Freddy Cheng","user":"freddy5566","type":"user"},{"_id":"63d954096b496a404a438a9c","avatarUrl":"/avatars/3690eb339055a3b49a4559e149bc3ad3.svg","isPro":false,"fullname":"Naveen Kumar B C","user":"naveenbc","type":"user"},{"_id":"63535541a023833e59d9c436","avatarUrl":"/avatars/f2bae1dc8645bd854252cc33fec88361.svg","isPro":false,"fullname":"R D","user":"RichD","type":"user"},{"_id":"6453c3a486171838061ecef8","avatarUrl":"/avatars/f4d45b716d60f8872e6601b234bbe93c.svg","isPro":false,"fullname":"Masao Someki","user":"ms180","type":"user"},{"_id":"617a7340f53050061a7294ed","avatarUrl":"/avatars/e502d1267c2fc50c9fd67d799ab9a839.svg","isPro":false,"fullname":"Yerb Khas","user":"khassanoff","type":"user"},{"_id":"62b1fe30b9bc778fe4e68c5d","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/62b1fe30b9bc778fe4e68c5d/3qVYitofCIBloSN0be7pp.jpeg","isPro":false,"fullname":"Kalvin Chang","user":"kalbin","type":"user"},{"_id":"64770ff6d7cf39f2e937b882","avatarUrl":"/avatars/7b729501856f5073cf9e484811012f2a.svg","isPro":true,"fullname":"Jinchuan Tian","user":"JinchuanTian","type":"user"},{"_id":"62d3bb4524a626ee94743132","avatarUrl":"/avatars/1a941f067799b2f8b367e9b7a7df526b.svg","isPro":false,"fullname":"Omer Lerinman","user":"Omerler","type":"user"},{"_id":"648cd0277f7821f063c16e9a","avatarUrl":"/avatars/cb3d7745f22010d8a3f1b6a28975a561.svg","isPro":false,"fullname":"Xinjian Li","user":"xinjianl","type":"user"},{"_id":"6022429d479c2f635564dd90","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/6022429d479c2f635564dd90/_NkUH29Za15SmMlw4Eh_M.jpeg","isPro":true,"fullname":"Debashish C","user":"d3bach","type":"user"},{"_id":"64ab544489aa67e4a2505eeb","avatarUrl":"/avatars/f1a9def3afbec2f8b89ef4450770d67e.svg","isPro":false,"fullname":"Guangzhi Sun","user":"BrianatCambridge","type":"user"},{"_id":"64ba7c68710f5e5476877218","avatarUrl":"/avatars/1e5ad96dd0b237db1bfee400aa8ed229.svg","isPro":false,"fullname":"Kwanghee Choi","user":"juice500","type":"user"},{"_id":"6215f0b546a52a84070e7033","avatarUrl":"/avatars/6848bc8804b0b01fbf9c9d1782cee341.svg","isPro":false,"fullname":"Chenda Li","user":"lichenda","type":"user"},{"_id":"6433a37c82ca403c44e04839","avatarUrl":"/avatars/736c726c1fd18a8d7a695242d0d8b406.svg","isPro":false,"fullname":"IISc","user":"RESPIN","type":"user"},{"_id":"636507e1f31ef76df4fabb59","avatarUrl":"/avatars/8bfda121c79804837066777595ec2ee9.svg","isPro":false,"fullname":"Sunreeta B","user":"SunreetaB","type":"user"},{"_id":"64df10afe437d02ce6b1a98b","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/64df10afe437d02ce6b1a98b/owQTV-CzfePfvWFLM6MIe.jpeg","isPro":false,"fullname":"Heyang Liu","user":"SandO114","type":"user"},{"_id":"650470daae3094e771965494","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/noauth/oa1HrIHk8SppV3iGALezt.jpeg","isPro":false,"fullname":"Yichen Lu","user":"yichenl5","type":"user"},{"_id":"65088a112c804aed59b06fca","avatarUrl":"/avatars/0d5d8ff2ad91c72ba7c3c57ec10aad14.svg","isPro":false,"fullname":"Yihan Wu","user":"Yihan2023","type":"user"},{"_id":"6433b0ab7b82474801061035","avatarUrl":"/avatars/6851c3ceff313eddf915507d85483d46.svg","isPro":false,"fullname":"Minsu Kim","user":"ms-dot-k","type":"user"},{"_id":"650ecfd353b1e2d59e0b186e","avatarUrl":"/avatars/ba01c9c5707d41e94608e423632f2a00.svg","isPro":false,"fullname":"Kohei Saijo","user":"kohei0209","type":"user"},{"_id":"630f3e4002ce39336c411048","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/630f3e4002ce39336c411048/Dbb855C55XdPd3rRH9auc.jpeg","isPro":false,"fullname":"alkinun","user":"AtAndDev","type":"user"},{"_id":"64d2412e40a5c53f3b13d30e","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/64d2412e40a5c53f3b13d30e/pZ15bPQetaG3_-x3T6hQV.png","isPro":false,"fullname":"Jethro Wang","user":"jethrowang","type":"user"},{"_id":"64211062b286e8c464f8c6fe","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/64211062b286e8c464f8c6fe/9hzmU-6nhZ8FwxXCbRftk.jpeg","isPro":false,"fullname":"Connor Henderson","user":"connor-henderson","type":"user"},{"_id":"61f91cf54a8e5a275b2b3e7c","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1653243468328-61f91cf54a8e5a275b2b3e7c.jpeg","isPro":false,"fullname":"Sanchit Gandhi","user":"sanchit-gandhi","type":"user"},{"_id":"621ff334fa5492893dc03d82","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/621ff334fa5492893dc03d82/EAIr-l3O4OeM10f1boLux.jpeg","isPro":false,"fullname":"Xabier de Zuazo","user":"zuazo","type":"user"},{"_id":"6226f40ee1cc4da2221557a0","avatarUrl":"/avatars/9654850f5aeba1d7c19ae017096c78d7.svg","isPro":false,"fullname":"Jee-weon Jung","user":"jungjee","type":"user"},{"_id":"654e5cd6a74fa49eafbb4a43","avatarUrl":"/avatars/ef0a2421cef6bc787aacaa8017467851.svg","isPro":true,"fullname":"Gerardo Correa","user":"correage","type":"user"},{"_id":"6559c7ec86fbe7506eed034b","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/noauth/tBTBHakOJ904eN3EsszHP.jpeg","isPro":false,"fullname":"Patrick Jeffery","user":"Loomstone","type":"user"},{"_id":"651cfd6f0cd82f89cc3e2647","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/noauth/wLODGpf5quCUMaoSazZPN.jpeg","isPro":false,"fullname":"dl","user":"dlmn","type":"user"},{"_id":"64c510e84399efa2fd91c496","avatarUrl":"/avatars/f30a8d922687fd549e834106fe08fc99.svg","isPro":false,"fullname":"SHIH-HENG WANG","user":"Stanwang1210","type":"user"}],"userCount":172,"collections":[{"slug":"espnet/openbeats-691a59c719c6c652977e1506","title":"OpenBEATs","description":"OpenBEATs: A Fully Open-Source General-Purpose Audio Encoder","gating":false,"lastUpdated":"2025-11-16T23:20:46.003Z","owner":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"items":[{"_id":"691a5ad619c6c652977e1e58","position":0,"type":"paper","id":"2507.14129","title":"OpenBEATs: A Fully Open-Source General-Purpose Audio Encoder","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2507.14129.png","upvotes":11,"publishedAt":"2025-07-18T17:57:46.000Z","isUpvotedByUser":false},{"_id":"691a59ef473d4d4997eac814","position":1,"type":"model","author":"shikhar7ssu","authorData":{"_id":"66ccd2fcdba9f642125e3a55","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/66ccd2fcdba9f642125e3a55/hTKwQQahkbpNuGlaRhKp7.jpeg","fullname":"Shikhar Bharadwaj","name":"shikhar7ssu","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":2,"isUserFollowing":false},"downloads":5,"gated":false,"id":"shikhar7ssu/OpenBEATs-Large-i2","availableInferenceProviders":[],"lastModified":"2025-07-21T22:43:47.000Z","likes":1,"private":false,"repoType":"model","isLikedByUser":false},{"_id":"691a5a1fb9d00158e245b63a","position":2,"type":"model","author":"shikhar7ssu","authorData":{"_id":"66ccd2fcdba9f642125e3a55","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/66ccd2fcdba9f642125e3a55/hTKwQQahkbpNuGlaRhKp7.jpeg","fullname":"Shikhar Bharadwaj","name":"shikhar7ssu","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":2,"isUserFollowing":false},"downloads":6,"gated":false,"id":"shikhar7ssu/OpenBEATs-ICME","availableInferenceProviders":[],"lastModified":"2026-01-26T13:46:18.000Z","likes":0,"pipeline_tag":"audio-classification","private":false,"repoType":"model","isLikedByUser":false},{"_id":"691a5a1a852038da6d2452ef","position":3,"type":"model","author":"shikhar7ssu","authorData":{"_id":"66ccd2fcdba9f642125e3a55","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/66ccd2fcdba9f642125e3a55/hTKwQQahkbpNuGlaRhKp7.jpeg","fullname":"Shikhar Bharadwaj","name":"shikhar7ssu","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":2,"isUserFollowing":false},"downloads":6,"gated":false,"id":"shikhar7ssu/OpenBEATs-ICME-SOUND","availableInferenceProviders":[],"lastModified":"2026-01-26T13:46:31.000Z","likes":0,"pipeline_tag":"audio-classification","private":false,"repoType":"model","isLikedByUser":false}],"position":0,"theme":"indigo","private":false,"shareUrl":"https://hf.co/collections/espnet/openbeats","upvotes":1,"isUpvotedByUser":false},{"slug":"espnet/arecho-series-684c5b1b6e51669439b37bd5","title":"ARECHO Series","description":"","gating":false,"lastUpdated":"2025-06-13T17:09:16.892Z","owner":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"items":[{"_id":"684c5b2a06a26a76ae4ef550","position":0,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":4,"gated":false,"id":"espnet/arecho_base_v0","availableInferenceProviders":[],"lastModified":"2025-06-13T07:59:43.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"_id":"684c5b30f71281e1bb601a22","position":1,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":1,"gated":false,"id":"espnet/arecho_scale_v0","availableInferenceProviders":[],"lastModified":"2025-06-13T08:00:43.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"_id":"684c5b39f516aad0afaa256d","position":2,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":1,"gated":false,"id":"espnet/arecho_base_v0.1-large-decoder","availableInferenceProviders":[],"lastModified":"2025-06-13T07:58:02.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"_id":"684c5b23444b1d8855c72464","position":3,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":1,"gated":false,"id":"espnet/arecho_scale_v0.1-large-decoder","availableInferenceProviders":[],"lastModified":"2025-06-13T07:57:07.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false}],"position":1,"theme":"indigo","private":false,"shareUrl":"https://hf.co/collections/espnet/arecho-series","upvotes":0,"isUpvotedByUser":false},{"slug":"espnet/opuslm-68350ca5a3262d6b1e98a153","title":"OpusLM","description":"The OpusLM collections","gating":false,"lastUpdated":"2025-05-27T00:57:34.978Z","owner":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"items":[{"_id":"68350d068c6b65c050826519","position":0,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":4,"gated":false,"id":"espnet/OpusLM_7B_Anneal","availableInferenceProviders":[],"lastModified":"2025-09-12T15:55:24.000Z","likes":2,"private":false,"repoType":"model","isLikedByUser":false},{"_id":"68350d0c5a7f669d6a0950f3","position":1,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":70,"gated":false,"id":"espnet/OpusLM_1.7B_Anneal","availableInferenceProviders":[],"lastModified":"2025-09-12T15:56:56.000Z","likes":1,"private":false,"repoType":"model","isLikedByUser":false}],"position":2,"theme":"green","private":false,"shareUrl":"https://hf.co/collections/espnet/opuslm","upvotes":1,"isUpvotedByUser":false},{"slug":"espnet/universa-6834e7c0a28225bffb6e2526","title":"UniVERSA","description":"","gating":false,"lastUpdated":"2025-09-10T05:39:46.321Z","owner":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"items":[{"_id":"684c956817ae31ba664f6342","position":0,"type":"model","author":"vvwangvv","authorData":{"_id":"62a7de44f245503845c98dd8","avatarUrl":"/avatars/0b51404a3419a2975e80b7c3160a4792.svg","fullname":"wei wang","name":"vvwangvv","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":1,"isUserFollowing":false},"downloads":47,"gated":false,"id":"vvwangvv/universa_ext-wavlm_base_urgent24_urgent25_multi-metric_noref","availableInferenceProviders":[],"lastModified":"2025-06-13T21:09:21.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"_id":"6834e7f11c0441efb553ba31","position":1,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":114,"gated":false,"id":"espnet/universa-wavlm_base_urgent24_multi-metric_noref","availableInferenceProviders":[],"lastModified":"2025-05-28T22:14:38.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"_id":"6834e7d370d215849acc467a","position":2,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":0,"gated":false,"id":"espnet/universa-wavlm_base_urgent24_multi-metric_audioref","availableInferenceProviders":[],"lastModified":"2025-05-30T03:18:34.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"_id":"6834e7fa6a599f6dbbb81890","position":3,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":0,"gated":false,"id":"espnet/universa-wavlm_base_urgent24_multi-metric_textref","availableInferenceProviders":[],"lastModified":"2025-05-30T03:18:59.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false}],"position":3,"theme":"orange","private":false,"shareUrl":"https://hf.co/collections/espnet/universa","upvotes":0,"isUpvotedByUser":false},{"slug":"espnet/codec-survey-pre-trained-models-67ce8e09568b741d1c4483c8","title":"Codec Survey - Pre-trained Models","description":"","gating":false,"lastUpdated":"2025-03-10T07:02:48.780Z","owner":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"items":[{"_id":"67ce8e382881b5ff582d6eeb","position":0,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":0,"gated":false,"id":"espnet/dac_16k_all_survey","availableInferenceProviders":[],"lastModified":"2025-01-02T10:03:48.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"_id":"67ce8e479742eb0905baef7a","position":1,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":3,"gated":false,"id":"espnet/dac_16k_music_survey","availableInferenceProviders":[],"lastModified":"2025-01-07T08:17:48.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"_id":"67ce8e4e547e3ec05ec28aae","position":2,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":3,"gated":false,"id":"espnet/dac_44k_audio_single_survey","availableInferenceProviders":[],"lastModified":"2025-01-07T08:28:11.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"_id":"67ce8e58dda4f49361aa48f5","position":3,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":3,"gated":false,"id":"espnet/dac_16k_music_single_survey","availableInferenceProviders":[],"lastModified":"2025-01-07T08:17:24.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false}],"position":4,"theme":"indigo","private":false,"shareUrl":"https://hf.co/collections/espnet/codec-survey-pre-trained-models","upvotes":0,"isUpvotedByUser":false},{"slug":"espnet/owsm-fully-open-speech-recognition-and-translation-models-67ab7954e79384a35530e79d","title":"OWSM: Fully Open Speech Recognition and Translation Models","description":"A collection of models related to the Open Whisper-style Speech Models (OWSM) project from CMU: https://www.wavlab.org/activities/2024/owsm/","gating":false,"lastUpdated":"2025-03-08T00:21:24.804Z","owner":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"items":[{"_id":"67abf925399eff1ac327991b","position":0,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":104,"gated":false,"id":"espnet/owsm_ctc_v3.2_ft_1B","availableInferenceProviders":[],"lastModified":"2025-08-30T21:56:02.000Z","likes":5,"pipeline_tag":"automatic-speech-recognition","private":false,"repoType":"model","isLikedByUser":false},{"_id":"67ab79aa165cdcdf645520c9","position":1,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":130,"gated":false,"id":"espnet/owsm_ctc_v3.1_1B","availableInferenceProviders":[],"lastModified":"2025-08-30T21:55:42.000Z","likes":14,"pipeline_tag":"automatic-speech-recognition","private":false,"repoType":"model","isLikedByUser":false},{"_id":"67abf91d401012b8103ec5ac","position":2,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":182,"gated":false,"id":"espnet/owsm_v3.1_ebf","availableInferenceProviders":[],"lastModified":"2025-08-30T21:54:56.000Z","likes":17,"pipeline_tag":"automatic-speech-recognition","private":false,"repoType":"model","isLikedByUser":false},{"_id":"67ab79b8b9fb317984f4c20b","position":3,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":54,"gated":false,"id":"espnet/owsm_v3.1_ebf_small","availableInferenceProviders":[],"lastModified":"2025-08-30T21:55:09.000Z","likes":2,"pipeline_tag":"automatic-speech-recognition","private":false,"repoType":"model","isLikedByUser":false}],"position":5,"theme":"pink","private":false,"shareUrl":"https://hf.co/collections/espnet/owsm-fully-open-speech-recognition-and-translation-models","upvotes":3,"isUpvotedByUser":false},{"slug":"espnet/owls-scaling-laws-for-speech-recognition-and-translation-67ab7f991c194065f057ce8d","title":"OWLS: Scaling Laws for Speech Recognition and Translation ","description":"🦉 A suite of Whisper-style models from 250M to 18B parameters. Trained on up to 360K hours of data. 16k sampling rate.","gating":false,"lastUpdated":"2025-05-03T01:22:38.918Z","owner":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"items":[{"_id":"67aea9a003b0a8a71d56905a","position":0,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":1,"gated":false,"id":"espnet/owls_4B_180K","availableInferenceProviders":[],"lastModified":"2025-05-03T01:22:55.000Z","likes":5,"pipeline_tag":"automatic-speech-recognition","private":false,"repoType":"model","isLikedByUser":false},{"_id":"67aea9a75a0f889055610b4e","position":1,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":2,"gated":false,"id":"espnet/owls_9B_180K","availableInferenceProviders":[],"lastModified":"2025-05-03T01:19:18.000Z","likes":0,"pipeline_tag":"automatic-speech-recognition","private":false,"repoType":"model","isLikedByUser":false},{"_id":"67bd4946597c068d32ed2fcc","position":2,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":0,"gated":false,"id":"espnet/owls_05B_180K","availableInferenceProviders":[],"lastModified":"2025-05-03T01:23:08.000Z","likes":0,"pipeline_tag":"automatic-speech-recognition","private":false,"repoType":"model","isLikedByUser":false},{"_id":"67bd494a0417e7f92281b7d8","position":3,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":1,"gated":false,"id":"espnet/owls_025B_180K","availableInferenceProviders":[],"lastModified":"2025-05-03T01:23:15.000Z","likes":0,"pipeline_tag":"automatic-speech-recognition","private":false,"repoType":"model","isLikedByUser":false}],"position":6,"theme":"blue","private":false,"shareUrl":"https://hf.co/collections/espnet/owls-scaling-laws-for-speech-recognition-and-translation","upvotes":7,"isUpvotedByUser":false},{"slug":"espnet/owsm-ctc-ultra-fast-speech-foundation-models-67ab7a9d91a82bab571ca42c","title":"OWSM-CTC: Ultra-Fast Speech Foundation Models","description":"CTC-based models from the OWSM project, designed for fast non-autoregressive inference: https://www.wavlab.org/activities/2024/owsm/","gating":false,"lastUpdated":"2025-03-08T00:21:24.804Z","owner":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"items":[{"_id":"67ab7aeb24ba1d5454c20f13","position":0,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":104,"gated":false,"id":"espnet/owsm_ctc_v3.2_ft_1B","availableInferenceProviders":[],"lastModified":"2025-08-30T21:56:02.000Z","likes":5,"pipeline_tag":"automatic-speech-recognition","private":false,"repoType":"model","isLikedByUser":false},{"_id":"67ab7adf218b09252a958b59","position":1,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":130,"gated":false,"id":"espnet/owsm_ctc_v3.1_1B","availableInferenceProviders":[],"lastModified":"2025-08-30T21:55:42.000Z","likes":14,"pipeline_tag":"automatic-speech-recognition","private":false,"repoType":"model","isLikedByUser":false}],"position":7,"theme":"purple","private":false,"shareUrl":"https://hf.co/collections/espnet/owsm-ctc-ultra-fast-speech-foundation-models","upvotes":1,"isUpvotedByUser":false},{"slug":"espnet/neural-codecs-67cb8c96859c53a6131a85ec","title":"Neural Codecs","description":"Collection of neural codecs trained in ESPnet for speech tokenization","gating":false,"lastUpdated":"2025-03-08T00:21:24.809Z","owner":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"items":[{"_id":"67cb8caa1a936a9bed72b2e7","position":0,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":3,"gated":false,"id":"espnet/dac_16k_music_survey","availableInferenceProviders":[],"lastModified":"2025-01-07T08:17:48.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"_id":"67cb8cae15882735a4f5ab7f","position":1,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":3,"gated":false,"id":"espnet/dac_44k_audio_single_survey","availableInferenceProviders":[],"lastModified":"2025-01-07T08:28:11.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"_id":"67cb8cb18a3f9443b8af2a40","position":2,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":2,"gated":false,"id":"espnet/dac_16k_speech_single_survey","availableInferenceProviders":[],"lastModified":"2025-01-02T10:14:15.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"_id":"67cb8cb54920ce9952792aef","position":3,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":1,"gated":false,"id":"espnet/dac_16k_all_single_survey","availableInferenceProviders":[],"lastModified":"2025-01-02T10:28:03.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false}],"position":8,"theme":"green","private":false,"shareUrl":"https://hf.co/collections/espnet/neural-codecs","upvotes":0,"isUpvotedByUser":false},{"slug":"espnet/xeus-model-and-data-67cb8bccfc8c1e674a43c89a","title":"XEUS Model and Data","description":"Data and models used for EMNLP 2024 Best Paper \"Towards Robust Speech Representation Learning for Thousands of Languages\"","gating":false,"lastUpdated":"2025-03-08T00:21:24.808Z","owner":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"items":[{"_id":"67cb8bec3dcbd6ef9ee56967","position":0,"type":"dataset","author":"espnet","downloads":322,"gated":false,"id":"espnet/mms_ulab_v2","lastModified":"2025-02-04T21:50:02.000Z","datasetsServerInfo":{"viewer":"viewer-partial","numRows":20701,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["audio","text"]},"private":false,"repoType":"dataset","likes":25,"isLikedByUser":false,"isBenchmark":false},{"_id":"67cb8bf2eee0ee19a65d0f00","position":1,"type":"dataset","author":"espnet","downloads":64,"gated":false,"id":"espnet/wikitongues","lastModified":"2024-07-02T05:36:56.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":820,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["audio","text"]},"private":false,"repoType":"dataset","likes":4,"isLikedByUser":false,"isBenchmark":false},{"_id":"67cb8bf8a97c24d5fcd8fab3","position":2,"type":"model","author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":24,"gated":false,"id":"espnet/xeus","availableInferenceProviders":[],"lastModified":"2025-06-17T02:46:08.000Z","likes":144,"pipeline_tag":"automatic-speech-recognition","private":false,"repoType":"model","isLikedByUser":false},{"_id":"67cb8c002ec2ca72a2c04565","position":3,"type":"dataset","author":"espnet","downloads":75,"gated":false,"id":"espnet/jesus_dramas","lastModified":"2024-07-02T05:36:29.000Z","datasetsServerInfo":{"viewer":"viewer-partial","numRows":397,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["audio","text"]},"private":false,"repoType":"dataset","likes":4,"isLikedByUser":false,"isBenchmark":false}],"position":9,"theme":"blue","private":false,"shareUrl":"https://hf.co/collections/espnet/xeus-model-and-data","upvotes":0,"isUpvotedByUser":false}],"datasets":[{"author":"espnet","downloads":124,"gated":false,"id":"espnet/data_part4","lastModified":"2026-02-22T20:55:31.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false},{"author":"espnet","downloads":8,"gated":false,"id":"espnet/v2_data","lastModified":"2025-12-18T04:54:33.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false},{"author":"espnet","downloads":4,"gated":false,"id":"espnet/librispeech_arkive","lastModified":"2025-11-13T01:48:28.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false},{"author":"espnet","downloads":1070,"gated":false,"id":"espnet/yodas_owsmv4","lastModified":"2025-09-01T21:12:13.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":4,"libraries":["datasets","mlcroissant"],"formats":["imagefolder"],"modalities":["image","text"]},"private":false,"repoType":"dataset","likes":17,"isLikedByUser":false,"isBenchmark":false},{"author":"espnet","downloads":29309,"gated":false,"id":"espnet/yodas-granary","lastModified":"2025-08-08T15:48:18.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":67622750,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["audio","text"]},"private":false,"repoType":"dataset","likes":26,"isLikedByUser":false,"isBenchmark":false},{"author":"espnet","downloads":48,"gated":false,"id":"espnet/kising_score_segments","lastModified":"2025-07-09T20:53:01.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":833,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false,"isBenchmark":false},{"author":"espnet","downloads":53649,"gated":false,"id":"espnet/yodas2","lastModified":"2025-05-15T22:28:55.000Z","private":false,"repoType":"dataset","likes":47,"isLikedByUser":false,"isBenchmark":false},{"author":"espnet","downloads":65,"gated":false,"id":"espnet/DSUChallenge2024","lastModified":"2025-03-20T03:01:30.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":217683,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["audio","text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false,"isBenchmark":false},{"author":"espnet","downloads":322,"gated":false,"id":"espnet/mms_ulab_v2","lastModified":"2025-02-04T21:50:02.000Z","datasetsServerInfo":{"viewer":"viewer-partial","numRows":20701,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["audio","text"]},"private":false,"repoType":"dataset","likes":25,"isLikedByUser":false,"isBenchmark":false},{"author":"espnet","downloads":176,"gated":false,"id":"espnet/ml_superb_hf","lastModified":"2024-12-28T14:30:45.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":153877,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["audio","text"]},"private":false,"repoType":"dataset","likes":5,"isLikedByUser":false,"isBenchmark":false}],"models":[{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":0,"gated":false,"id":"espnet/ci_tools","availableInferenceProviders":[],"lastModified":"2026-02-22T13:34:04.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":0,"gated":false,"id":"espnet/BSCodec","availableInferenceProviders":[],"lastModified":"2026-02-22T10:22:17.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":0,"gated":false,"id":"espnet/eurospeech_portugal_owsm_v4_medium_1B","availableInferenceProviders":[],"lastModified":"2026-02-21T00:02:49.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":7,"gated":false,"id":"espnet/ms_snsd_tfgridnet","availableInferenceProviders":[],"lastModified":"2026-02-16T23:50:26.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":37,"gated":false,"id":"espnet/powsm_ctc","availableInferenceProviders":[],"lastModified":"2026-01-21T18:59:15.000Z","likes":2,"pipeline_tag":"automatic-speech-recognition","private":false,"repoType":"model","isLikedByUser":false},{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":196,"gated":false,"id":"espnet/powsm","availableInferenceProviders":[],"lastModified":"2026-01-21T18:57:18.000Z","likes":9,"pipeline_tag":"automatic-speech-recognition","private":false,"repoType":"model","isLikedByUser":false},{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":1,"gated":false,"id":"espnet/xun_tal_zh_adult_teach_branchformer","availableInferenceProviders":[],"lastModified":"2025-12-17T22:09:08.000Z","likes":0,"pipeline_tag":"automatic-speech-recognition","private":false,"repoType":"model","isLikedByUser":false},{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":0,"gated":false,"id":"espnet/xeus_ckpts","availableInferenceProviders":[],"lastModified":"2025-12-06T19:15:36.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":3,"gated":false,"id":"espnet/mixdata_svs_visinger2_spkemb_lang_pretrained","availableInferenceProviders":[],"lastModified":"2025-11-28T00:13:51.000Z","likes":1,"pipeline_tag":"text-to-audio","private":false,"repoType":"model","isLikedByUser":false},{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":9,"gated":false,"id":"espnet/aceopencpop_svs_visinger2_40singer_pretrain","availableInferenceProviders":[],"lastModified":"2025-11-28T00:13:41.000Z","likes":0,"pipeline_tag":"text-to-audio","private":false,"repoType":"model","isLikedByUser":false}],"paperPreviews":[],"spaces":[{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"colorFrom":"green","colorTo":"indigo","createdAt":"2025-10-31T21:46:06.000Z","emoji":"🥇","id":"espnet/TheESPnetLeaderBoard","lastModified":"2025-12-31T10:43:43.000Z","likes":0,"pinned":true,"private":false,"sdk":"gradio","repoType":"space","runtime":{"stage":"RUNNING","hardware":{"current":"cpu-basic","requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"espnet-theespnetleaderboard.hf.space","stage":"READY"}],"sha":"4012180015890821937138ed707ac7279512c76c"},"shortDescription":"ESPnet Leaderboard","title":"TheESPnetLeaderBoard","isLikedByUser":false,"ai_short_description":"View and submit language model evaluations","ai_category":"Model Benchmarking","trendingScore":0,"tags":["gradio","leaderboard","region:us"],"featured":false},{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"colorFrom":"blue","colorTo":"pink","createdAt":"2026-02-09T04:16:49.000Z","emoji":"📊","id":"espnet/Voice_Assistant_Demo","lastModified":"2026-01-20T20:34:44.000Z","likes":0,"pinned":false,"private":false,"sdk":"gradio","repoType":"space","runtime":{"stage":"RUNTIME_ERROR","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"errorMessage":"Exit code: 1. Reason: rgs...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.\n @torch.cuda.amp.autocast(enabled=False)\n/usr/local/lib/python3.9/site-packages/espnet2/enh/layers/bsrnn.py:331: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.\n @torch.cuda.amp.autocast(enabled=False)\n/usr/local/lib/python3.9/site-packages/espnet2/enh/separator/tfgridnetv3_separator.py:369: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.\n @torch.cuda.amp.autocast(enabled=False)\n/usr/local/lib/python3.9/site-packages/espnet2/enh/layers/uses.py:392: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.\n @torch.cuda.amp.autocast(enabled=False)\n/usr/local/lib/python3.9/site-packages/espnet2/enh/layers/uses.py:421: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.\n @torch.cuda.amp.autocast(enabled=False)\n/usr/local/lib/python3.9/site-packages/espnet2/enh/decoder/stft_decoder.py:58: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.\n @torch.cuda.amp.autocast(enabled=False)\n/usr/local/lib/python3.9/site-packages/espnet2/enh/encoder/stft_encoder.py:79: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.\n @torch.cuda.amp.autocast(enabled=False)\nTraceback (most recent call last):\n File \"/app/app.py\", line 29, in \n dialogue_model = ESPnetSDSModelInterface(\n File \"/usr/local/lib/python3.9/site-packages/typeguard/__init__.py\", line 1032, in wrapper\n check_argument_types(memo)\n File \"/usr/local/lib/python3.9/site-packages/typeguard/__init__.py\", line 875, in check_argument_types\n raise TypeError(*exc.args) from None\nTypeError: type of argument \"access_token\" must be str; got NoneType instead\n","replicas":{"requested":1},"devMode":false,"domains":[{"domain":"espnet-voice-assistant-demo.hf.space","stage":"READY"}]},"title":"Voice Assistant Demo","isLikedByUser":false,"originRepo":{"name":"wanchichen/Voice_Assistant_Demo","author":{"_id":"630438615c70c21d0eae6613","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/630438615c70c21d0eae6613/KEHsB-znDIKHt7M0eyP8j.png","fullname":"William Chen","name":"wanchichen","type":"user","isPro":true,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":5,"isUserFollowing":false}},"trendingScore":0,"tags":["gradio","region:us"],"featured":false},{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"colorFrom":"pink","colorTo":"yellow","createdAt":"2025-07-04T21:06:38.000Z","emoji":"🎶","id":"espnet/SingingSDS","lastModified":"2025-11-25T04:16:04.000Z","likes":0,"pinned":false,"private":false,"sdk":"gradio","repoType":"space","runtime":{"stage":"SLEEPING","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"replicas":{"requested":1},"devMode":false,"domains":[{"domain":"espnet-singingsds.hf.space","stage":"READY"}]},"title":"SingingSDS","isLikedByUser":false,"ai_short_description":"Generate text with a customizable interface","ai_category":"Text Generation","trendingScore":0,"tags":["gradio","region:us"],"featured":false},{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"colorFrom":"blue","colorTo":"pink","createdAt":"2025-08-27T16:31:48.000Z","emoji":"🌍","id":"espnet/OWSM_V4_Demo","lastModified":"2025-11-19T19:00:18.000Z","likes":10,"pinned":false,"private":false,"sdk":"gradio","repoType":"space","runtime":{"stage":"RUNNING","hardware":{"current":"zero-a10g","requested":"zero-a10g"},"storage":null,"gcTimeout":172800,"replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"espnet-owsm-v4-demo.hf.space","stage":"READY"}],"sha":"0dc13537cdc7f567944ab8da6f9ee3a91fbfcc87"},"shortDescription":"This is a demo for OWSM-V4 CTC and medium model.","title":"OWSM V4 Demo","isLikedByUser":false,"ai_short_description":"Transcribe and translate speech into text","ai_category":"Speech Synthesis","trendingScore":0,"tags":["gradio","region:us"],"featured":false},{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"colorFrom":"pink","colorTo":"yellow","createdAt":"2024-10-27T06:45:38.000Z","emoji":"💻","id":"espnet/svs","lastModified":"2024-10-29T07:49:28.000Z","likes":1,"pinned":false,"private":false,"sdk":"gradio","repoType":"space","runtime":{"stage":"RUNNING","hardware":{"current":"cpu-basic","requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"espnet-svs.hf.space","stage":"READY"}],"sha":"9dd50f58a97de658f63b0515ed22d0812e2ae9a8"},"title":"Svs","isLikedByUser":false,"ai_short_description":"Generate singing voice from lyrics, duration, and pitch","ai_category":"Singing Voice Synthesis","trendingScore":0,"tags":["gradio","region:us"],"featured":false},{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"colorFrom":"blue","colorTo":"pink","createdAt":"2023-04-06T14:07:47.000Z","emoji":"🌖","id":"espnet/TTS","lastModified":"2024-10-21T13:51:34.000Z","likes":1,"pinned":false,"private":false,"sdk":"gradio","repoType":"space","runtime":{"stage":"RUNNING","hardware":{"current":"cpu-basic","requested":"cpu-basic"},"storage":null,"gcTimeout":86400,"replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"espnet-tts.hf.space","stage":"READY"}],"sha":"145cc8d8d75a9618a21153555ac5b9959311a1c6"},"title":"TTS","isLikedByUser":false,"ai_short_description":"Greet someone by name","ai_category":"Text Generation","trendingScore":0,"tags":["gradio","region:us"],"featured":false}],"buckets":[],"numBuckets":0,"numDatasets":23,"numModels":656,"numSpaces":8,"lastOrgActivities":[{"time":"2026-02-22T12:54:51.142Z","user":"Fhrozen","userAvatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625295625494-60d28bba010d938bba5c6ae9.jpeg","orgAvatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","type":"update","repoData":{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":0,"gated":false,"id":"espnet/ci_tools","availableInferenceProviders":[],"lastModified":"2026-02-22T13:34:04.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},"repoId":"espnet/ci_tools","repoType":"model","org":"espnet"},{"time":"2026-02-22T10:22:17.951Z","user":"whr-a","userAvatarUrl":"/avatars/26df8cd335daf07066d3a5fab14443de.svg","orgAvatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","type":"update","repoData":{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":0,"gated":false,"id":"espnet/BSCodec","availableInferenceProviders":[],"lastModified":"2026-02-22T10:22:17.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},"repoId":"espnet/BSCodec","repoType":"model","org":"espnet"},{"time":"2026-02-22T10:05:06.341Z","user":"whr-a","userAvatarUrl":"/avatars/26df8cd335daf07066d3a5fab14443de.svg","orgAvatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","type":"publish","repoData":{"author":"espnet","authorData":{"_id":"60def300c8fcb7a9bbaf4cf9","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1625224006560-60d28bba010d938bba5c6ae9.png","fullname":"ESPnet","name":"espnet","type":"org","isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":340,"isUserFollowing":false},"downloads":0,"gated":false,"id":"espnet/BSCodec","availableInferenceProviders":[],"lastModified":"2026-02-22T10:22:17.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},"repoId":"espnet/BSCodec","repoType":"model","org":"espnet"}],"acceptLanguages":["*"],"canReadRepos":false,"canReadSpaces":false,"blogPosts":[],"currentRepoPage":0,"filters":{},"paperView":false}">

AI & ML interests

voice-conversion speech-separation speech-enhancement speech-translation speech-synthesis speech-recognition spoken-language-understanding

Recent Activity

Fhrozen  updated a model about 11 hours ago
espnet/ci_tools
whr-a  updated a model about 14 hours ago
espnet/BSCodec
whr-a  published a model about 14 hours ago
espnet/BSCodec
View all activity

ESPnet: end-to-end speech processing toolkit

ESPnet is an end-to-end speech processing toolkit covering end-to-end speech recognition, text-to-speech, speech translation, speech enhancement, speaker diarization, spoken language understanding, and so on. ESPnet uses pytorch as a deep learning engine and also follows Kaldi style data processing, feature extraction/format, and recipes to provide a complete setup for various speech processing experiments.

Citing ESPnet
@inproceedings{watanabe2018espnet,
  author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson {Enrique Yalta Soplin} and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
  title={{ESPnet}: End-to-End Speech Processing Toolkit},
  year={2018},
  booktitle={Proceedings of Interspeech},
  pages={2207--2211},
  doi={10.21437/Interspeech.2018-1456},
  url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
}

@inproceedings{hayashi2020espnet,
  title={{Espnet-TTS}: Unified, reproducible, and integratable open source end-to-end text-to-speech toolkit},
  author={Hayashi, Tomoki and Yamamoto, Ryuichi and Inoue, Katsuki and Yoshimura, Takenori and Watanabe, Shinji and Toda, Tomoki and Takeda, Kazuya and Zhang, Yu and Tan, Xu},
  booktitle={Proceedings of IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
  pages={7654--7658},
  year={2020},
  organization={IEEE}
}

@inproceedings{inaguma-etal-2020-espnet,
    title = "{ESP}net-{ST}: All-in-One Speech Translation Toolkit",
    author = "Inaguma, Hirofumi  and
      Kiyono, Shun  and
      Duh, Kevin  and
      Karita, Shigeki  and
      Yalta, Nelson  and
      Hayashi, Tomoki  and
      Watanabe, Shinji",
    booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations",
    month = jul,
    year = "2020",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://www.aclweb.org/anthology/2020.acl-demos.34",
    pages = "302--311",
}

@inproceedings{li2020espnet,
  title={{ESPnet-SE}: End-to-End Speech Enhancement and Separation Toolkit Designed for {ASR} Integration},
  author={Chenda Li and Jing Shi and Wangyou Zhang and Aswin Shanmugam Subramanian and Xuankai Chang and Naoyuki Kamo and Moto Hira and Tomoki Hayashi and Christoph Boeddeker and Zhuo Chen and Shinji Watanabe},
  booktitle={Proceedings of IEEE Spoken Language Technology Workshop (SLT)},
  pages={785--792},
  year={2021},
  organization={IEEE},
}

@article{arora2021espnet,
  title={ESPnet-SLU: Advancing Spoken Language Understanding through ESPnet},
  author={Arora, Siddhant and Dalmia, Siddharth and Denisov, Pavel and Chang, Xuankai and Ueda, Yushi and Peng, Yifan and Zhang, Yuekai and Kumar, Sujay and Ganesan, Karthik and Yan, Brian and others},
  journal={arXiv preprint arXiv:2111.14706},
  year={2021}
}

</details>