Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456 Paper page - Jina-ColBERT-v2: A General-Purpose Multilingual Late Interaction
Retriever
Please give a thumbs up to this comment if you found it helpful!
\n
If you want recommendations for any Paper on Hugging Face checkout this Space
\n
You can directly ask Librarian Bot for paper recommendations by tagging it in a comment: \n\n@librarian-bot\n\t recommend
\n","updatedAt":"2024-09-03T01:34:16.120Z","author":{"_id":"63d3e0e8ff1384ce6c5dd17d","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg","fullname":"Librarian Bot (Bot)","name":"librarian-bot","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":318,"isUserFollowing":false}},"numEdits":0,"identifiedLanguage":{"language":"en","probability":0.7307335138320923},"editors":["librarian-bot"],"editorAvatarUrls":["https://cdn-avatars.huggingface.co/v1/production/uploads/1674830754237-63d3e0e8ff1384ce6c5dd17d.jpeg"],"reactions":[],"isReport":false}}],"primaryEmailConfirmed":false,"paper":{"id":"2408.16672","authors":[{"_id":"66d585b7092cc13d0f961d44","name":"Rohan Jha","hidden":false},{"_id":"66d585b7092cc13d0f961d45","user":{"_id":"63491dc83d8dc83a55cb749c","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/63491dc83d8dc83a55cb749c/IoqJrOIaEnYO_S7si4KGp.jpeg","isPro":false,"fullname":"Bo Wang","user":"bwang0911","type":"user"},"name":"Bo Wang","status":"extracted_confirmed","statusLastChangedAt":"2024-10-02T07:11:25.862Z","hidden":false},{"_id":"66d585b7092cc13d0f961d46","user":{"_id":"6476ff2699a5ce743ccea3fc","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/6476ff2699a5ce743ccea3fc/zmFmF8tXXDaAGcl8RYiRr.jpeg","isPro":false,"fullname":"Michael Günther","user":"michael-guenther","type":"user"},"name":"Michael Günther","status":"admin_assigned","statusLastChangedAt":"2024-09-02T09:35:08.111Z","hidden":false},{"_id":"66d585b7092cc13d0f961d47","user":{"_id":"64c23f6d569648a60737eddb","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/64c23f6d569648a60737eddb/iZq7bp-yYaGl5VBVoN5Dg.jpeg","isPro":false,"fullname":"Saba Sturua","user":"jupyterjazz","type":"user"},"name":"Saba Sturua","status":"admin_assigned","statusLastChangedAt":"2024-09-02T09:35:17.536Z","hidden":false},{"_id":"66d585b7092cc13d0f961d48","user":{"_id":"64d22f33032a420d1863b6ea","avatarUrl":"/avatars/ed3eaf4bab70dd6ab9a2b67b5928e4fb.svg","isPro":false,"fullname":"Mohammad Kalim Akram","user":"makram93","type":"user"},"name":"Mohammad Kalim Akram","status":"admin_assigned","statusLastChangedAt":"2024-09-02T09:35:23.144Z","hidden":false},{"_id":"66d585b7092cc13d0f961d49","user":{"_id":"603763514de52ff951d89793","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/603763514de52ff951d89793/n-QouGYg7oE5QeDaAb3Ns.png","isPro":false,"fullname":"Han Xiao","user":"hanxiao","type":"user"},"name":"Han Xiao","status":"admin_assigned","statusLastChangedAt":"2024-09-02T09:35:38.480Z","hidden":false}],"publishedAt":"2024-08-29T16:21:00.000Z","submittedOnDailyAt":"2024-09-02T08:00:33.428Z","title":"Jina-ColBERT-v2: A General-Purpose Multilingual Late Interaction\n Retriever","submittedOnDailyBy":{"_id":"62716952bcef985363db8485","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/62716952bcef985363db8485/zJPPo5xlwZRJdEuwYsYKp.jpeg","isPro":true,"fullname":"JB D.","user":"IAMJB","type":"user"},"summary":"Multi-vector dense models, such as ColBERT, have proven highly effective in\ninformation retrieval. ColBERT's late interaction scoring approximates the\njoint query-document attention seen in cross-encoders while maintaining\ninference efficiency closer to traditional dense retrieval models, thanks to\nits bi-encoder architecture and recent optimizations in indexing and search. In\nthis paper, we introduce several improvements to the ColBERT model architecture\nand training pipeline, leveraging techniques successful in the more established\nsingle-vector embedding model paradigm, particularly those suited for\nheterogeneous multilingual data. Our new model, Jina-ColBERT-v2, demonstrates\nstrong performance across a range of English and multilingual retrieval tasks,\nwhile also cutting storage requirements by up to 50% compared to previous\nmodels.","upvotes":9,"discussionId":"66d585b8092cc13d0f961d6e","ai_summary":"Jina-ColBERT-v2 enhances ColBERT's performance and reduces storage requirements through improvements to its architecture and training pipeline, benefiting multilingual retrieval tasks.","ai_keywords":["multi-vector dense models","ColBERT","late interaction scoring","joint query-document attention","cross-encoders","bi-encoder architecture","indexing","search","single-vector embedding","Jina-ColBERT-v2","heterogeneous multilingual data"],"organization":{"_id":"63563e0c2d14fcd7d83743cf","name":"jinaai","fullname":"Jina AI","avatar":"https://cdn-uploads.huggingface.co/production/uploads/603763514de52ff951d89793/wD54VbAHHyHop3uYlJKl4.png"}},"canReadDatabase":false,"canManagePapers":false,"canSubmit":false,"hasHfLevelAccess":false,"upvoted":false,"upvoters":[{"_id":"62716952bcef985363db8485","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/62716952bcef985363db8485/zJPPo5xlwZRJdEuwYsYKp.jpeg","isPro":true,"fullname":"JB D.","user":"IAMJB","type":"user"},{"_id":"648eb1eb59c4e5c87dc116e0","avatarUrl":"/avatars/c636cea39c2c0937f01398c94ead5dad.svg","isPro":false,"fullname":"fdsqefsgergd","user":"T-representer","type":"user"},{"_id":"620783f24e28382272337ba4","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/620783f24e28382272337ba4/zkUveQPNiDfYjgGhuFErj.jpeg","isPro":false,"fullname":"GuoLiangTang","user":"Tommy930","type":"user"},{"_id":"648e72a866dcba8b5aaecbdc","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/noauth/UKR1KF7s0OWCFQzsTrcuX.jpeg","isPro":false,"fullname":"Sergey Bratchikov","user":"hivaze","type":"user"},{"_id":"64c23f6d569648a60737eddb","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/64c23f6d569648a60737eddb/iZq7bp-yYaGl5VBVoN5Dg.jpeg","isPro":false,"fullname":"Saba Sturua","user":"jupyterjazz","type":"user"},{"_id":"639c2b60db7c5f35003f4168","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/639c2b60db7c5f35003f4168/wTIY98vCcpyPaR3zGz7hd.jpeg","isPro":false,"fullname":"李浩","user":"lihaocruiser","type":"user"},{"_id":"64a830cd6cc1a9a131f62619","avatarUrl":"/avatars/0c6ba301a66f2db73049c9fe0e97f2ef.svg","isPro":false,"fullname":"Isabelle Mohr","user":"isacat","type":"user"},{"_id":"663ccbff3a74a20189d4aa2e","avatarUrl":"/avatars/83a54455e0157480f65c498cd9057cf2.svg","isPro":false,"fullname":"Nguyen Van Thanh","user":"NguyenVanThanhHust","type":"user"},{"_id":"63107b18e87051f3e3e0f598","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/63107b18e87051f3e3e0f598/R9onir4Y0MZuq1jEWCZ2-.jpeg","isPro":false,"fullname":"Unchun Yang","user":"ucyang","type":"user"}],"acceptLanguages":["*"],"dailyPaperRank":0,"organization":{"_id":"63563e0c2d14fcd7d83743cf","name":"jinaai","fullname":"Jina AI","avatar":"https://cdn-uploads.huggingface.co/production/uploads/603763514de52ff951d89793/wD54VbAHHyHop3uYlJKl4.png"}}">
Jina-ColBERT-v2 enhances ColBERT's performance and reduces storage requirements through improvements to its architecture and training pipeline, benefiting multilingual retrieval tasks.