Deprecated: The each() function is deprecated. This message will be suppressed on further calls in /home/zhenxiangba/zhenxiangba.com/public_html/phproxy-improved-master/index.php on line 456
mohan007 (papasani)
[go: Go Back, main page]

https://huggingface.co/datasets/smolagents/aguvis-stage-1 & https://huggingface.co/datasets/smolagents/aguvis-stage-2
but below code function from https://github.com/huggingface/smol2operator/blob/main/recipe.ipynb
is causing the training to go for more than 10 days , is there any way to optimize the collate_fn function !!?
I am planning to release model in opensource after training
thanks !

\n

def create_collate_fn(processor, max_length: int):
\"\"\"Optimized collate function for VLM training that masks system prompt tokens.\"\"\"

\n
def collate_fn(examples: list[dict[str, list | str | Image.Image]]):\n    batch_messages: list[list[dict[str, list | str | Image.Image]]] = []\n    assistant_messages: list[list[str]] = []\n    all_image_inputs: list[list[Image.Image]] = []\n    for example in examples:\n        images: list[Image.Image] = example[\"images\"]\n        is_first_user = True\n        sample: list[dict[str, list | str | Image.Image]] = []\n        assistant: list[str] = []\n        for text in example[\"texts\"]:\n            if \"system\" in text.keys():\n                sample.append(\n                    {\n                        \"role\": \"system\",\n                        \"content\": [{\"type\": \"text\", \"text\": text[\"system\"]}],\n                    }\n                )\n\n            if is_first_user:\n                sample.append(\n                    {\n                        \"role\": \"user\",\n                        \"content\": [\n                            {\"type\": \"image\", \"image\": images[0]},\n                            {\"type\": \"text\", \"text\": text[\"user\"]},\n                        ],\n                    }\n                )\n                is_first_user = False\n            else:\n                sample.append(\n                    {\n                        \"role\": \"user\",\n                        \"content\": [\n                            {\"type\": \"text\", \"text\": text[\"user\"]},\n                        ],\n                    }\n                )\n\n            sample.append(\n                {\n                    \"role\": \"assistant\",\n                    \"content\": [{\"type\": \"text\", \"text\": \"\\n\" + text[\"assistant\"]}],\n                }\n            )\n            assistant.append(text[\"assistant\"])\n\n        batch_messages.append(sample)\n        assistant_messages.append(assistant)\n        all_image_inputs.append(images)\n\n    texts = [\n        processor.apply_chat_template(\n            messages, tokenize=False, add_generation_prompt=False\n        )\n        for messages in batch_messages\n    ]\n\n    batch = processor(\n        text=texts,\n        images=all_image_inputs if all_image_inputs else None,\n        max_length=max_length,\n        truncation=True,\n        padding=True,\n        return_tensors=\"pt\",\n    )\n\n    input_ids = batch[\"input_ids\"]\n    labels = input_ids.clone()\n\n    assistant_encodings = [\n        processor.tokenizer(\n            [msg + \"\" for msg in assistant_message],\n            add_special_tokens=False,\n            padding=False,\n        )[\"input_ids\"]\n        for assistant_message in assistant_messages\n    ]\n\n    # Mask out all except the assistant messages\n    for i, assistant_ids_list in enumerate(assistant_encodings):\n        seq = input_ids[i].tolist()\n        assistant_positions: list[int] = []\n        for ids in assistant_ids_list:\n            start_pos = 0\n            while start_pos < len(seq) - len(ids) + 1:\n                found = False\n                for j in range(start_pos, len(seq) - len(ids) + 1):\n                    if seq[j : j + len(ids)] == ids:\n                        assistant_positions.extend(range(j, j + len(ids)))\n                        start_pos = j + len(ids)\n                        found = True\n                        break\n                if not found:\n                    break\n\n        for pos in range(len(seq)):\n            if pos not in assistant_positions:\n                labels[i, pos] = -100\n\n    batch[\"labels\"] = labels\n    return batch\n\nreturn collate_fn\n
\n"}},{"time":"2026-01-05T03:17:35.357Z","user":"mohan007","userAvatarUrl":"/avatars/76f071524387aa3839252436f6e372c0.svg","type":"update","repoData":{"author":"mohan007","authorData":{"_id":"632450a960ff5fbf36e757d5","avatarUrl":"/avatars/76f071524387aa3839252436f6e372c0.svg","fullname":"papasani","name":"mohan007","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":2,"isUserFollowing":false},"downloads":9,"gated":false,"id":"mohan007/SmolVLM2-256M-Instruct-Agentic-GUI","availableInferenceProviders":[],"lastModified":"2026-01-05T03:17:31.000Z","likes":0,"pipeline_tag":"image-text-to-text","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":256484928},"repoId":"mohan007/SmolVLM2-256M-Instruct-Agentic-GUI","repoType":"model"},{"time":"2026-01-05T03:17:02.878Z","user":"mohan007","userAvatarUrl":"/avatars/76f071524387aa3839252436f6e372c0.svg","type":"publish","repoData":{"author":"mohan007","authorData":{"_id":"632450a960ff5fbf36e757d5","avatarUrl":"/avatars/76f071524387aa3839252436f6e372c0.svg","fullname":"papasani","name":"mohan007","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":2,"isUserFollowing":false},"downloads":9,"gated":false,"id":"mohan007/SmolVLM2-256M-Instruct-Agentic-GUI","availableInferenceProviders":[],"lastModified":"2026-01-05T03:17:31.000Z","likes":0,"pipeline_tag":"image-text-to-text","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":256484928},"repoId":"mohan007/SmolVLM2-256M-Instruct-Agentic-GUI","repoType":"model"}],"blogPosts":[],"totalBlogPosts":0,"canReadDatabase":false,"canManageEntities":false,"canReadEntities":false,"canImpersonate":false,"canManageBilling":false,"canReadRepos":false,"canReadSpaces":false,"communityScore":5,"collections":[],"datasets":[],"models":[{"author":"mohan007","authorData":{"_id":"632450a960ff5fbf36e757d5","avatarUrl":"/avatars/76f071524387aa3839252436f6e372c0.svg","fullname":"papasani","name":"mohan007","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":2,"isUserFollowing":false},"downloads":9,"gated":false,"id":"mohan007/SmolVLM2-256M-Instruct-Agentic-GUI","availableInferenceProviders":[],"lastModified":"2026-01-05T03:17:31.000Z","likes":0,"pipeline_tag":"image-text-to-text","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":256484928},{"author":"mohan007","authorData":{"_id":"632450a960ff5fbf36e757d5","avatarUrl":"/avatars/76f071524387aa3839252436f6e372c0.svg","fullname":"papasani","name":"mohan007","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":2,"isUserFollowing":false},"downloads":3,"gated":false,"id":"mohan007/ALLaVA-3B-Longer","availableInferenceProviders":[],"lastModified":"2024-03-22T06:42:12.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":3092320256},{"author":"mohan007","authorData":{"_id":"632450a960ff5fbf36e757d5","avatarUrl":"/avatars/76f071524387aa3839252436f6e372c0.svg","fullname":"papasani","name":"mohan007","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":2,"isUserFollowing":false},"downloads":3,"gated":false,"id":"mohan007/moondream1","availableInferenceProviders":[],"lastModified":"2024-02-09T01:33:43.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":1857482608},{"author":"mohan007","authorData":{"_id":"632450a960ff5fbf36e757d5","avatarUrl":"/avatars/76f071524387aa3839252436f6e372c0.svg","fullname":"papasani","name":"mohan007","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":2,"isUserFollowing":false},"downloads":4,"gated":false,"id":"mohan007/Qwen-VL-Chat-Int4","availableInferenceProviders":[],"lastModified":"2023-11-13T10:15:02.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[],"numParameters":9657901824},{"author":"mohan007","authorData":{"_id":"632450a960ff5fbf36e757d5","avatarUrl":"/avatars/76f071524387aa3839252436f6e372c0.svg","fullname":"papasani","name":"mohan007","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":2,"isUserFollowing":false},"downloads":0,"gated":false,"id":"mohan007/trial-1","availableInferenceProviders":[],"lastModified":"2022-11-28T23:16:23.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"mohan007","authorData":{"_id":"632450a960ff5fbf36e757d5","avatarUrl":"/avatars/76f071524387aa3839252436f6e372c0.svg","fullname":"papasani","name":"mohan007","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":2,"isUserFollowing":false},"downloads":0,"gated":false,"id":"mohan007/fire-detection-transformers-trial-1","availableInferenceProviders":[],"lastModified":"2022-11-28T21:15:39.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false}],"buckets":[],"numBuckets":0,"numberLikes":5,"papers":[],"posts":[],"totalPosts":0,"spaces":[{"author":"mohan007","authorData":{"_id":"632450a960ff5fbf36e757d5","avatarUrl":"/avatars/76f071524387aa3839252436f6e372c0.svg","fullname":"papasani","name":"mohan007","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":2,"isUserFollowing":false},"colorFrom":"green","colorTo":"red","createdAt":"2025-02-25T17:37:58.000Z","emoji":"๐Ÿฆ€","id":"mohan007/linly-talker","lastModified":"2025-02-25T17:37:58.000Z","likes":0,"pinned":false,"private":false,"sdk":"docker","repoType":"space","runtime":{"stage":"NO_APP_FILE","hardware":{"current":null,"requested":"t4-medium"},"storage":null,"gcTimeout":3600,"errorMessage":null,"replicas":{"requested":1},"domains":[{"domain":"mohan007-linly-talker.hf.space","stage":"READY"}]},"title":"Linly Talker","isLikedByUser":false,"trendingScore":0,"tags":["docker","region:us"],"featured":false},{"author":"mohan007","authorData":{"_id":"632450a960ff5fbf36e757d5","avatarUrl":"/avatars/76f071524387aa3839252436f6e372c0.svg","fullname":"papasani","name":"mohan007","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":2,"isUserFollowing":false},"colorFrom":"green","colorTo":"red","createdAt":"2025-01-29T06:09:02.000Z","emoji":"๐Ÿข","id":"mohan007/autism_exp_with_minicpm_o_2_6","lastModified":"2025-01-29T06:12:35.000Z","likes":1,"pinned":false,"private":false,"sdk":"gradio","repoType":"space","runtime":{"stage":"RUNTIME_ERROR","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"errorMessage":"Exit code: 1. Reason: okens_map.json: 0%| | 0.00/5.35k [00:00\n from src.app.response import describe_image\n File \"/home/user/app/src/app/response.py\", line 23, in \n model, tokenizer, processor = load_model_tokenizer_and_processor(model_name, device)\n File \"/home/user/app/src/app/model.py\", line 46, in load_model_tokenizer_and_processor\n ).to('cuda')\n File \"/usr/local/lib/python3.10/site-packages/transformers/modeling_utils.py\", line 2905, in to\n return super().to(*args, **kwargs)\n File \"/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1152, in to\n return self._apply(convert)\n File \"/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 802, in _apply\n module._apply(fn)\n File \"/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 802, in _apply\n module._apply(fn)\n File \"/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 802, in _apply\n module._apply(fn)\n File \"/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 825, in _apply\n param_applied = fn(param)\n File \"/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py\", line 1150, in convert\n return t.to(device, dtype if t.is_floating_point() or t.is_complex() else None, non_blocking)\n File \"/usr/local/lib/python3.10/site-packages/torch/cuda/__init__.py\", line 302, in _lazy_init\n torch._C._cuda_init()\nRuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx\n","replicas":{"requested":1},"devMode":false,"domains":[{"domain":"mohan007-autism-exp-with-minicpm-o-2-6.hf.space","stage":"READY"}]},"shortDescription":"Demo for MiniCPM-V 2.6 to answer questions about images","title":"PicQ","isLikedByUser":false,"trendingScore":0,"tags":["gradio","region:us"],"featured":false},{"author":"mohan007","authorData":{"_id":"632450a960ff5fbf36e757d5","avatarUrl":"/avatars/76f071524387aa3839252436f6e372c0.svg","fullname":"papasani","name":"mohan007","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":2,"isUserFollowing":false},"colorFrom":"gray","colorTo":"gray","createdAt":"2025-01-07T06:46:27.000Z","emoji":"๐Ÿ‘€","id":"mohan007/sales_audio_analysis","lastModified":"2025-01-13T18:47:00.000Z","likes":0,"pinned":false,"private":false,"sdk":"gradio","repoType":"space","runtime":{"stage":"BUILD_ERROR","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"errorMessage":"Job timeout","replicas":{"requested":1},"devMode":false,"domains":[{"domain":"mohan007-sales-audio-analysis.hf.space","stage":"READY"}]},"title":"Sales Audio Analysis","isLikedByUser":false,"trendingScore":0,"tags":["gradio","region:us"],"featured":false},{"author":"mohan007","authorData":{"_id":"632450a960ff5fbf36e757d5","avatarUrl":"/avatars/76f071524387aa3839252436f6e372c0.svg","fullname":"papasani","name":"mohan007","type":"user","isPro":false,"isHf":false,"isHfAdmin":false,"isMod":false,"followerCount":2,"isUserFollowing":false},"colorFrom":"indigo","colorTo":"pink","createdAt":"2024-11-07T05:29:29.000Z","emoji":"๐Ÿ’ป","id":"mohan007/agent_over_rag_for_pdf","lastModified":"2024-11-07T07:42:14.000Z","likes":0,"pinned":false,"private":false,"sdk":"streamlit","repoType":"space","runtime":{"stage":"SLEEPING","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"errorMessage":null,"replicas":{"requested":1},"devMode":false,"domains":[{"domain":"mohan007-agent-over-rag-for-pdf.hf.space","stage":"READY"}]},"shortDescription":"pdf agent over rag ","title":"Agent Over Rag For Pdf","isLikedByUser":false,"trendingScore":0,"tags":["streamlit","region:us"],"featured":false}],"u":{"avatarUrl":"/avatars/76f071524387aa3839252436f6e372c0.svg","isPro":false,"fullname":"papasani","user":"mohan007","orgs":[],"signup":{"github":"papasanimohansrinivas","twitter":"PMOHANSRINIVAS1","details":"cctv + computer_vision && robotics"},"isHf":false,"isMod":false,"type":"user","theme":"light"},"upvotes":0,"numFollowers":2,"numFollowingUsers":6,"numFollowingOrgs":4,"numModels":6,"numDatasets":0,"numSpaces":4,"isFollowing":false,"isFollower":false,"sampleFollowers":[{"user":"briancconnelly","fullname":"brian","type":"user","_id":"6778eac37279b5c5738d631d","isPro":false,"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/no-auth/uDK6Ac385GhpaNipvPMi9.png"},{"user":"21world","fullname":"www.minds.com/jelyazko/","type":"user","_id":"64548986cd09ceba0e1709cb","isPro":false,"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/64548986cd09ceba0e1709cb/muGiatjmPfzxYb3Rjcqas.jpeg"}],"isWatching":false,"isIgnored":false,"acceptLanguages":["*"],"filters":{},"currentRepoPage":0}">