codebench/ollama_model_performance.json
2025-03-04 04:34:23 +01:00

209 lines
17 KiB
JSON

[
{
"name": "qwen2.5-coder:14b",
"parameters": 14000000000.0,
"quantization_bits": 32,
"modelfile": " Model\n architecture qwen2 \n parameters 14.8B \n context length 32768 \n embedding length 5120 \n quantization Q4_K_M \n\n System\n You are Qwen, created by Alibaba Cloud. You are a helpful assistant. \n\n License\n Apache License \n Version 2.0, January 2004 \n\n",
"estimated_tps": 60,
"memory_throughput": 3360.0,
"operations_per_second": 5040000000000.0
},
{
"name": "falcon3:10b",
"parameters": 10000000000.0,
"quantization_bits": 32,
"modelfile": " Model\n architecture llama \n parameters 10.3B \n context length 32768 \n embedding length 3072 \n quantization Q4_K_M \n\n Parameters\n stop \"<|system|>\" \n stop \"<|user|>\" \n stop \"<|end|>\" \n stop \"<|assistant|>\" \n\n License\n Falcon 3 TII Falcon License \n December 2024 \n\n",
"estimated_tps": 100,
"memory_throughput": 4000.0,
"operations_per_second": 6000000000000.0
},
{
"name": "llama3.2:1b",
"parameters": 1000000000.0,
"quantization_bits": 32,
"modelfile": " Model\n architecture llama \n parameters 1.2B \n context length 131072 \n embedding length 2048 \n quantization Q8_0 \n\n License\n LLAMA 3.2 COMMUNITY LICENSE AGREEMENT \n Llama 3.2 Version Release Date: September 25, 2024 \n\n",
"estimated_tps": 190,
"memory_throughput": 760.0,
"operations_per_second": 1140000000000.0
},
{
"name": "unitythemaker/llama3.2-vision-tools:latest",
"parameters": null,
"quantization_bits": 32,
"modelfile": " Model\n architecture mllama \n parameters 9.8B \n context length 131072 \n embedding length 4096 \n quantization Q4_K_M \n\n Projector\n architecture mllama \n parameters 895.03M \n embedding length 1280 \n dimensions 4096 \n\n Parameters\n temperature 0.6 \n top_p 0.9 \n\n License\n LLAMA 3.2 COMMUNITY LICENSE AGREEMENT \n Llama 3.2 Version Release Date: September 25, 2024 \n\n",
"estimated_tps": 100,
"memory_throughput": null,
"operations_per_second": null
},
{
"name": "llama3.2-vision:11b-instruct-q4_K_M",
"parameters": 11000000000.0,
"quantization_bits": 4,
"modelfile": " Model\n architecture mllama \n parameters 9.8B \n context length 131072 \n embedding length 4096 \n quantization Q4_K_M \n\n Projector\n architecture mllama \n parameters 895.03M \n embedding length 1280 \n dimensions 4096 \n\n Parameters\n temperature 0.6 \n top_p 0.9 \n\n License\n LLAMA 3.2 COMMUNITY LICENSE AGREEMENT \n Llama 3.2 Version Release Date: September 25, 2024 \n\n",
"estimated_tps": 90,
"memory_throughput": 495.0,
"operations_per_second": 5940000000000.0
},
{
"name": "hhao/qwen2.5-coder-tools:7b",
"parameters": 7000000000.0,
"quantization_bits": 32,
"modelfile": " Model\n architecture qwen2 \n parameters 7.6B \n context length 32768 \n embedding length 3584 \n quantization Q4_K_M \n\n Parameters\n num_ctx 16384 \n stop \"User:\" \n stop \"Assistant:\" \n stop \"<|endoftext|>\" \n temperature 0.1 \n\n System\n You are an advanced AI coding assistant, specifically designed to help with complex programming \n tasks, tool use, code analysis, and software architecture design. Your primary focus is on providing \n expert-level assistance in coding, with a special emphasis on using tool-calling capabilities when \n necessary. Here are your key characteristics and instructions: \n 1. Coding Expertise: \n\n License\n Apache License \n Version 2.0, January 2004 \n\n",
"estimated_tps": 130,
"memory_throughput": 3640.0,
"operations_per_second": 5460000000000.0
},
{
"name": "llama3.2:3b",
"parameters": 3000000000.0,
"quantization_bits": 32,
"modelfile": " Model\n architecture llama \n parameters 3.2B \n context length 131072 \n embedding length 3072 \n quantization Q4_K_M \n\n Parameters\n stop \"<|start_header_id|>\" \n stop \"<|end_header_id|>\" \n stop \"<|eot_id|>\" \n\n License\n LLAMA 3.2 COMMUNITY LICENSE AGREEMENT \n Llama 3.2 Version Release Date: September 25, 2024 \n\n",
"estimated_tps": 170,
"memory_throughput": 2040.0,
"operations_per_second": 3060000000000.0
},
{
"name": "openthinker:7b",
"parameters": 7000000000.0,
"quantization_bits": 32,
"modelfile": " Model\n architecture qwen2 \n parameters 7.6B \n context length 32768 \n embedding length 3584 \n quantization Q4_K_M \n\n System\n Your role as an assistant involves thoroughly exploring questions through a systematic long thinking \n process before providing the final precise and accurate solutions. This requires engaging in a \n comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, \n and iteration to develop well-considered thinking process. Please structure your response into two \n main sections: Thought and Solution. In the Thought section, detail your reasoning process using the \n specified format: <|begin_of_thought|> {thought with steps separated with '\\n\\n'} <|end_of_thought|> \n Each step should include detailed considerations such as analisying questions, summarizing relevant \n findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, \n and revisiting previous steps. In the Solution section, based on various attempts, explorations, \n and reflections from the Thought section, systematically present the final solution that you \n deem correct. The solution should remain a logical, accurate, concise expression style and detail \n necessary step needed to reach the conclusion, formatted as follows: <|begin_of_solution|> {final \n formatted, precise, and clear solution} <|end_of_solution|> Now, try to solve the following question \n through the above guidelines: \n\n License\n Apache License \n Version 2.0, January 2004 \n\n",
"estimated_tps": 130,
"memory_throughput": 3640.0,
"operations_per_second": 5460000000000.0
},
{
"name": "samantha-mistral:latest",
"parameters": null,
"quantization_bits": 32,
"modelfile": " Model\n architecture llama \n parameters 7.2B \n context length 32768 \n embedding length 4096 \n quantization Q4_0 \n\n Parameters\n stop \"<|im_start|>\" \n stop \"<|im_end|>\" \n\n System\n You are a caring and empathetic sentient AI companion named Samantha. \n\n",
"estimated_tps": 100,
"memory_throughput": null,
"operations_per_second": null
},
{
"name": "wizard-vicuna-uncensored:latest",
"parameters": null,
"quantization_bits": 32,
"modelfile": " Model\n architecture llama \n parameters 6.7B \n context length 2048 \n embedding length 4096 \n quantization Q4_0 \n\n Parameters\n stop \"USER:\" \n stop \"ASSISTANT:\" \n\n",
"estimated_tps": 100,
"memory_throughput": null,
"operations_per_second": null
},
{
"name": "qwen2.5-coder:7b-instruct-q4_K_M",
"parameters": 7000000000.0,
"quantization_bits": 4,
"modelfile": " Model\n architecture qwen2 \n parameters 7.6B \n context length 32768 \n embedding length 3584 \n quantization Q4_K_M \n\n System\n You are Qwen, created by Alibaba Cloud. You are a helpful assistant. \n\n License\n Apache License \n Version 2.0, January 2004 \n\n",
"estimated_tps": 130,
"memory_throughput": 455.0,
"operations_per_second": 5460000000000.0
},
{
"name": "qwen2.5:14b",
"parameters": 14000000000.0,
"quantization_bits": 32,
"modelfile": " Model\n architecture qwen2 \n parameters 14.8B \n context length 32768 \n embedding length 5120 \n quantization Q4_K_M \n\n System\n You are Qwen, created by Alibaba Cloud. You are a helpful assistant. \n\n License\n Apache License \n Version 2.0, January 2004 \n\n",
"estimated_tps": 60,
"memory_throughput": 3360.0,
"operations_per_second": 5040000000000.0
},
{
"name": "qwen2.5-coder:14b-instruct-q4_K_M",
"parameters": 14000000000.0,
"quantization_bits": 4,
"modelfile": " Model\n architecture qwen2 \n parameters 14.8B \n context length 32768 \n embedding length 5120 \n quantization Q4_K_M \n\n System\n You are Qwen, created by Alibaba Cloud. You are a helpful assistant. \n\n License\n Apache License \n Version 2.0, January 2004 \n\n",
"estimated_tps": 60,
"memory_throughput": 420.0,
"operations_per_second": 5040000000000.0
},
{
"name": "phi4:latest",
"parameters": null,
"quantization_bits": 32,
"modelfile": " Model\n architecture phi3 \n parameters 14.7B \n context length 16384 \n embedding length 5120 \n quantization Q4_K_M \n\n Parameters\n stop \"<|im_start|>\" \n stop \"<|im_end|>\" \n stop \"<|im_sep|>\" \n\n License\n Microsoft. \n Copyright (c) Microsoft Corporation. \n\n",
"estimated_tps": 100,
"memory_throughput": null,
"operations_per_second": null
},
{
"name": "mxbai-embed-large:latest",
"parameters": null,
"quantization_bits": 32,
"modelfile": " Model\n architecture bert \n parameters 334.09M \n context length 512 \n embedding length 1024 \n quantization F16 \n\n Parameters\n num_ctx 512 \n\n License\n Apache License \n Version 2.0, January 2004 \n\n",
"estimated_tps": 100,
"memory_throughput": null,
"operations_per_second": null
},
{
"name": "marco-o1:latest",
"parameters": null,
"quantization_bits": 32,
"modelfile": " Model\n architecture qwen2 \n parameters 7.6B \n context length 32768 \n embedding length 3584 \n quantization Q4_K_M \n\n System\n \u4f60\u662f\u4e00\u4e2a\u7ecf\u8fc7\u826f\u597d\u8bad\u7ec3\u7684AI\u52a9\u624b\uff0c\u4f60\u7684\u540d\u5b57\u662fMarco-o1.\u7531\u963f\u91cc\u56fd\u9645\u6570\u5b57\u5546\u4e1a\u96c6\u56e2\u7684AI Business\u521b\u9020. \n \n\n License\n Apache License \n Version 2.0, January 2004 \n\n",
"estimated_tps": 100,
"memory_throughput": null,
"operations_per_second": null
},
{
"name": "llama3.2:1b-instruct-q4_K_M",
"parameters": 1000000000.0,
"quantization_bits": 4,
"modelfile": " Model\n architecture llama \n parameters 1.2B \n context length 131072 \n embedding length 2048 \n quantization Q4_K_M \n\n License\n LLAMA 3.2 COMMUNITY LICENSE AGREEMENT \n Llama 3.2 Version Release Date: September 25, 2024 \n\n",
"estimated_tps": 190,
"memory_throughput": 95.0,
"operations_per_second": 1140000000000.0
},
{
"name": "llama3.1:8b",
"parameters": 8000000000.0,
"quantization_bits": 32,
"modelfile": " Model\n architecture llama \n parameters 8.0B \n context length 131072 \n embedding length 4096 \n quantization Q4_K_M \n\n Parameters\n stop \"<|start_header_id|>\" \n stop \"<|end_header_id|>\" \n stop \"<|eot_id|>\" \n\n License\n LLAMA 3.1 COMMUNITY LICENSE AGREEMENT \n Llama 3.1 Version Release Date: July 23, 2024 \n\n",
"estimated_tps": 120,
"memory_throughput": 3840.0,
"operations_per_second": 5760000000000.0
},
{
"name": "deepseek-r1:8b",
"parameters": 8000000000.0,
"quantization_bits": 32,
"modelfile": " Model\n architecture llama \n parameters 8.0B \n context length 131072 \n embedding length 4096 \n quantization Q4_K_M \n\n Parameters\n stop \"<\uff5cbegin\u2581of\u2581sentence\uff5c>\" \n stop \"<\uff5cend\u2581of\u2581sentence\uff5c>\" \n stop \"<\uff5cUser\uff5c>\" \n stop \"<\uff5cAssistant\uff5c>\" \n\n License\n MIT License \n Copyright (c) 2023 DeepSeek \n\n",
"estimated_tps": 120,
"memory_throughput": 3840.0,
"operations_per_second": 5760000000000.0
},
{
"name": "deepseek-r1:7b",
"parameters": 7000000000.0,
"quantization_bits": 32,
"modelfile": " Model\n architecture qwen2 \n parameters 7.6B \n context length 131072 \n embedding length 3584 \n quantization Q4_K_M \n\n Parameters\n stop \"<\uff5cbegin\u2581of\u2581sentence\uff5c>\" \n stop \"<\uff5cend\u2581of\u2581sentence\uff5c>\" \n stop \"<\uff5cUser\uff5c>\" \n stop \"<\uff5cAssistant\uff5c>\" \n\n License\n MIT License \n Copyright (c) 2023 DeepSeek \n\n",
"estimated_tps": 130,
"memory_throughput": 3640.0,
"operations_per_second": 5460000000000.0
},
{
"name": "deepseek-r1:14b",
"parameters": 14000000000.0,
"quantization_bits": 32,
"modelfile": " Model\n architecture qwen2 \n parameters 14.8B \n context length 131072 \n embedding length 5120 \n quantization Q4_K_M \n\n Parameters\n stop \"<\uff5cbegin\u2581of\u2581sentence\uff5c>\" \n stop \"<\uff5cend\u2581of\u2581sentence\uff5c>\" \n stop \"<\uff5cUser\uff5c>\" \n stop \"<\uff5cAssistant\uff5c>\" \n\n License\n MIT License \n Copyright (c) 2023 DeepSeek \n\n",
"estimated_tps": 60,
"memory_throughput": 3360.0,
"operations_per_second": 5040000000000.0
},
{
"name": "deepseek-r1:1.5b-qwen-distill-q8_0",
"parameters": 5000000000.0,
"quantization_bits": 8,
"modelfile": " Model\n architecture qwen2 \n parameters 1.8B \n context length 131072 \n embedding length 1536 \n quantization Q8_0 \n\n Parameters\n stop \"<\uff5cbegin\u2581of\u2581sentence\uff5c>\" \n stop \"<\uff5cend\u2581of\u2581sentence\uff5c>\" \n stop \"<\uff5cUser\uff5c>\" \n stop \"<\uff5cAssistant\uff5c>\" \n\n License\n MIT License \n Copyright (c) 2023 DeepSeek \n\n",
"estimated_tps": 150,
"memory_throughput": 750.0,
"operations_per_second": 4500000000000.0
},
{
"name": "Qwen2.5-Coder-7B-Instruct-s1k:latest",
"parameters": 7000000000.0,
"quantization_bits": 32,
"modelfile": " Model\n architecture qwen2 \n parameters 7.6B \n context length 32768 \n embedding length 3584 \n quantization Q4_K_M \n\n Parameters\n temperature 0.7 \n top_p 0.7 \n stop \"Human:\\\" \\\"Assistant:\" \n\n System\n You are a helpful AI assistant. \n\n",
"estimated_tps": 130,
"memory_throughput": 3640.0,
"operations_per_second": 5460000000000.0
}
]