codebench/ollama_model_performance.json

[
  {
    "name": "qwen2.5-coder:14b",
    "parameters": 14000000000.0,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        qwen2     \n    parameters          14.8B     \n    context length      32768     \n    embedding length    5120      \n    quantization        Q4_K_M    \n\n  System\n    You are Qwen, created by Alibaba Cloud. You are a helpful assistant.    \n\n  License\n    Apache License               \n    Version 2.0, January 2004    \n\n",
    "estimated_tps": 60,
    "memory_throughput": 3360.0,
    "operations_per_second": 5040000000000.0
  },
  {
    "name": "falcon3:10b",
    "parameters": 10000000000.0,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        llama     \n    parameters          10.3B     \n    context length      32768     \n    embedding length    3072      \n    quantization        Q4_K_M    \n\n  Parameters\n    stop    \"<|system|>\"       \n    stop    \"<|user|>\"         \n    stop    \"<|end|>\"          \n    stop    \"<|assistant|>\"    \n\n  License\n    Falcon 3 TII Falcon License    \n    December 2024                  \n\n",
    "estimated_tps": 100,
    "memory_throughput": 4000.0,
    "operations_per_second": 6000000000000.0
  },
  {
    "name": "llama3.2:1b",
    "parameters": 1000000000.0,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        llama     \n    parameters          1.2B      \n    context length      131072    \n    embedding length    2048      \n    quantization        Q8_0      \n\n  License\n    LLAMA 3.2 COMMUNITY LICENSE AGREEMENT                 \n    Llama 3.2 Version Release Date: September 25, 2024    \n\n",
    "estimated_tps": 190,
    "memory_throughput": 760.0,
    "operations_per_second": 1140000000000.0
  },
  {
    "name": "unitythemaker/llama3.2-vision-tools:latest",
    "parameters": null,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        mllama    \n    parameters          9.8B      \n    context length      131072    \n    embedding length    4096      \n    quantization        Q4_K_M    \n\n  Projector\n    architecture        mllama     \n    parameters          895.03M    \n    embedding length    1280       \n    dimensions          4096       \n\n  Parameters\n    temperature    0.6    \n    top_p          0.9    \n\n  License\n    LLAMA 3.2 COMMUNITY LICENSE AGREEMENT                 \n    Llama 3.2 Version Release Date: September 25, 2024    \n\n",
    "estimated_tps": 100,
    "memory_throughput": null,
    "operations_per_second": null
  },
  {
    "name": "llama3.2-vision:11b-instruct-q4_K_M",
    "parameters": 11000000000.0,
    "quantization_bits": 4,
    "modelfile": "  Model\n    architecture        mllama    \n    parameters          9.8B      \n    context length      131072    \n    embedding length    4096      \n    quantization        Q4_K_M    \n\n  Projector\n    architecture        mllama     \n    parameters          895.03M    \n    embedding length    1280       \n    dimensions          4096       \n\n  Parameters\n    temperature    0.6    \n    top_p          0.9    \n\n  License\n    LLAMA 3.2 COMMUNITY LICENSE AGREEMENT                 \n    Llama 3.2 Version Release Date: September 25, 2024    \n\n",
    "estimated_tps": 90,
    "memory_throughput": 495.0,
    "operations_per_second": 5940000000000.0
  },
  {
    "name": "hhao/qwen2.5-coder-tools:7b",
    "parameters": 7000000000.0,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        qwen2     \n    parameters          7.6B      \n    context length      32768     \n    embedding length    3584      \n    quantization        Q4_K_M    \n\n  Parameters\n    num_ctx        16384              \n    stop           \"User:\"            \n    stop           \"Assistant:\"       \n    stop           \"<|endoftext|>\"    \n    temperature    0.1                \n\n  System\n    You are an advanced AI coding assistant, specifically designed to help with complex programming         \n      tasks, tool use, code analysis, and software architecture design. Your primary focus is on providing    \n      expert-level assistance in coding, with a special emphasis on using tool-calling capabilities when      \n      necessary. Here are your key characteristics and instructions:                                          \n    1. Coding Expertise:                                                                                    \n\n  License\n    Apache License               \n    Version 2.0, January 2004    \n\n",
    "estimated_tps": 130,
    "memory_throughput": 3640.0,
    "operations_per_second": 5460000000000.0
  },
  {
    "name": "llama3.2:3b",
    "parameters": 3000000000.0,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        llama     \n    parameters          3.2B      \n    context length      131072    \n    embedding length    3072      \n    quantization        Q4_K_M    \n\n  Parameters\n    stop    \"<|start_header_id|>\"    \n    stop    \"<|end_header_id|>\"      \n    stop    \"<|eot_id|>\"             \n\n  License\n    LLAMA 3.2 COMMUNITY LICENSE AGREEMENT                 \n    Llama 3.2 Version Release Date: September 25, 2024    \n\n",
    "estimated_tps": 170,
    "memory_throughput": 2040.0,
    "operations_per_second": 3060000000000.0
  },
  {
    "name": "openthinker:7b",
    "parameters": 7000000000.0,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        qwen2     \n    parameters          7.6B      \n    context length      32768     \n    embedding length    3584      \n    quantization        Q4_K_M    \n\n  System\n    Your role as an assistant involves thoroughly exploring questions through a systematic long thinking    \n      process before providing the final precise and accurate solutions. This requires engaging in a          \n      comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing,       \n      and iteration to develop well-considered thinking process. Please structure your response into two      \n      main sections: Thought and Solution. In the Thought section, detail your reasoning process using the    \n      specified format: <|begin_of_thought|> {thought with steps separated with '\\n\\n'} <|end_of_thought|>    \n      Each step should include detailed considerations such as analisying questions, summarizing relevant     \n      findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors,    \n      and revisiting previous steps. In the Solution section, based on various attempts, explorations,        \n      and reflections from the Thought section, systematically present the final solution that you            \n      deem correct. The solution should remain a logical, accurate, concise expression style and detail       \n      necessary step needed to reach the conclusion, formatted as follows: <|begin_of_solution|> {final       \n      formatted, precise, and clear solution} <|end_of_solution|> Now, try to solve the following question    \n      through the above guidelines:                                                                           \n\n  License\n    Apache License               \n    Version 2.0, January 2004    \n\n",
    "estimated_tps": 130,
    "memory_throughput": 3640.0,
    "operations_per_second": 5460000000000.0
  },
  {
    "name": "samantha-mistral:latest",
    "parameters": null,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        llama    \n    parameters          7.2B     \n    context length      32768    \n    embedding length    4096     \n    quantization        Q4_0     \n\n  Parameters\n    stop    \"<|im_start|>\"    \n    stop    \"<|im_end|>\"      \n\n  System\n    You are a caring and empathetic sentient AI companion named Samantha.    \n\n",
    "estimated_tps": 100,
    "memory_throughput": null,
    "operations_per_second": null
  },
  {
    "name": "wizard-vicuna-uncensored:latest",
    "parameters": null,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        llama    \n    parameters          6.7B     \n    context length      2048     \n    embedding length    4096     \n    quantization        Q4_0     \n\n  Parameters\n    stop    \"USER:\"         \n    stop    \"ASSISTANT:\"    \n\n",
    "estimated_tps": 100,
    "memory_throughput": null,
    "operations_per_second": null
  },
  {
    "name": "qwen2.5-coder:7b-instruct-q4_K_M",
    "parameters": 7000000000.0,
    "quantization_bits": 4,
    "modelfile": "  Model\n    architecture        qwen2     \n    parameters          7.6B      \n    context length      32768     \n    embedding length    3584      \n    quantization        Q4_K_M    \n\n  System\n    You are Qwen, created by Alibaba Cloud. You are a helpful assistant.    \n\n  License\n    Apache License               \n    Version 2.0, January 2004    \n\n",
    "estimated_tps": 130,
    "memory_throughput": 455.0,
    "operations_per_second": 5460000000000.0
  },
  {
    "name": "qwen2.5:14b",
    "parameters": 14000000000.0,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        qwen2     \n    parameters          14.8B     \n    context length      32768     \n    embedding length    5120      \n    quantization        Q4_K_M    \n\n  System\n    You are Qwen, created by Alibaba Cloud. You are a helpful assistant.    \n\n  License\n    Apache License               \n    Version 2.0, January 2004    \n\n",
    "estimated_tps": 60,
    "memory_throughput": 3360.0,
    "operations_per_second": 5040000000000.0
  },
  {
    "name": "qwen2.5-coder:14b-instruct-q4_K_M",
    "parameters": 14000000000.0,
    "quantization_bits": 4,
    "modelfile": "  Model\n    architecture        qwen2     \n    parameters          14.8B     \n    context length      32768     \n    embedding length    5120      \n    quantization        Q4_K_M    \n\n  System\n    You are Qwen, created by Alibaba Cloud. You are a helpful assistant.    \n\n  License\n    Apache License               \n    Version 2.0, January 2004    \n\n",
    "estimated_tps": 60,
    "memory_throughput": 420.0,
    "operations_per_second": 5040000000000.0
  },
  {
    "name": "phi4:latest",
    "parameters": null,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        phi3      \n    parameters          14.7B     \n    context length      16384     \n    embedding length    5120      \n    quantization        Q4_K_M    \n\n  Parameters\n    stop    \"<|im_start|>\"    \n    stop    \"<|im_end|>\"      \n    stop    \"<|im_sep|>\"      \n\n  License\n    Microsoft.                              \n    Copyright (c) Microsoft Corporation.    \n\n",
    "estimated_tps": 100,
    "memory_throughput": null,
    "operations_per_second": null
  },
  {
    "name": "mxbai-embed-large:latest",
    "parameters": null,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        bert       \n    parameters          334.09M    \n    context length      512        \n    embedding length    1024       \n    quantization        F16        \n\n  Parameters\n    num_ctx    512    \n\n  License\n    Apache License               \n    Version 2.0, January 2004    \n\n",
    "estimated_tps": 100,
    "memory_throughput": null,
    "operations_per_second": null
  },
  {
    "name": "marco-o1:latest",
    "parameters": null,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        qwen2     \n    parameters          7.6B      \n    context length      32768     \n    embedding length    3584      \n    quantization        Q4_K_M    \n\n  System\n    \u4f60\u662f\u4e00\u4e2a\u7ecf\u8fc7\u826f\u597d\u8bad\u7ec3\u7684AI\u52a9\u624b\uff0c\u4f60\u7684\u540d\u5b57\u662fMarco-o1.\u7531\u963f\u91cc\u56fd\u9645\u6570\u5b57\u5546\u4e1a\u96c6\u56e2\u7684AI Business\u521b\u9020.    \n                                                                                                 \n\n  License\n    Apache License               \n    Version 2.0, January 2004    \n\n",
    "estimated_tps": 100,
    "memory_throughput": null,
    "operations_per_second": null
  },
  {
    "name": "llama3.2:1b-instruct-q4_K_M",
    "parameters": 1000000000.0,
    "quantization_bits": 4,
    "modelfile": "  Model\n    architecture        llama     \n    parameters          1.2B      \n    context length      131072    \n    embedding length    2048      \n    quantization        Q4_K_M    \n\n  License\n    LLAMA 3.2 COMMUNITY LICENSE AGREEMENT                 \n    Llama 3.2 Version Release Date: September 25, 2024    \n\n",
    "estimated_tps": 190,
    "memory_throughput": 95.0,
    "operations_per_second": 1140000000000.0
  },
  {
    "name": "llama3.1:8b",
    "parameters": 8000000000.0,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        llama     \n    parameters          8.0B      \n    context length      131072    \n    embedding length    4096      \n    quantization        Q4_K_M    \n\n  Parameters\n    stop    \"<|start_header_id|>\"    \n    stop    \"<|end_header_id|>\"      \n    stop    \"<|eot_id|>\"             \n\n  License\n    LLAMA 3.1 COMMUNITY LICENSE AGREEMENT            \n    Llama 3.1 Version Release Date: July 23, 2024    \n\n",
    "estimated_tps": 120,
    "memory_throughput": 3840.0,
    "operations_per_second": 5760000000000.0
  },
  {
    "name": "deepseek-r1:8b",
    "parameters": 8000000000.0,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        llama     \n    parameters          8.0B      \n    context length      131072    \n    embedding length    4096      \n    quantization        Q4_K_M    \n\n  Parameters\n    stop    \"<\uff5cbegin\u2581of\u2581sentence\uff5c>\"    \n    stop    \"<\uff5cend\u2581of\u2581sentence\uff5c>\"      \n    stop    \"<\uff5cUser\uff5c>\"                 \n    stop    \"<\uff5cAssistant\uff5c>\"            \n\n  License\n    MIT License                    \n    Copyright (c) 2023 DeepSeek    \n\n",
    "estimated_tps": 120,
    "memory_throughput": 3840.0,
    "operations_per_second": 5760000000000.0
  },
  {
    "name": "deepseek-r1:7b",
    "parameters": 7000000000.0,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        qwen2     \n    parameters          7.6B      \n    context length      131072    \n    embedding length    3584      \n    quantization        Q4_K_M    \n\n  Parameters\n    stop    \"<\uff5cbegin\u2581of\u2581sentence\uff5c>\"    \n    stop    \"<\uff5cend\u2581of\u2581sentence\uff5c>\"      \n    stop    \"<\uff5cUser\uff5c>\"                 \n    stop    \"<\uff5cAssistant\uff5c>\"            \n\n  License\n    MIT License                    \n    Copyright (c) 2023 DeepSeek    \n\n",
    "estimated_tps": 130,
    "memory_throughput": 3640.0,
    "operations_per_second": 5460000000000.0
  },
  {
    "name": "deepseek-r1:14b",
    "parameters": 14000000000.0,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        qwen2     \n    parameters          14.8B     \n    context length      131072    \n    embedding length    5120      \n    quantization        Q4_K_M    \n\n  Parameters\n    stop    \"<\uff5cbegin\u2581of\u2581sentence\uff5c>\"    \n    stop    \"<\uff5cend\u2581of\u2581sentence\uff5c>\"      \n    stop    \"<\uff5cUser\uff5c>\"                 \n    stop    \"<\uff5cAssistant\uff5c>\"            \n\n  License\n    MIT License                    \n    Copyright (c) 2023 DeepSeek    \n\n",
    "estimated_tps": 60,
    "memory_throughput": 3360.0,
    "operations_per_second": 5040000000000.0
  },
  {
    "name": "deepseek-r1:1.5b-qwen-distill-q8_0",
    "parameters": 5000000000.0,
    "quantization_bits": 8,
    "modelfile": "  Model\n    architecture        qwen2     \n    parameters          1.8B      \n    context length      131072    \n    embedding length    1536      \n    quantization        Q8_0      \n\n  Parameters\n    stop    \"<\uff5cbegin\u2581of\u2581sentence\uff5c>\"    \n    stop    \"<\uff5cend\u2581of\u2581sentence\uff5c>\"      \n    stop    \"<\uff5cUser\uff5c>\"                 \n    stop    \"<\uff5cAssistant\uff5c>\"            \n\n  License\n    MIT License                    \n    Copyright (c) 2023 DeepSeek    \n\n",
    "estimated_tps": 150,
    "memory_throughput": 750.0,
    "operations_per_second": 4500000000000.0
  },
  {
    "name": "Qwen2.5-Coder-7B-Instruct-s1k:latest",
    "parameters": 7000000000.0,
    "quantization_bits": 32,
    "modelfile": "  Model\n    architecture        qwen2     \n    parameters          7.6B      \n    context length      32768     \n    embedding length    3584      \n    quantization        Q4_K_M    \n\n  Parameters\n    temperature    0.7          \n    top_p          0.7          \n    stop           \"Human:\\\"    \\\"Assistant:\"    \n\n  System\n    You are a helpful AI assistant.    \n\n",
    "estimated_tps": 130,
    "memory_throughput": 3640.0,
    "operations_per_second": 5460000000000.0
  }
]