codebench/benchmark_results/Apple_M1_Pro_localhost_11434.json
2025-03-15 01:35:25 +01:00

1250 lines
45 KiB
JSON

{
"server_url": "http://localhost:11434",
"benchmarks": [
{
"timestamp": "20250303_113430",
"results": [
{
"model": "wizard-vicuna-uncensored:latest",
"total_duration": 12.927267513666667,
"tokens_per_second": 23.964408789863477,
"test_results": {
"Fibonacci": {
"success_rate": 0.0,
"passed_cases": 0,
"total_cases": 18,
"success_cases_rate": 0.0,
"avg_duration": 12.927267513666667,
"avg_tokens_sec": 23.964408789863477
},
"Binary Search": {
"success_rate": 0.0,
"passed_cases": 0,
"total_cases": 18,
"success_cases_rate": 0.0,
"avg_duration": 6.512057569666667,
"avg_tokens_sec": 22.65644348402749
},
"Palindrome": {
"success_rate": 0.0,
"passed_cases": 0,
"total_cases": 18,
"success_cases_rate": 0.0,
"avg_duration": 6.231747597333333,
"avg_tokens_sec": 22.508357556533323
},
"Anagram Check": {
"success_rate": 0.0,
"passed_cases": 0,
"total_cases": 18,
"success_cases_rate": 0.0,
"avg_duration": 10.538350027666667,
"avg_tokens_sec": 23.50141235903869
}
},
"overall_success_rate": 0.0,
"overall_success_cases_rate": 0.0,
"min_avg_duration": 6.231747597333333,
"max_avg_duration": 12.927267513666667,
"min_tokens_per_second": 22.508357556533323,
"max_tokens_per_second": 23.964408789863477
},
{
"model": "qwen2.5:14b",
"total_duration": 40.78935244466666,
"tokens_per_second": 8.889333441477179,
"test_results": {
"Fibonacci": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 40.78935244466666,
"avg_tokens_sec": 8.889333441477179
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 48.256167,
"avg_tokens_sec": 8.91781830131912
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 37.049296861,
"avg_tokens_sec": 8.545959631111652
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 45.454861625,
"avg_tokens_sec": 8.529429794367523
}
},
"overall_success_rate": 100.0,
"overall_success_cases_rate": 1.0,
"min_avg_duration": 37.049296861,
"max_avg_duration": 48.256167,
"min_tokens_per_second": 8.529429794367523,
"max_tokens_per_second": 8.91781830131912
}
]
},
{
"timestamp": "20250303_174821",
"results": [
{
"model": "wizard-vicuna-uncensored:latest",
"total_duration": 9.061140611333332,
"tokens_per_second": 22.0077609712149,
"test_results": {
"Fibonacci": {
"success_rate": 0.0,
"passed_cases": 0,
"total_cases": 18,
"success_cases_rate": 0.0,
"avg_duration": 9.061140611333332,
"avg_tokens_sec": 22.0077609712149
},
"Binary Search": {
"success_rate": 0.0,
"passed_cases": 0,
"total_cases": 18,
"success_cases_rate": 0.0,
"avg_duration": 10.442756513666666,
"avg_tokens_sec": 24.41797865756476
},
"Palindrome": {
"success_rate": 33.33333333333333,
"passed_cases": 6,
"total_cases": 18,
"success_cases_rate": 0.3333333333333333,
"avg_duration": 5.603618597333333,
"avg_tokens_sec": 22.36799915531131
},
"Anagram Check": {
"success_rate": 5.555555555555555,
"passed_cases": 1,
"total_cases": 18,
"success_cases_rate": 0.05555555555555555,
"avg_duration": 11.451202444666666,
"avg_tokens_sec": 23.838978284868205
}
},
"overall_success_rate": 9.722222222222223,
"overall_success_cases_rate": 0.09722222222222222,
"min_avg_duration": 5.603618597333333,
"max_avg_duration": 11.451202444666666,
"min_tokens_per_second": 22.0077609712149,
"max_tokens_per_second": 24.41797865756476
},
{
"model": "qwen2.5-coder:7b-instruct-q4_K_M",
"total_duration": 17.315668139,
"tokens_per_second": 19.328937038685474,
"test_results": {
"Fibonacci": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 17.315668139,
"avg_tokens_sec": 19.328937038685474
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 17.98539502766667,
"avg_tokens_sec": 19.57693023406052
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 13.079861986,
"avg_tokens_sec": 19.205164969270385
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 8.672462666666666,
"avg_tokens_sec": 18.750156524185527
}
},
"overall_success_rate": 100.0,
"overall_success_cases_rate": 1.0,
"min_avg_duration": 8.672462666666666,
"max_avg_duration": 17.98539502766667,
"min_tokens_per_second": 18.750156524185527,
"max_tokens_per_second": 19.57693023406052
},
{
"model": "samantha-mistral:latest",
"total_duration": 12.211435388666667,
"tokens_per_second": 23.923938967067834,
"test_results": {
"Fibonacci": {
"success_rate": 44.44444444444444,
"passed_cases": 8,
"total_cases": 18,
"success_cases_rate": 0.4444444444444444,
"avg_duration": 12.211435388666667,
"avg_tokens_sec": 23.923938967067834
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 7.592263986333333,
"avg_tokens_sec": 24.005256761630953
},
"Palindrome": {
"success_rate": 88.88888888888889,
"passed_cases": 16,
"total_cases": 18,
"success_cases_rate": 0.8888888888888888,
"avg_duration": 11.879134833,
"avg_tokens_sec": 24.79139939867722
},
"Anagram Check": {
"success_rate": 88.88888888888889,
"passed_cases": 16,
"total_cases": 18,
"success_cases_rate": 0.8888888888888888,
"avg_duration": 8.438326180666667,
"avg_tokens_sec": 23.909059284433855
}
},
"overall_success_rate": 80.55555555555556,
"overall_success_cases_rate": 0.8055555555555556,
"min_avg_duration": 7.592263986333333,
"max_avg_duration": 12.211435388666667,
"min_tokens_per_second": 23.909059284433855,
"max_tokens_per_second": 24.79139939867722
},
{
"model": "qwen2.5:14b",
"total_duration": 35.248588736,
"tokens_per_second": 9.780712468500422,
"test_results": {
"Fibonacci": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 35.248588736,
"avg_tokens_sec": 9.780712468500422
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 30.093496236,
"avg_tokens_sec": 9.852073250185546
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 31.317275361,
"avg_tokens_sec": 9.878912443287856
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 34.83164913866667,
"avg_tokens_sec": 9.864557197597287
}
},
"overall_success_rate": 100.0,
"overall_success_cases_rate": 1.0,
"min_avg_duration": 30.093496236,
"max_avg_duration": 35.248588736,
"min_tokens_per_second": 9.780712468500422,
"max_tokens_per_second": 9.878912443287856
},
{
"model": "qwen2.5-coder:14b-instruct-q4_K_M",
"total_duration": 37.17813055533333,
"tokens_per_second": 9.679934868977442,
"test_results": {
"Fibonacci": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 37.17813055533333,
"avg_tokens_sec": 9.679934868977442
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 23.064999083333333,
"avg_tokens_sec": 9.768638447565982
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 29.162273486,
"avg_tokens_sec": 9.648370262979713
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 34.809515569333335,
"avg_tokens_sec": 9.880958976996757
}
},
"overall_success_rate": 100.0,
"overall_success_cases_rate": 1.0,
"min_avg_duration": 23.064999083333333,
"max_avg_duration": 37.17813055533333,
"min_tokens_per_second": 9.648370262979713,
"max_tokens_per_second": 9.880958976996757
},
{
"model": "phi4:latest",
"total_duration": 23.437334625000002,
"tokens_per_second": 9.008754294006154,
"test_results": {
"Fibonacci": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 23.437334625000002,
"avg_tokens_sec": 9.008754294006154
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 31.24377932,
"avg_tokens_sec": 8.95641765835511
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 38.821998611333335,
"avg_tokens_sec": 9.323287626758741
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 27.483618916666668,
"avg_tokens_sec": 9.115156378168296
}
},
"overall_success_rate": 100.0,
"overall_success_cases_rate": 1.0,
"min_avg_duration": 23.437334625000002,
"max_avg_duration": 38.821998611333335,
"min_tokens_per_second": 8.95641765835511,
"max_tokens_per_second": 9.323287626758741
},
{
"model": "openthinker:7b",
"total_duration": 263.0048665136667,
"tokens_per_second": 18.161543236382954,
"test_results": {
"Fibonacci": {
"success_rate": 0.0,
"passed_cases": 0,
"total_cases": 18,
"success_cases_rate": 0.0,
"avg_duration": 263.0048665136667,
"avg_tokens_sec": 18.161543236382954
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 168.908598889,
"avg_tokens_sec": 18.286959395282295
},
"Palindrome": {
"success_rate": 66.66666666666666,
"passed_cases": 12,
"total_cases": 18,
"success_cases_rate": 0.6666666666666666,
"avg_duration": 227.30224683333336,
"avg_tokens_sec": 18.19731478522653
},
"Anagram Check": {
"success_rate": 22.22222222222222,
"passed_cases": 4,
"total_cases": 18,
"success_cases_rate": 0.2222222222222222,
"avg_duration": 302.7885256943333,
"avg_tokens_sec": 17.98162671601628
}
},
"overall_success_rate": 47.22222222222222,
"overall_success_cases_rate": 0.4722222222222222,
"min_avg_duration": 168.908598889,
"max_avg_duration": 302.7885256943333,
"min_tokens_per_second": 17.98162671601628,
"max_tokens_per_second": 18.286959395282295
},
{
"model": "marco-o1:latest",
"total_duration": 41.135754569666666,
"tokens_per_second": 19.187625581350957,
"test_results": {
"Fibonacci": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 41.135754569666666,
"avg_tokens_sec": 19.187625581350957
},
"Binary Search": {
"success_rate": 33.33333333333333,
"passed_cases": 6,
"total_cases": 18,
"success_cases_rate": 0.3333333333333333,
"avg_duration": 51.497049624999995,
"avg_tokens_sec": 19.299717479390168
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 33.27567922266667,
"avg_tokens_sec": 19.39344271268353
},
"Anagram Check": {
"success_rate": 88.88888888888889,
"passed_cases": 16,
"total_cases": 18,
"success_cases_rate": 0.8888888888888888,
"avg_duration": 47.74587681933333,
"avg_tokens_sec": 19.37606411458147
}
},
"overall_success_rate": 80.55555555555556,
"overall_success_cases_rate": 0.8055555555555556,
"min_avg_duration": 33.27567922266667,
"max_avg_duration": 51.497049624999995,
"min_tokens_per_second": 19.187625581350957,
"max_tokens_per_second": 19.39344271268353
},
{
"model": "mxbai-embed-large:latest",
"total_duration": 0.0,
"tokens_per_second": 0,
"test_results": {
"Fibonacci": {
"success_rate": 0.0,
"passed_cases": 0,
"total_cases": 18,
"success_cases_rate": 0.0,
"avg_duration": 0.0,
"avg_tokens_sec": 0
},
"Binary Search": {
"success_rate": 0.0,
"passed_cases": 0,
"total_cases": 18,
"success_cases_rate": 0.0,
"avg_duration": 0.0,
"avg_tokens_sec": 0
},
"Palindrome": {
"success_rate": 0.0,
"passed_cases": 0,
"total_cases": 18,
"success_cases_rate": 0.0,
"avg_duration": 0.0,
"avg_tokens_sec": 0
},
"Anagram Check": {
"success_rate": 0.0,
"passed_cases": 0,
"total_cases": 18,
"success_cases_rate": 0.0,
"avg_duration": 0.0,
"avg_tokens_sec": 0
}
},
"overall_success_rate": 0.0,
"overall_success_cases_rate": 0.0,
"min_avg_duration": 0.0,
"max_avg_duration": 0.0,
"min_tokens_per_second": 0,
"max_tokens_per_second": 0
},
{
"model": "llama3.2:3b",
"total_duration": 2.6735089863333332,
"tokens_per_second": 36.09389841880591,
"test_results": {
"Fibonacci": {
"success_rate": 77.77777777777779,
"passed_cases": 14,
"total_cases": 18,
"success_cases_rate": 0.7777777777777778,
"avg_duration": 2.6735089863333332,
"avg_tokens_sec": 36.09389841880591
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 2.4100508473333333,
"avg_tokens_sec": 35.29015421496925
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 1.0372564586666666,
"avg_tokens_sec": 30.851865251357125
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 2.762424833333333,
"avg_tokens_sec": 37.526840100741644
}
},
"overall_success_rate": 94.44444444444444,
"overall_success_cases_rate": 0.9444444444444444,
"min_avg_duration": 1.0372564586666666,
"max_avg_duration": 2.762424833333333,
"min_tokens_per_second": 30.851865251357125,
"max_tokens_per_second": 37.526840100741644
},
{
"model": "llama3.2:1b-instruct-q4_K_M",
"total_duration": 3.6423406526666664,
"tokens_per_second": 88.23564288140822,
"test_results": {
"Fibonacci": {
"success_rate": 27.77777777777778,
"passed_cases": 5,
"total_cases": 18,
"success_cases_rate": 0.2777777777777778,
"avg_duration": 3.6423406526666664,
"avg_tokens_sec": 88.23564288140822
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 4.933635027666666,
"avg_tokens_sec": 88.80398801189928
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 1.8736983196666668,
"avg_tokens_sec": 88.92932647066883
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 1.938491514,
"avg_tokens_sec": 88.64959146136897
}
},
"overall_success_rate": 81.94444444444444,
"overall_success_cases_rate": 0.8194444444444444,
"min_avg_duration": 1.8736983196666668,
"max_avg_duration": 4.933635027666666,
"min_tokens_per_second": 88.23564288140822,
"max_tokens_per_second": 88.92932647066883
},
{
"model": "llama3.1:8b",
"total_duration": 18.044723708666666,
"tokens_per_second": 17.92435455357372,
"test_results": {
"Fibonacci": {
"success_rate": 77.77777777777779,
"passed_cases": 14,
"total_cases": 18,
"success_cases_rate": 0.7777777777777778,
"avg_duration": 18.044723708666666,
"avg_tokens_sec": 17.92435455357372
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 19.558708902666666,
"avg_tokens_sec": 18.175417784298393
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 14.678415916666667,
"avg_tokens_sec": 18.32735204911305
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 16.556909722,
"avg_tokens_sec": 18.44864594957723
}
},
"overall_success_rate": 94.44444444444444,
"overall_success_cases_rate": 0.9444444444444444,
"min_avg_duration": 14.678415916666667,
"max_avg_duration": 19.558708902666666,
"min_tokens_per_second": 17.92435455357372,
"max_tokens_per_second": 18.44864594957723
},
{
"model": "llama3.2-vision:11b-instruct-q4_K_M",
"total_duration": 22.329897666666668,
"tokens_per_second": 15.683487784271444,
"test_results": {
"Fibonacci": {
"success_rate": 88.88888888888889,
"passed_cases": 16,
"total_cases": 18,
"success_cases_rate": 0.8888888888888888,
"avg_duration": 22.329897666666668,
"avg_tokens_sec": 15.683487784271444
},
"Binary Search": {
"success_rate": 94.44444444444444,
"passed_cases": 17,
"total_cases": 18,
"success_cases_rate": 0.9444444444444444,
"avg_duration": 28.854500333333334,
"avg_tokens_sec": 14.92370109676282
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 16.310659278,
"avg_tokens_sec": 15.077964284743096
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 19.173385736,
"avg_tokens_sec": 15.919297403838554
}
},
"overall_success_rate": 95.83333333333334,
"overall_success_cases_rate": 0.9583333333333334,
"min_avg_duration": 16.310659278,
"max_avg_duration": 28.854500333333334,
"min_tokens_per_second": 14.92370109676282,
"max_tokens_per_second": 15.919297403838554
},
{
"model": "hhao/qwen2.5-coder-tools:7b",
"total_duration": 9.350419291333333,
"tokens_per_second": 17.75326132604395,
"test_results": {
"Fibonacci": {
"success_rate": 66.66666666666666,
"passed_cases": 12,
"total_cases": 18,
"success_cases_rate": 0.6666666666666666,
"avg_duration": 9.350419291333333,
"avg_tokens_sec": 17.75326132604395
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 5.363153069333333,
"avg_tokens_sec": 16.969020266567018
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 4.1743959723333335,
"avg_tokens_sec": 16.053159652852415
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 4.8510482223333335,
"avg_tokens_sec": 16.49191827040695
}
},
"overall_success_rate": 91.66666666666666,
"overall_success_cases_rate": 0.9166666666666666,
"min_avg_duration": 4.1743959723333335,
"max_avg_duration": 9.350419291333333,
"min_tokens_per_second": 16.053159652852415,
"max_tokens_per_second": 17.75326132604395
},
{
"model": "falcon3:10b",
"total_duration": 13.457690222666667,
"tokens_per_second": 13.205079847530891,
"test_results": {
"Fibonacci": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 13.457690222666667,
"avg_tokens_sec": 13.205079847530891
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 8.738829985999999,
"avg_tokens_sec": 13.311836646406299
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 9.371235944333334,
"avg_tokens_sec": 13.304170208123876
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 6.758179319666667,
"avg_tokens_sec": 12.528827123222568
}
},
"overall_success_rate": 100.0,
"overall_success_cases_rate": 1.0,
"min_avg_duration": 6.758179319666667,
"max_avg_duration": 13.457690222666667,
"min_tokens_per_second": 12.528827123222568,
"max_tokens_per_second": 13.311836646406299
},
{
"model": "deepseek-r1:8b",
"total_duration": 168.97134605566666,
"tokens_per_second": 17.431910724233774,
"test_results": {
"Fibonacci": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 168.97134605566666,
"avg_tokens_sec": 17.431910724233774
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 107.91327369466667,
"avg_tokens_sec": 18.012494102204858
},
"Palindrome": {
"success_rate": 88.88888888888889,
"passed_cases": 16,
"total_cases": 18,
"success_cases_rate": 0.8888888888888888,
"avg_duration": 129.79005644466667,
"avg_tokens_sec": 17.961536992747714
},
"Anagram Check": {
"success_rate": 55.55555555555556,
"passed_cases": 10,
"total_cases": 18,
"success_cases_rate": 0.5555555555555556,
"avg_duration": 164.30645630566667,
"avg_tokens_sec": 17.291549519011483
}
},
"overall_success_rate": 86.11111111111111,
"overall_success_cases_rate": 0.8611111111111112,
"min_avg_duration": 107.91327369466667,
"max_avg_duration": 168.97134605566666,
"min_tokens_per_second": 17.291549519011483,
"max_tokens_per_second": 18.012494102204858
},
{
"model": "deepseek-r1:7b",
"total_duration": 336.8736558336667,
"tokens_per_second": 18.01216135231193,
"test_results": {
"Fibonacci": {
"success_rate": 55.55555555555556,
"passed_cases": 10,
"total_cases": 18,
"success_cases_rate": 0.5555555555555556,
"avg_duration": 336.8736558336667,
"avg_tokens_sec": 18.01216135231193
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 120.31249633333333,
"avg_tokens_sec": 18.63905839923618
},
"Palindrome": {
"success_rate": 66.66666666666666,
"passed_cases": 12,
"total_cases": 18,
"success_cases_rate": 0.6666666666666666,
"avg_duration": 331.0699081943333,
"avg_tokens_sec": 18.283371808323412
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 78.71177402766666,
"avg_tokens_sec": 19.069151610886887
}
},
"overall_success_rate": 80.55555555555556,
"overall_success_cases_rate": 0.8055555555555556,
"min_avg_duration": 78.71177402766666,
"max_avg_duration": 336.8736558336667,
"min_tokens_per_second": 18.01216135231193,
"max_tokens_per_second": 19.069151610886887
},
{
"model": "deepseek-r1:1.5b-qwen-distill-q8_0",
"total_duration": 137.589384375,
"tokens_per_second": 57.36798365837254,
"test_results": {
"Fibonacci": {
"success_rate": 61.111111111111114,
"passed_cases": 11,
"total_cases": 18,
"success_cases_rate": 0.6111111111111112,
"avg_duration": 137.589384375,
"avg_tokens_sec": 57.36798365837254
},
"Binary Search": {
"success_rate": 66.66666666666666,
"passed_cases": 12,
"total_cases": 18,
"success_cases_rate": 0.6666666666666666,
"avg_duration": 41.38244445833333,
"avg_tokens_sec": 59.59776267442225
},
"Palindrome": {
"success_rate": 33.33333333333333,
"passed_cases": 6,
"total_cases": 18,
"success_cases_rate": 0.3333333333333333,
"avg_duration": 371.12627313866665,
"avg_tokens_sec": 53.88393896990123
},
"Anagram Check": {
"success_rate": 50.0,
"passed_cases": 9,
"total_cases": 18,
"success_cases_rate": 0.5,
"avg_duration": 52.969490597,
"avg_tokens_sec": 58.487717549099806
}
},
"overall_success_rate": 52.77777777777778,
"overall_success_cases_rate": 0.5277777777777778,
"min_avg_duration": 41.38244445833333,
"max_avg_duration": 371.12627313866665,
"min_tokens_per_second": 53.88393896990123,
"max_tokens_per_second": 59.59776267442225
},
{
"model": "deepseek-r1:14b",
"total_duration": 278.32433922266665,
"tokens_per_second": 9.047140217393457,
"test_results": {
"Fibonacci": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 278.32433922266665,
"avg_tokens_sec": 9.047140217393457
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 174.302479847,
"avg_tokens_sec": 9.379116823142061
},
"Palindrome": {
"success_rate": 88.88888888888889,
"passed_cases": 16,
"total_cases": 18,
"success_cases_rate": 0.8888888888888888,
"avg_duration": 482.098123792,
"avg_tokens_sec": 9.218761695132402
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 308.77178886133333,
"avg_tokens_sec": 8.899496740455705
}
},
"overall_success_rate": 97.22222222222221,
"overall_success_cases_rate": 0.9722222222222222,
"min_avg_duration": 174.302479847,
"max_avg_duration": 482.098123792,
"min_tokens_per_second": 8.899496740455705,
"max_tokens_per_second": 9.379116823142061
},
{
"model": "Qwen2.5-Coder-7B-Instruct-s1k:latest",
"total_duration": 9.948125833333332,
"tokens_per_second": 18.377766002186945,
"test_results": {
"Fibonacci": {
"success_rate": 88.88888888888889,
"passed_cases": 16,
"total_cases": 18,
"success_cases_rate": 0.8888888888888888,
"avg_duration": 9.948125833333332,
"avg_tokens_sec": 18.377766002186945
},
"Binary Search": {
"success_rate": 66.66666666666666,
"passed_cases": 12,
"total_cases": 18,
"success_cases_rate": 0.6666666666666666,
"avg_duration": 12.908918361333333,
"avg_tokens_sec": 18.561003950076692
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 9.064120069666666,
"avg_tokens_sec": 18.58546264084521
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 12.546619139,
"avg_tokens_sec": 18.9448229322312
}
},
"overall_success_rate": 88.88888888888889,
"overall_success_cases_rate": 0.8888888888888888,
"min_avg_duration": 9.064120069666666,
"max_avg_duration": 12.908918361333333,
"min_tokens_per_second": 18.377766002186945,
"max_tokens_per_second": 18.9448229322312
},
{
"model": "phi4-mini:latest",
"total_duration": 10.860303611333332,
"tokens_per_second": 29.361579428697542,
"test_results": {
"Fibonacci": {
"success_rate": 61.111111111111114,
"passed_cases": 11,
"total_cases": 18,
"success_cases_rate": 0.6111111111111112,
"avg_duration": 10.860303611333332,
"avg_tokens_sec": 29.361579428697542
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 10.22926025,
"avg_tokens_sec": 29.360358027471495
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 7.7338954719999995,
"avg_tokens_sec": 29.349959100715157
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 9.66612725,
"avg_tokens_sec": 29.794841927435822
}
},
"overall_success_rate": 90.27777777777779,
"overall_success_cases_rate": 0.9027777777777778,
"min_avg_duration": 7.7338954719999995,
"max_avg_duration": 10.860303611333332,
"min_tokens_per_second": 29.349959100715157,
"max_tokens_per_second": 29.794841927435822
}
]
},
{
"timestamp": "20250313_051856",
"results": [
{
"model": "gemma3:12b",
"total_duration": 17.904428624666668,
"tokens_per_second": 11.206900603314153,
"test_results": {
"Fibonacci": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 17.904428624666668,
"avg_tokens_sec": 11.206900603314153
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 14.096915041666666,
"avg_tokens_sec": 11.209157987254114
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 9.514898375333333,
"avg_tokens_sec": 11.037508677057549
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 24.419397555666666,
"avg_tokens_sec": 11.87609409055045
}
},
"overall_success_rate": 100.0,
"overall_success_cases_rate": 1.0,
"min_avg_duration": 9.514898375333333,
"max_avg_duration": 24.419397555666666,
"min_tokens_per_second": 11.037508677057549,
"max_tokens_per_second": 11.87609409055045
}
]
},
{
"timestamp": "20250314_024439",
"results": [
{
"model": "SiliconBasedWorld/Qwen2.5-7B-Instruct-1M",
"total_duration": 20.47047556933333,
"tokens_per_second": 19.721316911932245,
"test_results": {
"Fibonacci": {
"success_rate": 61.111111111111114,
"passed_cases": 11,
"total_cases": 18,
"success_cases_rate": 0.6111111111111112,
"avg_duration": 20.47047556933333,
"avg_tokens_sec": 19.721316911932245
},
"Binary Search": {
"success_rate": 66.66666666666666,
"passed_cases": 12,
"total_cases": 18,
"success_cases_rate": 0.6666666666666666,
"avg_duration": 89.59582123599999,
"avg_tokens_sec": 19.522371869517652
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 29.476939527666666,
"avg_tokens_sec": 19.835750358255293
},
"Anagram Check": {
"success_rate": 33.33333333333333,
"passed_cases": 6,
"total_cases": 18,
"success_cases_rate": 0.3333333333333333,
"avg_duration": 52.099640236333336,
"avg_tokens_sec": 19.661776969493513
}
},
"overall_success_rate": 65.27777777777779,
"overall_success_cases_rate": 0.6527777777777778,
"min_avg_duration": 20.47047556933333,
"max_avg_duration": 89.59582123599999,
"min_tokens_per_second": 19.522371869517652,
"max_tokens_per_second": 19.835750358255293
}
]
},
{
"timestamp": "20250314_110909",
"results": [
{
"model": "olmo2:13b",
"total_duration": 25.239670416666666,
"tokens_per_second": 8.973277631244137,
"test_results": {
"Fibonacci": {
"success_rate": 61.111111111111114,
"passed_cases": 11,
"total_cases": 18,
"success_cases_rate": 0.6111111111111112,
"avg_duration": 25.239670416666666,
"avg_tokens_sec": 8.973277631244137
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 10.511362861,
"avg_tokens_sec": 8.094987124683419
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 7.803927528,
"avg_tokens_sec": 8.07489922259982
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 16.829488430333335,
"avg_tokens_sec": 8.85685146687769
}
},
"overall_success_rate": 90.27777777777779,
"overall_success_cases_rate": 0.9027777777777778,
"min_avg_duration": 7.803927528,
"max_avg_duration": 25.239670416666666,
"min_tokens_per_second": 8.07489922259982,
"max_tokens_per_second": 8.973277631244137
}
]
},
{
"timestamp": "20250314_111430",
"results": [
{
"model": "olmo2:13b-1124-instruct-q4_K_M",
"total_duration": 27.796664694333334,
"tokens_per_second": 9.16360668962085,
"test_results": {
"Fibonacci": {
"success_rate": 27.77777777777778,
"passed_cases": 5,
"total_cases": 18,
"success_cases_rate": 0.2777777777777778,
"avg_duration": 27.796664694333334,
"avg_tokens_sec": 9.16360668962085
},
"Binary Search": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 21.839994722333333,
"avg_tokens_sec": 9.000336176480124
},
"Palindrome": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 10.587036805333334,
"avg_tokens_sec": 8.492606444397637
},
"Anagram Check": {
"success_rate": 100.0,
"passed_cases": 18,
"total_cases": 18,
"success_cases_rate": 1.0,
"avg_duration": 9.969617250333334,
"avg_tokens_sec": 8.499243210997909
}
},
"overall_success_rate": 81.94444444444444,
"overall_success_cases_rate": 0.8194444444444444,
"min_avg_duration": 9.969617250333334,
"max_avg_duration": 27.796664694333334,
"min_tokens_per_second": 8.492606444397637,
"max_tokens_per_second": 9.16360668962085
}
]
}
]
}