tools calling functions compatibility
This commit is contained in:
parent
cddbbf844d
commit
049ef58f1e
34
README.md
34
README.md
@ -49,7 +49,7 @@ python3 main.py
|
|||||||
Available options:
|
Available options:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python3 main.py --server [local|remote] --model [model_name] --number [count|all] --verbose --plot-only --no-plot --file [results_file]
|
python3 main.py --server [local|remote] --model [model_name] --number [count|all] --verbose --plot-only --no-plot --file [results_file] --funcall [model_name|all]
|
||||||
```
|
```
|
||||||
|
|
||||||
## Arguments:
|
## Arguments:
|
||||||
@ -61,6 +61,7 @@ python3 main.py --server [local|remote] --model [model_name] --number [count|all
|
|||||||
- --plot-only : Skip benchmarking and just generate graphs from existing results
|
- --plot-only : Skip benchmarking and just generate graphs from existing results
|
||||||
- --no-plot : Run benchmarking without plotting graphs at the end
|
- --no-plot : Run benchmarking without plotting graphs at the end
|
||||||
- --file : Specify a benchmark results file to use for plotting (only with --plot-only)
|
- --file : Specify a benchmark results file to use for plotting (only with --plot-only)
|
||||||
|
- --funcall : Check function calling (tools) compatibility of models. Specify a model name or "all" for all models
|
||||||
|
|
||||||
## Supported Tests
|
## Supported Tests
|
||||||
The tool currently tests models on these coding challenges:
|
The tool currently tests models on these coding challenges:
|
||||||
@ -198,6 +199,37 @@ The tool generates several output files in the `benchmark_results` directory:
|
|||||||
- High-resolution (300 DPI) visualization of model performance
|
- High-resolution (300 DPI) visualization of model performance
|
||||||
- Shows token processing speed, success rates, and duration ranges
|
- Shows token processing speed, success rates, and duration ranges
|
||||||
|
|
||||||
|
## Function Calling (Tools) Compatibility
|
||||||
|
|
||||||
|
The tool now includes a feature to check which Ollama models support function calling (tools). This is useful for identifying models that can be used with applications requiring function calling capabilities.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Check all models for function calling compatibility
|
||||||
|
python3 main.py --funcall all
|
||||||
|
|
||||||
|
# Check a specific model
|
||||||
|
python3 main.py --funcall mistral:7b
|
||||||
|
```
|
||||||
|
|
||||||
|
Example output:
|
||||||
|
|
||||||
|
```
|
||||||
|
Tools Compatibility Report
|
||||||
|
===================================================================
|
||||||
|
Model Tools Support
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
qwen2.5:14b ✅ Supported
|
||||||
|
phi4-mini:latest ✅ Supported
|
||||||
|
phi4:latest ❌ Not Supported
|
||||||
|
mistral:7b ✅ Supported
|
||||||
|
llama3.2:3b ✅ Supported
|
||||||
|
llama3.2:1b-instruct-q4_K_M ✅ Supported
|
||||||
|
gemma3:12b ❌ Not Supported
|
||||||
|
===================================================================
|
||||||
|
```
|
||||||
|
|
||||||
|
During benchmarking, the tool will now also display whether each model supports function calling before running the tests.
|
||||||
|
|
||||||
## Recent Updates
|
## Recent Updates
|
||||||
|
|
||||||
### March 2025 Updates
|
### March 2025 Updates
|
||||||
|
34
main.py
34
main.py
@ -14,6 +14,7 @@ import matplotlib.pyplot as plt
|
|||||||
from together import Together
|
from together import Together
|
||||||
from cpuinfo import get_cpu_info
|
from cpuinfo import get_cpu_info
|
||||||
import subprocess
|
import subprocess
|
||||||
|
from tools import get_tools_compatible_models, print_tools_compatibility_table
|
||||||
|
|
||||||
|
|
||||||
# ANSI color codes
|
# ANSI color codes
|
||||||
@ -473,6 +474,15 @@ def run_model_benchmark(model: str, server_url: str, num_runs: int = 4) -> Dict:
|
|||||||
"""
|
"""
|
||||||
Run multiple benchmarks for a model and calculate average metrics.
|
Run multiple benchmarks for a model and calculate average metrics.
|
||||||
"""
|
"""
|
||||||
|
# Check function calling (tools) compatibility
|
||||||
|
from tools import test_model_tools_support
|
||||||
|
supports_tools, error = test_model_tools_support(model)
|
||||||
|
if supports_tools:
|
||||||
|
print(f"\n{SUCCESS}Function Calling (Tools): ✅ Supported{ENDC}")
|
||||||
|
else:
|
||||||
|
error_msg = f" ({error})" if error else ""
|
||||||
|
print(f"\n{ERROR}Function Calling (Tools): ❌ Not Supported{error_msg}{ENDC}")
|
||||||
|
|
||||||
metrics = []
|
metrics = []
|
||||||
|
|
||||||
for i in range(num_runs):
|
for i in range(num_runs):
|
||||||
@ -987,6 +997,8 @@ def main():
|
|||||||
help='Run benchmarking without plotting graphs at the end')
|
help='Run benchmarking without plotting graphs at the end')
|
||||||
parser.add_argument('--file', type=str,
|
parser.add_argument('--file', type=str,
|
||||||
help='Specify a benchmark results file to use for plotting (only with --plot-only)')
|
help='Specify a benchmark results file to use for plotting (only with --plot-only)')
|
||||||
|
parser.add_argument('--funcall', type=str, nargs='?', const='all',
|
||||||
|
help='Check function calling (tools) compatibility of models. Specify a model name or "all" for all models')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Set global verbose flag
|
# Set global verbose flag
|
||||||
@ -998,6 +1010,28 @@ def main():
|
|||||||
print(f"{INFO}Running in plot-only mode...{ENDC}")
|
print(f"{INFO}Running in plot-only mode...{ENDC}")
|
||||||
plot_benchmark_results(args.file)
|
plot_benchmark_results(args.file)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Handle function calling compatibility check mode
|
||||||
|
if args.funcall is not None:
|
||||||
|
server_url = SERVERS[args.server]
|
||||||
|
print(f"{INFO}Checking function calling (tools) compatibility...{ENDC}")
|
||||||
|
|
||||||
|
if args.funcall.lower() == 'all':
|
||||||
|
# Check all available models
|
||||||
|
compatibility = get_tools_compatible_models(server_url=server_url)
|
||||||
|
print_tools_compatibility_table(compatibility)
|
||||||
|
else:
|
||||||
|
# Check specific model
|
||||||
|
model_name = args.funcall
|
||||||
|
print(f"{INFO}Checking function calling compatibility for {model_name}...{ENDC}")
|
||||||
|
supports_tools, error = get_tools_compatible_models(model=model_name)
|
||||||
|
|
||||||
|
if supports_tools:
|
||||||
|
print(f"{SUCCESS}✅ {model_name}: Supports function calling (tools){ENDC}")
|
||||||
|
else:
|
||||||
|
error_msg = f" ({error})" if error else ""
|
||||||
|
print(f"{ERROR}❌ {model_name}: Does not support function calling (tools){error_msg}{ENDC}")
|
||||||
|
return
|
||||||
|
|
||||||
server_url = SERVERS[args.server]
|
server_url = SERVERS[args.server]
|
||||||
|
|
||||||
|
22
models.py
22
models.py
@ -48,14 +48,14 @@ def get_model_details(model_name):
|
|||||||
print(f"An error occurred: {e}")
|
print(f"An error occurred: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# List all available models using the Ollama Python library
|
# This code is commented out to prevent automatic execution when imported
|
||||||
models = get_available_models(server_url)
|
# models = get_available_models(server_url)
|
||||||
print("Available Models:")
|
# print("Available Models:")
|
||||||
for model_name in models:
|
# for model_name in models:
|
||||||
print(model_name)
|
# print(model_name)
|
||||||
details = get_model_details(model_name)
|
# details = get_model_details(model_name)
|
||||||
|
#
|
||||||
# Display detailed information about the model
|
# # Display detailed information about the model
|
||||||
if details:
|
# if details:
|
||||||
print("\nModel Details:")
|
# print("\nModel Details:")
|
||||||
print(json.dumps(details, indent=4))
|
# print(json.dumps(details, indent=4))
|
||||||
|
Loading…
Reference in New Issue
Block a user