diff --git a/.DS_Store b/.DS_Store index 8fbf2d4..ee3d1ef 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/README.md b/README.md index 78e6e91..c2b3ec3 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ python3 main.py Available options: ```bash -python3 main.py --server [local|remote] --model [model_name] --number [count|all] --verbose --plot-only --no-plot --file [results_file] +python3 main.py --server [local|remote] --model [model_name] --number [count|all] --verbose --plot-only --no-plot --file [results_file] --funcall [model_name|all] ``` ## Arguments: @@ -61,6 +61,7 @@ python3 main.py --server [local|remote] --model [model_name] --number [count|all - --plot-only : Skip benchmarking and just generate graphs from existing results - --no-plot : Run benchmarking without plotting graphs at the end - --file : Specify a benchmark results file to use for plotting (only with --plot-only) +- --funcall : Check function calling (tools) compatibility of models. Specify a model name or "all" for all models ## Supported Tests The tool currently tests models on these coding challenges: @@ -198,6 +199,37 @@ The tool generates several output files in the `benchmark_results` directory: - High-resolution (300 DPI) visualization of model performance - Shows token processing speed, success rates, and duration ranges +## Function Calling (Tools) Compatibility + +The tool now includes a feature to check which Ollama models support function calling (tools). This is useful for identifying models that can be used with applications requiring function calling capabilities. + +```bash +# Check all models for function calling compatibility +python3 main.py --funcall all + +# Check a specific model +python3 main.py --funcall mistral:7b +``` + +Example output: + +``` +Tools Compatibility Report +=================================================================== +Model Tools Support +------------------------------------------------------------------- +qwen2.5:14b ✅ Supported +phi4-mini:latest ✅ Supported +phi4:latest ❌ Not Supported +mistral:7b ✅ Supported +llama3.2:3b ✅ Supported +llama3.2:1b-instruct-q4_K_M ✅ Supported +gemma3:12b ❌ Not Supported +=================================================================== +``` + +During benchmarking, the tool will now also display whether each model supports function calling before running the tests. + ## Recent Updates ### March 2025 Updates diff --git a/main.py b/main.py index bc4f7fe..8a092d0 100755 --- a/main.py +++ b/main.py @@ -14,6 +14,7 @@ import matplotlib.pyplot as plt from together import Together from cpuinfo import get_cpu_info import subprocess +from tools import get_tools_compatible_models, print_tools_compatibility_table # ANSI color codes @@ -473,6 +474,15 @@ def run_model_benchmark(model: str, server_url: str, num_runs: int = 4) -> Dict: """ Run multiple benchmarks for a model and calculate average metrics. """ + # Check function calling (tools) compatibility + from tools import test_model_tools_support + supports_tools, error = test_model_tools_support(model) + if supports_tools: + print(f"\n{SUCCESS}Function Calling (Tools): ✅ Supported{ENDC}") + else: + error_msg = f" ({error})" if error else "" + print(f"\n{ERROR}Function Calling (Tools): ❌ Not Supported{error_msg}{ENDC}") + metrics = [] for i in range(num_runs): @@ -987,6 +997,8 @@ def main(): help='Run benchmarking without plotting graphs at the end') parser.add_argument('--file', type=str, help='Specify a benchmark results file to use for plotting (only with --plot-only)') + parser.add_argument('--funcall', type=str, nargs='?', const='all', + help='Check function calling (tools) compatibility of models. Specify a model name or "all" for all models') args = parser.parse_args() # Set global verbose flag @@ -998,6 +1010,28 @@ def main(): print(f"{INFO}Running in plot-only mode...{ENDC}") plot_benchmark_results(args.file) return + + # Handle function calling compatibility check mode + if args.funcall is not None: + server_url = SERVERS[args.server] + print(f"{INFO}Checking function calling (tools) compatibility...{ENDC}") + + if args.funcall.lower() == 'all': + # Check all available models + compatibility = get_tools_compatible_models(server_url=server_url) + print_tools_compatibility_table(compatibility) + else: + # Check specific model + model_name = args.funcall + print(f"{INFO}Checking function calling compatibility for {model_name}...{ENDC}") + supports_tools, error = get_tools_compatible_models(model=model_name) + + if supports_tools: + print(f"{SUCCESS}✅ {model_name}: Supports function calling (tools){ENDC}") + else: + error_msg = f" ({error})" if error else "" + print(f"{ERROR}❌ {model_name}: Does not support function calling (tools){error_msg}{ENDC}") + return server_url = SERVERS[args.server] diff --git a/models.py b/models.py index 31c7877..4cc2746 100644 --- a/models.py +++ b/models.py @@ -48,14 +48,14 @@ def get_model_details(model_name): print(f"An error occurred: {e}") return None -# List all available models using the Ollama Python library -models = get_available_models(server_url) -print("Available Models:") -for model_name in models: - print(model_name) - details = get_model_details(model_name) - - # Display detailed information about the model - if details: - print("\nModel Details:") - print(json.dumps(details, indent=4)) +# This code is commented out to prevent automatic execution when imported +# models = get_available_models(server_url) +# print("Available Models:") +# for model_name in models: +# print(model_name) +# details = get_model_details(model_name) +# +# # Display detailed information about the model +# if details: +# print("\nModel Details:") +# print(json.dumps(details, indent=4))