tools calling functions compatibility
This commit is contained in:
		
							parent
							
								
									cddbbf844d
								
							
						
					
					
						commit
						049ef58f1e
					
				
							
								
								
									
										34
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										34
									
								
								README.md
									
									
									
									
									
								
							| @ -49,7 +49,7 @@ python3 main.py | |||||||
| Available options: | Available options: | ||||||
| 
 | 
 | ||||||
| ```bash | ```bash | ||||||
| python3 main.py --server [local|remote] --model [model_name] --number [count|all] --verbose --plot-only --no-plot --file [results_file] | python3 main.py --server [local|remote] --model [model_name] --number [count|all] --verbose --plot-only --no-plot --file [results_file] --funcall [model_name|all] | ||||||
|  ``` |  ``` | ||||||
| 
 | 
 | ||||||
| ## Arguments: | ## Arguments: | ||||||
| @ -61,6 +61,7 @@ python3 main.py --server [local|remote] --model [model_name] --number [count|all | |||||||
| - --plot-only : Skip benchmarking and just generate graphs from existing results | - --plot-only : Skip benchmarking and just generate graphs from existing results | ||||||
| - --no-plot : Run benchmarking without plotting graphs at the end | - --no-plot : Run benchmarking without plotting graphs at the end | ||||||
| - --file : Specify a benchmark results file to use for plotting (only with --plot-only) | - --file : Specify a benchmark results file to use for plotting (only with --plot-only) | ||||||
|  | - --funcall : Check function calling (tools) compatibility of models. Specify a model name or "all" for all models | ||||||
| 
 | 
 | ||||||
| ## Supported Tests | ## Supported Tests | ||||||
| The tool currently tests models on these coding challenges: | The tool currently tests models on these coding challenges: | ||||||
| @ -198,6 +199,37 @@ The tool generates several output files in the `benchmark_results` directory: | |||||||
|    - High-resolution (300 DPI) visualization of model performance |    - High-resolution (300 DPI) visualization of model performance | ||||||
|    - Shows token processing speed, success rates, and duration ranges |    - Shows token processing speed, success rates, and duration ranges | ||||||
| 
 | 
 | ||||||
|  | ## Function Calling (Tools) Compatibility | ||||||
|  | 
 | ||||||
|  | The tool now includes a feature to check which Ollama models support function calling (tools). This is useful for identifying models that can be used with applications requiring function calling capabilities. | ||||||
|  | 
 | ||||||
|  | ```bash | ||||||
|  | # Check all models for function calling compatibility | ||||||
|  | python3 main.py --funcall all | ||||||
|  | 
 | ||||||
|  | # Check a specific model | ||||||
|  | python3 main.py --funcall mistral:7b | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | Example output: | ||||||
|  | 
 | ||||||
|  | ``` | ||||||
|  | Tools Compatibility Report | ||||||
|  | =================================================================== | ||||||
|  | Model                                             Tools Support | ||||||
|  | ------------------------------------------------------------------- | ||||||
|  | qwen2.5:14b                                       ✅ Supported | ||||||
|  | phi4-mini:latest                                  ✅ Supported | ||||||
|  | phi4:latest                                       ❌ Not Supported | ||||||
|  | mistral:7b                                        ✅ Supported | ||||||
|  | llama3.2:3b                                       ✅ Supported | ||||||
|  | llama3.2:1b-instruct-q4_K_M                       ✅ Supported | ||||||
|  | gemma3:12b                                        ❌ Not Supported | ||||||
|  | =================================================================== | ||||||
|  | ``` | ||||||
|  | 
 | ||||||
|  | During benchmarking, the tool will now also display whether each model supports function calling before running the tests. | ||||||
|  | 
 | ||||||
| ## Recent Updates | ## Recent Updates | ||||||
| 
 | 
 | ||||||
| ### March 2025 Updates | ### March 2025 Updates | ||||||
|  | |||||||
							
								
								
									
										34
									
								
								main.py
									
									
									
									
									
								
							
							
						
						
									
										34
									
								
								main.py
									
									
									
									
									
								
							| @ -14,6 +14,7 @@ import matplotlib.pyplot as plt | |||||||
| from together import Together | from together import Together | ||||||
| from cpuinfo import get_cpu_info | from cpuinfo import get_cpu_info | ||||||
| import subprocess | import subprocess | ||||||
|  | from tools import get_tools_compatible_models, print_tools_compatibility_table | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # ANSI color codes | # ANSI color codes | ||||||
| @ -473,6 +474,15 @@ def run_model_benchmark(model: str, server_url: str, num_runs: int = 4) -> Dict: | |||||||
|     """ |     """ | ||||||
|     Run multiple benchmarks for a model and calculate average metrics. |     Run multiple benchmarks for a model and calculate average metrics. | ||||||
|     """ |     """ | ||||||
|  |     # Check function calling (tools) compatibility | ||||||
|  |     from tools import test_model_tools_support | ||||||
|  |     supports_tools, error = test_model_tools_support(model) | ||||||
|  |     if supports_tools: | ||||||
|  |         print(f"\n{SUCCESS}Function Calling (Tools): ✅ Supported{ENDC}") | ||||||
|  |     else: | ||||||
|  |         error_msg = f" ({error})" if error else "" | ||||||
|  |         print(f"\n{ERROR}Function Calling (Tools): ❌ Not Supported{error_msg}{ENDC}") | ||||||
|  |      | ||||||
|     metrics = [] |     metrics = [] | ||||||
|      |      | ||||||
|     for i in range(num_runs): |     for i in range(num_runs): | ||||||
| @ -987,6 +997,8 @@ def main(): | |||||||
|                       help='Run benchmarking without plotting graphs at the end') |                       help='Run benchmarking without plotting graphs at the end') | ||||||
|     parser.add_argument('--file', type=str,  |     parser.add_argument('--file', type=str,  | ||||||
|                       help='Specify a benchmark results file to use for plotting (only with --plot-only)') |                       help='Specify a benchmark results file to use for plotting (only with --plot-only)') | ||||||
|  |     parser.add_argument('--funcall', type=str, nargs='?', const='all', | ||||||
|  |                       help='Check function calling (tools) compatibility of models. Specify a model name or "all" for all models') | ||||||
|     args = parser.parse_args() |     args = parser.parse_args() | ||||||
|      |      | ||||||
|     # Set global verbose flag |     # Set global verbose flag | ||||||
| @ -998,6 +1010,28 @@ def main(): | |||||||
|         print(f"{INFO}Running in plot-only mode...{ENDC}") |         print(f"{INFO}Running in plot-only mode...{ENDC}") | ||||||
|         plot_benchmark_results(args.file) |         plot_benchmark_results(args.file) | ||||||
|         return |         return | ||||||
|  |          | ||||||
|  |     # Handle function calling compatibility check mode | ||||||
|  |     if args.funcall is not None: | ||||||
|  |         server_url = SERVERS[args.server] | ||||||
|  |         print(f"{INFO}Checking function calling (tools) compatibility...{ENDC}") | ||||||
|  |          | ||||||
|  |         if args.funcall.lower() == 'all': | ||||||
|  |             # Check all available models | ||||||
|  |             compatibility = get_tools_compatible_models(server_url=server_url) | ||||||
|  |             print_tools_compatibility_table(compatibility) | ||||||
|  |         else: | ||||||
|  |             # Check specific model | ||||||
|  |             model_name = args.funcall | ||||||
|  |             print(f"{INFO}Checking function calling compatibility for {model_name}...{ENDC}") | ||||||
|  |             supports_tools, error = get_tools_compatible_models(model=model_name) | ||||||
|  |              | ||||||
|  |             if supports_tools: | ||||||
|  |                 print(f"{SUCCESS}✅ {model_name}: Supports function calling (tools){ENDC}") | ||||||
|  |             else: | ||||||
|  |                 error_msg = f" ({error})" if error else "" | ||||||
|  |                 print(f"{ERROR}❌ {model_name}: Does not support function calling (tools){error_msg}{ENDC}") | ||||||
|  |         return | ||||||
|      |      | ||||||
|     server_url = SERVERS[args.server] |     server_url = SERVERS[args.server] | ||||||
|      |      | ||||||
|  | |||||||
							
								
								
									
										22
									
								
								models.py
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								models.py
									
									
									
									
									
								
							| @ -48,14 +48,14 @@ def get_model_details(model_name): | |||||||
|         print(f"An error occurred: {e}") |         print(f"An error occurred: {e}") | ||||||
|         return None |         return None | ||||||
| 
 | 
 | ||||||
| # List all available models using the Ollama Python library | # This code is commented out to prevent automatic execution when imported | ||||||
| models = get_available_models(server_url) | # models = get_available_models(server_url) | ||||||
| print("Available Models:") | # print("Available Models:") | ||||||
| for model_name in models: | # for model_name in models: | ||||||
|     print(model_name) | #     print(model_name) | ||||||
|     details = get_model_details(model_name) | #     details = get_model_details(model_name) | ||||||
| 
 | # | ||||||
|     # Display detailed information about the model | #     # Display detailed information about the model | ||||||
|     if details: | #     if details: | ||||||
|         print("\nModel Details:") | #         print("\nModel Details:") | ||||||
|         print(json.dumps(details, indent=4)) | #         print(json.dumps(details, indent=4)) | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user