import subprocess import platform import GPUtil import psutil import json import re from cpuinfo import get_cpu_info from ollama import chat from pydantic import BaseModel print() print("CPU py-cpuinfo Information:") cpu_info = get_cpu_info() for key, value in cpu_info.items(): print(f"{key}: {value}") def get_cpu_full_info(): cpu_freq = psutil.cpu_freq() cpu_info = { "Architecture": platform.machine(), "Processor": platform.processor(), "Physical cores": psutil.cpu_count(logical=False), "Total cores": psutil.cpu_count(logical=True), "Max frequency": f"{cpu_freq.max:.2f}Mhz", "Min frequency": f"{cpu_freq.min:.2f}Mhz", "Current frequency": f"{cpu_freq.current:.2f}Mhz", "CPU Usage Per Core": psutil.cpu_percent(interval=1, percpu=True), "Total CPU Usage": psutil.cpu_percent(interval=1) } return cpu_info def print_cpu_fullinfo(cpu_info): print() print("CPU psutil Information:") for key, value in cpu_info.items(): if isinstance(value, list): print(f"{key}:") for i, usage in enumerate(value): print(f" Core {i}: {usage}%") else: print(f"{key}: {value}") def get_cpu_moduleinfo(): cpu_name = platform.processor() return { "name": cpu_name, "cores": psutil.cpu_count(logical=False), "threads": psutil.cpu_count(logical=True) } def get_gpu_info(): gpus = GPUtil.getGPUs() gpu_info = [] for gpu in gpus: gpu_info.append({ "id": gpu.id, "name": gpu.name, "memory_total": gpu.memoryTotal, # in MB "memory_free": gpu.memoryFree, # in MB "memory_used": gpu.memoryUsed # in MB }) return gpu_info def calculate_theoretical_gpu_bandwidth(memory_clock_mhz, bus_width_bits): # Formula: Bandwidth = (Memory Clock * Bus Width * 2) / 8 (convert to GB/s) return (memory_clock_mhz * 1e6 * bus_width_bits * 2) / (8 * 1e9) # GB/s def get_local_models(): try: result = subprocess.run(['ollama', 'list'], capture_output=True, text=True, check=True) models = result.stdout.strip().split('\n')[1:] # Skip header return [model.split()[0] for model in models] except subprocess.CalledProcessError: print("Error: Unable to retrieve local models. Make sure Ollama is installed and accessible.") return [] def get_model_info(model_name): try: result = subprocess.run(['ollama', 'show', model_name], capture_output=True, text=True, check=True) modelfile = result.stdout param_match = re.search(r'(\d+)b', model_name.lower()) param_count = int(param_match.group(1)) * 1e9 if param_match else None quant_match = re.search(r'q(\d+)', model_name.lower()) quant_bits = int(quant_match.group(1)) if quant_match else 32 # Assume 32-bit if not specified return { 'name': model_name, 'parameters': param_count, 'quantization_bits': quant_bits, 'modelfile': modelfile } except subprocess.CalledProcessError: print(f"Error: Unable to retrieve information for model {model_name}") return None def estimate_tps(model_info): # Rough estimate based on model size if model_info['parameters'] is None: return 100 # Default value param_billions = model_info['parameters'] / 1e9 return max(10, int(200 - param_billions * 10)) # Simple linear decrease def calculate_memory_throughput(model_info, tps): P = model_info['parameters'] Q = model_info['quantization_bits'] if P and Q: bytes_per_parameter = Q / 8 total_bytes = P * bytes_per_parameter return (total_bytes * tps) / 1e9 # Convert to GB/s return None def calculate_ops(model_info, tps): P = model_info['parameters'] if P: flops_per_token = 6 * P # Estimate based on basic transformer architecture return flops_per_token * tps return None def main(): print() cpu_info = get_cpu_moduleinfo() print(f"CPU Info: {cpu_info}") print() gpu_info = get_gpu_info() print(f"GPU Info: {gpu_info}") print_cpu_fullinfo(get_cpu_full_info()) # Example GPU theoretical bandwidth calculation (replace with actual values) for gpu in gpu_info: memory_clock_mhz = 14000 # Example value for GDDR6 (adjust as needed) bus_width_bits = 384 # Example value for high-end GPUs like RTX series theoretical_bandwidth = calculate_theoretical_gpu_bandwidth(memory_clock_mhz, bus_width_bits) print(f"GPU {gpu['name']} Theoretical Memory Bandwidth: {theoretical_bandwidth:.2f} GB/s") print() local_models = get_local_models() model_info_list = [] for model in local_models: info = get_model_info(model) print(info) tps = estimate_tps(info) info['estimated_tps'] = tps info['memory_throughput'] = calculate_memory_throughput(info, tps) info['operations_per_second'] = calculate_ops(info, tps) model_info_list.append(info) print(f"Model: {info['name']}") print(f"Parameters: {info['parameters'] / 1e9:.2f} Billions") print(f"Quantization: {info['quantization']}") print(f"Estimated TPS: {info['estimated_tps']}") print(f"Required Memory Throughput: {info['memory_throughput']:.2f} GB/s" if info['memory_throughput'] else "Required Memory Throughput: Unknown") print(f"Operations per Second: {info['operations_per_second']:.2e}" if info['operations_per_second'] else "Operations per Second: Unknown") print("---") with open('ollama_model_performance.json', 'w') as f: json.dump(model_info_list, f, indent=2) if __name__ == "__main__": main()