"""Ollama provider implementation. Ollama enables 100% local AI execution with no costs and complete privacy. No internet connection required - perfect for sensitive enterprise environments. """ from typing import Optional from .base import AIProvider, AIProviderError class OllamaProvider(AIProvider): """Ollama provider for local AI execution.""" NAME = "ollama" REQUIRES_API_KEY = False DEFAULT_URL = "http://localhost:11434" def __init__(self, api_key: str = "", model: str = "", base_url: str = ""): """Initialize Ollama provider. Args: api_key: Not used for Ollama (local execution) model: Model name (user must select from loaded models) base_url: Ollama server URL (default: http://localhost:11434) """ super().__init__(api_key, model, base_url) # Use default URL if not provided if not self.base_url: self.base_url = self.DEFAULT_URL def generate(self, system_prompt: str, user_message: str, max_tokens: int = 200) -> Optional[str]: """Generate a response using local Ollama server. Args: system_prompt: System instructions user_message: User message to process max_tokens: Maximum response length (maps to num_predict) Returns: Generated text or None if failed Raises: AIProviderError: If Ollama server is unreachable """ url = f"{self.base_url.rstrip('/')}/api/chat" payload = { 'model': self.model, 'messages': [ {'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': user_message}, ], 'stream': False, 'options': { 'num_predict': max_tokens, 'temperature': 0.3, } } headers = { 'Content-Type': 'application/json', } # Cloud models (e.g., kimi-k2.5:cloud, minimax-m2.7:cloud) need longer timeout # because requests go through: ProxMenux -> Ollama -> Cloud Provider -> back # Local models also need generous timeout for slower hardware (e.g., low-end CPUs, # no GPU acceleration, larger models like 8B parameters) is_cloud_model = ':cloud' in self.model.lower() timeout = 120 if is_cloud_model else 90 # 2 minutes for cloud, 90s for local try: result = self._make_request(url, payload, headers, timeout=timeout) except AIProviderError as e: if "Connection" in str(e) or "refused" in str(e).lower(): raise AIProviderError( f"Cannot connect to Ollama at {self.base_url}. " "Make sure Ollama is running (ollama serve)" ) raise try: message = result.get('message', {}) return message.get('content', '').strip() except (KeyError, AttributeError) as e: raise AIProviderError(f"Unexpected response format: {e}") def test_connection(self): """Test connection to Ollama server. Also checks if the specified model is available. """ import json import urllib.request import urllib.error # First check if server is running try: url = f"{self.base_url.rstrip('/')}/api/tags" req = urllib.request.Request(url, method='GET', headers={'User-Agent': 'ProxMenux/1.0'}) with urllib.request.urlopen(req, timeout=5) as resp: data = json.loads(resp.read().decode('utf-8')) # Get full model names (with tags) for comparison full_model_names = [m.get('name', '') for m in data.get('models', [])] # Also get base names (without tags) for fallback matching base_model_names = [name.split(':')[0] for name in full_model_names] # Check if the requested model matches any available model # Match by: exact name, base name, or requested model without tag requested_base = self.model.split(':')[0] if ':' in self.model else self.model model_found = ( self.model in full_model_names or # Exact match (e.g., "llama3.2:latest") self.model in base_model_names or # Base name match (e.g., "llama3.2") requested_base in base_model_names # Requested base matches available base ) if not model_found: display_models = full_model_names[:5] if full_model_names else ['none'] return { 'success': False, 'message': f"Model '{self.model}' not found. Available: {', '.join(display_models)}{'...' if len(full_model_names) > 5 else ''}", 'model': self.model } except urllib.error.URLError: return { 'success': False, 'message': f"Cannot connect to Ollama at {self.base_url}. Make sure Ollama is running.", 'model': self.model } except Exception as e: return { 'success': False, 'message': f"Error checking Ollama: {str(e)}", 'model': self.model } # If server is up and model exists, do the actual test # For cloud models, we skip the full test (which sends a message) # because it would take too long. The model availability check above is sufficient. is_cloud_model = ':cloud' in self.model.lower() if is_cloud_model: return { 'success': True, 'message': f"Cloud model '{self.model}' is available via Ollama", 'model': self.model } return super().test_connection()