Files
ProxMenux/AppImage/scripts/ai_providers/ollama_provider.py
2026-03-22 18:43:33 +01:00

150 lines
6.0 KiB
Python

"""Ollama provider implementation.
Ollama enables 100% local AI execution with no costs and complete privacy.
No internet connection required - perfect for sensitive enterprise environments.
"""
from typing import Optional
from .base import AIProvider, AIProviderError
class OllamaProvider(AIProvider):
"""Ollama provider for local AI execution."""
NAME = "ollama"
REQUIRES_API_KEY = False
DEFAULT_URL = "http://localhost:11434"
def __init__(self, api_key: str = "", model: str = "", base_url: str = ""):
"""Initialize Ollama provider.
Args:
api_key: Not used for Ollama (local execution)
model: Model name (user must select from loaded models)
base_url: Ollama server URL (default: http://localhost:11434)
"""
super().__init__(api_key, model, base_url)
# Use default URL if not provided
if not self.base_url:
self.base_url = self.DEFAULT_URL
def generate(self, system_prompt: str, user_message: str,
max_tokens: int = 200) -> Optional[str]:
"""Generate a response using local Ollama server.
Args:
system_prompt: System instructions
user_message: User message to process
max_tokens: Maximum response length (maps to num_predict)
Returns:
Generated text or None if failed
Raises:
AIProviderError: If Ollama server is unreachable
"""
url = f"{self.base_url.rstrip('/')}/api/chat"
payload = {
'model': self.model,
'messages': [
{'role': 'system', 'content': system_prompt},
{'role': 'user', 'content': user_message},
],
'stream': False,
'options': {
'num_predict': max_tokens,
'temperature': 0.3,
}
}
headers = {
'Content-Type': 'application/json',
}
# Cloud models (e.g., kimi-k2.5:cloud, minimax-m2.7:cloud) need longer timeout
# because requests go through: ProxMenux -> Ollama -> Cloud Provider -> back
# Local models also need generous timeout for slower hardware (e.g., low-end CPUs,
# no GPU acceleration, larger models like 8B parameters)
is_cloud_model = ':cloud' in self.model.lower()
timeout = 120 if is_cloud_model else 90 # 2 minutes for cloud, 90s for local
try:
result = self._make_request(url, payload, headers, timeout=timeout)
except AIProviderError as e:
if "Connection" in str(e) or "refused" in str(e).lower():
raise AIProviderError(
f"Cannot connect to Ollama at {self.base_url}. "
"Make sure Ollama is running (ollama serve)"
)
raise
try:
message = result.get('message', {})
return message.get('content', '').strip()
except (KeyError, AttributeError) as e:
raise AIProviderError(f"Unexpected response format: {e}")
def test_connection(self):
"""Test connection to Ollama server.
Also checks if the specified model is available.
"""
import json
import urllib.request
import urllib.error
# First check if server is running
try:
url = f"{self.base_url.rstrip('/')}/api/tags"
req = urllib.request.Request(url, method='GET')
with urllib.request.urlopen(req, timeout=5) as resp:
data = json.loads(resp.read().decode('utf-8'))
# Get full model names (with tags) for comparison
full_model_names = [m.get('name', '') for m in data.get('models', [])]
# Also get base names (without tags) for fallback matching
base_model_names = [name.split(':')[0] for name in full_model_names]
# Check if the requested model matches any available model
# Match by: exact name, base name, or requested model without tag
requested_base = self.model.split(':')[0] if ':' in self.model else self.model
model_found = (
self.model in full_model_names or # Exact match (e.g., "llama3.2:latest")
self.model in base_model_names or # Base name match (e.g., "llama3.2")
requested_base in base_model_names # Requested base matches available base
)
if not model_found:
display_models = full_model_names[:5] if full_model_names else ['none']
return {
'success': False,
'message': f"Model '{self.model}' not found. Available: {', '.join(display_models)}{'...' if len(full_model_names) > 5 else ''}",
'model': self.model
}
except urllib.error.URLError:
return {
'success': False,
'message': f"Cannot connect to Ollama at {self.base_url}. Make sure Ollama is running.",
'model': self.model
}
except Exception as e:
return {
'success': False,
'message': f"Error checking Ollama: {str(e)}",
'model': self.model
}
# If server is up and model exists, do the actual test
# For cloud models, we skip the full test (which sends a message)
# because it would take too long. The model availability check above is sufficient.
is_cloud_model = ':cloud' in self.model.lower()
if is_cloud_model:
return {
'success': True,
'message': f"Cloud model '{self.model}' is available via Ollama",
'model': self.model
}
return super().test_connection()