mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-05-17 22:35:02 +00:00
update beta ProxMenux 1.2.1.1-beta
This commit is contained in:
@@ -8,6 +8,43 @@ class AIProviderError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
# Shared urllib3 PoolManager for AI providers. urllib's `urlopen` does
|
||||
# NOT pool connections — each call does a fresh TCP+TLS handshake (~100-
|
||||
# 300ms wasted per call). PoolManager keeps connections alive within the
|
||||
# `cleanup` window per (scheme, host, port). Providers can opt into this
|
||||
# by calling `pooled_request(...)` instead of `urllib.request.urlopen`.
|
||||
# Audit Tier 7 — Sin HTTP connection pooling.
|
||||
try:
|
||||
import urllib3 as _urllib3
|
||||
_HTTP_POOL = _urllib3.PoolManager(
|
||||
num_pools=8, # one slot per provider host (groq, openai, ...)
|
||||
maxsize=4, # parallel connections per host
|
||||
timeout=_urllib3.Timeout(connect=5, read=30),
|
||||
retries=False, # we handle retries at the dispatcher level
|
||||
)
|
||||
_POOL_AVAILABLE = True
|
||||
except Exception:
|
||||
_HTTP_POOL = None
|
||||
_POOL_AVAILABLE = False
|
||||
|
||||
|
||||
def pooled_request(method, url, headers=None, body=None, timeout=None):
|
||||
"""Issue an HTTP request through the shared pool. Returns urllib3.HTTPResponse.
|
||||
|
||||
Falls back to a plain urllib call if urllib3 isn't available, so the
|
||||
AppImage still works on systems without it. Callers that need the
|
||||
legacy `urllib.request.urlopen()` semantics can still use that
|
||||
directly — this helper is opt-in.
|
||||
"""
|
||||
if _POOL_AVAILABLE and _HTTP_POOL is not None:
|
||||
return _HTTP_POOL.request(method, url, headers=headers or {}, body=body,
|
||||
timeout=timeout)
|
||||
# Fallback: plain urllib.
|
||||
import urllib.request
|
||||
req = urllib.request.Request(url, data=body, headers=headers or {}, method=method)
|
||||
return urllib.request.urlopen(req, timeout=timeout if timeout else 10)
|
||||
|
||||
|
||||
class AIProvider(ABC):
|
||||
"""Abstract base class for AI providers.
|
||||
|
||||
@@ -68,17 +105,24 @@ class AIProvider(ABC):
|
||||
max_tokens=50 # Some providers (Gemini) need more tokens to return any content
|
||||
)
|
||||
if response:
|
||||
# Check if response contains our expected text
|
||||
# Require the sentinel to mark the connection as truly OK.
|
||||
# Previous code accepted any non-empty response, so a typo in
|
||||
# `ollama_url` that hit some other HTTP service would still
|
||||
# report "Connected (response received)" — masking a real
|
||||
# misconfiguration. Audit Tier 6 — `test_connection`
|
||||
# heuristic.
|
||||
if "CONNECTION_OK" in response.upper() or "CONNECTION" in response.upper():
|
||||
return {
|
||||
'success': True,
|
||||
'message': 'Connection successful',
|
||||
'model': self.model
|
||||
}
|
||||
# Even if different response, connection worked
|
||||
preview = response.strip()
|
||||
if len(preview) > 200:
|
||||
preview = preview[:200] + '...'
|
||||
return {
|
||||
'success': True,
|
||||
'message': f'Connected (response received)',
|
||||
'success': False,
|
||||
'message': f'Endpoint responded but not as an LLM (no sentinel). Response preview: {preview}',
|
||||
'model': self.model
|
||||
}
|
||||
return {
|
||||
@@ -132,46 +176,67 @@ class AIProvider(ABC):
|
||||
# Models are typically sorted, so first one is usually a good default
|
||||
return available[0]
|
||||
|
||||
def _make_request(self, url: str, payload: dict, headers: dict,
|
||||
timeout: int = 15) -> dict:
|
||||
"""Make HTTP request to AI provider API.
|
||||
|
||||
Args:
|
||||
url: API endpoint URL
|
||||
payload: JSON payload to send
|
||||
headers: HTTP headers
|
||||
timeout: Request timeout in seconds
|
||||
|
||||
Returns:
|
||||
Parsed JSON response
|
||||
|
||||
Raises:
|
||||
AIProviderError: If request fails
|
||||
def _make_request(self, url: str, payload: dict, headers: dict,
|
||||
timeout: int = 15, max_retries: int = 2) -> dict:
|
||||
"""Make HTTP request to AI provider API with retry/backoff on 429/5xx.
|
||||
|
||||
Retries with exponential backoff (1s, 2s, 4s) on transient failures:
|
||||
- HTTP 429 (rate limit) — provider asks us to slow down.
|
||||
- HTTP 5xx (server error) — provider hiccup, often resolves quickly.
|
||||
- URLError (DNS / connection refused / timeout).
|
||||
4xx errors other than 429 are returned without retry — those are bugs
|
||||
in our request, not transient.
|
||||
|
||||
Error bodies are NOT echoed into the exception message: provider
|
||||
responses can contain PII from our own prompt being reflected back,
|
||||
and that ends up in journald where any reader sees it. Audit Tier 3.2
|
||||
#5 (retry/backoff) and #6 (PII leak via error body).
|
||||
"""
|
||||
import json
|
||||
import time as _time
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
|
||||
# Ensure User-Agent is set (Cloudflare blocks requests without it - error 1010)
|
||||
if 'User-Agent' not in headers:
|
||||
headers['User-Agent'] = 'ProxMenux/1.0'
|
||||
|
||||
|
||||
data = json.dumps(payload).encode('utf-8')
|
||||
req = urllib.request.Request(url, data=data, headers=headers, method='POST')
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return json.loads(resp.read().decode('utf-8'))
|
||||
except urllib.error.HTTPError as e:
|
||||
error_body = ""
|
||||
|
||||
last_error = None
|
||||
for attempt in range(max_retries + 1):
|
||||
try:
|
||||
error_body = e.read().decode('utf-8')
|
||||
except Exception:
|
||||
pass
|
||||
raise AIProviderError(f"HTTP {e.code}: {error_body or e.reason}")
|
||||
except urllib.error.URLError as e:
|
||||
raise AIProviderError(f"Connection error: {e.reason}")
|
||||
except json.JSONDecodeError as e:
|
||||
raise AIProviderError(f"Invalid JSON response: {e}")
|
||||
except Exception as e:
|
||||
raise AIProviderError(f"Request failed: {str(e)}")
|
||||
req = urllib.request.Request(url, data=data, headers=headers, method='POST')
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return json.loads(resp.read().decode('utf-8'))
|
||||
except urllib.error.HTTPError as e:
|
||||
# Drain the body so we can decide whether to retry, but NEVER
|
||||
# include it in the raised exception (PII / API key in echo).
|
||||
try:
|
||||
e.read()
|
||||
except Exception:
|
||||
pass
|
||||
# Retry on 429 (rate limit) and 5xx (server error).
|
||||
retryable = e.code == 429 or 500 <= e.code < 600
|
||||
last_error = AIProviderError(f"HTTP {e.code}: {e.reason}")
|
||||
if retryable and attempt < max_retries:
|
||||
backoff = 2 ** attempt # 1, 2, 4 seconds
|
||||
_time.sleep(backoff)
|
||||
continue
|
||||
raise last_error
|
||||
except urllib.error.URLError as e:
|
||||
last_error = AIProviderError(f"Connection error: {e.reason}")
|
||||
if attempt < max_retries:
|
||||
backoff = 2 ** attempt
|
||||
_time.sleep(backoff)
|
||||
continue
|
||||
raise last_error
|
||||
except json.JSONDecodeError as e:
|
||||
# Not retryable — provider sent malformed response.
|
||||
raise AIProviderError(f"Invalid JSON response: {e}")
|
||||
except Exception as e:
|
||||
raise AIProviderError(f"Request failed: {type(e).__name__}")
|
||||
# Should be unreachable; keep mypy happy.
|
||||
if last_error:
|
||||
raise last_error
|
||||
raise AIProviderError("Request failed after retries")
|
||||
|
||||
Reference in New Issue
Block a user