update beta ProxMenux 1.2.1.1-beta

2026-05-17 22:35:02 +00:00 · 2026-05-09 18:59:59 +02:00
parent 5ed1fc44fd
commit 2f919de9e3
125 changed files with 16506 additions and 2877 deletions
@@ -8,6 +8,43 @@ class AIProviderError(Exception):
    pass


+# Shared urllib3 PoolManager for AI providers. urllib's `urlopen` does
+# NOT pool connections — each call does a fresh TCP+TLS handshake (~100-
+# 300ms wasted per call). PoolManager keeps connections alive within the
+# `cleanup` window per (scheme, host, port). Providers can opt into this
+# by calling `pooled_request(...)` instead of `urllib.request.urlopen`.
+# Audit Tier 7 — Sin HTTP connection pooling.
+try:
+    import urllib3 as _urllib3
+    _HTTP_POOL = _urllib3.PoolManager(
+        num_pools=8,           # one slot per provider host (groq, openai, ...)
+        maxsize=4,             # parallel connections per host
+        timeout=_urllib3.Timeout(connect=5, read=30),
+        retries=False,         # we handle retries at the dispatcher level
+    )
+    _POOL_AVAILABLE = True
+except Exception:
+    _HTTP_POOL = None
+    _POOL_AVAILABLE = False
+
+
+def pooled_request(method, url, headers=None, body=None, timeout=None):
+    """Issue an HTTP request through the shared pool. Returns urllib3.HTTPResponse.
+
+    Falls back to a plain urllib call if urllib3 isn't available, so the
+    AppImage still works on systems without it. Callers that need the
+    legacy `urllib.request.urlopen()` semantics can still use that
+    directly — this helper is opt-in.
+    """
+    if _POOL_AVAILABLE and _HTTP_POOL is not None:
+        return _HTTP_POOL.request(method, url, headers=headers or {}, body=body,
+                                  timeout=timeout)
+    # Fallback: plain urllib.
+    import urllib.request
+    req = urllib.request.Request(url, data=body, headers=headers or {}, method=method)
+    return urllib.request.urlopen(req, timeout=timeout if timeout else 10)
+
+
 class AIProvider(ABC):
    """Abstract base class for AI providers.
    
@@ -68,17 +105,24 @@ class AIProvider(ABC):
                max_tokens=50  # Some providers (Gemini) need more tokens to return any content
            )
            if response:
-                # Check if response contains our expected text
+                # Require the sentinel to mark the connection as truly OK.
+                # Previous code accepted any non-empty response, so a typo in
+                # `ollama_url` that hit some other HTTP service would still
+                # report "Connected (response received)" — masking a real
+                # misconfiguration. Audit Tier 6 — `test_connection`
+                # heuristic.
                if "CONNECTION_OK" in response.upper() or "CONNECTION" in response.upper():
                    return {
                        'success': True,
                        'message': 'Connection successful',
                        'model': self.model
                    }
-                # Even if different response, connection worked
+                preview = response.strip()
+                if len(preview) > 200:
+                    preview = preview[:200] + '...'
                return {
-                    'success': True,
-                    'message': f'Connected (response received)',
+                    'success': False,
+                    'message': f'Endpoint responded but not as an LLM (no sentinel). Response preview: {preview}',
                    'model': self.model
                }
            return {
@@ -132,46 +176,67 @@ class AIProvider(ABC):
        # Models are typically sorted, so first one is usually a good default
        return available[0]
    
-    def _make_request(self, url: str, payload: dict, headers: dict, 
-                      timeout: int = 15) -> dict:
-        """Make HTTP request to AI provider API.
-        
-        Args:
-            url: API endpoint URL
-            payload: JSON payload to send
-            headers: HTTP headers
-            timeout: Request timeout in seconds
-            
-        Returns:
-            Parsed JSON response
-            
-        Raises:
-            AIProviderError: If request fails
+    def _make_request(self, url: str, payload: dict, headers: dict,
+                      timeout: int = 15, max_retries: int = 2) -> dict:
+        """Make HTTP request to AI provider API with retry/backoff on 429/5xx.
+
+        Retries with exponential backoff (1s, 2s, 4s) on transient failures:
+          - HTTP 429 (rate limit) — provider asks us to slow down.
+          - HTTP 5xx (server error) — provider hiccup, often resolves quickly.
+          - URLError (DNS / connection refused / timeout).
+        4xx errors other than 429 are returned without retry — those are bugs
+        in our request, not transient.
+
+        Error bodies are NOT echoed into the exception message: provider
+        responses can contain PII from our own prompt being reflected back,
+        and that ends up in journald where any reader sees it. Audit Tier 3.2
+        #5 (retry/backoff) and #6 (PII leak via error body).
        """
        import json
+        import time as _time
        import urllib.request
        import urllib.error
-        
+
        # Ensure User-Agent is set (Cloudflare blocks requests without it - error 1010)
        if 'User-Agent' not in headers:
            headers['User-Agent'] = 'ProxMenux/1.0'
-        
+
        data = json.dumps(payload).encode('utf-8')
-        req = urllib.request.Request(url, data=data, headers=headers, method='POST')
-        
-        try:
-            with urllib.request.urlopen(req, timeout=timeout) as resp:
-                return json.loads(resp.read().decode('utf-8'))
-        except urllib.error.HTTPError as e:
-            error_body = ""
+
+        last_error = None
+        for attempt in range(max_retries + 1):
            try:
-                error_body = e.read().decode('utf-8')
-            except Exception:
-                pass
-            raise AIProviderError(f"HTTP {e.code}: {error_body or e.reason}")
-        except urllib.error.URLError as e:
-            raise AIProviderError(f"Connection error: {e.reason}")
-        except json.JSONDecodeError as e:
-            raise AIProviderError(f"Invalid JSON response: {e}")
-        except Exception as e:
-            raise AIProviderError(f"Request failed: {str(e)}")
+                req = urllib.request.Request(url, data=data, headers=headers, method='POST')
+                with urllib.request.urlopen(req, timeout=timeout) as resp:
+                    return json.loads(resp.read().decode('utf-8'))
+            except urllib.error.HTTPError as e:
+                # Drain the body so we can decide whether to retry, but NEVER
+                # include it in the raised exception (PII / API key in echo).
+                try:
+                    e.read()
+                except Exception:
+                    pass
+                # Retry on 429 (rate limit) and 5xx (server error).
+                retryable = e.code == 429 or 500 <= e.code < 600
+                last_error = AIProviderError(f"HTTP {e.code}: {e.reason}")
+                if retryable and attempt < max_retries:
+                    backoff = 2 ** attempt  # 1, 2, 4 seconds
+                    _time.sleep(backoff)
+                    continue
+                raise last_error
+            except urllib.error.URLError as e:
+                last_error = AIProviderError(f"Connection error: {e.reason}")
+                if attempt < max_retries:
+                    backoff = 2 ** attempt
+                    _time.sleep(backoff)
+                    continue
+                raise last_error
+            except json.JSONDecodeError as e:
+                # Not retryable — provider sent malformed response.
+                raise AIProviderError(f"Invalid JSON response: {e}")
+            except Exception as e:
+                raise AIProviderError(f"Request failed: {type(e).__name__}")
+        # Should be unreachable; keep mypy happy.
+        if last_error:
+            raise last_error
+        raise AIProviderError("Request failed after retries")