update beta ProxMenux 1.2.1.1-beta

2026-05-17 22:35:02 +00:00 · 2026-05-09 18:59:59 +02:00
parent 5ed1fc44fd
commit 2f919de9e3
125 changed files with 16506 additions and 2877 deletions
@@ -10,49 +10,159 @@ import hashlib
 from pathlib import Path
 from collections import deque
 from flask import Blueprint, jsonify, request
-from notification_manager import notification_manager
+from notification_manager import notification_manager, SENSITIVE_PLACEHOLDER, validate_external_url
+from jwt_middleware import require_auth
+
+
+def _resolve_masked_api_key(provider, api_key):
+    """If the UI sent the masked placeholder back, fall back to the stored key.
+
+    The settings endpoint masks sensitive values on GET (audit Tier 2 #17c).
+    For test-ai and provider-models we want the user to be able to "Test"
+    without re-entering the key — so when we see the placeholder we look up
+    the real stored key by provider name. Returns the resolved key or the
+    original input if no substitution is needed.
+    """
+    if api_key != SENSITIVE_PLACEHOLDER:
+        return api_key
+    try:
+        if not notification_manager._config:
+            notification_manager._load_config()
+        return notification_manager._config.get(f'ai_api_key_{provider}', '') or ''
+    except Exception:
+        return ''


 # ─── Webhook Hardening Helpers ───────────────────────────────────

 class WebhookRateLimiter:
-    """Simple sliding-window rate limiter for the webhook endpoint."""
-    
+    """Per-IP sliding-window rate limiter for the webhook endpoint.
+
+    Was a single global bucket, which let one noisy/abusive caller fill it
+    and starve legitimate PVE webhooks. Each remote IP now gets its own
+    deque; total tracked IPs is capped to avoid memory growth from
+    drive-by random-IP probing. Thread-safe — Flask routes run in worker
+    threads.
+    """
+
+    _MAX_IPS = 1024
+
    def __init__(self, max_requests: int = 60, window_seconds: int = 60):
+        import threading as _threading
        self._max = max_requests
        self._window = window_seconds
-        self._timestamps: deque = deque()
-    
-    def allow(self) -> bool:
+        self._buckets: dict = {}
+        self._lock = _threading.Lock()
+
+    def allow(self, ip: str = '') -> bool:
+        key = ip or '_unknown'
        now = time.time()
-        # Prune entries outside the window
-        while self._timestamps and now - self._timestamps[0] > self._window:
-            self._timestamps.popleft()
-        if len(self._timestamps) >= self._max:
-            return False
-        self._timestamps.append(now)
-        return True
+        with self._lock:
+            # Drop the LRU IP (longest-idle bucket) before exceeding the cap.
+            if key not in self._buckets and len(self._buckets) >= self._MAX_IPS:
+                stale = min(
+                    self._buckets,
+                    key=lambda k: self._buckets[k][-1] if self._buckets[k] else 0
+                )
+                self._buckets.pop(stale, None)
+            bucket = self._buckets.setdefault(key, deque())
+            while bucket and now - bucket[0] > self._window:
+                bucket.popleft()
+            if len(bucket) >= self._max:
+                return False
+            bucket.append(now)
+            return True


 class ReplayCache:
-    """Bounded in-memory cache of recently seen request signatures (60s TTL)."""
-    
-    _MAX_SIZE = 2000  # Hard cap to prevent memory growth
-    
-    def __init__(self, ttl: int = 60):
+    """Replay-detection cache backed by SQLite.
+
+    The previous in-memory `OrderedDict` was per-process: when Flask
+    runs with multiple worker processes (gunicorn -w N) each worker
+    keeps its own table, so the same signed body can be replayed N
+    times before any one worker has seen it. Persisting to SQLite
+    shares state across workers (and survives reloads). The
+    `OrderedDict` is kept as an in-memory fast path for hot dedup
+    within a single request burst — we still hit the DB to be sure.
+    Audit Tier 3.1 — Replay cache per-process.
+    """
+
+    _MAX_SIZE = 2000  # In-memory hot-path cap
+
+    def __init__(self, ttl: int = 60, db_path: str = '/usr/local/share/proxmenux/health_monitor.db'):
+        from collections import OrderedDict as _OrderedDict
+        import threading as _threading_rc
        self._ttl = ttl
-        self._seen: dict = {}  # signature -> timestamp
-    
+        self._db_path = db_path
+        self._seen: _OrderedDict = _OrderedDict()
+        self._lock = _threading_rc.Lock()
+        self._init_db()
+
+    def _init_db(self):
+        try:
+            import sqlite3 as _sqlite
+            from pathlib import Path as _Path
+            _Path(self._db_path).parent.mkdir(parents=True, exist_ok=True)
+            conn = _sqlite.connect(self._db_path, timeout=5)
+            conn.execute('PRAGMA journal_mode=WAL')
+            conn.execute('''
+                CREATE TABLE IF NOT EXISTS webhook_replay_cache (
+                    signature TEXT PRIMARY KEY,
+                    seen_ts REAL NOT NULL
+                )
+            ''')
+            conn.commit()
+            conn.close()
+        except Exception as e:
+            print(f"[ReplayCache] DB init failed: {e}")
+
    def check_and_record(self, signature: str) -> bool:
        """Return True if this signature was already seen (replay). Records it otherwise."""
        now = time.time()
-        # Periodic cleanup
-        if len(self._seen) > self._MAX_SIZE // 2:
-            cutoff = now - self._ttl
-            self._seen = {k: v for k, v in self._seen.items() if v > cutoff}
-        if signature in self._seen and now - self._seen[signature] < self._ttl:
-            return True  # Replay detected
-        self._seen[signature] = now
+        cutoff = now - self._ttl
+
+        # In-memory fast path (lock-protected).
+        with self._lock:
+            while self._seen:
+                oldest_key = next(iter(self._seen))
+                if self._seen[oldest_key] > cutoff:
+                    break
+                self._seen.popitem(last=False)
+            if signature in self._seen and now - self._seen[signature] < self._ttl:
+                return True
+            # Tentatively reserve in memory; if DB confirms we're first,
+            # this stands. Hard cap defends against runaway growth.
+            self._seen[signature] = now
+            while len(self._seen) > self._MAX_SIZE:
+                self._seen.popitem(last=False)
+
+        # Cross-worker check via SQLite. If another worker already
+        # recorded the signature within the TTL window, treat as replay.
+        try:
+            import sqlite3 as _sqlite
+            conn = _sqlite.connect(self._db_path, timeout=2)
+            cur = conn.cursor()
+            # Opportunistic cleanup of stale rows.
+            cur.execute('DELETE FROM webhook_replay_cache WHERE seen_ts < ?', (cutoff,))
+            cur.execute(
+                'SELECT seen_ts FROM webhook_replay_cache WHERE signature = ?',
+                (signature,),
+            )
+            row = cur.fetchone()
+            if row and now - row[0] < self._ttl:
+                conn.commit()
+                conn.close()
+                return True
+            cur.execute(
+                'INSERT OR REPLACE INTO webhook_replay_cache (signature, seen_ts) VALUES (?, ?)',
+                (signature, now),
+            )
+            conn.commit()
+            conn.close()
+        except Exception as e:
+            # If the DB is unavailable, the in-memory check above still
+            # catches replays within a single worker — log and continue.
+            print(f"[ReplayCache] DB check failed (in-memory only): {e}")
        return False


@@ -63,20 +173,59 @@ _replay_cache = ReplayCache(ttl=60)
 # Timestamp validation window (seconds)
 _TIMESTAMP_MAX_DRIFT = 60

+# ─── Input validation whitelists ──────────────────────────────────
+# Used by the mutating routes (test, send) and the history filter.
+# `severity` is small enough to whitelist; `channel` mirrors
+# `notification_channels.CHANNEL_TYPES` plus 'all' for test_channel.
+# `event_type` is bounded by length + charset rather than enumerated —
+# the catalogue has 70+ entries and `render_template` already handles
+# unknown event types via a fallback. Audit Tier 3.1 — sin validación
+# de event_type/severity/channel en rutas mutantes.
+_VALID_SEVERITIES = {'info', 'warning', 'critical', 'error', 'INFO', 'WARNING', 'CRITICAL', 'ERROR'}
+_VALID_CHANNELS = {'all', 'telegram', 'gotify', 'discord', 'email'}
+import re as _re_validate
+_EVENT_TYPE_RE = _re_validate.compile(r'^[a-zA-Z0-9_]{1,64}$')
+
+
+def _bad_request(msg: str):
+    return jsonify({'error': msg}), 400
+
+
+def _validate_event_type(value: str) -> bool:
+    return isinstance(value, str) and bool(_EVENT_TYPE_RE.match(value))
+
+
+def _validate_severity(value: str, allow_empty: bool = False) -> bool:
+    if allow_empty and value == '':
+        return True
+    return value in _VALID_SEVERITIES
+
+
+def _validate_channel(value: str, allow_empty: bool = False) -> bool:
+    if allow_empty and value == '':
+        return True
+    return value in _VALID_CHANNELS
+
 notification_bp = Blueprint('notifications', __name__)


@notification_bp.route('/api/notifications/settings', methods=['GET'])
+@require_auth
 def get_notification_settings():
    """Get all notification settings for the UI."""
    try:
        settings = notification_manager.get_settings()
        return jsonify(settings)
    except Exception as e:
-        return jsonify({'error': str(e)}), 500
+        # Sanitize: include only the exception type, never the message,
+        # which can leak filesystem paths, internal class names and (in
+        # AI provider errors) reflected user prompts. Audit Tier 3.1 #7.
+        print(f"[notification_routes] {request.path} failed: {type(e).__name__}: {e}")
+        return jsonify({'error': f'Internal error ({type(e).__name__})'}), 500


@notification_bp.route('/api/notifications/settings', methods=['POST'])
+@require_auth
 def save_notification_settings():
    """Save notification settings from the UI."""
    try:
@@ -87,20 +236,32 @@ def save_notification_settings():
        result = notification_manager.save_settings(payload)
        return jsonify(result)
    except Exception as e:
-        return jsonify({'error': str(e)}), 500
+        # Sanitize: include only the exception type, never the message,
+        # which can leak filesystem paths, internal class names and (in
+        # AI provider errors) reflected user prompts. Audit Tier 3.1 #7.
+        print(f"[notification_routes] {request.path} failed: {type(e).__name__}: {e}")
+        return jsonify({'error': f'Internal error ({type(e).__name__})'}), 500


@notification_bp.route('/api/notifications/test', methods=['POST'])
+@require_auth
 def test_notification():
    """Send a test notification to one or all channels."""
    try:
        data = request.get_json() or {}
        channel = data.get('channel', 'all')
-        
+
+        if not _validate_channel(channel):
+            return _bad_request('Invalid channel')
+
        result = notification_manager.test_channel(channel)
        return jsonify(result)
    except Exception as e:
-        return jsonify({'error': str(e)}), 500
+        # Sanitize: include only the exception type, never the message,
+        # which can leak filesystem paths, internal class names and (in
+        # AI provider errors) reflected user prompts. Audit Tier 3.1 #7.
+        print(f"[notification_routes] {request.path} failed: {type(e).__name__}: {e}")
+        return jsonify({'error': f'Internal error ({type(e).__name__})'}), 500


 def load_verified_models():
@@ -130,6 +291,7 @@ def load_verified_models():


@notification_bp.route('/api/notifications/provider-models', methods=['POST'])
+@require_auth
 def get_provider_models():
    """Fetch available models from AI provider, filtered by verified models list.
    
@@ -156,12 +318,24 @@ def get_provider_models():
    try:
        data = request.get_json() or {}
        provider = data.get('provider', '')
-        api_key = data.get('api_key', '')
+        api_key = _resolve_masked_api_key(provider, data.get('api_key', ''))
        ollama_url = data.get('ollama_url', 'http://localhost:11434')
        openai_base_url = data.get('openai_base_url', '')
-        
+
        if not provider:
            return jsonify({'success': False, 'models': [], 'message': 'Provider not specified'})
+
+        # SSRF guard before we touch the URL. Ollama is local-by-design so
+        # loopback is allowed there; OpenAI base URL must be a real external
+        # endpoint so loopback / RFC1918 are blocked.
+        if provider == 'ollama':
+            ok, err = validate_external_url(ollama_url, allow_loopback=True)
+            if not ok:
+                return jsonify({'success': False, 'models': [], 'message': f'Invalid ollama_url: {err}'}), 400
+        if provider == 'openai' and openai_base_url:
+            ok, err = validate_external_url(openai_base_url, allow_loopback=False)
+            if not ok:
+                return jsonify({'success': False, 'models': [], 'message': f'Invalid openai_base_url: {err}'}), 400
        
        # Load verified models config
        verified_config = load_verified_models()
@@ -203,8 +377,12 @@ def get_provider_models():
                'message': f'{len(models)} verified models'
            })
        
-        # For other providers, fetch from API and filter by verified list
-        if not api_key:
+        # For other providers, fetch from API and filter by verified list.
+        # Custom OpenAI-compatible endpoints (LiteLLM, opencode.ai, vLLM,
+        # LocalAI…) often expose `/v1/models` without authentication, so
+        # we only require an api_key when there's no custom base URL to
+        # consult. Issue #11.5 — OpenCode provider Custom Base URL fetch.
+        if not api_key and not (provider == 'openai' and openai_base_url):
            return jsonify({'success': False, 'models': [], 'message': 'API key required'})
        
        from ai_providers import get_provider
@@ -295,6 +473,7 @@ def get_provider_models():


@notification_bp.route('/api/notifications/test-ai', methods=['POST'])
+@require_auth
 def test_ai_connection():
    """Test AI provider connection and configuration.
    
@@ -315,13 +494,25 @@ def test_ai_connection():
    """
    try:
        data = request.get_json() or {}
-        
+
        provider = data.get('provider', 'groq')
-        api_key = data.get('api_key', '')
+        api_key = _resolve_masked_api_key(provider, data.get('api_key', ''))
        model = data.get('model', '')
        ollama_url = data.get('ollama_url', 'http://localhost:11434')
        openai_base_url = data.get('openai_base_url', '')
-        
+
+        # Provider whitelist + bounds. Without these `provider` flows into
+        # `get_provider()` (importable name), `api_key` into HTTP headers
+        # (could be megabytes), and `model` into the path of paid LLM
+        # requests. Audit Tier 3.1 — `test-ai` validation gap.
+        _ALLOWED_PROVIDERS = {'groq', 'openai', 'anthropic', 'gemini', 'ollama', 'openrouter'}
+        if provider not in _ALLOWED_PROVIDERS:
+            return jsonify({'success': False, 'message': 'Unsupported provider', 'model': ''}), 400
+        if not isinstance(api_key, str) or len(api_key) > 512:
+            return jsonify({'success': False, 'message': 'api_key too long (max 512 chars)', 'model': ''}), 400
+        if not isinstance(model, str) or len(model) > 128:
+            return jsonify({'success': False, 'message': 'model too long (max 128 chars)', 'model': ''}), 400
+
        # Validate required fields
        if provider != 'ollama' and not api_key:
            return jsonify({
@@ -329,7 +520,17 @@ def test_ai_connection():
                'message': 'API key is required',
                'model': ''
            }), 400
-        
+
+        # SSRF guard — same policy as provider-models.
+        if provider == 'ollama':
+            ok, err = validate_external_url(ollama_url, allow_loopback=True)
+            if not ok:
+                return jsonify({'success': False, 'message': f'Invalid ollama_url: {err}', 'model': ''}), 400
+        if provider == 'openai' and openai_base_url:
+            ok, err = validate_external_url(openai_base_url, allow_loopback=False)
+            if not ok:
+                return jsonify({'success': False, 'message': f'Invalid openai_base_url: {err}', 'model': ''}), 400
+
        if provider == 'ollama' and not ollama_url:
            return jsonify({
                'success': False,
@@ -381,51 +582,97 @@ def test_ai_connection():


@notification_bp.route('/api/notifications/status', methods=['GET'])
+@require_auth
 def get_notification_status():
    """Get notification service status."""
    try:
        status = notification_manager.get_status()
        return jsonify(status)
    except Exception as e:
-        return jsonify({'error': str(e)}), 500
+        # Sanitize: include only the exception type, never the message,
+        # which can leak filesystem paths, internal class names and (in
+        # AI provider errors) reflected user prompts. Audit Tier 3.1 #7.
+        print(f"[notification_routes] {request.path} failed: {type(e).__name__}: {e}")
+        return jsonify({'error': f'Internal error ({type(e).__name__})'}), 500


@notification_bp.route('/api/notifications/history', methods=['GET'])
+@require_auth
 def get_notification_history():
-    """Get notification history with optional filters."""
+    """Get notification history with optional filters.
+
+    `limit` is capped at 500 to prevent memory blow-up. The audit (Tier 3.1)
+    flagged that without a cap, an authenticated client could request
+    `?limit=1000000` and force the manager to load the entire history table
+    into RAM and serialize it to JSON. Audit Tier 3.1 #5.
+    """
    try:
        limit = request.args.get('limit', 100, type=int)
        offset = request.args.get('offset', 0, type=int)
        severity = request.args.get('severity', '')
        channel = request.args.get('channel', '')
-        
+
+        # Sane bounds — clamp instead of erroring so well-behaved clients
+        # asking for "all" just get a reasonable page.
+        if limit is None or limit < 1:
+            limit = 100
+        if limit > 500:
+            limit = 500
+        if offset is None or offset < 0:
+            offset = 0
+
+        # Filter strings: whitelist or empty. Without this an attacker who
+        # finds a downstream sink that interpolates these (template,
+        # filename, log) gets a free string-injection vector.
+        if not _validate_severity(severity, allow_empty=True):
+            return _bad_request('Invalid severity filter')
+        if not _validate_channel(channel, allow_empty=True):
+            return _bad_request('Invalid channel filter')
+
        result = notification_manager.get_history(limit, offset, severity, channel)
        return jsonify(result)
    except Exception as e:
-        return jsonify({'error': str(e)}), 500
+        # Sanitize: include only the exception type, never the message,
+        # which can leak filesystem paths, internal class names and (in
+        # AI provider errors) reflected user prompts. Audit Tier 3.1 #7.
+        print(f"[notification_routes] {request.path} failed: {type(e).__name__}: {e}")
+        return jsonify({'error': f'Internal error ({type(e).__name__})'}), 500


@notification_bp.route('/api/notifications/history', methods=['DELETE'])
+@require_auth
 def clear_notification_history():
    """Clear all notification history."""
    try:
        result = notification_manager.clear_history()
        return jsonify(result)
    except Exception as e:
-        return jsonify({'error': str(e)}), 500
+        # Sanitize: include only the exception type, never the message,
+        # which can leak filesystem paths, internal class names and (in
+        # AI provider errors) reflected user prompts. Audit Tier 3.1 #7.
+        print(f"[notification_routes] {request.path} failed: {type(e).__name__}: {e}")
+        return jsonify({'error': f'Internal error ({type(e).__name__})'}), 500


@notification_bp.route('/api/notifications/send', methods=['POST'])
+@require_auth
 def send_notification():
    """Send a notification via API (for testing or external triggers)."""
    try:
        data = request.get_json()
        if not data:
            return jsonify({'error': 'No data provided'}), 400
-        
+
+        event_type = data.get('event_type', 'custom')
+        severity = data.get('severity', 'INFO')
+        if not _validate_event_type(event_type):
+            return _bad_request('Invalid event_type (alphanumeric/underscore, 1-64 chars)')
+        if not _validate_severity(severity):
+            return _bad_request('Invalid severity')
+
        result = notification_manager.send_notification(
-            event_type=data.get('event_type', 'custom'),
-            severity=data.get('severity', 'INFO'),
+            event_type=event_type,
+            severity=severity,
            title=data.get('title', ''),
            message=data.get('message', ''),
            data=data.get('data', {}),
@@ -433,13 +680,16 @@ def send_notification():
        )
        return jsonify(result)
    except Exception as e:
-        return jsonify({'error': str(e)}), 500
+        # Sanitize: include only the exception type, never the message,
+        # which can leak filesystem paths, internal class names and (in
+        # AI provider errors) reflected user prompts. Audit Tier 3.1 #7.
+        print(f"[notification_routes] {request.path} failed: {type(e).__name__}: {e}")
+        return jsonify({'error': f'Internal error ({type(e).__name__})'}), 500


 # ── PVE config constants ──
 _PVE_ENDPOINT_ID = 'proxmenux-webhook'
 _PVE_MATCHER_ID = 'proxmenux-default'
-_PVE_WEBHOOK_URL = 'http://127.0.0.1:8008/api/notifications/webhook'
 _PVE_NOTIFICATIONS_CFG = '/etc/pve/notifications.cfg'
 _PVE_PRIV_CFG = '/etc/pve/priv/notifications.cfg'
 _PVE_OUR_HEADERS = {
@@ -448,6 +698,31 @@ _PVE_OUR_HEADERS = {
 }


+def _pve_webhook_url() -> str:
+    """Return http:// or https:// based on the current SSL config.
+
+    Hardcoded `http://...` previously broke webhook delivery whenever the
+    user enabled SSL — Flask only listened on HTTPS, so PVE got connection
+    refused and notifications stopped. Issue #194. PVE may still need
+    `update-ca-certificates` if the cert is self-signed; that's a doc
+    step on the user side.
+    """
+    try:
+        from auth_manager import load_ssl_config
+        cfg = load_ssl_config() or {}
+        if cfg.get('enabled'):
+            return 'https://127.0.0.1:8008/api/notifications/webhook'
+    except Exception:
+        pass
+    return 'http://127.0.0.1:8008/api/notifications/webhook'
+
+
+# Backward-compat alias for callers that read this at import time. Most
+# call sites now use `_pve_webhook_url()` to pick up SSL state at write
+# time. This constant reflects the state at module-load only.
+_PVE_WEBHOOK_URL = _pve_webhook_url()
+
+
 def _pve_read_file(path):
    """Read file, return (content, error). Content is '' if missing."""
    try:
@@ -474,37 +749,59 @@ def _pve_backup_file(path):
        pass


+# Recognised PVE notifications.cfg header keywords. A header line begins
+# unindented with `<keyword>:` and the value names the entry. Anything
+# that doesn't match this regex is not treated as a header — that fixes
+# the previous parser which any unindented line with `:` (a third-party
+# `description: foo: bar` continuation, a comment with `:` in it, etc.)
+# could trigger as a header and corrupt user content. Audit Tier 3.1 —
+# `_pve_remove_our_blocks` parser frágil.
+import re as _re_pve_cfg
+_PVE_HEADER_RE = _re_pve_cfg.compile(
+    r'^(?P<kw>webhook|matcher|gotify|smtp|sendmail|ntfy):\s*(?P<name>[A-Za-z0-9_.\-]+)\s*$'
+)
+
+
 def _pve_remove_our_blocks(text, headers_to_remove):
    """Remove only blocks whose header line matches one of ours.
-    
+
    Preserves ALL other content byte-for-byte.
    A block = header line + indented continuation lines + trailing blank line.
    """
    lines = text.splitlines(keepends=True)
    cleaned = []
    skip_block = False
-    
+
    for line in lines:
        stripped = line.strip()
-        
-        if stripped and not line[0:1].isspace() and ':' in stripped:
+        is_header = (
+            bool(stripped)
+            and not line[0:1].isspace()
+            and bool(_PVE_HEADER_RE.match(stripped))
+        )
+
+        if is_header:
            if stripped in headers_to_remove:
                skip_block = True
                continue
            else:
                skip_block = False
-        
+
        if skip_block:
            if not stripped:
+                # Blank line ends our block; consume it so we don't leave
+                # a double blank gap in the output.
                skip_block = False
                continue
-            elif line[0:1].isspace():
+            if line[0:1].isspace():
+                # Indented continuation line of the block we're removing.
                continue
-            else:
-                skip_block = False
-        
+            # Non-blank, unindented, but not recognised as a header by
+            # the regex — leave the next iteration to figure it out.
+            skip_block = False
+
        cleaned.append(line)
-    
+
    return ''.join(cleaned)


@@ -520,7 +817,7 @@ def _build_webhook_fallback():
        f"webhook: {_PVE_ENDPOINT_ID}",
        f"\tbody {body_b64}",
        f"\tmethod post",
-        f"\turl {_PVE_WEBHOOK_URL}",
+        f"\turl {_pve_webhook_url()}",
        "",
        f"matcher: {_PVE_MATCHER_ID}",
        f"\ttarget {_PVE_ENDPOINT_ID}",
@@ -531,6 +828,46 @@ def _build_webhook_fallback():
    ]


+def _is_proxmenux_webhook_registered() -> bool:
+    """Cheap check: is our webhook block currently present in
+    /etc/pve/notifications.cfg? Used by `refresh_pve_webhook_url_if_registered`
+    to avoid auto-registering a webhook for users who never enabled
+    notifications."""
+    try:
+        text, err = _pve_read_file(_PVE_NOTIFICATIONS_CFG)
+        if err or not text:
+            return False
+        # Match the block header line as a whole word boundary so we
+        # don't false-positive on a substring inside another endpoint's
+        # config.
+        return f'webhook: {_PVE_ENDPOINT_ID}' in text
+    except Exception:
+        return False
+
+
+def refresh_pve_webhook_url_if_registered() -> dict:
+    """Re-register the webhook block in PVE notifications.cfg with the
+    URL scheme that matches the *current* SSL config.
+
+    Called from the SSL configure/disable routes so a user toggling
+    SSL while notifications are already set up doesn't end up with a
+    stale `http://` (or `https://`) URL in PVE that PVE then can't
+    reach. Idempotent and safe to call when nothing is registered —
+    in that case it returns `{'configured': False, 'skipped': True}`
+    without touching the cfg.
+
+    Returns the same shape as `setup_pve_webhook_core` plus an
+    optional `skipped` flag.
+    """
+    if not _is_proxmenux_webhook_registered():
+        return {
+            'configured': False,
+            'skipped': True,
+            'reason': 'no proxmenux webhook currently registered in PVE',
+        }
+    return setup_pve_webhook_core()
+
+
 def setup_pve_webhook_core() -> dict:
    """Core logic to configure PVE webhook. Callable from anywhere.
    
@@ -543,7 +880,7 @@ def setup_pve_webhook_core() -> dict:
        'configured': False,
        'endpoint_id': _PVE_ENDPOINT_ID,
        'matcher_id': _PVE_MATCHER_ID,
-        'url': _PVE_WEBHOOK_URL,
+        'url': _pve_webhook_url(),
        'fallback_commands': [],
        'error': None,
    }
@@ -602,7 +939,7 @@ def setup_pve_webhook_core() -> dict:
            f"webhook: {_PVE_ENDPOINT_ID}\n"
            f"\tbody {body_b64}\n"
            f"\tmethod post\n"
-            f"\turl {_PVE_WEBHOOK_URL}\n"
+            f"\turl {_pve_webhook_url()}\n"
        )
        
        matcher_block = (
@@ -641,8 +978,14 @@ def setup_pve_webhook_core() -> dict:
        # PVE REQUIRES a matching block in priv/notifications.cfg for every
        # webhook endpoint, even if it has no secrets. Without it PVE throws:
        #   "Could not instantiate endpoint: private config does not exist"
+        # Include the `secret` line so PVE actually sends the
+        # `X-Webhook-Secret` header on each delivery — without it the
+        # endpoint depends entirely on the localhost-bypass and any move
+        # to a non-loopback bind silently breaks auth. Audit Tier 3.1 —
+        # `setup_pve_webhook_core` no escribe secret en priv cfg.
        priv_block = (
            f"webhook: {_PVE_ENDPOINT_ID}\n"
+            f"        secret name=X-Webhook-Secret,value={secret}\n"
        )
        
        if priv_text is not None:
@@ -676,6 +1019,7 @@ def setup_pve_webhook_core() -> dict:


@notification_bp.route('/api/notifications/proxmox/setup-webhook', methods=['POST'])
+@require_auth
 def setup_proxmox_webhook():
    """HTTP endpoint wrapper for webhook setup."""
    return jsonify(setup_pve_webhook_core()), 200
@@ -751,12 +1095,14 @@ def cleanup_pve_webhook_core() -> dict:


@notification_bp.route('/api/notifications/proxmox/cleanup-webhook', methods=['POST'])
+@require_auth
 def cleanup_proxmox_webhook():
    """HTTP endpoint wrapper for webhook cleanup."""
    return jsonify(cleanup_pve_webhook_core()), 200


@notification_bp.route('/api/notifications/proxmox/read-cfg', methods=['GET'])
+@require_auth
 def read_pve_notification_cfg():
    """Diagnostic: return raw content of PVE notification config files.
    
@@ -815,6 +1161,7 @@ def read_pve_notification_cfg():


@notification_bp.route('/api/notifications/proxmox/restore-cfg', methods=['POST'])
+@require_auth
 def restore_pve_notification_cfg():
    """Restore PVE notification config from our backup.
    
@@ -834,12 +1181,22 @@ def restore_pve_notification_cfg():
    
    for search_dir, target_path in files_to_restore.items():
        try:
-            candidates = sorted([
+            # Pick the most recent backup by mtime, not lexicographic name.
+            # An attacker (or accidental rename) with a write primitive
+            # could craft `notifications.cfg.proxmenux_backup_99999999_999999`
+            # and have it sort first, hijacking the restore. mtime tracks
+            # the actual file age so renamed/touched files don't fool us.
+            # Audit Tier 3.1 — restore-cfg sort lexicográfico.
+            candidates = [
                f for f in os.listdir(search_dir)
                if 'proxmenux_backup' in f and f.startswith('notifications.cfg')
-            ], reverse=True)
-            
+            ]
+
            if candidates:
+                candidates.sort(
+                    key=lambda f: os.path.getmtime(os.path.join(search_dir, f)),
+                    reverse=True,
+                )
                backup_path = os.path.join(search_dir, candidates[0])
                shutil.copy2(backup_path, target_path)
                restored.append({'target': target_path, 'from_backup': backup_path})
@@ -866,12 +1223,21 @@ def proxmox_webhook():
      Remote: rate limiting + shared secret + timestamp + replay + IP allowlist.
    """
    _reject = lambda code, error, status: (jsonify({'accepted': False, 'error': error}), status)
-    
+
    client_ip = request.remote_addr or ''
    is_localhost = client_ip in ('127.0.0.1', '::1')
-    
-    # ── Layer 1: Rate limiting (always) ──
-    if not _webhook_limiter.allow():
+
+    # CSRF defence-in-depth: reject `application/x-www-form-urlencoded`
+    # bodies. PVE always sends `application/json`; form-encoded bodies
+    # are how a browser session would POST cross-origin without preflight,
+    # so accepting them here would open a CSRF vector once the route gets
+    # auth wrapped in the future. Audit Tier 6 — webhook acepta form bodies.
+    ct = (request.content_type or '').lower()
+    if ct.startswith('application/x-www-form-urlencoded') or ct.startswith('multipart/form-data'):
+        return _reject(415, 'unsupported_content_type', 415)
+
+    # ── Layer 1: Rate limiting (per-IP, always) ──
+    if not _webhook_limiter.allow(client_ip):
        resp = jsonify({'accepted': False, 'error': 'rate_limited'})
        resp.headers['Retry-After'] = '60'
        return resp, 429
@@ -918,53 +1284,50 @@ def proxmox_webhook():
    
    # ── Parse and process payload ──
    try:
-        content_type = request.content_type or ''
        raw_data = request.get_data(as_text=True) or ''
-        
-        # Try JSON first
+
+        # Try JSON first (with the newline-repair pass that PVE actually
+        # benefits from — its `{{ message }}` template inserts unescaped
+        # newlines that break strict JSON parsing).
        payload = request.get_json(silent=True) or {}
-        
-        # If not JSON, try form data
-        if not payload:
-            payload = dict(request.form)
-        
-        # If still empty, try parsing raw data as JSON (PVE may not set Content-Type)
        if not payload and raw_data:
            import json
            try:
                payload = json.loads(raw_data)
            except (json.JSONDecodeError, ValueError):
-                # PVE's {{ message }} may contain unescaped newlines/quotes
-                # that break JSON. Try to repair common issues.
                try:
                    repaired = raw_data.replace('\n', '\\n').replace('\r', '\\r')
                    payload = json.loads(repaired)
                except (json.JSONDecodeError, ValueError):
-                    # Try to extract fields with regex from broken JSON
-                    import re
-                    title_m = re.search(r'"title"\s*:\s*"([^"]*)"', raw_data)
-                    sev_m = re.search(r'"severity"\s*:\s*"([^"]*)"', raw_data)
-                    if title_m:
-                        payload = {
-                            'title': title_m.group(1),
-                            'body': raw_data[:1000],
-                            'severity': sev_m.group(1) if sev_m else 'info',
-                            'source': 'proxmox_hook',
-                        }
-        
-        # If still empty, try to salvage data from raw body
-        if not payload:
-            if raw_data:
-                # Last resort: treat raw text as the message body
-                payload = {
-                    'title': 'PVE Notification',
-                    'body': raw_data[:1000],
-                    'severity': 'info',
-                    'source': 'proxmox_hook',
-                }
-            else:
-                return _reject(400, 'empty_payload', 400)
-        
+                    payload = {}
+
+        # The previous regex-from-broken-JSON path and the raw-body
+        # fallback let arbitrary opaque bodies into `process_webhook` —
+        # an attacker who reaches the webhook (post-auth bypass) could
+        # smuggle arbitrary `title`/`severity`/`body` strings into the
+        # downstream pipeline. Audit Tier 3.1 — webhook payload schema.
+        if not isinstance(payload, dict) or not payload:
+            return _reject(400, 'invalid_payload', 400)
+
+        # Required fields: enforce type + non-empty title/message.
+        title = payload.get('title') or payload.get('subject')
+        message = payload.get('message') or payload.get('body') or payload.get('text')
+        if not isinstance(title, str) or not title.strip():
+            return _reject(400, 'missing_title', 400)
+        if not isinstance(message, str):
+            message = str(message) if message is not None else ''
+        # Bound runaway sizes — webhooks shouldn't exceed a few KB of text.
+        if len(title) > 256:
+            payload['title'] = title[:256]
+        if len(message) > 4096:
+            payload['message'] = message[:4096]
+        # Severity normalisation: accept the canonical set, default to 'info'.
+        sev = (payload.get('severity') or '').lower()
+        if sev not in {'info', 'warning', 'critical', 'error', 'notice'}:
+            payload['severity'] = 'info'
+        else:
+            payload['severity'] = sev
+
        result = notification_manager.process_webhook(payload)
        # Always return 200 to PVE -- a non-200 makes PVE report the webhook as broken.
        # The 'accepted' field in the JSON body indicates actual processing status.