diff --git a/AppImage/ProxMenux-1.2.1.2-beta.AppImage b/AppImage/ProxMenux-1.2.1.2-beta.AppImage index 2fbb7ad0..25175139 100755 Binary files a/AppImage/ProxMenux-1.2.1.2-beta.AppImage and b/AppImage/ProxMenux-1.2.1.2-beta.AppImage differ diff --git a/AppImage/ProxMenux-Monitor.AppImage.sha256 b/AppImage/ProxMenux-Monitor.AppImage.sha256 index 00c9bd75..20b53588 100644 --- a/AppImage/ProxMenux-Monitor.AppImage.sha256 +++ b/AppImage/ProxMenux-Monitor.AppImage.sha256 @@ -1 +1 @@ -1b72c977163192fba07cb6e18e8539d37c90e9624ff22e3ca2cc3c8a55ce8a8e ProxMenux-1.2.1.2-beta.AppImage +69aed2fd5627fa4542f1cd26163aa2eda5843f7e1a23d259298a173d8e377c36 ProxMenux-1.2.1.2-beta.AppImage diff --git a/AppImage/components/security.tsx b/AppImage/components/security.tsx index 143ac23d..ba2caebe 100644 --- a/AppImage/components/security.tsx +++ b/AppImage/components/security.tsx @@ -24,6 +24,13 @@ interface ApiTokenEntry { created_at: string expires_at: string revoked: boolean + /** Backend flag: `true` when JWT verifies under the current jwt_secret, + * `false` when the secret has been rotated since this token was minted + * (token returns 401 even though it looks stored), `null` for legacy + * rows that pre-date the tracking field. */ + valid?: boolean | null + /** Human reason populated when `valid === false`. */ + invalidation_reason?: string } // Replaces the previous `password.length < 6` check. Bumped the minimum @@ -2368,18 +2375,39 @@ ${(report.sections && report.sections.length > 0) ? `
- {existingTokens.map((token) => ( -
+ {existingTokens.map((token) => { + // `valid === false` → JWT signature broken by a + // jwt_secret rotation, every request returns 401 + // even though the entry still appears here. The + // operator needs to revoke and regenerate. + const isInvalid = token.valid === false + const isLegacy = token.valid === null || token.valid === undefined + const containerClass = isInvalid + ? "flex items-center justify-between p-3 bg-red-500/5 rounded-lg border border-red-500/30" + : "flex items-center justify-between p-3 bg-muted/50 rounded-lg border border-border" + return ( +
-
- +
+
-

{token.name}

-
+
+

{token.name}

+ {isInvalid && ( + + Invalid — regenerate + + )} + {isLegacy && ( + + Legacy + + )} +
+
{token.token_prefix} @@ -2388,6 +2416,11 @@ ${(report.sections && report.sections.length > 0) ? ` : "Unknown"}
+ {isInvalid && token.invalidation_reason && ( +

+ {token.invalidation_reason} +

+ )}
- ))} + ) + })}
)} diff --git a/AppImage/scripts/auth_manager.py b/AppImage/scripts/auth_manager.py index e5a7c98e..4a7cf2b8 100644 --- a/AppImage/scripts/auth_manager.py +++ b/AppImage/scripts/auth_manager.py @@ -152,13 +152,65 @@ def _get_jwt_secret(): config = load_auth_config() sec = config.get("jwt_secret") if isinstance(sec, str) and len(sec) >= 32: + _audit_api_tokens_against_jwt_secret(sec) return sec new_secret = secrets.token_urlsafe(48) config["jwt_secret"] = new_secret save_auth_config(config) + _audit_api_tokens_against_jwt_secret(new_secret) return new_secret +# One-shot startup audit: warn the operator (in journal) when stored +# api_tokens were minted under a previous jwt_secret. Those tokens +# remain in `api_tokens` metadata but their JWTs no longer verify, so +# the user's HTTP client (Home Assistant, custom script, …) gets a 401 +# while the token "looks valid" in the UI. We log once per process to +# make the failure mode searchable in journalctl without spamming. +_TOKEN_AUDIT_DONE = False +_TOKEN_AUDIT_LOCK = threading.Lock() + + +def _audit_api_tokens_against_jwt_secret(current_secret: str) -> None: + """One-time warning when stored api_tokens were signed under a + previous jwt_secret. Cheap: returns immediately after the first + successful run. Logs to stdout/stderr so the message lands in the + Monitor's journalctl output. + """ + global _TOKEN_AUDIT_DONE + with _TOKEN_AUDIT_LOCK: + if _TOKEN_AUDIT_DONE: + return + _TOKEN_AUDIT_DONE = True + + try: + config = load_auth_config() + tokens = config.get("api_tokens", []) + if not tokens: + return + current_fp = hashlib.sha256(current_secret.encode()).hexdigest()[:16] + stale = [t for t in tokens + if t.get("signed_with") is not None + and t.get("signed_with") != current_fp] + legacy = [t for t in tokens if t.get("signed_with") is None] + if stale: + ids = ", ".join(t.get("id", "?") for t in stale) + print(f"[ProxMenux][auth] WARNING: {len(stale)} API token(s) " + f"signed with a previous jwt_secret — they will return " + f"401 'Invalid or expired token'. Revoke and regenerate " + f"from Settings → API Tokens. Affected IDs: {ids}") + if legacy: + ids = ", ".join(t.get("id", "?") for t in legacy) + print(f"[ProxMenux][auth] NOTE: {len(legacy)} API token(s) " + f"have no signing-secret fingerprint (created before " + f"the tracking field was added). Their validity can " + f"only be confirmed by an actual auth attempt. " + f"Legacy IDs: {ids}") + except Exception as e: + # Audit is best-effort — failure must never break startup. + print(f"[ProxMenux][auth] token audit skipped: {e}") + + # Server-side mirror of the frontend's `validatePasswordStrength`. Defense # in depth: the UI enforces these rules but a direct API caller (curl, # scripted setup, custom client) bypasses the JS — so the same minimum has @@ -419,24 +471,45 @@ def verify_token(token): return None +def _jwt_secret_fingerprint(secret: str = None) -> str: + """Stable fingerprint of the active jwt_secret. + + First 16 hex chars of SHA256(secret). Used to detect whether a stored + api-token was minted under the *current* jwt_secret or under a + previous one (in which case the JWT can no longer be verified). + Never returns the secret itself. + """ + sec = secret if secret is not None else _get_jwt_secret() + if not sec: + return "" + return hashlib.sha256(sec.encode()).hexdigest()[:16] + + def store_api_token_metadata(token, token_name="API Token"): """ Store API token metadata (hash, name, creation date) for listing and revocation. The actual token is never stored - only a hash for identification. + + Also records the fingerprint of the jwt_secret that minted this token + (`signed_with`). At list time we compare this against the current + fingerprint so the UI can flag tokens whose signing secret has been + rotated since — those JWTs no longer verify and the operator needs + to regenerate them (see `list_api_tokens`). """ config = load_auth_config() token_hash = hashlib.sha256(token.encode()).hexdigest() token_id = token_hash[:16] - + token_entry = { "id": token_id, "name": token_name, "token_hash": token_hash, "token_prefix": token[:12] + "...", "created_at": datetime.utcnow().isoformat() + "Z", - "expires_at": (datetime.utcnow() + timedelta(days=365)).isoformat() + "Z" + "expires_at": (datetime.utcnow() + timedelta(days=365)).isoformat() + "Z", + "signed_with": _jwt_secret_fingerprint(), } - + config.setdefault("api_tokens", []) config["api_tokens"].append(token_entry) save_auth_config(config) @@ -444,24 +517,56 @@ def store_api_token_metadata(token, token_name="API Token"): def list_api_tokens(): - """ - List all stored API token metadata (no actual tokens are returned). - Returns list of token entries with id, name, prefix, creation and expiration dates. + """List stored API token metadata (no actual tokens are returned). + + Each entry carries: + * `revoked` — token hash is in the revocation list. + * `valid` — JWT can still be verified with the current secret. + `True` when `signed_with` matches the current + fingerprint, `False` when it doesn't (jwt_secret + rotated → JWT signature broken), `None` for legacy + entries created before this field existed (status + can only be confirmed by attempting a verify with + the real token, which we never see at list time). + * `invalidation_reason` — human-readable explanation when + `valid is False`, otherwise absent. + + The UI uses these flags to flag tokens that look stored but no + longer authenticate — preventing the "I have the token but it + returns 401" rabbit hole. """ config = load_auth_config() tokens = config.get("api_tokens", []) revoked = set(config.get("revoked_tokens", [])) - + current_fp = _jwt_secret_fingerprint() + result = [] for t in tokens: + signed_with = t.get("signed_with") + if signed_with is None: + valid = None # legacy entry — unknown + reason = None + elif signed_with == current_fp: + valid = True + reason = None + else: + valid = False + reason = ("Signed with a previous jwt_secret. The signing " + "secret has been rotated since this token was " + "issued — its JWT can no longer be verified. " + "Revoke this token and generate a new one.") + entry = { "id": t.get("id"), "name": t.get("name", "API Token"), "token_prefix": t.get("token_prefix", "***"), "created_at": t.get("created_at"), "expires_at": t.get("expires_at"), - "revoked": t.get("token_hash") in revoked + "revoked": t.get("token_hash") in revoked, + "valid": valid, } + if reason: + entry["invalidation_reason"] = reason result.append(entry) return result diff --git a/AppImage/scripts/flask_server.py b/AppImage/scripts/flask_server.py index fa99213c..62139d88 100644 --- a/AppImage/scripts/flask_server.py +++ b/AppImage/scripts/flask_server.py @@ -7,6 +7,31 @@ ProxMenux Flask Server - Integrates a web terminal powered by xterm.js """ +# ─── gevent monkey-patch — MUST be the first executable code ───────────── +# +# When SSL is enabled we serve the dashboard with `gevent.pywsgi.WSGIServer`. +# Without `monkey.patch_all()` gevent runs as a single-threaded cooperative +# event loop: a request that calls `subprocess.run(pvesh ...)` blocks the +# whole event loop, so every other request lined up in parallel returns 502 +# until that subprocess finishes. The frontend's `/api/vms` page fires 3-4 +# parallel requests on mount, which is exactly the symptom that surfaced as +# "first load 502, second load fine" under HTTPS. +# +# `patch_all()` replaces stdlib blocking primitives (socket, subprocess, +# select, threading, ssl, time.sleep, ...) with gevent-friendly equivalents +# that yield to the event loop instead of blocking it. This must run BEFORE +# any other import touches those primitives — otherwise the unpatched +# versions get bound in the module and the patch is silently ineffective. +# +# Wrapped in a try/except so a host without gevent installed (HTTP-only +# mode) still imports cleanly: the patch is only meaningful when gevent is +# actually being used as the WSGI server. +try: + from gevent import monkey + monkey.patch_all() +except ImportError: + pass + import glob import json import logging diff --git a/AppImage/scripts/notification_events.py b/AppImage/scripts/notification_events.py index 5c17acce..02b7d2d4 100644 --- a/AppImage/scripts/notification_events.py +++ b/AppImage/scripts/notification_events.py @@ -136,12 +136,30 @@ class NotificationEvent: return f"NotificationEvent({self.event_type}, {self.severity}, fp={self.fingerprint[:40]})" +_HOSTNAME_CACHE: Dict[str, Any] = {'value': None, 'ts': 0.0} +_HOSTNAME_CACHE_TTL = 5.0 # seconds + + def _hostname() -> str: """Get display hostname for notifications. - + Returns the custom display name from notification settings if configured, - otherwise falls back to the system hostname. + otherwise falls back to the system FQDN (NOT truncated at the first dot — + a host called ``px.seeindustry.com`` is rendered in full so multi-host + deployments stay distinguishable). + + Reads are cached for ~5 s so a burst of events (~tens per cycle) doesn't + hit the SQLite settings table on every call. The TTL is short enough that + a freshly-saved alias takes effect within seconds without restarting the + service — fixes the original behaviour where `self._hostname = _hostname()` + was cached in `__init__` and never refreshed. """ + now = time.time() + cached = _HOSTNAME_CACHE.get('value') + if cached is not None and (now - _HOSTNAME_CACHE['ts']) < _HOSTNAME_CACHE_TTL: + return cached + + resolved = '' # Try to read custom display name from notification settings try: db_path = Path('/usr/local/share/proxmenux/health_monitor.db') @@ -156,15 +174,24 @@ def _hostname() -> str: row = cursor.fetchone() conn.close() if row and row[0] and row[0].strip(): - return row[0].strip() + resolved = row[0].strip() except Exception: pass # Fall back to system hostname - - # Fall back to system hostname - try: - return socket.gethostname().split('.')[0] - except Exception: - return 'proxmox' + + if not resolved: + # Use FULL FQDN — never truncate at the first dot. The previous + # `.split('.')[0]` produced misleading bare labels like "px" when the + # alias was missing or unreadable, with no way for the operator to + # tell which of their `px.*.example.com` nodes the notification came + # from. The Display Name (alias) remains the recommended override. + try: + resolved = socket.gethostname() + except Exception: + resolved = 'proxmox' + + _HOSTNAME_CACHE['value'] = resolved + _HOSTNAME_CACHE['ts'] = now + return resolved def capture_journal_context(keywords: list, lines: int = 30, @@ -376,7 +403,10 @@ class JournalWatcher: self._running = False self._thread: Optional[threading.Thread] = None self._process: Optional[subprocess.Popen] = None - self._hostname = _hostname() + # `_hostname` is exposed as a @property below so every read returns + # the *current* alias from the settings DB (TTL-cached for 5 s in + # _hostname()). The old `__init__`-time cache made a fresh Display + # Name require a service restart to take effect. # Dedup: track recent events to avoid duplicates self._recent_events: Dict[str, float] = {} @@ -421,10 +451,14 @@ class JournalWatcher: # so we can suppress per-guest "Starting Backup of VM ..." noise self._last_backup_job_ts: float = 0 self._BACKUP_JOB_SUPPRESS_WINDOW = 7200 # 2h: suppress per-guest during active job - + # NOTE: Service failure batching is handled universally by # BurstAggregator in NotificationManager (AGGREGATION_RULES). - + + @property + def _hostname(self) -> str: + return _hostname() + def start(self): """Start the journal watcher thread.""" if self._running: @@ -1752,7 +1786,10 @@ class TaskWatcher: self._queue = event_queue self._running = False self._thread: Optional[threading.Thread] = None - self._hostname = _hostname() + # `_hostname` is exposed as a @property below so every read returns + # the *current* alias from the settings DB (TTL-cached for 5 s in + # _hostname()). The old `__init__`-time cache made a fresh Display + # Name require a service restart to take effect. self._last_position = 0 # Cache for active vzdump detection self._vzdump_active_cache: float = 0 # timestamp of last positive check @@ -1765,12 +1802,16 @@ class TaskWatcher: self._vzdump_grace_period = 120 # seconds after vzdump ends to still suppress # Track active-file UPIDs we've already seen, to avoid duplicate backup_start self._seen_active_upids: set = set() - + + @property + def _hostname(self) -> str: + return _hostname() + def start(self): if self._running: return self._running = True - + # Start at end of file if os.path.exists(self.TASK_LOG): try: @@ -2263,7 +2304,10 @@ class PollingCollector: self._running = False self._thread: Optional[threading.Thread] = None self._poll_interval = poll_interval - self._hostname = _hostname() + # `_hostname` is exposed as a @property below so every read returns + # the *current* alias from the settings DB (TTL-cached for 5 s in + # _hostname()). The old `__init__`-time cache made a fresh Display + # Name require a service restart to take effect. self._last_update_check = 0 self._last_proxmenux_check = 0 self._last_ai_model_check = 0 @@ -2312,7 +2356,11 @@ class PollingCollector: # subprocess per disk-with-error per poll cycle. Key: bare device # name (no /dev/). Value: bool (True = USB). self._is_usb_cache: Dict[str, bool] = {} - + + @property + def _hostname(self) -> str: + return _hostname() + def start(self): if self._running: return @@ -3703,8 +3751,15 @@ class ProxmoxHookWatcher: def __init__(self, event_queue: Queue): self._queue = event_queue - self._hostname = _hostname() - + # `_hostname` is exposed as a @property below so every read returns + # the *current* alias from the settings DB (TTL-cached for 5 s in + # _hostname()). The old `__init__`-time cache made a fresh Display + # Name require a service restart to take effect. + + @property + def _hostname(self) -> str: + return _hostname() + def process_webhook(self, payload: dict) -> dict: """Process an incoming Proxmox webhook payload. diff --git a/scripts/menus/config_menu.sh b/scripts/menus/config_menu.sh index 79aa1948..943d0ba4 100644 --- a/scripts/menus/config_menu.sh +++ b/scripts/menus/config_menu.sh @@ -404,6 +404,109 @@ toggle_monitor_service() { fi } +reset_monitor_password() { + # Recovery path for operators who lost the Monitor login credentials. + # Wipes only the identity claims from auth.json (username / password / + # 2FA secret / backup codes) so the next visit to the dashboard + # triggers the setup wizard with no password needed. Intentionally + # KEEPS `jwt_secret`, `api_tokens` and `revoked_tokens` — that means + # already-issued API tokens continue to work (Home Assistant / + # custom scripts don't need to be reconfigured) and only the + # interactive web login is reset. The operator chooses a new + # username + password on the next visit. + + local auth_file="$MONITOR_CONFIG_DIR/auth.json" + + if [ ! -f "$auth_file" ]; then + dialog --clear --backtitle "$BACKTITLE" \ + --title "$(translate "Reset Monitor Password")" \ + --msgbox "\n\n$(translate "ProxMenux Monitor authentication is not configured on this host — there is no password to reset.")" 11 70 + return + fi + + if ! dialog --clear --backtitle "$BACKTITLE" \ + --title "$(translate "Reset Monitor Password")" \ + --yesno "\n$(translate "This will RESET the ProxMenux Monitor login credentials on this host:")\n\n • $(translate "Username and password will be cleared.")\n • $(translate "Two-factor authentication and backup codes will be removed.")\n • $(translate "API tokens (Home Assistant, scripts) will keep working.")\n • $(translate "The next visit to the dashboard will show the initial setup wizard.")\n\n$(translate "Continue?")" 16 78; then + return + fi + + if ! command -v jq >/dev/null 2>&1; then + dialog --clear --backtitle "$BACKTITLE" \ + --title "$(translate "Reset Monitor Password")" \ + --msgbox "\n\n$(translate "jq is required for this operation but is not installed.")" 10 60 + return + fi + + show_proxmenux_logo + msg_title "$(translate "Reset Monitor Password")" + + # Timestamped backup so the operator can recover the previous state + # if the reset was a mistake. Includes the secret material — keep + # this file out of any shared location. + local backup_file + backup_file="${auth_file}.bak-$(date -u +%Y%m%d%H%M%S)" + if ! cp -a "$auth_file" "$backup_file" 2>/dev/null; then + msg_error "$(translate "Could not back up the existing auth.json")" + msg_success "$(translate "Press Enter to return to menu...")" + read -r + return + fi + chmod 0600 "$backup_file" 2>/dev/null || true + msg_ok "$(translate "Backup saved to:") $backup_file" + + msg_info "$(translate "Stopping ProxMenux Monitor service...")" + systemctl stop "$MONITOR_SERVICE" >/dev/null 2>&1 || true + msg_ok "$(translate "Service stopped.")" + + msg_info "$(translate "Clearing login credentials...")" + local tmp + tmp=$(mktemp) + if jq ' + .enabled = false + | .configured = false + | .username = "" + | .password_hash = "" + | .declined = false + | .totp_enabled = false + | .totp_secret = null + | .backup_codes = [] + ' "$auth_file" > "$tmp" 2>/dev/null; then + chmod 0600 "$tmp" 2>/dev/null || true + mv "$tmp" "$auth_file" + msg_ok "$(translate "Credentials cleared. jwt_secret and API tokens preserved.")" + else + rm -f "$tmp" + msg_error "$(translate "Failed to update auth.json — restoring backup.")" + cp -a "$backup_file" "$auth_file" + systemctl start "$MONITOR_SERVICE" >/dev/null 2>&1 || true + msg_success "$(translate "Press Enter to return to menu...")" + read -r + return + fi + + msg_info "$(translate "Restarting ProxMenux Monitor service...")" + if systemctl start "$MONITOR_SERVICE" >/dev/null 2>&1; then + msg_ok "$(translate "Service restarted.")" + else + msg_warn "$(translate "Could not restart the service — start it manually with systemctl start") $MONITOR_SERVICE" + fi + + local server_ip + server_ip=$(hostname -I | awk '{print $1}') + echo "" + msg_success "$(translate "Password reset completed.")" + echo "" + if [ -n "$server_ip" ]; then + msg_info2 "$(translate "Open the dashboard to create a new admin account:")" + echo -e "${TAB}${BL}http://${server_ip}:8008${CL}" + else + msg_info2 "$(translate "Open the dashboard from this host on port 8008 to create a new admin account.")" + fi + echo "" + msg_success "$(translate "Press Enter to return to menu...")" + read -r +} + show_monitor_status() { clear show_proxmenux_logo @@ -467,6 +570,10 @@ show_config_menu() { menu_options+=("$option_num" "$(translate "Show Monitor Service Status")") option_actions[$option_num]="show_monitor_status" ((option_num++)) + + menu_options+=("$option_num" "$(translate "Reset ProxMenux Monitor Password")") + option_actions[$option_num]="reset_monitor_password" + ((option_num++)) fi menu_options+=("$option_num" "$(translate "Change Release Channel")") @@ -517,6 +624,9 @@ show_config_menu() { "show_monitor_status") show_monitor_status ;; + "reset_monitor_password") + reset_monitor_password + ;; "change_release_channel") change_release_channel ;;