)}
diff --git a/AppImage/scripts/auth_manager.py b/AppImage/scripts/auth_manager.py
index e5a7c98e..4a7cf2b8 100644
--- a/AppImage/scripts/auth_manager.py
+++ b/AppImage/scripts/auth_manager.py
@@ -152,13 +152,65 @@ def _get_jwt_secret():
config = load_auth_config()
sec = config.get("jwt_secret")
if isinstance(sec, str) and len(sec) >= 32:
+ _audit_api_tokens_against_jwt_secret(sec)
return sec
new_secret = secrets.token_urlsafe(48)
config["jwt_secret"] = new_secret
save_auth_config(config)
+ _audit_api_tokens_against_jwt_secret(new_secret)
return new_secret
+# One-shot startup audit: warn the operator (in journal) when stored
+# api_tokens were minted under a previous jwt_secret. Those tokens
+# remain in `api_tokens` metadata but their JWTs no longer verify, so
+# the user's HTTP client (Home Assistant, custom script, …) gets a 401
+# while the token "looks valid" in the UI. We log once per process to
+# make the failure mode searchable in journalctl without spamming.
+_TOKEN_AUDIT_DONE = False
+_TOKEN_AUDIT_LOCK = threading.Lock()
+
+
+def _audit_api_tokens_against_jwt_secret(current_secret: str) -> None:
+ """One-time warning when stored api_tokens were signed under a
+ previous jwt_secret. Cheap: returns immediately after the first
+ successful run. Logs to stdout/stderr so the message lands in the
+ Monitor's journalctl output.
+ """
+ global _TOKEN_AUDIT_DONE
+ with _TOKEN_AUDIT_LOCK:
+ if _TOKEN_AUDIT_DONE:
+ return
+ _TOKEN_AUDIT_DONE = True
+
+ try:
+ config = load_auth_config()
+ tokens = config.get("api_tokens", [])
+ if not tokens:
+ return
+ current_fp = hashlib.sha256(current_secret.encode()).hexdigest()[:16]
+ stale = [t for t in tokens
+ if t.get("signed_with") is not None
+ and t.get("signed_with") != current_fp]
+ legacy = [t for t in tokens if t.get("signed_with") is None]
+ if stale:
+ ids = ", ".join(t.get("id", "?") for t in stale)
+ print(f"[ProxMenux][auth] WARNING: {len(stale)} API token(s) "
+ f"signed with a previous jwt_secret — they will return "
+ f"401 'Invalid or expired token'. Revoke and regenerate "
+ f"from Settings → API Tokens. Affected IDs: {ids}")
+ if legacy:
+ ids = ", ".join(t.get("id", "?") for t in legacy)
+ print(f"[ProxMenux][auth] NOTE: {len(legacy)} API token(s) "
+ f"have no signing-secret fingerprint (created before "
+ f"the tracking field was added). Their validity can "
+ f"only be confirmed by an actual auth attempt. "
+ f"Legacy IDs: {ids}")
+ except Exception as e:
+ # Audit is best-effort — failure must never break startup.
+ print(f"[ProxMenux][auth] token audit skipped: {e}")
+
+
# Server-side mirror of the frontend's `validatePasswordStrength`. Defense
# in depth: the UI enforces these rules but a direct API caller (curl,
# scripted setup, custom client) bypasses the JS — so the same minimum has
@@ -419,24 +471,45 @@ def verify_token(token):
return None
+def _jwt_secret_fingerprint(secret: str = None) -> str:
+ """Stable fingerprint of the active jwt_secret.
+
+ First 16 hex chars of SHA256(secret). Used to detect whether a stored
+ api-token was minted under the *current* jwt_secret or under a
+ previous one (in which case the JWT can no longer be verified).
+ Never returns the secret itself.
+ """
+ sec = secret if secret is not None else _get_jwt_secret()
+ if not sec:
+ return ""
+ return hashlib.sha256(sec.encode()).hexdigest()[:16]
+
+
def store_api_token_metadata(token, token_name="API Token"):
"""
Store API token metadata (hash, name, creation date) for listing and revocation.
The actual token is never stored - only a hash for identification.
+
+ Also records the fingerprint of the jwt_secret that minted this token
+ (`signed_with`). At list time we compare this against the current
+ fingerprint so the UI can flag tokens whose signing secret has been
+ rotated since — those JWTs no longer verify and the operator needs
+ to regenerate them (see `list_api_tokens`).
"""
config = load_auth_config()
token_hash = hashlib.sha256(token.encode()).hexdigest()
token_id = token_hash[:16]
-
+
token_entry = {
"id": token_id,
"name": token_name,
"token_hash": token_hash,
"token_prefix": token[:12] + "...",
"created_at": datetime.utcnow().isoformat() + "Z",
- "expires_at": (datetime.utcnow() + timedelta(days=365)).isoformat() + "Z"
+ "expires_at": (datetime.utcnow() + timedelta(days=365)).isoformat() + "Z",
+ "signed_with": _jwt_secret_fingerprint(),
}
-
+
config.setdefault("api_tokens", [])
config["api_tokens"].append(token_entry)
save_auth_config(config)
@@ -444,24 +517,56 @@ def store_api_token_metadata(token, token_name="API Token"):
def list_api_tokens():
- """
- List all stored API token metadata (no actual tokens are returned).
- Returns list of token entries with id, name, prefix, creation and expiration dates.
+ """List stored API token metadata (no actual tokens are returned).
+
+ Each entry carries:
+ * `revoked` — token hash is in the revocation list.
+ * `valid` — JWT can still be verified with the current secret.
+ `True` when `signed_with` matches the current
+ fingerprint, `False` when it doesn't (jwt_secret
+ rotated → JWT signature broken), `None` for legacy
+ entries created before this field existed (status
+ can only be confirmed by attempting a verify with
+ the real token, which we never see at list time).
+ * `invalidation_reason` — human-readable explanation when
+ `valid is False`, otherwise absent.
+
+ The UI uses these flags to flag tokens that look stored but no
+ longer authenticate — preventing the "I have the token but it
+ returns 401" rabbit hole.
"""
config = load_auth_config()
tokens = config.get("api_tokens", [])
revoked = set(config.get("revoked_tokens", []))
-
+ current_fp = _jwt_secret_fingerprint()
+
result = []
for t in tokens:
+ signed_with = t.get("signed_with")
+ if signed_with is None:
+ valid = None # legacy entry — unknown
+ reason = None
+ elif signed_with == current_fp:
+ valid = True
+ reason = None
+ else:
+ valid = False
+ reason = ("Signed with a previous jwt_secret. The signing "
+ "secret has been rotated since this token was "
+ "issued — its JWT can no longer be verified. "
+ "Revoke this token and generate a new one.")
+
entry = {
"id": t.get("id"),
"name": t.get("name", "API Token"),
"token_prefix": t.get("token_prefix", "***"),
"created_at": t.get("created_at"),
"expires_at": t.get("expires_at"),
- "revoked": t.get("token_hash") in revoked
+ "revoked": t.get("token_hash") in revoked,
+ "valid": valid,
}
+ if reason:
+ entry["invalidation_reason"] = reason
result.append(entry)
return result
diff --git a/AppImage/scripts/flask_server.py b/AppImage/scripts/flask_server.py
index fa99213c..62139d88 100644
--- a/AppImage/scripts/flask_server.py
+++ b/AppImage/scripts/flask_server.py
@@ -7,6 +7,31 @@ ProxMenux Flask Server
- Integrates a web terminal powered by xterm.js
"""
+# ─── gevent monkey-patch — MUST be the first executable code ─────────────
+#
+# When SSL is enabled we serve the dashboard with `gevent.pywsgi.WSGIServer`.
+# Without `monkey.patch_all()` gevent runs as a single-threaded cooperative
+# event loop: a request that calls `subprocess.run(pvesh ...)` blocks the
+# whole event loop, so every other request lined up in parallel returns 502
+# until that subprocess finishes. The frontend's `/api/vms` page fires 3-4
+# parallel requests on mount, which is exactly the symptom that surfaced as
+# "first load 502, second load fine" under HTTPS.
+#
+# `patch_all()` replaces stdlib blocking primitives (socket, subprocess,
+# select, threading, ssl, time.sleep, ...) with gevent-friendly equivalents
+# that yield to the event loop instead of blocking it. This must run BEFORE
+# any other import touches those primitives — otherwise the unpatched
+# versions get bound in the module and the patch is silently ineffective.
+#
+# Wrapped in a try/except so a host without gevent installed (HTTP-only
+# mode) still imports cleanly: the patch is only meaningful when gevent is
+# actually being used as the WSGI server.
+try:
+ from gevent import monkey
+ monkey.patch_all()
+except ImportError:
+ pass
+
import glob
import json
import logging
diff --git a/AppImage/scripts/notification_events.py b/AppImage/scripts/notification_events.py
index 5c17acce..02b7d2d4 100644
--- a/AppImage/scripts/notification_events.py
+++ b/AppImage/scripts/notification_events.py
@@ -136,12 +136,30 @@ class NotificationEvent:
return f"NotificationEvent({self.event_type}, {self.severity}, fp={self.fingerprint[:40]})"
+_HOSTNAME_CACHE: Dict[str, Any] = {'value': None, 'ts': 0.0}
+_HOSTNAME_CACHE_TTL = 5.0 # seconds
+
+
def _hostname() -> str:
"""Get display hostname for notifications.
-
+
Returns the custom display name from notification settings if configured,
- otherwise falls back to the system hostname.
+ otherwise falls back to the system FQDN (NOT truncated at the first dot —
+ a host called ``px.seeindustry.com`` is rendered in full so multi-host
+ deployments stay distinguishable).
+
+ Reads are cached for ~5 s so a burst of events (~tens per cycle) doesn't
+ hit the SQLite settings table on every call. The TTL is short enough that
+ a freshly-saved alias takes effect within seconds without restarting the
+ service — fixes the original behaviour where `self._hostname = _hostname()`
+ was cached in `__init__` and never refreshed.
"""
+ now = time.time()
+ cached = _HOSTNAME_CACHE.get('value')
+ if cached is not None and (now - _HOSTNAME_CACHE['ts']) < _HOSTNAME_CACHE_TTL:
+ return cached
+
+ resolved = ''
# Try to read custom display name from notification settings
try:
db_path = Path('/usr/local/share/proxmenux/health_monitor.db')
@@ -156,15 +174,24 @@ def _hostname() -> str:
row = cursor.fetchone()
conn.close()
if row and row[0] and row[0].strip():
- return row[0].strip()
+ resolved = row[0].strip()
except Exception:
pass # Fall back to system hostname
-
- # Fall back to system hostname
- try:
- return socket.gethostname().split('.')[0]
- except Exception:
- return 'proxmox'
+
+ if not resolved:
+ # Use FULL FQDN — never truncate at the first dot. The previous
+ # `.split('.')[0]` produced misleading bare labels like "px" when the
+ # alias was missing or unreadable, with no way for the operator to
+ # tell which of their `px.*.example.com` nodes the notification came
+ # from. The Display Name (alias) remains the recommended override.
+ try:
+ resolved = socket.gethostname()
+ except Exception:
+ resolved = 'proxmox'
+
+ _HOSTNAME_CACHE['value'] = resolved
+ _HOSTNAME_CACHE['ts'] = now
+ return resolved
def capture_journal_context(keywords: list, lines: int = 30,
@@ -376,7 +403,10 @@ class JournalWatcher:
self._running = False
self._thread: Optional[threading.Thread] = None
self._process: Optional[subprocess.Popen] = None
- self._hostname = _hostname()
+ # `_hostname` is exposed as a @property below so every read returns
+ # the *current* alias from the settings DB (TTL-cached for 5 s in
+ # _hostname()). The old `__init__`-time cache made a fresh Display
+ # Name require a service restart to take effect.
# Dedup: track recent events to avoid duplicates
self._recent_events: Dict[str, float] = {}
@@ -421,10 +451,14 @@ class JournalWatcher:
# so we can suppress per-guest "Starting Backup of VM ..." noise
self._last_backup_job_ts: float = 0
self._BACKUP_JOB_SUPPRESS_WINDOW = 7200 # 2h: suppress per-guest during active job
-
+
# NOTE: Service failure batching is handled universally by
# BurstAggregator in NotificationManager (AGGREGATION_RULES).
-
+
+ @property
+ def _hostname(self) -> str:
+ return _hostname()
+
def start(self):
"""Start the journal watcher thread."""
if self._running:
@@ -1752,7 +1786,10 @@ class TaskWatcher:
self._queue = event_queue
self._running = False
self._thread: Optional[threading.Thread] = None
- self._hostname = _hostname()
+ # `_hostname` is exposed as a @property below so every read returns
+ # the *current* alias from the settings DB (TTL-cached for 5 s in
+ # _hostname()). The old `__init__`-time cache made a fresh Display
+ # Name require a service restart to take effect.
self._last_position = 0
# Cache for active vzdump detection
self._vzdump_active_cache: float = 0 # timestamp of last positive check
@@ -1765,12 +1802,16 @@ class TaskWatcher:
self._vzdump_grace_period = 120 # seconds after vzdump ends to still suppress
# Track active-file UPIDs we've already seen, to avoid duplicate backup_start
self._seen_active_upids: set = set()
-
+
+ @property
+ def _hostname(self) -> str:
+ return _hostname()
+
def start(self):
if self._running:
return
self._running = True
-
+
# Start at end of file
if os.path.exists(self.TASK_LOG):
try:
@@ -2263,7 +2304,10 @@ class PollingCollector:
self._running = False
self._thread: Optional[threading.Thread] = None
self._poll_interval = poll_interval
- self._hostname = _hostname()
+ # `_hostname` is exposed as a @property below so every read returns
+ # the *current* alias from the settings DB (TTL-cached for 5 s in
+ # _hostname()). The old `__init__`-time cache made a fresh Display
+ # Name require a service restart to take effect.
self._last_update_check = 0
self._last_proxmenux_check = 0
self._last_ai_model_check = 0
@@ -2312,7 +2356,11 @@ class PollingCollector:
# subprocess per disk-with-error per poll cycle. Key: bare device
# name (no /dev/). Value: bool (True = USB).
self._is_usb_cache: Dict[str, bool] = {}
-
+
+ @property
+ def _hostname(self) -> str:
+ return _hostname()
+
def start(self):
if self._running:
return
@@ -3703,8 +3751,15 @@ class ProxmoxHookWatcher:
def __init__(self, event_queue: Queue):
self._queue = event_queue
- self._hostname = _hostname()
-
+ # `_hostname` is exposed as a @property below so every read returns
+ # the *current* alias from the settings DB (TTL-cached for 5 s in
+ # _hostname()). The old `__init__`-time cache made a fresh Display
+ # Name require a service restart to take effect.
+
+ @property
+ def _hostname(self) -> str:
+ return _hostname()
+
def process_webhook(self, payload: dict) -> dict:
"""Process an incoming Proxmox webhook payload.
diff --git a/scripts/menus/config_menu.sh b/scripts/menus/config_menu.sh
index 79aa1948..943d0ba4 100644
--- a/scripts/menus/config_menu.sh
+++ b/scripts/menus/config_menu.sh
@@ -404,6 +404,109 @@ toggle_monitor_service() {
fi
}
+reset_monitor_password() {
+ # Recovery path for operators who lost the Monitor login credentials.
+ # Wipes only the identity claims from auth.json (username / password /
+ # 2FA secret / backup codes) so the next visit to the dashboard
+ # triggers the setup wizard with no password needed. Intentionally
+ # KEEPS `jwt_secret`, `api_tokens` and `revoked_tokens` — that means
+ # already-issued API tokens continue to work (Home Assistant /
+ # custom scripts don't need to be reconfigured) and only the
+ # interactive web login is reset. The operator chooses a new
+ # username + password on the next visit.
+
+ local auth_file="$MONITOR_CONFIG_DIR/auth.json"
+
+ if [ ! -f "$auth_file" ]; then
+ dialog --clear --backtitle "$BACKTITLE" \
+ --title "$(translate "Reset Monitor Password")" \
+ --msgbox "\n\n$(translate "ProxMenux Monitor authentication is not configured on this host — there is no password to reset.")" 11 70
+ return
+ fi
+
+ if ! dialog --clear --backtitle "$BACKTITLE" \
+ --title "$(translate "Reset Monitor Password")" \
+ --yesno "\n$(translate "This will RESET the ProxMenux Monitor login credentials on this host:")\n\n • $(translate "Username and password will be cleared.")\n • $(translate "Two-factor authentication and backup codes will be removed.")\n • $(translate "API tokens (Home Assistant, scripts) will keep working.")\n • $(translate "The next visit to the dashboard will show the initial setup wizard.")\n\n$(translate "Continue?")" 16 78; then
+ return
+ fi
+
+ if ! command -v jq >/dev/null 2>&1; then
+ dialog --clear --backtitle "$BACKTITLE" \
+ --title "$(translate "Reset Monitor Password")" \
+ --msgbox "\n\n$(translate "jq is required for this operation but is not installed.")" 10 60
+ return
+ fi
+
+ show_proxmenux_logo
+ msg_title "$(translate "Reset Monitor Password")"
+
+ # Timestamped backup so the operator can recover the previous state
+ # if the reset was a mistake. Includes the secret material — keep
+ # this file out of any shared location.
+ local backup_file
+ backup_file="${auth_file}.bak-$(date -u +%Y%m%d%H%M%S)"
+ if ! cp -a "$auth_file" "$backup_file" 2>/dev/null; then
+ msg_error "$(translate "Could not back up the existing auth.json")"
+ msg_success "$(translate "Press Enter to return to menu...")"
+ read -r
+ return
+ fi
+ chmod 0600 "$backup_file" 2>/dev/null || true
+ msg_ok "$(translate "Backup saved to:") $backup_file"
+
+ msg_info "$(translate "Stopping ProxMenux Monitor service...")"
+ systemctl stop "$MONITOR_SERVICE" >/dev/null 2>&1 || true
+ msg_ok "$(translate "Service stopped.")"
+
+ msg_info "$(translate "Clearing login credentials...")"
+ local tmp
+ tmp=$(mktemp)
+ if jq '
+ .enabled = false
+ | .configured = false
+ | .username = ""
+ | .password_hash = ""
+ | .declined = false
+ | .totp_enabled = false
+ | .totp_secret = null
+ | .backup_codes = []
+ ' "$auth_file" > "$tmp" 2>/dev/null; then
+ chmod 0600 "$tmp" 2>/dev/null || true
+ mv "$tmp" "$auth_file"
+ msg_ok "$(translate "Credentials cleared. jwt_secret and API tokens preserved.")"
+ else
+ rm -f "$tmp"
+ msg_error "$(translate "Failed to update auth.json — restoring backup.")"
+ cp -a "$backup_file" "$auth_file"
+ systemctl start "$MONITOR_SERVICE" >/dev/null 2>&1 || true
+ msg_success "$(translate "Press Enter to return to menu...")"
+ read -r
+ return
+ fi
+
+ msg_info "$(translate "Restarting ProxMenux Monitor service...")"
+ if systemctl start "$MONITOR_SERVICE" >/dev/null 2>&1; then
+ msg_ok "$(translate "Service restarted.")"
+ else
+ msg_warn "$(translate "Could not restart the service — start it manually with systemctl start") $MONITOR_SERVICE"
+ fi
+
+ local server_ip
+ server_ip=$(hostname -I | awk '{print $1}')
+ echo ""
+ msg_success "$(translate "Password reset completed.")"
+ echo ""
+ if [ -n "$server_ip" ]; then
+ msg_info2 "$(translate "Open the dashboard to create a new admin account:")"
+ echo -e "${TAB}${BL}http://${server_ip}:8008${CL}"
+ else
+ msg_info2 "$(translate "Open the dashboard from this host on port 8008 to create a new admin account.")"
+ fi
+ echo ""
+ msg_success "$(translate "Press Enter to return to menu...")"
+ read -r
+}
+
show_monitor_status() {
clear
show_proxmenux_logo
@@ -467,6 +570,10 @@ show_config_menu() {
menu_options+=("$option_num" "$(translate "Show Monitor Service Status")")
option_actions[$option_num]="show_monitor_status"
((option_num++))
+
+ menu_options+=("$option_num" "$(translate "Reset ProxMenux Monitor Password")")
+ option_actions[$option_num]="reset_monitor_password"
+ ((option_num++))
fi
menu_options+=("$option_num" "$(translate "Change Release Channel")")
@@ -517,6 +624,9 @@ show_config_menu() {
"show_monitor_status")
show_monitor_status
;;
+ "reset_monitor_password")
+ reset_monitor_password
+ ;;
"change_release_channel")
change_release_channel
;;