Update AppImage 1.2.1.2

This commit is contained in:
MacRimi
2026-05-21 21:17:59 +02:00
parent 3e9dd599a6
commit f5b7a0a74b
7 changed files with 367 additions and 38 deletions
+113 -8
View File
@@ -152,13 +152,65 @@ def _get_jwt_secret():
config = load_auth_config()
sec = config.get("jwt_secret")
if isinstance(sec, str) and len(sec) >= 32:
_audit_api_tokens_against_jwt_secret(sec)
return sec
new_secret = secrets.token_urlsafe(48)
config["jwt_secret"] = new_secret
save_auth_config(config)
_audit_api_tokens_against_jwt_secret(new_secret)
return new_secret
# One-shot startup audit: warn the operator (in journal) when stored
# api_tokens were minted under a previous jwt_secret. Those tokens
# remain in `api_tokens` metadata but their JWTs no longer verify, so
# the user's HTTP client (Home Assistant, custom script, …) gets a 401
# while the token "looks valid" in the UI. We log once per process to
# make the failure mode searchable in journalctl without spamming.
_TOKEN_AUDIT_DONE = False
_TOKEN_AUDIT_LOCK = threading.Lock()
def _audit_api_tokens_against_jwt_secret(current_secret: str) -> None:
"""One-time warning when stored api_tokens were signed under a
previous jwt_secret. Cheap: returns immediately after the first
successful run. Logs to stdout/stderr so the message lands in the
Monitor's journalctl output.
"""
global _TOKEN_AUDIT_DONE
with _TOKEN_AUDIT_LOCK:
if _TOKEN_AUDIT_DONE:
return
_TOKEN_AUDIT_DONE = True
try:
config = load_auth_config()
tokens = config.get("api_tokens", [])
if not tokens:
return
current_fp = hashlib.sha256(current_secret.encode()).hexdigest()[:16]
stale = [t for t in tokens
if t.get("signed_with") is not None
and t.get("signed_with") != current_fp]
legacy = [t for t in tokens if t.get("signed_with") is None]
if stale:
ids = ", ".join(t.get("id", "?") for t in stale)
print(f"[ProxMenux][auth] WARNING: {len(stale)} API token(s) "
f"signed with a previous jwt_secret — they will return "
f"401 'Invalid or expired token'. Revoke and regenerate "
f"from Settings → API Tokens. Affected IDs: {ids}")
if legacy:
ids = ", ".join(t.get("id", "?") for t in legacy)
print(f"[ProxMenux][auth] NOTE: {len(legacy)} API token(s) "
f"have no signing-secret fingerprint (created before "
f"the tracking field was added). Their validity can "
f"only be confirmed by an actual auth attempt. "
f"Legacy IDs: {ids}")
except Exception as e:
# Audit is best-effort — failure must never break startup.
print(f"[ProxMenux][auth] token audit skipped: {e}")
# Server-side mirror of the frontend's `validatePasswordStrength`. Defense
# in depth: the UI enforces these rules but a direct API caller (curl,
# scripted setup, custom client) bypasses the JS — so the same minimum has
@@ -419,24 +471,45 @@ def verify_token(token):
return None
def _jwt_secret_fingerprint(secret: str = None) -> str:
"""Stable fingerprint of the active jwt_secret.
First 16 hex chars of SHA256(secret). Used to detect whether a stored
api-token was minted under the *current* jwt_secret or under a
previous one (in which case the JWT can no longer be verified).
Never returns the secret itself.
"""
sec = secret if secret is not None else _get_jwt_secret()
if not sec:
return ""
return hashlib.sha256(sec.encode()).hexdigest()[:16]
def store_api_token_metadata(token, token_name="API Token"):
"""
Store API token metadata (hash, name, creation date) for listing and revocation.
The actual token is never stored - only a hash for identification.
Also records the fingerprint of the jwt_secret that minted this token
(`signed_with`). At list time we compare this against the current
fingerprint so the UI can flag tokens whose signing secret has been
rotated since — those JWTs no longer verify and the operator needs
to regenerate them (see `list_api_tokens`).
"""
config = load_auth_config()
token_hash = hashlib.sha256(token.encode()).hexdigest()
token_id = token_hash[:16]
token_entry = {
"id": token_id,
"name": token_name,
"token_hash": token_hash,
"token_prefix": token[:12] + "...",
"created_at": datetime.utcnow().isoformat() + "Z",
"expires_at": (datetime.utcnow() + timedelta(days=365)).isoformat() + "Z"
"expires_at": (datetime.utcnow() + timedelta(days=365)).isoformat() + "Z",
"signed_with": _jwt_secret_fingerprint(),
}
config.setdefault("api_tokens", [])
config["api_tokens"].append(token_entry)
save_auth_config(config)
@@ -444,24 +517,56 @@ def store_api_token_metadata(token, token_name="API Token"):
def list_api_tokens():
"""
List all stored API token metadata (no actual tokens are returned).
Returns list of token entries with id, name, prefix, creation and expiration dates.
"""List stored API token metadata (no actual tokens are returned).
Each entry carries:
* `revoked` — token hash is in the revocation list.
* `valid` — JWT can still be verified with the current secret.
`True` when `signed_with` matches the current
fingerprint, `False` when it doesn't (jwt_secret
rotated → JWT signature broken), `None` for legacy
entries created before this field existed (status
can only be confirmed by attempting a verify with
the real token, which we never see at list time).
* `invalidation_reason` — human-readable explanation when
`valid is False`, otherwise absent.
The UI uses these flags to flag tokens that look stored but no
longer authenticate — preventing the "I have the token but it
returns 401" rabbit hole.
"""
config = load_auth_config()
tokens = config.get("api_tokens", [])
revoked = set(config.get("revoked_tokens", []))
current_fp = _jwt_secret_fingerprint()
result = []
for t in tokens:
signed_with = t.get("signed_with")
if signed_with is None:
valid = None # legacy entry — unknown
reason = None
elif signed_with == current_fp:
valid = True
reason = None
else:
valid = False
reason = ("Signed with a previous jwt_secret. The signing "
"secret has been rotated since this token was "
"issued — its JWT can no longer be verified. "
"Revoke this token and generate a new one.")
entry = {
"id": t.get("id"),
"name": t.get("name", "API Token"),
"token_prefix": t.get("token_prefix", "***"),
"created_at": t.get("created_at"),
"expires_at": t.get("expires_at"),
"revoked": t.get("token_hash") in revoked
"revoked": t.get("token_hash") in revoked,
"valid": valid,
}
if reason:
entry["invalidation_reason"] = reason
result.append(entry)
return result
+25
View File
@@ -7,6 +7,31 @@ ProxMenux Flask Server
- Integrates a web terminal powered by xterm.js
"""
# ─── gevent monkey-patch — MUST be the first executable code ─────────────
#
# When SSL is enabled we serve the dashboard with `gevent.pywsgi.WSGIServer`.
# Without `monkey.patch_all()` gevent runs as a single-threaded cooperative
# event loop: a request that calls `subprocess.run(pvesh ...)` blocks the
# whole event loop, so every other request lined up in parallel returns 502
# until that subprocess finishes. The frontend's `/api/vms` page fires 3-4
# parallel requests on mount, which is exactly the symptom that surfaced as
# "first load 502, second load fine" under HTTPS.
#
# `patch_all()` replaces stdlib blocking primitives (socket, subprocess,
# select, threading, ssl, time.sleep, ...) with gevent-friendly equivalents
# that yield to the event loop instead of blocking it. This must run BEFORE
# any other import touches those primitives — otherwise the unpatched
# versions get bound in the module and the patch is silently ineffective.
#
# Wrapped in a try/except so a host without gevent installed (HTTP-only
# mode) still imports cleanly: the patch is only meaningful when gevent is
# actually being used as the WSGI server.
try:
from gevent import monkey
monkey.patch_all()
except ImportError:
pass
import glob
import json
import logging
+74 -19
View File
@@ -136,12 +136,30 @@ class NotificationEvent:
return f"NotificationEvent({self.event_type}, {self.severity}, fp={self.fingerprint[:40]})"
_HOSTNAME_CACHE: Dict[str, Any] = {'value': None, 'ts': 0.0}
_HOSTNAME_CACHE_TTL = 5.0 # seconds
def _hostname() -> str:
"""Get display hostname for notifications.
Returns the custom display name from notification settings if configured,
otherwise falls back to the system hostname.
otherwise falls back to the system FQDN (NOT truncated at the first dot —
a host called ``px.seeindustry.com`` is rendered in full so multi-host
deployments stay distinguishable).
Reads are cached for ~5 s so a burst of events (~tens per cycle) doesn't
hit the SQLite settings table on every call. The TTL is short enough that
a freshly-saved alias takes effect within seconds without restarting the
service — fixes the original behaviour where `self._hostname = _hostname()`
was cached in `__init__` and never refreshed.
"""
now = time.time()
cached = _HOSTNAME_CACHE.get('value')
if cached is not None and (now - _HOSTNAME_CACHE['ts']) < _HOSTNAME_CACHE_TTL:
return cached
resolved = ''
# Try to read custom display name from notification settings
try:
db_path = Path('/usr/local/share/proxmenux/health_monitor.db')
@@ -156,15 +174,24 @@ def _hostname() -> str:
row = cursor.fetchone()
conn.close()
if row and row[0] and row[0].strip():
return row[0].strip()
resolved = row[0].strip()
except Exception:
pass # Fall back to system hostname
# Fall back to system hostname
try:
return socket.gethostname().split('.')[0]
except Exception:
return 'proxmox'
if not resolved:
# Use FULL FQDN — never truncate at the first dot. The previous
# `.split('.')[0]` produced misleading bare labels like "px" when the
# alias was missing or unreadable, with no way for the operator to
# tell which of their `px.*.example.com` nodes the notification came
# from. The Display Name (alias) remains the recommended override.
try:
resolved = socket.gethostname()
except Exception:
resolved = 'proxmox'
_HOSTNAME_CACHE['value'] = resolved
_HOSTNAME_CACHE['ts'] = now
return resolved
def capture_journal_context(keywords: list, lines: int = 30,
@@ -376,7 +403,10 @@ class JournalWatcher:
self._running = False
self._thread: Optional[threading.Thread] = None
self._process: Optional[subprocess.Popen] = None
self._hostname = _hostname()
# `_hostname` is exposed as a @property below so every read returns
# the *current* alias from the settings DB (TTL-cached for 5 s in
# _hostname()). The old `__init__`-time cache made a fresh Display
# Name require a service restart to take effect.
# Dedup: track recent events to avoid duplicates
self._recent_events: Dict[str, float] = {}
@@ -421,10 +451,14 @@ class JournalWatcher:
# so we can suppress per-guest "Starting Backup of VM ..." noise
self._last_backup_job_ts: float = 0
self._BACKUP_JOB_SUPPRESS_WINDOW = 7200 # 2h: suppress per-guest during active job
# NOTE: Service failure batching is handled universally by
# BurstAggregator in NotificationManager (AGGREGATION_RULES).
@property
def _hostname(self) -> str:
return _hostname()
def start(self):
"""Start the journal watcher thread."""
if self._running:
@@ -1752,7 +1786,10 @@ class TaskWatcher:
self._queue = event_queue
self._running = False
self._thread: Optional[threading.Thread] = None
self._hostname = _hostname()
# `_hostname` is exposed as a @property below so every read returns
# the *current* alias from the settings DB (TTL-cached for 5 s in
# _hostname()). The old `__init__`-time cache made a fresh Display
# Name require a service restart to take effect.
self._last_position = 0
# Cache for active vzdump detection
self._vzdump_active_cache: float = 0 # timestamp of last positive check
@@ -1765,12 +1802,16 @@ class TaskWatcher:
self._vzdump_grace_period = 120 # seconds after vzdump ends to still suppress
# Track active-file UPIDs we've already seen, to avoid duplicate backup_start
self._seen_active_upids: set = set()
@property
def _hostname(self) -> str:
return _hostname()
def start(self):
if self._running:
return
self._running = True
# Start at end of file
if os.path.exists(self.TASK_LOG):
try:
@@ -2263,7 +2304,10 @@ class PollingCollector:
self._running = False
self._thread: Optional[threading.Thread] = None
self._poll_interval = poll_interval
self._hostname = _hostname()
# `_hostname` is exposed as a @property below so every read returns
# the *current* alias from the settings DB (TTL-cached for 5 s in
# _hostname()). The old `__init__`-time cache made a fresh Display
# Name require a service restart to take effect.
self._last_update_check = 0
self._last_proxmenux_check = 0
self._last_ai_model_check = 0
@@ -2312,7 +2356,11 @@ class PollingCollector:
# subprocess per disk-with-error per poll cycle. Key: bare device
# name (no /dev/). Value: bool (True = USB).
self._is_usb_cache: Dict[str, bool] = {}
@property
def _hostname(self) -> str:
return _hostname()
def start(self):
if self._running:
return
@@ -3703,8 +3751,15 @@ class ProxmoxHookWatcher:
def __init__(self, event_queue: Queue):
self._queue = event_queue
self._hostname = _hostname()
# `_hostname` is exposed as a @property below so every read returns
# the *current* alias from the settings DB (TTL-cached for 5 s in
# _hostname()). The old `__init__`-time cache made a fresh Display
# Name require a service restart to take effect.
@property
def _hostname(self) -> str:
return _hostname()
def process_webhook(self, payload: dict) -> dict:
"""Process an incoming Proxmox webhook payload.