Files
ProxMenux/AppImage/scripts/managed_installs.py
T
2026-05-09 18:59:59 +02:00

578 lines
22 KiB
Python

"""ProxMenux-managed installs registry.
Single source of truth for "things ProxMenux installed (or detected as
already installed) and can check for updates on". Replaces the
type-specific polling we had before — every check now flows through
this module, so adding a new tracked install (Coral driver, Frigate,
etc.) is one entry in DETECTORS + one entry in CHECKERS.
Two operation modes:
* **Detection** — at AppImage startup and every 24h, every registered
``DETECTOR`` runs against the host. If the probe finds the thing
installed and it's not in the registry, we add it (with
``installed_by="detected"`` so the operator sees we autodiscovered
it). If it's in the registry but the probe fails, we mark it
``removed_at`` instead of deleting — keeps history and avoids
spurious notifications when a probe transiently fails.
* **Update check** — for every active entry, the matching ``CHECKER``
runs and updates ``current_version`` + ``available`` + ``latest``.
Each checker is responsible for its own per-source cache (the
Tailscale OCI checker memoises for 24h, NVIDIA for 7 days). The
notification poll loop reads the registry, emits a notification when
``available`` flips false→true for a (type, latest) pair it hasn't
notified yet.
Persistence is a single JSON file at
``/usr/local/share/proxmenux/managed_installs.json``. Atomic writes
via tmp+rename so a crash mid-write can't leave a half-written file.
The module is concurrency-safe: a single ``threading.RLock`` guards
every read-modify-write so the periodic detector and a request handler
calling ``get_registry()`` can run in parallel without stepping on
each other.
"""
from __future__ import annotations
import datetime
import json
import os
import re
import subprocess
import threading
import time
import urllib.request
from typing import Any, Callable, Optional
# ─── Storage ──────────────────────────────────────────────────────────────────
_DB_DIR = "/usr/local/share/proxmenux"
_REGISTRY_PATH = os.path.join(_DB_DIR, "managed_installs.json")
_SCHEMA_VERSION = 1
_lock = threading.RLock()
def _now_iso() -> str:
return datetime.datetime.utcnow().isoformat() + "Z"
def _read_registry() -> dict:
"""Load the JSON file. Returns the canonical empty shape on first
run / parse error / permission issue — callers always see a valid
dict."""
try:
with open(_REGISTRY_PATH, "r", encoding="utf-8") as f:
data = json.load(f)
if isinstance(data, dict) and isinstance(data.get("items"), list):
return data
except (FileNotFoundError, IsADirectoryError, PermissionError):
pass
except (OSError, json.JSONDecodeError) as e:
print(f"[ProxMenux] managed_installs read failed ({e}); starting fresh")
return {"version": _SCHEMA_VERSION, "items": []}
def _write_registry(reg: dict) -> bool:
"""Atomic write — tmp + rename. Never raises; returns False on any
OS-level failure so the caller can decide whether to retry."""
try:
os.makedirs(_DB_DIR, exist_ok=True)
tmp = _REGISTRY_PATH + ".tmp"
with open(tmp, "w", encoding="utf-8") as f:
json.dump(reg, f, indent=2, ensure_ascii=False)
f.flush()
os.fsync(f.fileno())
os.replace(tmp, _REGISTRY_PATH)
return True
except OSError as e:
print(f"[ProxMenux] managed_installs write failed: {e}")
return False
# ─── Public read API ─────────────────────────────────────────────────────────
def get_registry() -> dict:
"""Return the full registry as a dict. Pure read — the caller can
inspect ``items`` freely. Don't mutate the returned dict."""
with _lock:
return _read_registry()
def get_active_items() -> list[dict]:
"""Items the host actually has installed right now (no
``removed_at``). Most callers want this, not the full history."""
with _lock:
reg = _read_registry()
return [it for it in reg.get("items", []) if not it.get("removed_at")]
def get_item(item_id: str) -> Optional[dict]:
with _lock:
reg = _read_registry()
for it in reg.get("items", []):
if it.get("id") == item_id:
return it
return None
# ─── DETECTORS — auto-discovery ──────────────────────────────────────────────
#
# Each detector is a `() -> Optional[dict]` that returns the *partial*
# entry shape (id, type, name, current_version, menu_label,
# menu_script — optional fields too) if the thing is installed on the
# host, or None if it's not. The framework merges this with the
# existing registry entry (preserving history) and rewrites if
# anything changed.
def _detect_nvidia_xfree86() -> Optional[dict]:
"""Detect a host-side NVIDIA driver via `nvidia-smi`."""
try:
proc = subprocess.run(
[
"nvidia-smi",
"--query-gpu=driver_version",
"--format=csv,noheader",
],
capture_output=True, text=True, timeout=5,
)
except (FileNotFoundError, OSError, subprocess.TimeoutExpired):
return None
if proc.returncode != 0:
return None
version = (proc.stdout or "").strip().splitlines()[0].strip() if proc.stdout else ""
if not re.match(r"^\d+\.\d+(\.\d+)?$", version):
return None
return {
"id": "nvidia-host",
"type": "nvidia_xfree86",
"name": "NVIDIA Host Driver",
"current_version": version,
"menu_label": "GPU & TPU → NVIDIA Driver",
"menu_script": "scripts/gpu_tpu/nvidia_installer.sh",
}
def _detect_oci_apps() -> list[dict]:
"""Bridge to the OCI manager so every OCI-installed app shows up
in the registry without a per-app detector here. The OCI manager
is the source of truth for OCI-specific state — we just project a
subset into our registry shape."""
try:
import oci_manager
except Exception:
return []
try:
installed = oci_manager.list_installed_apps() or []
except Exception as e:
print(f"[ProxMenux] managed_installs OCI bridge failed: {e}")
return []
out: list[dict] = []
for app in installed:
app_id = app.get("app_id") or app.get("id")
if not app_id:
continue
out.append({
"id": f"oci:{app_id}",
"type": "oci_app",
"name": app.get("name") or app_id,
"current_version": None, # filled by checker
"menu_label": "Settings → Secure Gateway",
"menu_script": None, # OCI apps update via the dashboard, no bash script
# Stash the raw app_id so the checker can find it without
# parsing the prefixed registry id.
"_oci_app_id": app_id,
})
return out
# Detectors registered here. Each returns either a single entry dict
# or a list (for sources that yield multiple items, like OCI). The
# framework normalises both shapes.
_DETECTORS: list[Callable[[], Any]] = [
_detect_nvidia_xfree86,
_detect_oci_apps,
]
def _normalise_detector_result(result: Any) -> list[dict]:
if not result:
return []
if isinstance(result, dict):
return [result]
if isinstance(result, list):
return [r for r in result if isinstance(r, dict)]
return []
def detect_and_register() -> dict:
"""Run every detector, merge results into the registry, persist.
Behaviour per item:
* detected + not in registry → add, ``installed_by="detected"``
* detected + in registry as removed → reactivate (clear removed_at)
* detected + already active → refresh ``current_version`` and any
metadata that changed (e.g. menu_label evolved)
* not detected + active in registry → mark ``removed_at``
Returns the new registry.
"""
discovered: dict[str, dict] = {}
for detector in _DETECTORS:
try:
result = detector()
except Exception as e:
print(f"[ProxMenux] managed_installs detector {detector.__name__} failed: {e}")
continue
for entry in _normalise_detector_result(result):
if not entry.get("id"):
continue
discovered[entry["id"]] = entry
with _lock:
reg = _read_registry()
items: list[dict] = list(reg.get("items", []))
index = {it.get("id"): i for i, it in enumerate(items) if it.get("id")}
now = _now_iso()
# 1. Add new + reactivate / refresh existing.
for item_id, entry in discovered.items():
if item_id in index:
existing = items[index[item_id]]
# Reactivate if it was previously removed
if existing.get("removed_at"):
existing.pop("removed_at", None)
existing["reactivated_at"] = now
# Refresh metadata fields that may have evolved
for k in ("name", "current_version", "menu_label", "menu_script"):
if k in entry and entry[k] is not None:
existing[k] = entry[k]
# Preserve internal helpers like `_oci_app_id`
for k, v in entry.items():
if k.startswith("_"):
existing[k] = v
existing["last_seen"] = now
else:
# Brand new entry
new_entry = {
"id": entry["id"],
"type": entry.get("type", "unknown"),
"name": entry.get("name", entry["id"]),
"current_version": entry.get("current_version"),
"menu_label": entry.get("menu_label"),
"menu_script": entry.get("menu_script"),
"installed_by": "detected",
"first_seen": now,
"last_seen": now,
"update_check": {
"last_check": None,
"available": False,
"latest": None,
"error": None,
},
}
# Carry over internals (`_oci_app_id` etc.)
for k, v in entry.items():
if k.startswith("_"):
new_entry[k] = v
items.append(new_entry)
# 2. Mark missing items as removed (don't delete — preserve
# history so a reinstall doesn't lose the audit trail).
for it in items:
if not it.get("id") or it.get("removed_at"):
continue
if it["id"] not in discovered:
it["removed_at"] = now
reg["items"] = items
reg["version"] = _SCHEMA_VERSION
reg["last_detect"] = now
_write_registry(reg)
return reg
# ─── CHECKERS — per-type update probes ───────────────────────────────────────
#
# A checker takes a registry entry and returns the *update* part of
# the registry shape:
# {available, latest, last_check, error?}
# It must be idempotent and may use its own internal cache so we don't
# pay the upstream cost on every call.
def _check_oci_app(entry: dict) -> dict:
"""Delegate to oci_manager — already has its own 24h cache."""
app_id = entry.get("_oci_app_id") or entry.get("id", "").removeprefix("oci:")
if not app_id:
return {"available": False, "latest": None, "last_check": _now_iso(),
"error": "no app_id in registry entry"}
try:
import oci_manager
state = oci_manager.check_app_update_available(app_id, force=False)
except Exception as e:
return {"available": False, "latest": None, "last_check": _now_iso(),
"error": str(e)}
if state.get("error"):
return {"available": False, "latest": None, "last_check": _now_iso(),
"error": state["error"]}
return {
"available": bool(state.get("available")),
"latest": state.get("latest_version"),
"current": state.get("current_version"),
"last_check": state.get("last_checked_iso") or _now_iso(),
"error": None,
"_packages": state.get("packages") or [],
}
# ── NVIDIA driver checker ──
#
# Source of truth for what's available upstream:
# `https://download.nvidia.com/XFree86/Linux-x86_64/latest.txt`
# returns the single newest version, e.g. "580.105.08"
# `https://download.nvidia.com/XFree86/Linux-x86_64/`
# HTML directory listing — we scrape it for per-branch latest
# (so a user on 570.x gets 570.x's latest, not pushed to 580.x
# unless their kernel forces a branch upgrade).
#
# Cache TTL is 7 days because NVIDIA's release cadence on each branch
# is roughly monthly. The cache is in-memory only; AppImage restarts
# refresh it for free.
_NVIDIA_BASE = "https://download.nvidia.com/XFree86/Linux-x86_64"
_NVIDIA_CACHE_TTL = 7 * 86400
_nvidia_cache: dict[str, Any] = {"versions": [], "fetched_at": 0}
def _nvidia_kernel_compat() -> dict:
"""Python port of `get_kernel_compatibility_info` in the bash
installer. Returns ``{kernel, min_version, recommended_branch,
note}``. Kept identical to the bash matrix so the recommendation
here matches what the installer would do."""
try:
kernel = subprocess.run(
["uname", "-r"], capture_output=True, text=True, timeout=2,
).stdout.strip()
except (OSError, subprocess.TimeoutExpired):
kernel = ""
parts = kernel.split(".") if kernel else []
try:
major = int(parts[0]) if len(parts) >= 1 else 0
minor = int(parts[1]) if len(parts) >= 2 else 0
except (ValueError, TypeError):
major, minor = 0, 0
if major >= 7 or (major == 6 and minor >= 17):
return {
"kernel": kernel,
"min_version": "580.105.08",
"recommended_branch": "580",
"note": (f"Kernel {kernel} requires NVIDIA driver 580.105.08 or "
f"newer (older 580.x builds fail to compile)"),
}
if major >= 6 and minor >= 8:
return {"kernel": kernel, "min_version": "550",
"recommended_branch": "580",
"note": f"Kernel {kernel} works with NVIDIA driver 550.x or newer"}
if major >= 6:
return {"kernel": kernel, "min_version": "535",
"recommended_branch": "550",
"note": f"Kernel {kernel} works with NVIDIA driver 535.x or newer"}
if major == 5 and minor >= 15:
return {"kernel": kernel, "min_version": "470",
"recommended_branch": "535",
"note": f"Kernel {kernel} works with NVIDIA driver 470.x or newer"}
return {"kernel": kernel, "min_version": "450",
"recommended_branch": "470",
"note": "For older kernels, compatibility may vary"}
def _version_tuple(v: str) -> tuple:
"""Convert ``580.105.08`` → ``(580, 105, 8)`` for comparison.
Pads to 3 components so ``580.82`` < ``580.105.08``."""
out = []
for chunk in v.split("."):
try:
out.append(int(chunk))
except (ValueError, TypeError):
out.append(0)
while len(out) < 3:
out.append(0)
return tuple(out[:3])
def _fetch_nvidia_versions(force: bool = False) -> list[str]:
"""Return the cached list of all upstream versions, or fetch fresh."""
now = time.time()
if not force and _nvidia_cache["versions"] and \
now - _nvidia_cache["fetched_at"] < _NVIDIA_CACHE_TTL:
return _nvidia_cache["versions"]
try:
req = urllib.request.Request(
_NVIDIA_BASE + "/",
headers={"User-Agent": "ProxMenux-Monitor/1.0"},
)
with urllib.request.urlopen(req, timeout=15) as resp:
html = resp.read().decode("utf-8", errors="replace")
except Exception as e:
print(f"[ProxMenux] NVIDIA version fetch failed: {e}")
return _nvidia_cache.get("versions", [])
versions = sorted(
{m.group(1) for m in re.finditer(
r"""href=['"](\d+\.\d+(?:\.\d+)?)/?['"]""", html)},
key=_version_tuple,
reverse=True,
)
if versions:
_nvidia_cache["versions"] = versions
_nvidia_cache["fetched_at"] = now
return versions
def _is_compat_with_kernel(version: str, kernel_compat: dict) -> bool:
"""Compare ``version`` (e.g. ``580.105.08``) against the kernel
compatibility floor. Mirrors the bash ``is_version_compatible``
helper (full-triple compare when min is dotted, major-only otherwise)."""
min_str = kernel_compat.get("min_version", "0")
if "." in min_str and re.match(r"^\d+\.\d+\.\d+$", min_str):
return _version_tuple(version) >= _version_tuple(min_str)
# Single-major threshold like "535" or "550"
try:
ver_major = int(version.split(".")[0])
min_major = int(min_str)
except (ValueError, TypeError):
return True
return ver_major >= min_major
def _check_nvidia_xfree86(entry: dict) -> dict:
"""Compute the update state for a host NVIDIA driver entry.
Policy (Option C from the design discussion):
1. Same-branch newer version available → notify.
2. Current branch no longer compatible with current kernel →
notify a branch upgrade with explicit messaging.
"""
current = entry.get("current_version")
if not current or not re.match(r"^\d+\.\d+(\.\d+)?$", current):
return {"available": False, "latest": None,
"last_check": _now_iso(), "error": "no installed version"}
versions = _fetch_nvidia_versions()
if not versions:
return {"available": False, "latest": None,
"last_check": _now_iso(),
"error": "could not parse upstream version listing"}
kernel_compat = _nvidia_kernel_compat()
current_branch = current.split(".")[0]
same_branch = [v for v in versions if v.split(".")[0] == current_branch
and _is_compat_with_kernel(v, kernel_compat)]
same_branch_latest = same_branch[0] if same_branch else None
notify_branch_upgrade = False
branch_upgrade_target: Optional[str] = None
if not _is_compat_with_kernel(current, kernel_compat):
# Current branch / version no longer works with current kernel.
# Recommend the kernel-recommended branch's latest.
rec_branch = kernel_compat["recommended_branch"]
rec_branch_versions = [v for v in versions
if v.split(".")[0] == rec_branch
and _is_compat_with_kernel(v, kernel_compat)]
if rec_branch_versions:
branch_upgrade_target = rec_branch_versions[0]
notify_branch_upgrade = True
available = False
latest: Optional[str] = None
upgrade_kind = None # "patch" | "branch_upgrade" | None
if notify_branch_upgrade and branch_upgrade_target:
latest = branch_upgrade_target
available = True
upgrade_kind = "branch_upgrade"
elif same_branch_latest and \
_version_tuple(same_branch_latest) > _version_tuple(current):
latest = same_branch_latest
available = True
upgrade_kind = "patch"
return {
"available": available,
"latest": latest,
"last_check": _now_iso(),
"error": None,
"_upgrade_kind": upgrade_kind,
"_kernel": kernel_compat.get("kernel"),
"_kernel_note": kernel_compat.get("note"),
}
_CHECKERS: dict[str, Callable[[dict], dict]] = {
"oci_app": _check_oci_app,
"nvidia_xfree86": _check_nvidia_xfree86,
}
def check_for_updates(force: bool = False) -> list[dict]:
"""Run every type-specific checker over active items, persist
the updated state, return the list of items that have an update
available right now.
The notification poller turns the returned list into events; the
UI reads ``get_active_items()`` to render the inline "update
available" line.
``force`` invalidates the per-source caches (currently only the
NVIDIA versions list — OCI keeps its own internal cache).
"""
if force:
_nvidia_cache["versions"] = []
_nvidia_cache["fetched_at"] = 0
updates_available: list[dict] = []
with _lock:
reg = _read_registry()
items = reg.get("items", [])
for it in items:
if it.get("removed_at"):
continue
checker = _CHECKERS.get(it.get("type"))
if not checker:
continue
try:
result = checker(it)
except Exception as e:
print(f"[ProxMenux] managed_installs checker failed for "
f"{it.get('id')}: {e}")
result = {"available": False, "latest": None,
"last_check": _now_iso(), "error": str(e)}
it["update_check"] = {
"available": bool(result.get("available")),
"latest": result.get("latest"),
"last_check": result.get("last_check") or _now_iso(),
"error": result.get("error"),
}
if result.get("current") and not it.get("current_version"):
it["current_version"] = result["current"]
for extra_key in ("_packages", "_upgrade_kind", "_kernel",
"_kernel_note"):
if extra_key in result:
it["update_check"][extra_key] = result[extra_key]
if it["update_check"]["available"]:
updates_available.append(it)
reg["items"] = items
reg["last_check_run"] = _now_iso()
_write_registry(reg)
return updates_available