diff --git a/AppImage/ProxMenux-1.2.1.3-beta.AppImage b/AppImage/ProxMenux-1.2.1.4-beta.AppImage similarity index 71% rename from AppImage/ProxMenux-1.2.1.3-beta.AppImage rename to AppImage/ProxMenux-1.2.1.4-beta.AppImage index 22aba271..04c9b6b9 100755 Binary files a/AppImage/ProxMenux-1.2.1.3-beta.AppImage and b/AppImage/ProxMenux-1.2.1.4-beta.AppImage differ diff --git a/AppImage/ProxMenux-Monitor.AppImage.sha256 b/AppImage/ProxMenux-Monitor.AppImage.sha256 index 442acfd4..743e7861 100644 --- a/AppImage/ProxMenux-Monitor.AppImage.sha256 +++ b/AppImage/ProxMenux-Monitor.AppImage.sha256 @@ -1 +1 @@ -1caca89b574241c9d754b9ac3bb11987c5eccc5f182d01a5c62e61623b62fda7 +fba0f824699660d18f77bc8558370acd725921cc34737508605c83ced3c947a4 diff --git a/AppImage/components/about.tsx b/AppImage/components/about.tsx index 630e48b7..86f91ef3 100644 --- a/AppImage/components/about.tsx +++ b/AppImage/components/about.tsx @@ -140,19 +140,30 @@ export function About() { v{APP_VERSION} - {/* Changelog goes to the web — the in-app modal version - duplicated content and lacked a close affordance on - some viewports, forcing a page refresh. The web - changelog is canonical and auto-syncs with releases. */} - - Changelog - - + {/* Beta versions surface their pre-release notes on the + GitHub Releases page (where each beta is tagged + signed); + stable versions point at the canonical web changelog + which only carries shipped releases. Detection: the + APP_VERSION string carries a "-beta" / "-rc" / + "-alpha" suffix for any non-stable build. */} + {(() => { + const isPrerelease = /-(beta|rc|alpha)/i.test(APP_VERSION) + const href = isPrerelease + ? "https://github.com/MacRimi/ProxMenux/releases" + : "https://proxmenux.com/en/changelog" + const label = isPrerelease ? "Release notes" : "Changelog" + return ( + + {label} + + + ) + })()} diff --git a/AppImage/components/auth-setup.tsx b/AppImage/components/auth-setup.tsx index 46ffa05a..1b137709 100644 --- a/AppImage/components/auth-setup.tsx +++ b/AppImage/components/auth-setup.tsx @@ -85,7 +85,7 @@ export function AuthSetup({ onComplete }: AuthSetupProps) { setOpen(false) onComplete() } catch (err) { - console.error("[v0] Auth skip error:", err) + console.error("Auth skip error:", err) setError(err instanceof Error ? err.message : "Failed to save preference") } finally { setLoading(false) @@ -203,7 +203,7 @@ export function AuthSetup({ onComplete }: AuthSetupProps) { setOpen(false) onComplete() } catch (err) { - console.error("[v0] Auth setup error:", err) + console.error("Auth setup error:", err) setError(err instanceof Error ? err.message : "Failed to setup authentication") } finally { setLoading(false) diff --git a/AppImage/components/hardware.tsx b/AppImage/components/hardware.tsx index fdf35d91..d986d7c4 100644 --- a/AppImage/components/hardware.tsx +++ b/AppImage/components/hardware.tsx @@ -260,7 +260,7 @@ export default function Hardware() { if (hardwareData?.storage_devices) { hardwareData.storage_devices.forEach((device) => { if (device.name.startsWith("nvme")) { - console.log(`[v0] NVMe device ${device.name}:`, { + console.log(`NVMe device ${device.name}:`, { pcie_gen: device.pcie_gen, pcie_width: device.pcie_width, pcie_max_gen: device.pcie_max_gen, @@ -452,7 +452,7 @@ export default function Hardware() { setDetailsLoading(false) } catch (error) { if (error instanceof Error && error.name !== "AbortError") { - console.error("[v0] Error fetching GPU realtime data:", error) + console.error("Error fetching GPU realtime data:", error) } setRealtimeGPUData({ has_monitoring_tool: false }) setDetailsLoading(false) diff --git a/AppImage/components/health-status-modal.tsx b/AppImage/components/health-status-modal.tsx index 26daa7f6..81cf78d4 100644 --- a/AppImage/components/health-status-modal.tsx +++ b/AppImage/components/health-status-modal.tsx @@ -7,6 +7,14 @@ import { getAuthToken } from "@/lib/api-config" import { Dialog, DialogContent, DialogDescription, DialogHeader, DialogTitle } from "@/components/ui/dialog" import { Badge } from "@/components/ui/badge" import { Button } from "@/components/ui/button" +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuLabel, + DropdownMenuSeparator, + DropdownMenuTrigger, +} from "@/components/ui/dropdown-menu" import { Loader2, CheckCircle2, @@ -357,8 +365,15 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu } } - const handleAcknowledge = async (errorKey: string, e: React.MouseEvent) => { - e.stopPropagation() + // `suppressionHours` overrides the category default for this dismiss: + // - undefined → backend uses the category's configured suppression + // - 24, 168 (7 days) → silence for that many hours + // - -1 → permanent dismiss; only revertible from + // Settings → Active Suppressions + const handleAcknowledge = async ( + errorKey: string, + suppressionHours?: number, + ) => { setDismissingKey(errorKey) try { @@ -369,10 +384,15 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu headers["Authorization"] = `Bearer ${token}` } + const body: Record = { error_key: errorKey } + if (suppressionHours !== undefined) { + body.suppression_hours = suppressionHours + } + const response = await fetch(url, { method: "POST", headers, - body: JSON.stringify({ error_key: errorKey }), + body: JSON.stringify(body), }) const responseData = await response.json().catch(() => ({})) @@ -390,11 +410,16 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu severity: responseData.result?.original_severity || 'WARNING', reason: 'Dismissed by user', dismissed: true, - acknowledged_at: new Date().toISOString() + // Surface the chosen duration so the row shows the right badge + // (countdown vs. "Permanent") without waiting for the refetch. + permanent: suppressionHours === -1, + suppression_remaining_hours: suppressionHours === -1 ? -1 : undefined, + suppression_hours: suppressionHours, + acknowledged_at: new Date().toISOString(), } setDismissedItems(prev => [...prev, dismissedItem]) } - + // Fetch fresh data in background (non-blocking) fetchHealthDetails().catch(() => {}) } catch (err) { @@ -511,32 +536,25 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu {formatCheckLabel(checkKey)} {checkData.detail} {checkData.dismissed && ( - - Dismissed - + checkData.permanent ? ( + + Permanent + + ) : ( + + Dismissed + + ) )}
{(checkStatus === "WARNING" || checkStatus === "CRITICAL" || checkStatus === "UNKNOWN") && isDismissable && !checkData.dismissed && ( - + + handleAcknowledge(checkData.error_key || checkKey, hours) + } + busy={dismissingKey === (checkData.error_key || checkKey)} + /> )}
@@ -681,25 +699,12 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu

{reason}

{/* Show dismiss button for UNKNOWN status at category level when dismissable */} {status === "UNKNOWN" && categoryData?.dismissable && !hasChecks && ( - + + handleAcknowledge(`category_${key}_unknown`, hours) + } + busy={dismissingKey === `category_${key}_unknown`} + /> )} )} @@ -840,3 +845,56 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu ) } + +// Small split button: the visible click opens a 3-option menu so the user +// chooses how long this specific alert stays silenced. ``-1`` is the +// permanent sentinel — backend stores it as `suppression_hours = -1` and +// the alert can only be brought back from Settings → Active Suppressions. +function DismissDropdown({ + onSelect, + busy, +}: { + onSelect: (suppressionHours: number) => void + busy: boolean +}) { + return ( + + + + + e.stopPropagation()}> + + Silence this alert for + + onSelect(24)} className="text-xs"> + 24 hours + + onSelect(168)} className="text-xs"> + 7 days + + + onSelect(-1)} + className="text-xs text-red-500 focus:text-red-500 focus:bg-red-500/10" + > + Permanently + + + + ) +} diff --git a/AppImage/components/login.tsx b/AppImage/components/login.tsx index dba95ef7..4e7b0723 100644 --- a/AppImage/components/login.tsx +++ b/AppImage/components/login.tsx @@ -271,7 +271,7 @@ export function Login({ onLogin }: LoginProps) { -

ProxMenux Monitor v1.2.1.3-beta

+

ProxMenux Monitor v1.2.1.4-beta

) diff --git a/AppImage/components/network-card.tsx b/AppImage/components/network-card.tsx index bb658a21..aaf640b4 100644 --- a/AppImage/components/network-card.tsx +++ b/AppImage/components/network-card.tsx @@ -109,7 +109,7 @@ export function NetworkCard({ interface_, timeframe, onClick }: NetworkCardProps }) } } catch (error) { - console.error("[v0] Failed to fetch traffic data for card:", error) + console.error("Failed to fetch traffic data for card:", error) setTrafficData({ received: 0, sent: 0 }) } } diff --git a/AppImage/components/network-traffic-chart.tsx b/AppImage/components/network-traffic-chart.tsx index 8c6ac350..c96b9649 100644 --- a/AppImage/components/network-traffic-chart.tsx +++ b/AppImage/components/network-traffic-chart.tsx @@ -206,7 +206,7 @@ export function NetworkTrafficChart({ setIsInitialLoad(false) } } catch (err: any) { - console.error("[v0] Error fetching network metrics:", err) + console.error("Error fetching network metrics:", err) setError(err.message || "Error loading metrics") } finally { setLoading(false) diff --git a/AppImage/components/node-metrics-charts.tsx b/AppImage/components/node-metrics-charts.tsx index 30db761e..5f356223 100644 --- a/AppImage/components/node-metrics-charts.tsx +++ b/AppImage/components/node-metrics-charts.tsx @@ -95,12 +95,12 @@ export function NodeMetricsCharts() { if (!result.data || !Array.isArray(result.data)) { - console.error("[v0] Invalid data format - data is not an array:", result) + console.error("Invalid data format - data is not an array:", result) throw new Error("Invalid data format received from server") } if (result.data.length === 0) { - console.warn("[v0] No data points received") + console.warn("No data points received") setData([]) setLoading(false) return @@ -159,9 +159,9 @@ export function NodeMetricsCharts() { setData(transformedData) } catch (err: any) { - console.error("[v0] Error fetching node metrics:", err) - console.error("[v0] Error message:", err.message) - console.error("[v0] Error stack:", err.stack) + console.error("Error fetching node metrics:", err) + console.error("Error message:", err.message) + console.error("Error stack:", err.stack) setError(err.message || "Error loading metrics") } finally { setLoading(false) diff --git a/AppImage/components/notification-settings.tsx b/AppImage/components/notification-settings.tsx index bfbf4b1e..02816833 100644 --- a/AppImage/components/notification-settings.tsx +++ b/AppImage/components/notification-settings.tsx @@ -1831,27 +1831,34 @@ export function NotificationSettings() { {config.channels.apprise?.enabled && ( <> -
+
-
+
updateChannel("apprise", "url", e.target.value)} disabled={!editMode} />
-

+ {/* The examples row was overflowing on mobile because + every `` token is atomic — the whole line + would scroll horizontally on narrow viewports. + `break-all` on the wrapper lets the layout break + mid-token if the viewport is really tight; on + wider screens the natural commas/spaces still + control wrapping. */} +

A single URL that Apprise routes to the right service. Examples: tgram://, discord://, @@ -1871,7 +1878,10 @@ export function NotificationSettings() { .

-
+ {renderChannelCategories("apprise")} + {renderQuietHours("apprise")} + {renderDailyDigest("apprise")} +
+
+ ) + })} +
+ )} +
)} diff --git a/AppImage/components/storage-metrics.tsx b/AppImage/components/storage-metrics.tsx index a14df0af..dc83f57b 100644 --- a/AppImage/components/storage-metrics.tsx +++ b/AppImage/components/storage-metrics.tsx @@ -43,7 +43,7 @@ const fetchStorageData = async (): Promise => { const data = await response.json() return data } catch (error) { - console.error("[v0] Failed to fetch storage data from Flask server:", error) + console.error("Failed to fetch storage data from Flask server:", error) return null } } diff --git a/AppImage/components/storage-overview.tsx b/AppImage/components/storage-overview.tsx index 92ab02ca..4465518b 100644 --- a/AppImage/components/storage-overview.tsx +++ b/AppImage/components/storage-overview.tsx @@ -3624,7 +3624,7 @@ ${observationsHtml} diff --git a/AppImage/components/temperature-detail-modal.tsx b/AppImage/components/temperature-detail-modal.tsx index ce53ca4d..dc364175 100644 --- a/AppImage/components/temperature-detail-modal.tsx +++ b/AppImage/components/temperature-detail-modal.tsx @@ -93,7 +93,7 @@ export function TemperatureDetailModal({ open, onOpenChange, liveTemperature }: setStats(result.stats) } } catch (err) { - console.error("[v0] Failed to fetch temperature history:", err) + console.error("Failed to fetch temperature history:", err) } finally { setLoading(false) } diff --git a/AppImage/components/terminal-panel.tsx b/AppImage/components/terminal-panel.tsx index b855b852..42be1329 100644 --- a/AppImage/components/terminal-panel.tsx +++ b/AppImage/components/terminal-panel.tsx @@ -624,7 +624,7 @@ export const TerminalPanel: React.FC = ({ websocketUrl, onCl ws.onerror = (error) => { clearTimeout(timeoutId) - console.error("[v0] TerminalPanel: WebSocket error:", error) + console.error("TerminalPanel: WebSocket error:", error) setTerminals((prev) => prev.map((t) => { if (t.id === terminal.id) { if (t.pingInterval) { @@ -924,6 +924,7 @@ const handleClose = () => {
(containerRefs.current[terminal.id] = el)} className="w-full h-full flex-1 bg-black overflow-hidden" + translate="no" /> ))} @@ -956,6 +957,7 @@ const handleClose = () => { ref={(el) => (containerRefs.current[terminal.id] = el)} onClick={() => setActiveTerminalId(terminal.id)} className="flex-1 w-full max-w-full bg-black overflow-hidden cursor-pointer" + translate="no" data-terminal-container />
diff --git a/AppImage/components/virtual-machines.tsx b/AppImage/components/virtual-machines.tsx index f9b829b5..efdea823 100644 --- a/AppImage/components/virtual-machines.tsx +++ b/AppImage/components/virtual-machines.tsx @@ -720,7 +720,7 @@ export function VirtualMachines() { configs[lxc.vmid] = extractIPFromConfig(details.config, details.lxc_ip_info) } } catch (error) { - console.log(`[v0] Could not fetch IP for LXC ${lxc.vmid}`) + console.log(`Could not fetch IP for LXC ${lxc.vmid}`) configs[lxc.vmid] = "N/A" } }), diff --git a/AppImage/lib/api-config.ts b/AppImage/lib/api-config.ts index 45e9b047..527cc201 100644 --- a/AppImage/lib/api-config.ts +++ b/AppImage/lib/api-config.ts @@ -161,14 +161,14 @@ export async function fetchApi(endpoint: string, options?: RequestInit): Prom const contentType = response.headers.get("content-type") if (!contentType || !contentType.includes("application/json")) { const text = await response.text() - console.error("[v0] fetchApi: Expected JSON but got:", contentType, "- Body preview:", text.substring(0, 200)) + console.error("fetchApi: Expected JSON but got:", contentType, "- Body preview:", text.substring(0, 200)) throw new Error(`Expected JSON response but got ${contentType || "unknown content type"}`) } try { return await response.json() } catch (jsonError) { - console.error("[v0] fetchApi: JSON parse error for", endpoint, "-", jsonError) + console.error("fetchApi: JSON parse error for", endpoint, "-", jsonError) throw new Error(`Invalid JSON response from ${endpoint}`) } } diff --git a/AppImage/package.json b/AppImage/package.json index f1858eb4..dcdffa28 100644 --- a/AppImage/package.json +++ b/AppImage/package.json @@ -1,6 +1,6 @@ { "name": "ProxMenux-Monitor", - "version": "1.2.1.3-beta", + "version": "1.2.1.4-beta", "description": "Proxmox System Monitoring Dashboard", "private": true, "scripts": { diff --git a/AppImage/scripts/flask_health_routes.py b/AppImage/scripts/flask_health_routes.py index f0299f5b..4fc68a3c 100644 --- a/AppImage/scripts/flask_health_routes.py +++ b/AppImage/scripts/flask_health_routes.py @@ -63,14 +63,32 @@ def acknowledge_error(): Acknowledge/dismiss an error manually. Returns details about the acknowledged error including original severity and suppression period info. + + Body accepts an optional ``suppression_hours`` field — if omitted the + server uses the user-configured value for the error's category (current + behavior). When provided, the value overrides the category default for + this specific dismiss: + - positive integer N → silence for N hours + - ``-1`` → silence permanently (only revertible from + Settings → Active Suppressions) """ try: data = request.get_json() if not data or 'error_key' not in data: return jsonify({'error': 'error_key is required'}), 400 - + error_key = data['error_key'] - result = health_persistence.acknowledge_error(error_key) + sup_override = None + if 'suppression_hours' in data and data['suppression_hours'] is not None: + try: + sup_override = int(data['suppression_hours']) + # Accept positive durations and the permanent sentinel (-1) + # only. Zero / other negatives would be nonsensical here. + if sup_override < -1 or sup_override == 0: + return jsonify({'error': 'suppression_hours must be a positive integer or -1 (permanent)'}), 400 + except (ValueError, TypeError): + return jsonify({'error': 'suppression_hours must be an integer'}), 400 + result = health_persistence.acknowledge_error(error_key, suppression_hours=sup_override) if result.get('success'): # Invalidate cached health results so next fetch reflects the dismiss @@ -130,6 +148,53 @@ def acknowledge_error(): except Exception as e: return jsonify({'error': str(e)}), 500 +@health_bp.route('/api/health/un-acknowledge', methods=['POST']) +def unacknowledge_error(): + """ + Re-enable a previously dismissed error. + + Used by Settings → Active Suppressions when the user explicitly removes + a suppression (time-limited or permanent). After this call the error + becomes eligible to re-emit and re-notify on the next health scan if + the underlying condition is still present. + + Body: ``{"error_key": ""}`` + """ + try: + data = request.get_json() + if not data or 'error_key' not in data: + return jsonify({'error': 'error_key is required'}), 400 + error_key = data['error_key'] + result = health_persistence.unacknowledge_error(error_key) + + # Invalidate caches so the next health fetch reflects the new state + # (the alert may re-appear immediately if the condition still holds). + category = result.get('category', '') + cache_key_map = { + 'logs': 'logs_analysis', + 'pve_services': 'pve_services', + 'updates': 'updates_check', + 'security': 'security_check', + 'temperature': 'cpu_check', + 'network': 'network_check', + 'disks': 'storage_check', + 'vms': 'vms_check', + } + cache_key = cache_key_map.get(category) + if cache_key: + health_monitor.last_check_times.pop(cache_key, None) + health_monitor.cached_results.pop(cache_key, None) + for ck in ['_bg_overall', '_bg_detailed', 'overall_health']: + health_monitor.last_check_times.pop(ck, None) + health_monitor.cached_results.pop(ck, None) + + if not result.get('success'): + return jsonify(result), 404 + return jsonify(result) + except Exception as e: + return jsonify({'error': str(e)}), 500 + + @health_bp.route('/api/health/active-errors', methods=['GET']) def get_active_errors(): """Get all active persistent errors""" diff --git a/AppImage/scripts/flask_server.py b/AppImage/scripts/flask_server.py index 67352d3a..56ba5f77 100644 --- a/AppImage/scripts/flask_server.py +++ b/AppImage/scripts/flask_server.py @@ -265,18 +265,35 @@ def _apply_security_headers(response): # is banned in the 'proxmenux' fail2ban jail and blocks at app level. import subprocess as _f2b_subprocess import time as _f2b_time +import shutil as _f2b_shutil # Cache banned IPs for 30 seconds to avoid calling fail2ban-client on every request _f2b_banned_cache = {"ips": set(), "ts": 0, "ttl": 30} +# One-time check at module import — when Fail2Ban isn't installed we want +# the @app.before_request middleware to be a no-op. Without this guard +# every HTTP request to the Monitor went through _f2b_get_banned_ips() → +# execve fail2ban-client → ENOENT, and the negative result wasn't cached +# (only the success branch updated `ts`), so a missing binary triggered +# one failed execve per HTTP request. strace on a host without Fail2Ban +# captured 250+ failed execve attempts in 10 min from this single path. +# Fixed in v1.2.1.4 perf audit. +_F2B_BINARY = _f2b_shutil.which("fail2ban-client") + def _f2b_get_banned_ips(): """Get currently banned IPs from the proxmenux jail, with caching.""" + if _F2B_BINARY is None: + # Fail2Ban isn't installed on this host. Skip the subprocess + # entirely; the @app.before_request middleware will see an empty + # banned-IPs set and let every request through (which is the + # correct behaviour — there's no Fail2Ban to honour). + return _f2b_banned_cache["ips"] now = _f2b_time.time() if now - _f2b_banned_cache["ts"] < _f2b_banned_cache["ttl"]: return _f2b_banned_cache["ips"] try: result = _f2b_subprocess.run( - ["fail2ban-client", "status", "proxmenux"], + [_F2B_BINARY, "status", "proxmenux"], capture_output=True, text=True, timeout=5 ) if result.returncode == 0: @@ -285,10 +302,13 @@ def _f2b_get_banned_ips(): ip_str = line.split(":", 1)[1].strip() banned = set(ip.strip() for ip in ip_str.split() if ip.strip()) _f2b_banned_cache["ips"] = banned - _f2b_banned_cache["ts"] = now - return banned except Exception: pass + # Always update the timestamp — even on exception / non-zero rc / + # missing jail. Caches the negative result for the same TTL so a + # transient Fail2Ban outage doesn't trigger one subprocess call per + # HTTP request until it recovers. + _f2b_banned_cache["ts"] = now return _f2b_banned_cache["ips"] # XFF / X-Real-IP are only honored when the operator opts in by setting @@ -707,37 +727,45 @@ def _temperature_collector_loop(): - Cleanup: every 60 min at offset 120s """ import time as _time - + RECORD_INTERVAL = 60 TEMP_OFFSET = 40 # Record temp at :40 of each minute LATENCY_OFFSET = 25 # Record latency at :25 of each minute + # v1.2.1.4 perf audit: disk SMART polling used to fire on the exact + # same tick as CPU temp (offset :40). Keeping it on the same 60s + # cadence — operator wants per-minute disk temperature chart data — + # but shifted to offset :55 so the smartctl burst (one per disk) + # doesn't pile on top of the CPU temp read and the upcoming latency + # ping of the next cycle (:25 + 60). Net effect: load is now spread + # across :25 (latency), :40 (CPU temp), :55 (disk SMART burst) + # instead of stacking at :25 + :40. + DISK_TEMP_DELAY_AFTER_CPU = 15 CLEANUP_INTERVAL = 3600 # 60 minutes CLEANUP_OFFSET = 120 # Cleanup at 2 min after the hour mark - + # Initial delays to stagger from other collectors _time.sleep(LATENCY_OFFSET) # Start latency first - + last_temp = _time.monotonic() last_latency = _time.monotonic() last_cleanup = _time.monotonic() - CLEANUP_INTERVAL + CLEANUP_OFFSET # First cleanup after offset - + while True: now = _time.monotonic() - + # Latency pings (offset 25s - runs first in each cycle) if now - last_latency >= RECORD_INTERVAL: _record_latency() last_latency = now - - # Temperature record (offset 40s - 15s after latency) + + # CPU / sensors temperature record (offset 40s - 15s after latency) _time.sleep(15) _record_temperature() - # Sprint 14: piggy-back the per-disk temperature sampler on - # the same minute tick. The sampler enumerates non-USB - # disks and writes a row each via smartctl; total cost is - # well under a second on typical hosts. Wrapped in a - # try-block so a stuck smartctl call can't break the - # CPU/latency pipeline. + # Sprint 14: per-disk SMART temperature sampler — kept on every + # tick (operator-visible chart granularity) but offset further + # into the cycle so the smartctl subprocess burst (one per disk) + # doesn't collide with the cheap CPU/latency reads. + _time.sleep(DISK_TEMP_DELAY_AFTER_CPU) try: import disk_temperature_history disk_temperature_history.record_all_disk_temperatures() @@ -10536,7 +10564,7 @@ def api_health(): return jsonify({ 'status': 'healthy', 'timestamp': datetime.now().isoformat(), - 'version': '1.2.1.3-beta' + 'version': '1.2.1.4-beta' }) # ─── User-configurable health thresholds ───────────────────────────────────── @@ -10697,18 +10725,60 @@ def api_health_thresholds_reset(): @app.route('/api/health/acknowledge', methods=['POST']) @require_auth def api_health_acknowledge(): - """Acknowledge/dismiss a health error by error_key.""" + """Acknowledge/dismiss a health error by error_key. + + Optional ``suppression_hours`` body field overrides the category default + (positive integer for hours; ``-1`` for permanent dismiss). + """ try: data = request.get_json() error_key = data.get('error_key', '') if not error_key: return jsonify({'error': 'error_key is required'}), 400 - - result = health_persistence.acknowledge_error(error_key) + + sup_override = None + if 'suppression_hours' in data and data['suppression_hours'] is not None: + try: + sup_override = int(data['suppression_hours']) + if sup_override < -1 or sup_override == 0: + return jsonify({'error': 'suppression_hours must be a positive integer or -1 (permanent)'}), 400 + except (ValueError, TypeError): + return jsonify({'error': 'suppression_hours must be an integer'}), 400 + + result = health_persistence.acknowledge_error(error_key, suppression_hours=sup_override) return jsonify({'success': True, 'result': result}) except Exception as e: return jsonify({'error': str(e)}), 500 + +@app.route('/api/health/un-acknowledge', methods=['POST']) +@require_auth +def api_health_unacknowledge(): + """Reverse a previous dismiss — re-enables the alert so it can fire again. + + Used by the Settings → Active Suppressions panel. + """ + try: + data = request.get_json() + error_key = data.get('error_key', '') + if not error_key: + return jsonify({'error': 'error_key is required'}), 400 + + result = health_persistence.unacknowledge_error(error_key) + # Invalidate caches so the next health fetch reflects the new state. + for ck in ['_bg_overall', '_bg_detailed', 'overall_health', + 'storage_check', 'vms_check', 'logs_analysis', + 'pve_services', 'updates_check', 'security_check', + 'cpu_check', 'network_check']: + health_monitor.last_check_times.pop(ck, None) + health_monitor.cached_results.pop(ck, None) + + status = 200 if result.get('success') else 404 + return jsonify(result), status + except Exception as e: + return jsonify({'error': str(e)}), 500 + + @app.route('/api/prometheus', methods=['GET']) @require_auth def api_prometheus(): @@ -10979,7 +11049,7 @@ def api_info(): """Root endpoint with API information""" return jsonify({ 'name': 'ProxMenux Monitor API', - 'version': '1.2.1.3-beta', + 'version': '1.2.1.4-beta', 'endpoints': [ '/api/system', '/api/system-info', @@ -11728,7 +11798,7 @@ if __name__ == '__main__': try: import sqlite3 from pathlib import Path - MONITOR_VERSION = '1.2.1.3-beta' + MONITOR_VERSION = '1.2.1.4-beta' db_path = Path('/usr/local/share/proxmenux/health_monitor.db') if db_path.exists(): conn = sqlite3.connect(str(db_path), timeout=10) diff --git a/AppImage/scripts/health_monitor.py b/AppImage/scripts/health_monitor.py index 9c1a0a5f..87fd782c 100644 --- a/AppImage/scripts/health_monitor.py +++ b/AppImage/scripts/health_monitor.py @@ -793,7 +793,10 @@ class HealthMonitor: def _annotate_dismissed(check_dict): """Mutate check_dict in place to add `dismissed=True` if - its error_key is currently acknowledged in the DB. + its error_key is currently acknowledged in the DB. When the + dismiss was permanent (suppression_hours == -1) also tags + ``permanent=True`` so the UI can render a "🔒 Permanent" + badge distinct from the time-limited countdown. Returns True when the check should NOT contribute to the aggregate status.""" if not isinstance(check_dict, dict): @@ -804,6 +807,8 @@ class HealthMonitor: try: if health_persistence.is_error_acknowledged(ek): check_dict['dismissed'] = True + if health_persistence.is_error_permanently_acknowledged(ek): + check_dict['permanent'] = True return True except Exception: pass diff --git a/AppImage/scripts/health_persistence.py b/AppImage/scripts/health_persistence.py index aedbcf57..be3630df 100644 --- a/AppImage/scripts/health_persistence.py +++ b/AppImage/scripts/health_persistence.py @@ -794,17 +794,26 @@ class HealthPersistence: conn.commit() - def acknowledge_error(self, error_key: str) -> Dict[str, Any]: + def acknowledge_error(self, error_key: str, suppression_hours: Optional[int] = None) -> Dict[str, Any]: """ Manually acknowledge an error (dismiss). - - Looks up the category's configured suppression duration from user settings - - Stores suppression_hours on the error record (snapshot at dismiss time) + + Args: + error_key: the unique key of the error to dismiss. + suppression_hours: optional override for the dismiss duration. + - ``None`` (default): use the category's configured value (current behavior). + - positive integer: silence for that many hours. + - ``-1``: silence permanently — the user must re-enable from + Settings → Active Suppressions to bring the alert back. + + - Stores ``suppression_hours`` on the error record (snapshot at dismiss time). - Marks as acknowledged so it won't re-appear during the suppression period + (or ever, when ``suppression_hours == -1``). """ with self._db_lock: - return self._acknowledge_error_impl(error_key) - - def _acknowledge_error_impl(self, error_key): + return self._acknowledge_error_impl(error_key, suppression_hours_override=suppression_hours) + + def _acknowledge_error_impl(self, error_key, suppression_hours_override: Optional[int] = None): conn = self._get_conn() conn.row_factory = sqlite3.Row category = '' @@ -852,6 +861,11 @@ class HealthPersistence: sup_hours = int(stored) except (ValueError, TypeError): pass + # Caller-supplied override (e.g. per-error "permanent" dismiss + # picked by the user from the Health Monitor popover) trumps + # the category default. ``-1`` means silence permanently. + if suppression_hours_override is not None: + sup_hours = suppression_hours_override # Insert as acknowledged but NOT resolved - error remains active cursor.execute(''' @@ -892,6 +906,11 @@ class HealthPersistence: sup_hours = int(stored) except (ValueError, TypeError): pass + # Per-error override (e.g. user selected "Permanent" / "7 days" + # in the dismiss popover) takes precedence over the category + # default. + if suppression_hours_override is not None: + sup_hours = suppression_hours_override # Mark as acknowledged but DO NOT set resolved_at cursor.execute(''' @@ -946,10 +965,65 @@ class HealthPersistence: self._clear_notification_cooldown(error_key) return result - + + def unacknowledge_error(self, error_key: str) -> Dict[str, Any]: + """ + Reverse a previous dismiss (acknowledged → not acknowledged). + + Used by the Settings → Active Suppressions panel: the user explicitly + re-enables an alert they had silenced (time-limited or permanent). + After this call the error becomes eligible to re-emit on the next + scan if the underlying condition is still present. + """ + with self._db_lock: + conn = self._get_conn() + try: + cursor = conn.cursor() + cursor.execute( + 'SELECT category, severity, acknowledged FROM errors WHERE error_key = ?', + (error_key,), + ) + row = cursor.fetchone() + if not row: + return {'success': False, 'error': 'not_found', 'error_key': error_key} + category = row[0] or '' + severity = row[1] or 'WARNING' + was_acknowledged = bool(row[2]) + if not was_acknowledged: + # Nothing to do — keep the call idempotent. + return { + 'success': True, + 'error_key': error_key, + 'category': category, + 'changed': False, + } + # Clear acknowledgment + stored suppression. The next health + # scan will decide whether to re-record the error based on the + # actual condition. + now = datetime.now().isoformat() + cursor.execute(''' + UPDATE errors + SET acknowledged = 0, acknowledged_at = NULL, suppression_hours = NULL, + last_seen = ? + WHERE error_key = ? + ''', (now, error_key)) + self._record_event(cursor, 'unacknowledged', error_key, { + 'category': category, + 'severity': severity, + }) + conn.commit() + return { + 'success': True, + 'error_key': error_key, + 'category': category, + 'changed': True, + } + finally: + conn.close() + def is_error_acknowledged(self, error_key: str) -> bool: """Check if an error_key has been acknowledged and is still within suppression window. - + Uses acknowledged_at (not resolved_at) to calculate suppression expiration, since dismissed errors may have resolved_at = NULL. """ @@ -967,11 +1041,11 @@ class HealthPersistence: # Check if still within suppression window using acknowledged_at acknowledged_at = row['acknowledged_at'] sup_hours = row['suppression_hours'] or self.DEFAULT_SUPPRESSION_HOURS - + # -1 means permanently suppressed if sup_hours < 0: return True - + if acknowledged_at: try: acknowledged_dt = datetime.fromisoformat(acknowledged_at) @@ -982,6 +1056,24 @@ class HealthPersistence: return True except Exception: return False + + def is_error_permanently_acknowledged(self, error_key: str) -> bool: + """True only when the error is currently dismissed with + ``suppression_hours == -1``. Used by the health monitor to surface a + "🔒 Permanent" badge in the UI vs. the regular time-limited dismiss.""" + try: + with self._db_connection(row_factory=True) as conn: + cursor = conn.cursor() + cursor.execute( + 'SELECT acknowledged, suppression_hours FROM errors WHERE error_key = ?', + (error_key,), + ) + row = cursor.fetchone() + if not row or not row['acknowledged']: + return False + return (row['suppression_hours'] or 0) == -1 + except Exception: + return False def get_active_errors(self, category: Optional[str] = None) -> List[Dict[str, Any]]: """Get all active (unresolved AND not acknowledged) errors, optionally filtered by category. diff --git a/AppImage/scripts/mount_monitor.py b/AppImage/scripts/mount_monitor.py index 2a66244b..540ff74c 100644 --- a/AppImage/scripts/mount_monitor.py +++ b/AppImage/scripts/mount_monitor.py @@ -337,20 +337,36 @@ def _list_running_lxcs() -> list[dict[str, str]]: if not vmid: continue + # v1.2.1.4 perf audit: previously this called `lxc-info -n -p` + # for every running CT on every scan tick. With N CTs that's N + # subprocesses per cycle (lxc-info forks + execs + parses its own + # config to give us a single number we can read directly). The CT's + # init PID is the first child of the supervising lxc-start process + # we just identified — readable from /proc with zero subprocess + # cost. pid = '' try: - p2 = subprocess.run( - ['lxc-info', '-n', vmid, '-p'], - capture_output=True, text=True, timeout=2, - ) - if p2.returncode == 0: - for ln in p2.stdout.splitlines(): - # lxc-info output: "PID: 12345" - if ln.strip().lower().startswith('pid:'): - pid = ln.split(':', 1)[1].strip() - break - except (subprocess.TimeoutExpired, OSError): - pass + with open(f'/proc/{entry.name}/task/{entry.name}/children', 'r') as f: + children = f.read().split() + if children: + pid = children[0] + except (OSError, IOError): + # Fallback to lxc-info only if the /proc read failed — keeps + # behaviour identical for any edge case where the children + # file is unreadable (race with CT stop, kernel without + # CONFIG_PROC_CHILDREN, etc.). + try: + p2 = subprocess.run( + ['lxc-info', '-n', vmid, '-p'], + capture_output=True, text=True, timeout=2, + ) + if p2.returncode == 0: + for ln in p2.stdout.splitlines(): + if ln.strip().lower().startswith('pid:'): + pid = ln.split(':', 1)[1].strip() + break + except (subprocess.TimeoutExpired, OSError): + pass out.append({'vmid': vmid, 'name': _read_lxc_name(vmid), 'pid': pid}) diff --git a/beta_version.txt b/beta_version.txt index eece4c72..93f340a7 100644 --- a/beta_version.txt +++ b/beta_version.txt @@ -1 +1 @@ -1.2.1.3 \ No newline at end of file +1.2.1.4