Update ProxMenux 1.2.1.4-beta

This commit is contained in:
MacRimi
2026-05-30 21:54:32 +02:00
parent d2ef8f0899
commit 4bf49675d2
27 changed files with 690 additions and 166 deletions
+1 -1
View File
@@ -1 +1 @@
1caca89b574241c9d754b9ac3bb11987c5eccc5f182d01a5c62e61623b62fda7
fba0f824699660d18f77bc8558370acd725921cc34737508605c83ced3c947a4
+24 -13
View File
@@ -140,19 +140,30 @@ export function About() {
<Sparkles className="h-3 w-3" />
v{APP_VERSION}
</span>
{/* Changelog goes to the web — the in-app modal version
duplicated content and lacked a close affordance on
some viewports, forcing a page refresh. The web
changelog is canonical and auto-syncs with releases. */}
<a
href="https://proxmenux.com/changelog"
target="_blank"
rel="noopener noreferrer"
className="inline-flex items-center gap-1.5 rounded-md bg-muted hover:bg-muted/70 transition-colors text-foreground border border-border px-2.5 py-1 text-xs"
>
Changelog
<ExternalLink className="h-3 w-3" />
</a>
{/* Beta versions surface their pre-release notes on the
GitHub Releases page (where each beta is tagged + signed);
stable versions point at the canonical web changelog
which only carries shipped releases. Detection: the
APP_VERSION string carries a "-beta" / "-rc" /
"-alpha" suffix for any non-stable build. */}
{(() => {
const isPrerelease = /-(beta|rc|alpha)/i.test(APP_VERSION)
const href = isPrerelease
? "https://github.com/MacRimi/ProxMenux/releases"
: "https://proxmenux.com/en/changelog"
const label = isPrerelease ? "Release notes" : "Changelog"
return (
<a
href={href}
target="_blank"
rel="noopener noreferrer"
className="inline-flex items-center gap-1.5 rounded-md bg-muted hover:bg-muted/70 transition-colors text-foreground border border-border px-2.5 py-1 text-xs"
>
{label}
<ExternalLink className="h-3 w-3" />
</a>
)
})()}
</div>
</div>
</div>
+2 -2
View File
@@ -85,7 +85,7 @@ export function AuthSetup({ onComplete }: AuthSetupProps) {
setOpen(false)
onComplete()
} catch (err) {
console.error("[v0] Auth skip error:", err)
console.error("Auth skip error:", err)
setError(err instanceof Error ? err.message : "Failed to save preference")
} finally {
setLoading(false)
@@ -203,7 +203,7 @@ export function AuthSetup({ onComplete }: AuthSetupProps) {
setOpen(false)
onComplete()
} catch (err) {
console.error("[v0] Auth setup error:", err)
console.error("Auth setup error:", err)
setError(err instanceof Error ? err.message : "Failed to setup authentication")
} finally {
setLoading(false)
+2 -2
View File
@@ -260,7 +260,7 @@ export default function Hardware() {
if (hardwareData?.storage_devices) {
hardwareData.storage_devices.forEach((device) => {
if (device.name.startsWith("nvme")) {
console.log(`[v0] NVMe device ${device.name}:`, {
console.log(`NVMe device ${device.name}:`, {
pcie_gen: device.pcie_gen,
pcie_width: device.pcie_width,
pcie_max_gen: device.pcie_max_gen,
@@ -452,7 +452,7 @@ export default function Hardware() {
setDetailsLoading(false)
} catch (error) {
if (error instanceof Error && error.name !== "AbortError") {
console.error("[v0] Error fetching GPU realtime data:", error)
console.error("Error fetching GPU realtime data:", error)
}
setRealtimeGPUData({ has_monitoring_tool: false })
setDetailsLoading(false)
+104 -46
View File
@@ -7,6 +7,14 @@ import { getAuthToken } from "@/lib/api-config"
import { Dialog, DialogContent, DialogDescription, DialogHeader, DialogTitle } from "@/components/ui/dialog"
import { Badge } from "@/components/ui/badge"
import { Button } from "@/components/ui/button"
import {
DropdownMenu,
DropdownMenuContent,
DropdownMenuItem,
DropdownMenuLabel,
DropdownMenuSeparator,
DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu"
import {
Loader2,
CheckCircle2,
@@ -357,8 +365,15 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
}
}
const handleAcknowledge = async (errorKey: string, e: React.MouseEvent) => {
e.stopPropagation()
// `suppressionHours` overrides the category default for this dismiss:
// - undefined → backend uses the category's configured suppression
// - 24, 168 (7 days) → silence for that many hours
// - -1 → permanent dismiss; only revertible from
// Settings → Active Suppressions
const handleAcknowledge = async (
errorKey: string,
suppressionHours?: number,
) => {
setDismissingKey(errorKey)
try {
@@ -369,10 +384,15 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
headers["Authorization"] = `Bearer ${token}`
}
const body: Record<string, unknown> = { error_key: errorKey }
if (suppressionHours !== undefined) {
body.suppression_hours = suppressionHours
}
const response = await fetch(url, {
method: "POST",
headers,
body: JSON.stringify({ error_key: errorKey }),
body: JSON.stringify(body),
})
const responseData = await response.json().catch(() => ({}))
@@ -390,11 +410,16 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
severity: responseData.result?.original_severity || 'WARNING',
reason: 'Dismissed by user',
dismissed: true,
acknowledged_at: new Date().toISOString()
// Surface the chosen duration so the row shows the right badge
// (countdown vs. "Permanent") without waiting for the refetch.
permanent: suppressionHours === -1,
suppression_remaining_hours: suppressionHours === -1 ? -1 : undefined,
suppression_hours: suppressionHours,
acknowledged_at: new Date().toISOString(),
}
setDismissedItems(prev => [...prev, dismissedItem])
}
// Fetch fresh data in background (non-blocking)
fetchHealthDetails().catch(() => {})
} catch (err) {
@@ -511,32 +536,25 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
<span className="font-medium shrink-0">{formatCheckLabel(checkKey)}</span>
<span className="text-muted-foreground break-words whitespace-pre-wrap min-w-0">{checkData.detail}</span>
{checkData.dismissed && (
<Badge variant="outline" className="text-[9px] px-1 py-0 h-4 shrink-0 text-blue-400 border-blue-400/30">
Dismissed
</Badge>
checkData.permanent ? (
<Badge variant="outline" className="text-[9px] px-1 py-0 h-4 shrink-0 text-amber-400 border-amber-400/40">
Permanent
</Badge>
) : (
<Badge variant="outline" className="text-[9px] px-1 py-0 h-4 shrink-0 text-blue-400 border-blue-400/30">
Dismissed
</Badge>
)
)}
</div>
<div className="flex items-center gap-1 sm:gap-1.5 shrink-0">
{(checkStatus === "WARNING" || checkStatus === "CRITICAL" || checkStatus === "UNKNOWN") && isDismissable && !checkData.dismissed && (
<Button
size="sm"
variant="outline"
className="h-5 px-1 sm:px-1.5 shrink-0 hover:bg-red-500/10 hover:border-red-500/50 bg-transparent text-[10px]"
disabled={dismissingKey === (checkData.error_key || checkKey)}
onClick={(e) => {
e.stopPropagation()
handleAcknowledge(checkData.error_key || checkKey, e)
}}
>
{dismissingKey === (checkData.error_key || checkKey) ? (
<Loader2 className="h-3 w-3 animate-spin" />
) : (
<>
<X className="h-3 w-3 sm:mr-0.5" />
<span className="hidden sm:inline">Dismiss</span>
</>
)}
</Button>
<DismissDropdown
onSelect={(hours) =>
handleAcknowledge(checkData.error_key || checkKey, hours)
}
busy={dismissingKey === (checkData.error_key || checkKey)}
/>
)}
</div>
</div>
@@ -681,25 +699,12 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
<p className="text-xs text-muted-foreground break-words whitespace-pre-wrap flex-1">{reason}</p>
{/* Show dismiss button for UNKNOWN status at category level when dismissable */}
{status === "UNKNOWN" && categoryData?.dismissable && !hasChecks && (
<Button
size="sm"
variant="outline"
className="h-5 px-1.5 shrink-0 hover:bg-red-500/10 hover:border-red-500/50 bg-transparent text-[10px]"
disabled={dismissingKey === `category_${key}`}
onClick={(e) => {
e.stopPropagation()
handleAcknowledge(`category_${key}_unknown`, e)
}}
>
{dismissingKey === `category_${key}` ? (
<Loader2 className="h-3 w-3 animate-spin" />
) : (
<>
<X className="h-3 w-3 sm:mr-0.5" />
<span className="hidden sm:inline">Dismiss</span>
</>
)}
</Button>
<DismissDropdown
onSelect={(hours) =>
handleAcknowledge(`category_${key}_unknown`, hours)
}
busy={dismissingKey === `category_${key}_unknown`}
/>
)}
</div>
)}
@@ -840,3 +845,56 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
</Dialog>
)
}
// Small split button: the visible click opens a 3-option menu so the user
// chooses how long this specific alert stays silenced. ``-1`` is the
// permanent sentinel — backend stores it as `suppression_hours = -1` and
// the alert can only be brought back from Settings → Active Suppressions.
function DismissDropdown({
onSelect,
busy,
}: {
onSelect: (suppressionHours: number) => void
busy: boolean
}) {
return (
<DropdownMenu>
<DropdownMenuTrigger asChild>
<Button
size="sm"
variant="outline"
className="h-5 px-1 sm:px-1.5 shrink-0 hover:bg-red-500/10 hover:border-red-500/50 bg-transparent text-[10px]"
disabled={busy}
onClick={(e) => e.stopPropagation()}
>
{busy ? (
<Loader2 className="h-3 w-3 animate-spin" />
) : (
<>
<X className="h-3 w-3 sm:mr-0.5" />
<span className="hidden sm:inline">Dismiss</span>
</>
)}
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent align="end" className="w-44" onClick={(e) => e.stopPropagation()}>
<DropdownMenuLabel className="text-[10px] uppercase tracking-wide text-muted-foreground">
Silence this alert for
</DropdownMenuLabel>
<DropdownMenuItem onSelect={() => onSelect(24)} className="text-xs">
<Clock className="h-3 w-3 mr-2 text-muted-foreground" /> 24 hours
</DropdownMenuItem>
<DropdownMenuItem onSelect={() => onSelect(168)} className="text-xs">
<Clock className="h-3 w-3 mr-2 text-muted-foreground" /> 7 days
</DropdownMenuItem>
<DropdownMenuSeparator />
<DropdownMenuItem
onSelect={() => onSelect(-1)}
className="text-xs text-red-500 focus:text-red-500 focus:bg-red-500/10"
>
<BellOff className="h-3 w-3 mr-2" /> Permanently
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
)
}
+1 -1
View File
@@ -271,7 +271,7 @@ export function Login({ onLogin }: LoginProps) {
</form>
</div>
<p className="text-center text-sm text-muted-foreground">ProxMenux Monitor v1.2.1.3-beta</p>
<p className="text-center text-sm text-muted-foreground">ProxMenux Monitor v1.2.1.4-beta</p>
</div>
</div>
)
+1 -1
View File
@@ -109,7 +109,7 @@ export function NetworkCard({ interface_, timeframe, onClick }: NetworkCardProps
})
}
} catch (error) {
console.error("[v0] Failed to fetch traffic data for card:", error)
console.error("Failed to fetch traffic data for card:", error)
setTrafficData({ received: 0, sent: 0 })
}
}
@@ -206,7 +206,7 @@ export function NetworkTrafficChart({
setIsInitialLoad(false)
}
} catch (err: any) {
console.error("[v0] Error fetching network metrics:", err)
console.error("Error fetching network metrics:", err)
setError(err.message || "Error loading metrics")
} finally {
setLoading(false)
+5 -5
View File
@@ -95,12 +95,12 @@ export function NodeMetricsCharts() {
if (!result.data || !Array.isArray(result.data)) {
console.error("[v0] Invalid data format - data is not an array:", result)
console.error("Invalid data format - data is not an array:", result)
throw new Error("Invalid data format received from server")
}
if (result.data.length === 0) {
console.warn("[v0] No data points received")
console.warn("No data points received")
setData([])
setLoading(false)
return
@@ -159,9 +159,9 @@ export function NodeMetricsCharts() {
setData(transformedData)
} catch (err: any) {
console.error("[v0] Error fetching node metrics:", err)
console.error("[v0] Error message:", err.message)
console.error("[v0] Error stack:", err.stack)
console.error("Error fetching node metrics:", err)
console.error("Error message:", err.message)
console.error("Error stack:", err.stack)
setError(err.message || "Error loading metrics")
} finally {
setLoading(false)
+17 -7
View File
@@ -1831,27 +1831,34 @@ export function NotificationSettings() {
</div>
{config.channels.apprise?.enabled && (
<>
<div className="space-y-1.5">
<div className="space-y-1.5 min-w-0">
<Label className="text-[11px] text-muted-foreground">Apprise URL</Label>
<div className="flex items-center gap-1.5">
<div className="flex items-center gap-1.5 min-w-0">
<Input
type={showSecrets["apprise_url"] ? "text" : "password"}
className={`h-7 text-xs font-mono ${!editMode ? "opacity-50" : ""}`}
placeholder="tgram://bottoken/ChatID · ntfy://server/topic · discord://webhook_id/token · matrix://..."
className={`h-7 text-xs font-mono min-w-0 flex-1 ${!editMode ? "opacity-50" : ""}`}
placeholder="tgram://bottoken/ChatID"
value={config.channels.apprise?.url || ""}
onChange={e => updateChannel("apprise", "url", e.target.value)}
disabled={!editMode}
/>
<button
type="button"
className="h-7 w-7 flex items-center justify-center rounded-md border border-border hover:bg-muted text-muted-foreground"
className="h-7 w-7 shrink-0 flex items-center justify-center rounded-md border border-border hover:bg-muted text-muted-foreground"
onClick={() => setShowSecrets(s => ({ ...s, apprise_url: !s.apprise_url }))}
title={showSecrets["apprise_url"] ? "Hide URL" : "Show URL"}
>
{showSecrets["apprise_url"] ? <EyeOff className="h-3 w-3" /> : <Eye className="h-3 w-3" />}
</button>
</div>
<p className="text-[10px] text-muted-foreground leading-relaxed">
{/* The examples row was overflowing on mobile because
every `<code>` token is atomic — the whole line
would scroll horizontally on narrow viewports.
`break-all` on the wrapper lets the layout break
mid-token if the viewport is really tight; on
wider screens the natural commas/spaces still
control wrapping. */}
<p className="text-[10px] text-muted-foreground leading-relaxed break-all min-w-0">
A single URL that Apprise routes to the right service. Examples:
<code className="text-foreground/80 mx-0.5">tgram://</code>,
<code className="text-foreground/80 mx-0.5">discord://</code>,
@@ -1871,7 +1878,10 @@ export function NotificationSettings() {
</a>.
</p>
</div>
<div className="flex justify-end pt-1">
{renderChannelCategories("apprise")}
{renderQuietHours("apprise")}
{renderDailyDigest("apprise")}
<div className="flex justify-end pt-2 border-t border-border/50">
<button
className="h-7 px-3 text-xs rounded-md bg-cyan-600 hover:bg-cyan-700 text-white transition-colors disabled:opacity-50 flex items-center gap-1.5"
onClick={() => handleTest("apprise")}
+1 -1
View File
@@ -858,7 +858,7 @@ export function ProxmoxDashboard() {
</Tabs>
<footer className="mt-8 md:mt-12 pt-4 md:pt-6 border-t border-border text-center text-xs md:text-sm text-muted-foreground">
<p className="font-medium mb-2">ProxMenux Monitor v1.2.1.3-beta</p>
<p className="font-medium mb-2">ProxMenux Monitor v1.2.1.4-beta</p>
<p>
<a
href="https://ko-fi.com/macrimi"
+24 -27
View File
@@ -3,10 +3,10 @@
import { useState, useEffect } from "react"
import { Button } from "./ui/button"
import { Dialog, DialogContent, DialogTitle } from "./ui/dialog"
import { X, Sparkles, Thermometer, Activity, HardDrive, Shield, Globe, Cpu, Zap, Sliders, Wrench, RefreshCw, Server } from "lucide-react"
import { X, Sparkles, Thermometer, Activity, HardDrive, Shield, Globe, Cpu, Zap, Sliders, Wrench, RefreshCw, Server, BellOff, Bell } from "lucide-react"
import { Checkbox } from "./ui/checkbox"
const APP_VERSION = "1.2.1.3-beta" // Sync with AppImage/package.json
const APP_VERSION = "1.2.1.4-beta" // Sync with AppImage/package.json
interface ReleaseNote {
date: string
@@ -18,6 +18,23 @@ interface ReleaseNote {
}
export const CHANGELOG: Record<string, ReleaseNote> = {
"1.2.1.4-beta": {
date: "May 30, 2026",
changes: {
added: [
"Per-error dismiss duration - The Dismiss button on each Health Monitor alert now opens a small dropdown with three options: 24 hours, 7 days, or Permanently. The 24h / 7d paths behave like the existing time-limited dismiss (the alert reappears after the window expires). Permanent dismisses persist with suppression_hours = -1 in the persistence DB, never re-emit, never re-notify, and are marked with a distinct amber Permanent badge in the Health Monitor so the operator knows the alert is intentionally silenced",
"Active Suppressions panel in Settings - New section inside Settings -> Health Monitor (below the per-category suppression durations) that lists every currently-dismissed alert, both time-limited (with countdown) and permanent. Each row carries the error_key, category, severity, when it was dismissed, and a Re-enable button that clears the acknowledgment so the alert can fire again on the next scan. The Re-enable button is gated by the Health Monitor Edit mode (same gating as the rest of the Health settings) — toggle Edit at the top of the page first, then the buttons become active. Permanent dismisses can only be reverted from here, time-limited ones can also be force-revived if you don't want to wait for the countdown",
"Apprise channel - per-event toggles, Quiet Hours and Daily Digest - The Apprise tab now exposes the same Notification Categories block, per-event sub-toggles, Quiet Hours and Daily Digest controls as Telegram / Gotify / Discord / Email. The backend already supported per-channel filtering for Apprise via the generic channel_overrides logic; the UI just wasn't surfacing it",
],
changed: [
"POST /api/health/acknowledge accepts an optional suppression_hours body field - positive integer for the dismiss duration in hours, -1 for permanent. Omitting the field preserves the previous behaviour (uses the category's configured default). New endpoint POST /api/health/un-acknowledge {error_key} reverses a dismiss (used by Settings -> Active Suppressions and by future automations)",
"Health Monitor dismissed annotation - When an alert is currently acknowledged with suppression_hours = -1, the dashboard payload now tags the check with permanent: true alongside dismissed: true so the UI can render the Permanent badge separately from the standard time-limited Dismissed badge",
],
fixed: [
"Apprise URL section - Mobile overflow - On narrow viewports the Apprise URL row used to break the design: the placeholder packed four full example URLs into one line and the inline <code> examples in the description had no break-all rule, so the section pushed past the right edge of the viewport. The placeholder is now a single concise example (tgram://bottoken/ChatID), the URL input wrapper enforces min-w-0 / flex-1 / shrink-0 on its children, and the examples paragraph uses break-all min-w-0 so it wraps cleanly on any width",
],
},
},
"1.2.1.3-beta": {
date: "May 22, 2026",
changes: {
@@ -146,36 +163,16 @@ export const CHANGELOG: Record<string, ReleaseNote> = {
const CURRENT_VERSION_FEATURES = [
{
icon: <RefreshCw className="h-5 w-5" />,
text: "Post-install function update detection - The Monitor tracks installed ProxMenux optimizations and notifies when a newer version of any of them is available, with one-click apply",
icon: <BellOff className="h-5 w-5" />,
text: "Per-error dismiss duration - The Dismiss button on each Health Monitor alert now opens a small dropdown so you choose 24 hours, 7 days, or Permanently for that specific alert. Permanent dismisses get a distinct Permanent badge and never re-notify",
},
{
icon: <Sliders className="h-5 w-5" />,
text: "Health Monitor Thresholds - Per-category warning and critical levels for CPU, memory, temperature, storage and more, fully configurable from Settings",
text: "Active Suppressions section - New section inside Settings -> Health Monitor that lists every dismissed alert (time-limited and permanent) with a Re-enable button. Permanent dismisses can only be reverted from here. The Re-enable action is gated by Health Monitor Edit mode",
},
{
icon: <Cpu className="h-5 w-5" />,
text: "NVIDIA driver update notifications - Kernel-aware detection of new compatible driver versions, surfaced in the Hardware tab and as notifications when a newer build is published",
},
{
icon: <Globe className="h-5 w-5" />,
text: "Secure Gateway update flow - One-click Tailscale update from Settings, with version indicators and notification when a new release is available",
},
{
icon: <Wrench className="h-5 w-5" />,
text: "Helper-Scripts menu - Richer context and useful information for each entry, so you know what every script does before running it",
},
{
icon: <Thermometer className="h-5 w-5" />,
text: "Improved disk temperature monitoring - Better readings, smarter caching across SMART probes and a redesigned history modal that opens at 24h by default",
},
{
icon: <Server className="h-5 w-5" />,
text: "VM and LXC modal expanded - Additional information consolidated into a single panel so you don't have to look it up across multiple tabs",
},
{
icon: <Zap className="h-5 w-5" />,
text: "Faster page load and tighter security - Lighter network usage on the main tabs, plus stricter authentication checks across notification, scripts and terminal endpoints",
icon: <Bell className="h-5 w-5" />,
text: "Apprise channel parity - Apprise now exposes the same per-event toggles, Quiet Hours and Daily Digest controls as Telegram / Gotify / Discord / Email. Mobile overflow in the Apprise URL row is also fixed",
},
]
+201 -3
View File
@@ -2,7 +2,9 @@
import { useState, useEffect } from "react"
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "./ui/card"
import { Wrench, Package, Ruler, HeartPulse, Cpu, MemoryStick, HardDrive, CircleDot, Network, Server, Settings2, FileText, RefreshCw, Shield, AlertTriangle, Info, Loader2, Check, Database, CloudOff, Code, X, Copy, Sparkles, ArrowUpCircle } from "lucide-react"
import { Wrench, Package, Ruler, HeartPulse, Cpu, MemoryStick, HardDrive, CircleDot, Network, Server, Settings2, FileText, RefreshCw, Shield, AlertTriangle, Info, Loader2, Check, Database, CloudOff, Code, X, Copy, Sparkles, ArrowUpCircle, BellOff } from "lucide-react"
import { Badge } from "./ui/badge"
import { Button } from "./ui/button"
import { NotificationSettings } from "./notification-settings"
import { HealthThresholds } from "./health-thresholds"
import { LxcUpdateDetection } from "./lxc-update-detection"
@@ -10,7 +12,6 @@ import { ScriptTerminalModal } from "./script-terminal-modal"
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "./ui/select"
import { Switch } from "./ui/switch"
import { Input } from "./ui/input"
import { Badge } from "./ui/badge"
import { getNetworkUnit } from "../lib/format-network"
import { fetchApi } from "../lib/api-config"
@@ -188,6 +189,63 @@ const CATEGORY_ICONS: Record<string, React.ElementType> = {
security: Shield,
}
// Acronyms that should stay uppercase in the normalized label.
const ERROR_KEY_ACRONYMS = new Set([
"CPU", "GPU", "IO", "RAM", "SSD", "HDD", "NIC", "API",
"URL", "SSH", "TLS", "SSL", "DNS", "DHCP", "NTP",
"NFS", "SMB", "CIFS", "ISCSI",
"PBS", "PVE", "LXC", "VM", "SMART", "ZFS", "LVM", "RAID",
"ID", "UUID", "MAC", "IP",
])
// Convert an internal error_key (e.g. `pve_storage_full_PBS-Cloud`)
// into a human-readable label (`PVE Storage Full: PBS-Cloud`).
// Tokens are split by `_`; trailing tokens that look like a resource
// identifier (contain a hyphen, uppercase letter or digit, or match
// a known device/interface pattern) are grouped after `:` so the
// title reads naturally. Known acronyms keep their uppercase form.
function normalizeErrorKey(key: string): string {
if (!key) return ""
const parts = key.split("_")
if (parts.length === 0) return key
const looksLikeResource = (s: string): boolean => {
if (!s) return false
if (s.includes("-")) return true
if (/[A-Z]/.test(s)) return true
if (/\d/.test(s)) return true
// Linux block/network device patterns
if (/^(sd[a-z]+\d*|nvme\d+n\d+|vmbr\d+|eth\d+|ens\d+|enp\d+|wlp\d+|tap\d+|veth\w+|vtnet\d+|vnet\d+)$/.test(s)) {
return true
}
return false
}
let cut = parts.length
for (let i = parts.length - 1; i >= 1; i--) {
if (looksLikeResource(parts[i])) {
cut = i
} else {
break
}
}
const descParts = parts.slice(0, cut)
const resourceParts = parts.slice(cut)
const titleize = (w: string): string => {
if (!w) return w
const upper = w.toUpperCase()
if (ERROR_KEY_ACRONYMS.has(upper)) return upper
return w.charAt(0).toUpperCase() + w.slice(1).toLowerCase()
}
const desc = descParts.map(titleize).join(" ")
if (resourceParts.length === 0) return desc
return `${desc}: ${resourceParts.join("_")}`
}
interface ProxMenuxTool {
key: string
name: string
@@ -295,6 +353,23 @@ export function Settings() {
const [loadingInterfaces, setLoadingInterfaces] = useState(true)
const [savingInterface, setSavingInterface] = useState<string | null>(null)
// Active Suppressions panel — lists every error currently dismissed
// (time-limited or permanent) so the user can re-enable individual
// alerts. Mirrors what /api/health/full returns under `dismissed`.
type ActiveSuppression = {
error_key: string
category: string
severity?: string
reason?: string
acknowledged_at?: string
suppression_hours?: number
suppression_remaining_hours?: number
permanent?: boolean
}
const [activeSuppressions, setActiveSuppressions] = useState<ActiveSuppression[]>([])
const [loadingSuppressions, setLoadingSuppressions] = useState(true)
const [reEnablingKey, setReEnablingKey] = useState<string | null>(null)
// Sprint 13 / issue #195: snippets storage selector. The bash helper
// resolves it on first GPU passthrough and saves to config.json; this
// card surfaces the same setting so the user can see/change it from
@@ -339,6 +414,7 @@ export function Settings() {
getUnitsSettings()
loadHealthSettings()
loadRemoteStorages()
loadActiveSuppressions()
loadNetworkInterfaces()
loadSnippetsStorage()
}, [])
@@ -561,6 +637,41 @@ export function Settings() {
}
}
const loadActiveSuppressions = async () => {
try {
const data = await fetchApi("/api/health/dismissed")
if (data && Array.isArray(data.dismissed)) {
setActiveSuppressions(data.dismissed as ActiveSuppression[])
}
} catch (err) {
console.error("Failed to load active suppressions:", err)
} finally {
setLoadingSuppressions(false)
}
}
// Click "Re-enable" on a suppression → POST /api/health/un-acknowledge.
// Remove the row optimistically, then re-fetch the list silently to stay
// in sync with the server (which may have re-recorded the error if the
// condition is still active — that surfaces in the Health Monitor, not
// this panel).
const handleReEnable = async (errorKey: string) => {
if (!healthEditMode) return
setReEnablingKey(errorKey)
try {
await fetchApi("/api/health/un-acknowledge", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ error_key: errorKey }),
})
setActiveSuppressions(prev => prev.filter(s => s.error_key !== errorKey))
} catch (err) {
console.error("Failed to re-enable alert:", err)
} finally {
setReEnablingKey(null)
}
}
const handleStorageExclusionChange = async (storageName: string, storageType: string, excludeHealth: boolean, excludeNotifications: boolean) => {
setSavingStorage(storageName)
try {
@@ -953,10 +1064,97 @@ export function Settings() {
<div className="flex items-start gap-2 mt-3 pt-3 border-t border-border">
<Info className="h-3.5 w-3.5 text-blue-400 shrink-0 mt-0.5" />
<p className="text-[11px] text-muted-foreground leading-relaxed">
These settings apply when you dismiss a warning from the Health Monitor.
These settings apply when you dismiss a warning from the Health Monitor.
Critical CPU temperature alerts always trigger regardless of settings to protect your hardware.
</p>
</div>
{/* Active Suppressions subsection.
Lives inside the Health Monitor card (no separator).
Surfaces every currently-dismissed alert (time-limited
and permanent) with a Re-enable button gated by Edit
mode. Permanent dismisses chosen from the dashboard
"Dismiss → Permanently" dropdown can only be reverted
here, so this is the audit log + un-dismiss UI for
them. Time-limited dismisses (24h, 7d) are listed for
visibility and can also be force-revived from here. */}
<div className="pt-8">
<div className="flex items-center gap-2 mb-1.5">
<BellOff className="h-4 w-4 text-amber-500" />
<span className="text-sm font-medium">Active Suppressions</span>
</div>
<p className="text-sm text-muted-foreground mb-4 leading-relaxed">
Alerts you have silenced from the Health Monitor. Permanent dismisses can only be
reverted here. Editing requires the Health Monitor <span className="font-mono text-xs">Edit</span> mode at the top of this card.
</p>
{loadingSuppressions ? (
<div className="flex items-center justify-center py-4">
<div className="animate-spin h-5 w-5 border-4 border-amber-500 border-t-transparent rounded-full" />
</div>
) : activeSuppressions.length === 0 ? (
<div className="text-center py-4 text-sm text-muted-foreground">
No active suppressions. Dismissed alerts from the Health Monitor will appear here.
</div>
) : (
<div className="space-y-2">
{activeSuppressions.map((s) => {
const remaining = s.suppression_remaining_hours
const remainingLabel = s.permanent
? "Permanent"
: remaining === undefined || remaining === null
? "Active"
: remaining >= 24
? `${Math.round(remaining / 24)}d remaining`
: `${Math.max(0, Math.round(remaining))}h remaining`
const dismissedAtLabel = s.acknowledged_at
? new Date(s.acknowledged_at).toLocaleString()
: ""
return (
<div
key={s.error_key}
className="flex items-start sm:items-center justify-between gap-3 px-3 py-2.5 rounded-md border border-border hover:bg-muted/30 transition-colors"
>
<div className="flex items-start gap-2 min-w-0 flex-1">
{s.permanent ? (
<Badge variant="outline" className="text-sm px-2 py-0.5 shrink-0 text-amber-400 border-amber-400/40 mt-0.5 font-normal">
Permanent
</Badge>
) : (
<Badge variant="outline" className="text-sm px-2 py-0.5 shrink-0 text-blue-400 border-blue-400/30 mt-0.5 font-normal">
{remainingLabel}
</Badge>
)}
<div className="min-w-0 flex-1">
<div className="text-xs sm:text-sm font-medium text-foreground truncate" title={s.error_key}>
{normalizeErrorKey(s.error_key)}
</div>
<div className="text-sm text-muted-foreground flex flex-wrap gap-x-3 gap-y-0.5 mt-0.5">
<span>category: <span className="font-medium text-foreground/80">{s.category || "—"}</span></span>
{s.severity && <span>severity: <span className="font-medium text-foreground/80">{s.severity}</span></span>}
{dismissedAtLabel && <span>dismissed: {dismissedAtLabel}</span>}
</div>
</div>
</div>
<Button
size="sm"
variant="outline"
className="h-7 px-2.5 text-xs shrink-0 hover:bg-green-500/10 hover:border-green-500/50 bg-transparent"
disabled={!healthEditMode || reEnablingKey === s.error_key}
onClick={() => handleReEnable(s.error_key)}
title={!healthEditMode ? "Enable Health Monitor Edit mode to re-enable" : "Re-enable this alert"}
>
{reEnablingKey === s.error_key ? (
<Loader2 className="h-3 w-3 animate-spin" />
) : (
"Re-enable"
)}
</Button>
</div>
)
})}
</div>
)}
</div>
</div>
)}
</CardContent>
+1 -1
View File
@@ -43,7 +43,7 @@ const fetchStorageData = async (): Promise<StorageData | null> => {
const data = await response.json()
return data
} catch (error) {
console.error("[v0] Failed to fetch storage data from Flask server:", error)
console.error("Failed to fetch storage data from Flask server:", error)
return null
}
}
+1 -1
View File
@@ -3624,7 +3624,7 @@ ${observationsHtml}
<!-- Footer -->
<div class="rpt-footer">
<div>Report generated by ProxMenux Monitor</div>
<div>ProxMenux Monitor v1.2.1.3-beta</div>
<div>ProxMenux Monitor v1.2.1.4-beta</div>
</div>
</body>
@@ -93,7 +93,7 @@ export function TemperatureDetailModal({ open, onOpenChange, liveTemperature }:
setStats(result.stats)
}
} catch (err) {
console.error("[v0] Failed to fetch temperature history:", err)
console.error("Failed to fetch temperature history:", err)
} finally {
setLoading(false)
}
+3 -1
View File
@@ -624,7 +624,7 @@ export const TerminalPanel: React.FC<TerminalPanelProps> = ({ websocketUrl, onCl
ws.onerror = (error) => {
clearTimeout(timeoutId)
console.error("[v0] TerminalPanel: WebSocket error:", error)
console.error("TerminalPanel: WebSocket error:", error)
setTerminals((prev) => prev.map((t) => {
if (t.id === terminal.id) {
if (t.pingInterval) {
@@ -924,6 +924,7 @@ const handleClose = () => {
<div
ref={(el) => (containerRefs.current[terminal.id] = el)}
className="w-full h-full flex-1 bg-black overflow-hidden"
translate="no"
/>
</TabsContent>
))}
@@ -956,6 +957,7 @@ const handleClose = () => {
ref={(el) => (containerRefs.current[terminal.id] = el)}
onClick={() => setActiveTerminalId(terminal.id)}
className="flex-1 w-full max-w-full bg-black overflow-hidden cursor-pointer"
translate="no"
data-terminal-container
/>
</div>
+1 -1
View File
@@ -720,7 +720,7 @@ export function VirtualMachines() {
configs[lxc.vmid] = extractIPFromConfig(details.config, details.lxc_ip_info)
}
} catch (error) {
console.log(`[v0] Could not fetch IP for LXC ${lxc.vmid}`)
console.log(`Could not fetch IP for LXC ${lxc.vmid}`)
configs[lxc.vmid] = "N/A"
}
}),
+2 -2
View File
@@ -161,14 +161,14 @@ export async function fetchApi<T>(endpoint: string, options?: RequestInit): Prom
const contentType = response.headers.get("content-type")
if (!contentType || !contentType.includes("application/json")) {
const text = await response.text()
console.error("[v0] fetchApi: Expected JSON but got:", contentType, "- Body preview:", text.substring(0, 200))
console.error("fetchApi: Expected JSON but got:", contentType, "- Body preview:", text.substring(0, 200))
throw new Error(`Expected JSON response but got ${contentType || "unknown content type"}`)
}
try {
return await response.json()
} catch (jsonError) {
console.error("[v0] fetchApi: JSON parse error for", endpoint, "-", jsonError)
console.error("fetchApi: JSON parse error for", endpoint, "-", jsonError)
throw new Error(`Invalid JSON response from ${endpoint}`)
}
}
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "ProxMenux-Monitor",
"version": "1.2.1.3-beta",
"version": "1.2.1.4-beta",
"description": "Proxmox System Monitoring Dashboard",
"private": true,
"scripts": {
+67 -2
View File
@@ -63,14 +63,32 @@ def acknowledge_error():
Acknowledge/dismiss an error manually.
Returns details about the acknowledged error including original severity
and suppression period info.
Body accepts an optional ``suppression_hours`` field — if omitted the
server uses the user-configured value for the error's category (current
behavior). When provided, the value overrides the category default for
this specific dismiss:
- positive integer N → silence for N hours
- ``-1`` → silence permanently (only revertible from
Settings → Active Suppressions)
"""
try:
data = request.get_json()
if not data or 'error_key' not in data:
return jsonify({'error': 'error_key is required'}), 400
error_key = data['error_key']
result = health_persistence.acknowledge_error(error_key)
sup_override = None
if 'suppression_hours' in data and data['suppression_hours'] is not None:
try:
sup_override = int(data['suppression_hours'])
# Accept positive durations and the permanent sentinel (-1)
# only. Zero / other negatives would be nonsensical here.
if sup_override < -1 or sup_override == 0:
return jsonify({'error': 'suppression_hours must be a positive integer or -1 (permanent)'}), 400
except (ValueError, TypeError):
return jsonify({'error': 'suppression_hours must be an integer'}), 400
result = health_persistence.acknowledge_error(error_key, suppression_hours=sup_override)
if result.get('success'):
# Invalidate cached health results so next fetch reflects the dismiss
@@ -130,6 +148,53 @@ def acknowledge_error():
except Exception as e:
return jsonify({'error': str(e)}), 500
@health_bp.route('/api/health/un-acknowledge', methods=['POST'])
def unacknowledge_error():
"""
Re-enable a previously dismissed error.
Used by Settings → Active Suppressions when the user explicitly removes
a suppression (time-limited or permanent). After this call the error
becomes eligible to re-emit and re-notify on the next health scan if
the underlying condition is still present.
Body: ``{"error_key": "<key>"}``
"""
try:
data = request.get_json()
if not data or 'error_key' not in data:
return jsonify({'error': 'error_key is required'}), 400
error_key = data['error_key']
result = health_persistence.unacknowledge_error(error_key)
# Invalidate caches so the next health fetch reflects the new state
# (the alert may re-appear immediately if the condition still holds).
category = result.get('category', '')
cache_key_map = {
'logs': 'logs_analysis',
'pve_services': 'pve_services',
'updates': 'updates_check',
'security': 'security_check',
'temperature': 'cpu_check',
'network': 'network_check',
'disks': 'storage_check',
'vms': 'vms_check',
}
cache_key = cache_key_map.get(category)
if cache_key:
health_monitor.last_check_times.pop(cache_key, None)
health_monitor.cached_results.pop(cache_key, None)
for ck in ['_bg_overall', '_bg_detailed', 'overall_health']:
health_monitor.last_check_times.pop(ck, None)
health_monitor.cached_results.pop(ck, None)
if not result.get('success'):
return jsonify(result), 404
return jsonify(result)
except Exception as e:
return jsonify({'error': str(e)}), 500
@health_bp.route('/api/health/active-errors', methods=['GET'])
def get_active_errors():
"""Get all active persistent errors"""
+92 -22
View File
@@ -265,18 +265,35 @@ def _apply_security_headers(response):
# is banned in the 'proxmenux' fail2ban jail and blocks at app level.
import subprocess as _f2b_subprocess
import time as _f2b_time
import shutil as _f2b_shutil
# Cache banned IPs for 30 seconds to avoid calling fail2ban-client on every request
_f2b_banned_cache = {"ips": set(), "ts": 0, "ttl": 30}
# One-time check at module import — when Fail2Ban isn't installed we want
# the @app.before_request middleware to be a no-op. Without this guard
# every HTTP request to the Monitor went through _f2b_get_banned_ips() →
# execve fail2ban-client → ENOENT, and the negative result wasn't cached
# (only the success branch updated `ts`), so a missing binary triggered
# one failed execve per HTTP request. strace on a host without Fail2Ban
# captured 250+ failed execve attempts in 10 min from this single path.
# Fixed in v1.2.1.4 perf audit.
_F2B_BINARY = _f2b_shutil.which("fail2ban-client")
def _f2b_get_banned_ips():
"""Get currently banned IPs from the proxmenux jail, with caching."""
if _F2B_BINARY is None:
# Fail2Ban isn't installed on this host. Skip the subprocess
# entirely; the @app.before_request middleware will see an empty
# banned-IPs set and let every request through (which is the
# correct behaviour — there's no Fail2Ban to honour).
return _f2b_banned_cache["ips"]
now = _f2b_time.time()
if now - _f2b_banned_cache["ts"] < _f2b_banned_cache["ttl"]:
return _f2b_banned_cache["ips"]
try:
result = _f2b_subprocess.run(
["fail2ban-client", "status", "proxmenux"],
[_F2B_BINARY, "status", "proxmenux"],
capture_output=True, text=True, timeout=5
)
if result.returncode == 0:
@@ -285,10 +302,13 @@ def _f2b_get_banned_ips():
ip_str = line.split(":", 1)[1].strip()
banned = set(ip.strip() for ip in ip_str.split() if ip.strip())
_f2b_banned_cache["ips"] = banned
_f2b_banned_cache["ts"] = now
return banned
except Exception:
pass
# Always update the timestamp — even on exception / non-zero rc /
# missing jail. Caches the negative result for the same TTL so a
# transient Fail2Ban outage doesn't trigger one subprocess call per
# HTTP request until it recovers.
_f2b_banned_cache["ts"] = now
return _f2b_banned_cache["ips"]
# XFF / X-Real-IP are only honored when the operator opts in by setting
@@ -707,37 +727,45 @@ def _temperature_collector_loop():
- Cleanup: every 60 min at offset 120s
"""
import time as _time
RECORD_INTERVAL = 60
TEMP_OFFSET = 40 # Record temp at :40 of each minute
LATENCY_OFFSET = 25 # Record latency at :25 of each minute
# v1.2.1.4 perf audit: disk SMART polling used to fire on the exact
# same tick as CPU temp (offset :40). Keeping it on the same 60s
# cadence — operator wants per-minute disk temperature chart data —
# but shifted to offset :55 so the smartctl burst (one per disk)
# doesn't pile on top of the CPU temp read and the upcoming latency
# ping of the next cycle (:25 + 60). Net effect: load is now spread
# across :25 (latency), :40 (CPU temp), :55 (disk SMART burst)
# instead of stacking at :25 + :40.
DISK_TEMP_DELAY_AFTER_CPU = 15
CLEANUP_INTERVAL = 3600 # 60 minutes
CLEANUP_OFFSET = 120 # Cleanup at 2 min after the hour mark
# Initial delays to stagger from other collectors
_time.sleep(LATENCY_OFFSET) # Start latency first
last_temp = _time.monotonic()
last_latency = _time.monotonic()
last_cleanup = _time.monotonic() - CLEANUP_INTERVAL + CLEANUP_OFFSET # First cleanup after offset
while True:
now = _time.monotonic()
# Latency pings (offset 25s - runs first in each cycle)
if now - last_latency >= RECORD_INTERVAL:
_record_latency()
last_latency = now
# Temperature record (offset 40s - 15s after latency)
# CPU / sensors temperature record (offset 40s - 15s after latency)
_time.sleep(15)
_record_temperature()
# Sprint 14: piggy-back the per-disk temperature sampler on
# the same minute tick. The sampler enumerates non-USB
# disks and writes a row each via smartctl; total cost is
# well under a second on typical hosts. Wrapped in a
# try-block so a stuck smartctl call can't break the
# CPU/latency pipeline.
# Sprint 14: per-disk SMART temperature sampler — kept on every
# tick (operator-visible chart granularity) but offset further
# into the cycle so the smartctl subprocess burst (one per disk)
# doesn't collide with the cheap CPU/latency reads.
_time.sleep(DISK_TEMP_DELAY_AFTER_CPU)
try:
import disk_temperature_history
disk_temperature_history.record_all_disk_temperatures()
@@ -10536,7 +10564,7 @@ def api_health():
return jsonify({
'status': 'healthy',
'timestamp': datetime.now().isoformat(),
'version': '1.2.1.3-beta'
'version': '1.2.1.4-beta'
})
# ─── User-configurable health thresholds ─────────────────────────────────────
@@ -10697,18 +10725,60 @@ def api_health_thresholds_reset():
@app.route('/api/health/acknowledge', methods=['POST'])
@require_auth
def api_health_acknowledge():
"""Acknowledge/dismiss a health error by error_key."""
"""Acknowledge/dismiss a health error by error_key.
Optional ``suppression_hours`` body field overrides the category default
(positive integer for hours; ``-1`` for permanent dismiss).
"""
try:
data = request.get_json()
error_key = data.get('error_key', '')
if not error_key:
return jsonify({'error': 'error_key is required'}), 400
result = health_persistence.acknowledge_error(error_key)
sup_override = None
if 'suppression_hours' in data and data['suppression_hours'] is not None:
try:
sup_override = int(data['suppression_hours'])
if sup_override < -1 or sup_override == 0:
return jsonify({'error': 'suppression_hours must be a positive integer or -1 (permanent)'}), 400
except (ValueError, TypeError):
return jsonify({'error': 'suppression_hours must be an integer'}), 400
result = health_persistence.acknowledge_error(error_key, suppression_hours=sup_override)
return jsonify({'success': True, 'result': result})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/health/un-acknowledge', methods=['POST'])
@require_auth
def api_health_unacknowledge():
"""Reverse a previous dismiss — re-enables the alert so it can fire again.
Used by the Settings Active Suppressions panel.
"""
try:
data = request.get_json()
error_key = data.get('error_key', '')
if not error_key:
return jsonify({'error': 'error_key is required'}), 400
result = health_persistence.unacknowledge_error(error_key)
# Invalidate caches so the next health fetch reflects the new state.
for ck in ['_bg_overall', '_bg_detailed', 'overall_health',
'storage_check', 'vms_check', 'logs_analysis',
'pve_services', 'updates_check', 'security_check',
'cpu_check', 'network_check']:
health_monitor.last_check_times.pop(ck, None)
health_monitor.cached_results.pop(ck, None)
status = 200 if result.get('success') else 404
return jsonify(result), status
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/api/prometheus', methods=['GET'])
@require_auth
def api_prometheus():
@@ -10979,7 +11049,7 @@ def api_info():
"""Root endpoint with API information"""
return jsonify({
'name': 'ProxMenux Monitor API',
'version': '1.2.1.3-beta',
'version': '1.2.1.4-beta',
'endpoints': [
'/api/system',
'/api/system-info',
@@ -11728,7 +11798,7 @@ if __name__ == '__main__':
try:
import sqlite3
from pathlib import Path
MONITOR_VERSION = '1.2.1.3-beta'
MONITOR_VERSION = '1.2.1.4-beta'
db_path = Path('/usr/local/share/proxmenux/health_monitor.db')
if db_path.exists():
conn = sqlite3.connect(str(db_path), timeout=10)
+6 -1
View File
@@ -793,7 +793,10 @@ class HealthMonitor:
def _annotate_dismissed(check_dict):
"""Mutate check_dict in place to add `dismissed=True` if
its error_key is currently acknowledged in the DB.
its error_key is currently acknowledged in the DB. When the
dismiss was permanent (suppression_hours == -1) also tags
``permanent=True`` so the UI can render a "🔒 Permanent"
badge distinct from the time-limited countdown.
Returns True when the check should NOT contribute to the
aggregate status."""
if not isinstance(check_dict, dict):
@@ -804,6 +807,8 @@ class HealthMonitor:
try:
if health_persistence.is_error_acknowledged(ek):
check_dict['dismissed'] = True
if health_persistence.is_error_permanently_acknowledged(ek):
check_dict['permanent'] = True
return True
except Exception:
pass
+102 -10
View File
@@ -794,17 +794,26 @@ class HealthPersistence:
conn.commit()
def acknowledge_error(self, error_key: str) -> Dict[str, Any]:
def acknowledge_error(self, error_key: str, suppression_hours: Optional[int] = None) -> Dict[str, Any]:
"""
Manually acknowledge an error (dismiss).
- Looks up the category's configured suppression duration from user settings
- Stores suppression_hours on the error record (snapshot at dismiss time)
Args:
error_key: the unique key of the error to dismiss.
suppression_hours: optional override for the dismiss duration.
- ``None`` (default): use the category's configured value (current behavior).
- positive integer: silence for that many hours.
- ``-1``: silence permanently — the user must re-enable from
Settings → Active Suppressions to bring the alert back.
- Stores ``suppression_hours`` on the error record (snapshot at dismiss time).
- Marks as acknowledged so it won't re-appear during the suppression period
(or ever, when ``suppression_hours == -1``).
"""
with self._db_lock:
return self._acknowledge_error_impl(error_key)
def _acknowledge_error_impl(self, error_key):
return self._acknowledge_error_impl(error_key, suppression_hours_override=suppression_hours)
def _acknowledge_error_impl(self, error_key, suppression_hours_override: Optional[int] = None):
conn = self._get_conn()
conn.row_factory = sqlite3.Row
category = ''
@@ -852,6 +861,11 @@ class HealthPersistence:
sup_hours = int(stored)
except (ValueError, TypeError):
pass
# Caller-supplied override (e.g. per-error "permanent" dismiss
# picked by the user from the Health Monitor popover) trumps
# the category default. ``-1`` means silence permanently.
if suppression_hours_override is not None:
sup_hours = suppression_hours_override
# Insert as acknowledged but NOT resolved - error remains active
cursor.execute('''
@@ -892,6 +906,11 @@ class HealthPersistence:
sup_hours = int(stored)
except (ValueError, TypeError):
pass
# Per-error override (e.g. user selected "Permanent" / "7 days"
# in the dismiss popover) takes precedence over the category
# default.
if suppression_hours_override is not None:
sup_hours = suppression_hours_override
# Mark as acknowledged but DO NOT set resolved_at
cursor.execute('''
@@ -946,10 +965,65 @@ class HealthPersistence:
self._clear_notification_cooldown(error_key)
return result
def unacknowledge_error(self, error_key: str) -> Dict[str, Any]:
"""
Reverse a previous dismiss (acknowledged → not acknowledged).
Used by the Settings → Active Suppressions panel: the user explicitly
re-enables an alert they had silenced (time-limited or permanent).
After this call the error becomes eligible to re-emit on the next
scan if the underlying condition is still present.
"""
with self._db_lock:
conn = self._get_conn()
try:
cursor = conn.cursor()
cursor.execute(
'SELECT category, severity, acknowledged FROM errors WHERE error_key = ?',
(error_key,),
)
row = cursor.fetchone()
if not row:
return {'success': False, 'error': 'not_found', 'error_key': error_key}
category = row[0] or ''
severity = row[1] or 'WARNING'
was_acknowledged = bool(row[2])
if not was_acknowledged:
# Nothing to do — keep the call idempotent.
return {
'success': True,
'error_key': error_key,
'category': category,
'changed': False,
}
# Clear acknowledgment + stored suppression. The next health
# scan will decide whether to re-record the error based on the
# actual condition.
now = datetime.now().isoformat()
cursor.execute('''
UPDATE errors
SET acknowledged = 0, acknowledged_at = NULL, suppression_hours = NULL,
last_seen = ?
WHERE error_key = ?
''', (now, error_key))
self._record_event(cursor, 'unacknowledged', error_key, {
'category': category,
'severity': severity,
})
conn.commit()
return {
'success': True,
'error_key': error_key,
'category': category,
'changed': True,
}
finally:
conn.close()
def is_error_acknowledged(self, error_key: str) -> bool:
"""Check if an error_key has been acknowledged and is still within suppression window.
Uses acknowledged_at (not resolved_at) to calculate suppression expiration,
since dismissed errors may have resolved_at = NULL.
"""
@@ -967,11 +1041,11 @@ class HealthPersistence:
# Check if still within suppression window using acknowledged_at
acknowledged_at = row['acknowledged_at']
sup_hours = row['suppression_hours'] or self.DEFAULT_SUPPRESSION_HOURS
# -1 means permanently suppressed
if sup_hours < 0:
return True
if acknowledged_at:
try:
acknowledged_dt = datetime.fromisoformat(acknowledged_at)
@@ -982,6 +1056,24 @@ class HealthPersistence:
return True
except Exception:
return False
def is_error_permanently_acknowledged(self, error_key: str) -> bool:
"""True only when the error is currently dismissed with
``suppression_hours == -1``. Used by the health monitor to surface a
"🔒 Permanent" badge in the UI vs. the regular time-limited dismiss."""
try:
with self._db_connection(row_factory=True) as conn:
cursor = conn.cursor()
cursor.execute(
'SELECT acknowledged, suppression_hours FROM errors WHERE error_key = ?',
(error_key,),
)
row = cursor.fetchone()
if not row or not row['acknowledged']:
return False
return (row['suppression_hours'] or 0) == -1
except Exception:
return False
def get_active_errors(self, category: Optional[str] = None) -> List[Dict[str, Any]]:
"""Get all active (unresolved AND not acknowledged) errors, optionally filtered by category.
+28 -12
View File
@@ -337,20 +337,36 @@ def _list_running_lxcs() -> list[dict[str, str]]:
if not vmid:
continue
# v1.2.1.4 perf audit: previously this called `lxc-info -n <vmid> -p`
# for every running CT on every scan tick. With N CTs that's N
# subprocesses per cycle (lxc-info forks + execs + parses its own
# config to give us a single number we can read directly). The CT's
# init PID is the first child of the supervising lxc-start process
# we just identified — readable from /proc with zero subprocess
# cost.
pid = ''
try:
p2 = subprocess.run(
['lxc-info', '-n', vmid, '-p'],
capture_output=True, text=True, timeout=2,
)
if p2.returncode == 0:
for ln in p2.stdout.splitlines():
# lxc-info output: "PID: 12345"
if ln.strip().lower().startswith('pid:'):
pid = ln.split(':', 1)[1].strip()
break
except (subprocess.TimeoutExpired, OSError):
pass
with open(f'/proc/{entry.name}/task/{entry.name}/children', 'r') as f:
children = f.read().split()
if children:
pid = children[0]
except (OSError, IOError):
# Fallback to lxc-info only if the /proc read failed — keeps
# behaviour identical for any edge case where the children
# file is unreadable (race with CT stop, kernel without
# CONFIG_PROC_CHILDREN, etc.).
try:
p2 = subprocess.run(
['lxc-info', '-n', vmid, '-p'],
capture_output=True, text=True, timeout=2,
)
if p2.returncode == 0:
for ln in p2.stdout.splitlines():
if ln.strip().lower().startswith('pid:'):
pid = ln.split(':', 1)[1].strip()
break
except (subprocess.TimeoutExpired, OSError):
pass
out.append({'vmid': vmid, 'name': _read_lxc_name(vmid), 'pid': pid})
+1 -1
View File
@@ -1 +1 @@
1.2.1.3
1.2.1.4