Update AppImage

This commit is contained in:
MacRimi
2026-05-20 18:14:32 +02:00
parent 1087a87ea2
commit 4112323961
20 changed files with 1638 additions and 261 deletions
Binary file not shown.
+1 -1
View File
@@ -1 +1 @@
9315f939f10353d0105a6a2cb8f3c7e21b02620a513b52ce9349a088b95751b8 ProxMenux-1.2.1.1-beta.AppImage 150694a49a5b0a4546a2bf5fedcc0914d37666d0cdeac1d9fdc58793c131b4bd ProxMenux-1.2.1.1-beta.AppImage
+31 -24
View File
@@ -398,31 +398,30 @@ export function HealthThresholds() {
if (!leaf) return null if (!leaf) return null
const key = pathKey(path) const key = pathKey(path)
const editingValue = pending[key] ?? String(leaf.value) const editingValue = pending[key] ?? String(leaf.value)
// Pick the badge palette from the leaf name so warning rows render // The input border carries the severity colour so the editable field
// amber and critical rows render red. `swap_critical` and any other // itself shows what kind of threshold this is — no separate badge
// *_critical key fall into the red bucket via the substring check. // duplicating the number, which users mistook for the "real" value.
// `swap_critical` and any other `*_critical` leaf falls into the red
// bucket via the substring check. A blue ring on top of the colour
// border signals "customised vs recommended" — two independent
// signals on the same widget.
const last = path[path.length - 1] || "" const last = path[path.length - 1] || ""
const isCritical = last.toLowerCase().includes("critical") const isCritical = last.toLowerCase().includes("critical")
const isWarning = last.toLowerCase().includes("warning") const isWarning = last.toLowerCase().includes("warning")
const badgeClasses = isCritical const severityBorder = isCritical
? "bg-red-500/10 text-red-500 border-red-500/30" ? "border-red-500/40 bg-red-500/5 focus-visible:border-red-500"
: isWarning : isWarning
? "bg-amber-500/10 text-amber-500 border-amber-500/30" ? "border-amber-500/40 bg-amber-500/5 focus-visible:border-amber-500"
: "bg-muted text-muted-foreground border-border" : ""
const isCustomised = leaf.customised && !(key in pending)
const customisedRing = isCustomised ? "ring-2 ring-blue-500/40" : ""
const recommendedTooltip = `Recommended: ${leaf.recommended}${leaf.unit}`
return ( return (
<div key={key} className="flex items-center justify-between gap-2 py-1.5 px-1"> <div key={key} className="flex items-center justify-between gap-2 py-1.5 px-1">
<span className="text-xs sm:text-sm text-foreground/90 min-w-0 flex items-center gap-2"> <span className="text-xs sm:text-sm text-foreground/90 min-w-0">
<span className="h-1.5 w-1.5 rounded-full bg-blue-500 flex-shrink-0" aria-hidden="true" />
{label} {label}
</span> </span>
<div className="flex items-center gap-2 flex-shrink-0"> <div className="flex items-center gap-2 flex-shrink-0">
<span
className={`inline-flex items-center justify-center h-6 px-2 rounded-md border text-[11px] font-mono tabular-nums ${badgeClasses}`}
title="Recommended default value"
>
{leaf.recommended}
{leaf.unit}
</span>
<Input <Input
type="number" type="number"
min={leaf.min} min={leaf.min}
@@ -430,14 +429,13 @@ export function HealthThresholds() {
step={leaf.step} step={leaf.step}
disabled={!editMode} disabled={!editMode}
value={editingValue} value={editingValue}
title={recommendedTooltip}
onChange={(e) => onChange={(e) =>
setPending((p) => ({ ...p, [key]: e.target.value })) setPending((p) => ({ ...p, [key]: e.target.value }))
} }
className={`w-20 h-7 text-xs text-right tabular-nums ${ className={`w-20 h-7 text-xs text-right tabular-nums ${
!editMode ? "opacity-70" : "" !editMode ? "opacity-70" : ""
} ${ } ${severityBorder} ${customisedRing}`}
leaf.customised && !(key in pending) ? "border-blue-500/40" : ""
}`}
/> />
<span className="text-[11px] text-muted-foreground w-6">{leaf.unit}</span> <span className="text-[11px] text-muted-foreground w-6">{leaf.unit}</span>
</div> </div>
@@ -507,9 +505,9 @@ export function HealthThresholds() {
</div> </div>
<CardDescription> <CardDescription>
The Health Monitor and notifications fire when these thresholds are crossed. The Health Monitor and notifications fire when these thresholds are crossed.
Recommended values are shown with their reference color (amber for warning, Amber inputs are warning levels, red inputs are critical levels. A blue ring
red for critical); your edits override them. Leave a value unchanged to keep marks a value you've customised away from the recommended default hover the
the recommended. field to see the recommendation, or use Reset to restore it.
</CardDescription> </CardDescription>
</CardHeader> </CardHeader>
<CardContent> <CardContent>
@@ -520,14 +518,22 @@ export function HealthThresholds() {
) : !tree ? ( ) : !tree ? (
<div className="text-sm text-muted-foreground">Failed to load thresholds.</div> <div className="text-sm text-muted-foreground">Failed to load thresholds.</div>
) : ( ) : (
<div className="space-y-4"> <div>
{error && ( {error && (
<div className="flex items-start gap-2 p-2.5 rounded-md bg-red-500/10 border border-red-500/30 text-red-500 text-xs"> <div className="mb-4 flex items-start gap-2 p-2.5 rounded-md bg-red-500/10 border border-red-500/30 text-red-500 text-xs">
<AlertCircle className="h-4 w-4 flex-shrink-0 mt-0.5" /> <AlertCircle className="h-4 w-4 flex-shrink-0 mt-0.5" />
<div className="flex-1">{error}</div> <div className="flex-1">{error}</div>
</div> </div>
)} )}
{/*
Masonry-style flow via CSS columns: cards keep their natural
height (CPU = 2 rows, Disk temperature = 8 rows) and the
browser packs them top-to-bottom into 1/2/3 columns based on
viewport. `break-inside-avoid` keeps each card whole.
Mobile (<md) stays single-column as today.
*/}
<div className="columns-1 md:columns-2 2xl:columns-3 gap-4 space-y-4 [&>*]:break-inside-avoid">
{SECTIONS.map((section) => { {SECTIONS.map((section) => {
const Icon = section.icon const Icon = section.icon
return ( return (
@@ -568,6 +574,7 @@ export function HealthThresholds() {
</div> </div>
) )
})} })}
</div>
</div> </div>
)} )}
</CardContent> </CardContent>
+24 -18
View File
@@ -492,11 +492,11 @@ export function NotificationSettings() {
<div className="space-y-2 pt-2 border-t border-border/50"> <div className="space-y-2 pt-2 border-t border-border/50">
<div className="flex items-center justify-between py-1"> <div className="flex items-center justify-between py-1">
<div> <div>
<Label className="text-xs font-medium flex items-center gap-1.5"> <Label className="text-xs sm:text-sm text-foreground/80 flex items-center gap-2">
<Moon className="h-3.5 w-3.5 text-blue-400" /> <Moon className="h-4 w-4 text-blue-400" />
Quiet hours Quiet hours
</Label> </Label>
<p className="text-[10px] text-muted-foreground"> <p className="text-xs text-muted-foreground mt-1">
During this window only CRITICAL events reach this channel. During this window only CRITICAL events reach this channel.
</p> </p>
</div> </div>
@@ -517,29 +517,35 @@ export function NotificationSettings() {
</div> </div>
{enabled && ( {enabled && (
<> <>
<div className="grid grid-cols-2 gap-2"> {/* Inline label + intrinsic-width inputs. The previous
<div> `grid-cols-2 + full-width inputs` rendered weirdly on
<Label className="text-[10px] text-muted-foreground">From</Label> iOS Safari (the native time picker centered "22:00"
inside a 200-px box with huge empty margins). flex +
w-24/w-28 keeps the input tight to the HH:MM text on
every viewport and the touch target stays comfortable. */}
<div className="flex flex-wrap items-center gap-x-4 gap-y-2 pt-1">
<div className="flex items-center gap-2">
<Label className="text-xs text-muted-foreground">From</Label>
<Input <Input
type="time" type="time"
value={start} value={start}
onChange={(e) => updateChannel(chName, "quiet_start", e.target.value)} onChange={(e) => updateChannel(chName, "quiet_start", e.target.value)}
disabled={!editMode} disabled={!editMode}
className="h-7 text-xs font-mono" className="h-9 w-28 text-sm font-mono"
/> />
</div> </div>
<div> <div className="flex items-center gap-2">
<Label className="text-[10px] text-muted-foreground">Until</Label> <Label className="text-xs text-muted-foreground">Until</Label>
<Input <Input
type="time" type="time"
value={end} value={end}
onChange={(e) => updateChannel(chName, "quiet_end", e.target.value)} onChange={(e) => updateChannel(chName, "quiet_end", e.target.value)}
disabled={!editMode} disabled={!editMode}
className="h-7 text-xs font-mono" className="h-9 w-28 text-sm font-mono"
/> />
</div> </div>
</div> </div>
<p className="text-[10px] text-muted-foreground"> <p className="text-xs text-muted-foreground">
{sameTime {sameTime
? "Set a different start and end time to activate." ? "Set a different start and end time to activate."
: live : live
@@ -571,11 +577,11 @@ export function NotificationSettings() {
<div className="space-y-2 pt-2 border-t border-border/50"> <div className="space-y-2 pt-2 border-t border-border/50">
<div className="flex items-center justify-between py-1"> <div className="flex items-center justify-between py-1">
<div> <div>
<Label className="text-xs font-medium flex items-center gap-1.5"> <Label className="text-xs sm:text-sm text-foreground/80 flex items-center gap-2">
<Newspaper className="h-3.5 w-3.5 text-violet-400" /> <Newspaper className="h-4 w-4 text-violet-400" />
Daily digest of INFO events Daily digest of INFO events
</Label> </Label>
<p className="text-[10px] text-muted-foreground"> <p className="text-xs text-muted-foreground mt-1">
All INFO events (backups OK, updates available, etc.) accumulate during the day and arrive once at this time as a single summary. CRITICAL and WARNING are never delayed. All INFO events (backups OK, updates available, etc.) accumulate during the day and arrive once at this time as a single summary. CRITICAL and WARNING are never delayed.
</p> </p>
</div> </div>
@@ -596,17 +602,17 @@ export function NotificationSettings() {
</div> </div>
{enabled && ( {enabled && (
<> <>
<div> <div className="flex items-center gap-2 pt-1">
<Label className="text-[10px] text-muted-foreground">Send at</Label> <Label className="text-xs text-muted-foreground">Send at</Label>
<Input <Input
type="time" type="time"
value={time} value={time}
onChange={(e) => updateChannel(chName, "digest_time", e.target.value)} onChange={(e) => updateChannel(chName, "digest_time", e.target.value)}
disabled={!editMode} disabled={!editMode}
className="h-7 text-xs font-mono" className="h-9 w-28 text-sm font-mono"
/> />
</div> </div>
<p className="text-[10px] text-muted-foreground">{nextLabel}</p> <p className="text-xs text-muted-foreground">{nextLabel}</p>
</> </>
)} )}
</div> </div>
+361 -75
View File
@@ -8,7 +8,7 @@ import { Badge } from "./ui/badge"
import { Progress } from "./ui/progress" import { Progress } from "./ui/progress"
import { Button } from "./ui/button" import { Button } from "./ui/button"
import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogFooter, DialogDescription } from "./ui/dialog" import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogFooter, DialogDescription } from "./ui/dialog"
import { Server, Play, Square, Cpu, MemoryStick, HardDrive, Network, Power, RotateCcw, StopCircle, Container, ChevronDown, ChevronUp, Terminal, Archive, Plus, Loader2, Clock, Database, Shield, Bell, FileText, Settings2, Activity } from 'lucide-react' import { Server, Play, Square, Cpu, MemoryStick, HardDrive, Network, Power, RotateCcw, StopCircle, Container, ChevronDown, ChevronUp, ChevronRight, Terminal, Archive, Plus, Loader2, Clock, Database, Shield, Bell, FileText, Settings2, Activity, Package } from 'lucide-react'
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "./ui/select" import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "./ui/select"
import { Checkbox } from "./ui/checkbox" import { Checkbox } from "./ui/checkbox"
import { Textarea } from "./ui/textarea" import { Textarea } from "./ui/textarea"
@@ -19,6 +19,28 @@ import { LxcTerminalModal } from "./lxc-terminal-modal"
import { formatStorage } from "../lib/utils" import { formatStorage } from "../lib/utils"
import { formatNetworkTraffic, getNetworkUnit } from "../lib/format-network" import { formatNetworkTraffic, getNetworkUnit } from "../lib/format-network"
import { fetchApi } from "../lib/api-config" import { fetchApi } from "../lib/api-config"
import DOMPurify from "dompurify"
import { marked } from "marked"
// Sent by /api/vms only for LXC rows, only when the user has enabled
// `lxc_updates_available` notifications. The Monitor populates this
// from managed_installs registry → frontend uses it to render the
// inline update badge + the modal's "Pending updates" section.
interface LxcPackageUpdate {
name: string
current: string
latest: string
security: boolean
}
interface LxcUpdateCheck {
available: boolean
count: number
security_count: number
last_check: string | null
latest: string | null
error: string | null
packages: LxcPackageUpdate[]
}
interface VMData { interface VMData {
vmid: number vmid: number
@@ -36,6 +58,7 @@ interface VMData {
diskread?: number diskread?: number
diskwrite?: number diskwrite?: number
ip?: string ip?: string
update_check?: LxcUpdateCheck
} }
interface VMConfig { interface VMConfig {
@@ -622,7 +645,7 @@ export function VirtualMachines() {
const [backupPbsChangeMode, setBackupPbsChangeMode] = useState<string>("default") const [backupPbsChangeMode, setBackupPbsChangeMode] = useState<string>("default")
// Tab state for modal // Tab state for modal
const [activeModalTab, setActiveModalTab] = useState<"status" | "mounts" | "backups">("status") const [activeModalTab, setActiveModalTab] = useState<"status" | "mounts" | "backups" | "updates">("status")
// Sprint 13.29: per-LXC mount points lazy-loaded when the user opens // Sprint 13.29: per-LXC mount points lazy-loaded when the user opens
// the LXC modal. We fetch alongside backups (one-shot) so switching // the LXC modal. We fetch alongside backups (one-shot) so switching
// tabs is instantaneous; the cost is small (parses one config file // tabs is instantaneous; the cost is small (parses one config file
@@ -984,6 +1007,74 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
// Ensure vmData is always an array (backend may return object on error) // Ensure vmData is always an array (backend may return object on error)
const safeVMData = Array.isArray(vmData) ? vmData : [] const safeVMData = Array.isArray(vmData) ? vmData : []
// Render the "📦 N updates / 🛡 N security" badge next to an LXC in
// the dashboard list. Used ONLY in the card row alongside Uptime —
// the modal surfaces the same info via a dedicated tab instead of
// duplicating a badge in its header.
//
// Sizing matches the sibling "Uptime: …" text (text-sm + h-4 icon)
// so the row reads as a single visual unit. Colour is violet, the
// shared accent for "managed updates" across notifications and UI
// (mirrors the Secure Gateway visual treatment). Security count
// stays red because it's still an urgency cue independent of the
// update theme.
const renderLxcUpdateBadge = (
uc?: LxcUpdateCheck,
compact = false,
onClick?: () => void,
) => {
if (!uc?.available || !uc.count || uc.count <= 0) return null
const last = uc.last_check
? new Date(uc.last_check).toLocaleString()
: "—"
const topNames = (uc.packages || [])
.slice(0, 5)
.map((p) => p.name)
.join(", ")
const secHint =
uc.security_count > 0 ? ` · ${uc.security_count} security` : ""
// Tooltip leads with the action when the badge is clickable so the
// affordance is explicit on hover — the chevron at the end of the
// badge reinforces the same signal visually for users who don't
// hover (mobile).
const tooltipPrefix = onClick ? "Click to view pending packages · " : ""
const tooltip = `${tooltipPrefix}Last checked: ${last}${secHint}${topNames ? ` · ${topNames}` : ""}`
// Compact = mobile card; matches the surrounding 10-12px chrome
// (ID line, type badge) so the count doesn't visually dominate.
// Non-compact = desktop card row, sized to match "Uptime: ..." text.
const sizing = compact
? "text-[11px] gap-1 px-1.5 py-0"
: "text-sm gap-1.5 px-2 py-0.5"
const iconSize = compact ? "h-3 w-3" : "h-4 w-4"
// Only soften the bg on hover — no border change, no focus ring.
// The chevron at the end of the badge carries the "open this"
// affordance on its own. The Badge component's CVA base adds a
// `focus:ring-2 focus:ring-ring focus:ring-offset-2` (the white
// double border we kept seeing on tap/click) — explicitly cancel
// every piece of it here.
const clickable = onClick
? "cursor-pointer hover:bg-violet-500/20 transition-colors focus:outline-none focus:ring-0 focus:ring-offset-0 focus-visible:outline-none focus-visible:ring-0 focus-visible:ring-offset-0"
: ""
return (
<Badge
variant="outline"
className={`bg-violet-500/10 text-violet-400 border-violet-500/30 flex items-center flex-shrink-0 ${sizing} ${clickable}`}
title={tooltip}
onClick={onClick}
role={onClick ? "button" : undefined}
tabIndex={onClick ? 0 : undefined}
>
<Package className={iconSize} />
{uc.count} {compact ? "" : (uc.count === 1 ? "update" : "updates")}
{/* Chevron only when the badge is wired up as a clickable
shortcut — its absence on the dashboard card avoids
implying interactivity where there isn't any (the whole
row is the click target there). */}
{onClick && <ChevronRight className={`${iconSize} -mr-0.5 opacity-80`} />}
</Badge>
)
}
// Total allocated RAM for ALL VMs/LXCs (running + stopped) // Total allocated RAM for ALL VMs/LXCs (running + stopped)
const totalAllocatedMemoryGB = useMemo(() => { const totalAllocatedMemoryGB = useMemo(() => {
return (safeVMData.reduce((sum, vm) => sum + (vm.maxmem || 0), 0) / 1024 ** 3).toFixed(1) return (safeVMData.reduce((sum, vm) => sum + (vm.maxmem || 0), 0) / 1024 ** 3).toFixed(1)
@@ -1111,67 +1202,57 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
return ( return (
<div className="space-y-6"> <div className="space-y-6">
{/*
styled-jsx is scoped by default — it adds a hash class to
selectors so they only match elements rendered by this
component. Content injected via `dangerouslySetInnerHTML`
does NOT get the hash, so descendant selectors like
`div[align="center"]` never matched the helper-script HTML
and notes rendered left-aligned. Wrapping the descendant
selectors in `:global(...)` keeps the parent class scoped
but lets the inner rules apply to the injected HTML.
*/}
<style jsx>{` <style jsx>{`
.proxmenux-notes { .proxmenux-notes {
/* Reset any inherited styles */
all: revert; all: revert;
/* Ensure links display inline */
a {
display: inline-block;
margin-right: 4px;
text-decoration: none;
}
/* Ensure images display inline */
img {
display: inline-block;
vertical-align: middle;
}
/* Ensure paragraphs with links display inline */
p {
margin: 0.5rem 0;
}
/* Override inline width and center the table */
table {
width: auto !important;
margin: 0 auto;
}
/* Ensure divs respect centering */
div[align="center"] {
text-align: center;
}
/* Remove border-left since logo already has the line, keep text left-aligned */
table td:nth-child(2) {
text-align: left;
padding-left: 16px;
}
/* Increase h1 font size for VM name */
table td:nth-child(2) h1 {
text-align: left;
font-size: 2rem;
font-weight: bold;
line-height: 1.2;
}
/* Ensure p in the second cell is left-aligned */
table td:nth-child(2) p {
text-align: left;
}
/* Add separator after tables */
table + p {
margin-top: 1rem;
padding-top: 1rem;
border-top: 1px solid rgba(255, 255, 255, 0.1);
}
} }
.proxmenux-notes :global(a) {
display: inline-block;
margin-right: 4px;
text-decoration: none;
}
.proxmenux-notes :global(img) {
display: inline-block;
vertical-align: middle;
}
.proxmenux-notes :global(p) {
margin: 0.5rem 0;
}
.proxmenux-notes :global(table) {
width: auto !important;
margin: 0 auto;
}
.proxmenux-notes :global(div[align="center"]) {
text-align: center;
}
.proxmenux-notes :global(table td:nth-child(2)) {
text-align: left;
padding-left: 16px;
}
.proxmenux-notes :global(table td:nth-child(2) h1) {
text-align: left;
font-size: 2rem;
font-weight: bold;
line-height: 1.2;
}
.proxmenux-notes :global(table td:nth-child(2) p) {
text-align: left;
}
.proxmenux-notes :global(table + p) {
margin-top: 1rem;
padding-top: 1rem;
border-top: 1px solid rgba(255, 255, 255, 0.1);
}
.proxmenux-notes-plaintext { .proxmenux-notes-plaintext {
white-space: pre-wrap; white-space: pre-wrap;
font-family: monospace; font-family: monospace;
@@ -1364,6 +1445,7 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
</span> </span>
)} )}
<span className="text-sm text-muted-foreground ml-auto">Uptime: {formatUptime(vm.uptime)}</span> <span className="text-sm text-muted-foreground ml-auto">Uptime: {formatUptime(vm.uptime)}</span>
{vm.type === "lxc" && renderLxcUpdateBadge(vm.update_check)}
</div> </div>
<div className="grid grid-cols-2 md:grid-cols-5 gap-3"> <div className="grid grid-cols-2 md:grid-cols-5 gap-3">
@@ -1474,7 +1556,10 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
{/* Name and ID */} {/* Name and ID */}
<div className="flex-1 min-w-0"> <div className="flex-1 min-w-0">
<div className="font-semibold text-foreground truncate">{vm.name}</div> <div className="font-semibold text-foreground truncate flex items-center gap-1.5">
<span className="truncate">{vm.name}</span>
{vm.type === "lxc" && renderLxcUpdateBadge(vm.update_check, true)}
</div>
<div className="text-[10px] text-muted-foreground">ID: {vm.vmid}</div> <div className="text-[10px] text-muted-foreground">ID: {vm.vmid}</div>
</div> </div>
@@ -1575,6 +1660,17 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
Uptime: {formatUptime(selectedVM.uptime)} Uptime: {formatUptime(selectedVM.uptime)}
</span> </span>
)} )}
{/* Clickable badge — the sole entry point to
the Updates panel now that the tab is no
longer in the nav. Full-size so it reads
at the same weight as the surrounding
Uptime / Type / Status chips. */}
{selectedVM.type === "lxc" &&
renderLxcUpdateBadge(
selectedVM.update_check,
false,
() => setActiveModalTab("updates"),
)}
</div> </div>
</> </>
)} )}
@@ -1600,6 +1696,12 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
Uptime: {formatUptime(selectedVM.uptime)} Uptime: {formatUptime(selectedVM.uptime)}
</span> </span>
)} )}
{selectedVM.type === "lxc" &&
renderLxcUpdateBadge(
selectedVM.update_check,
false,
() => setActiveModalTab("updates"),
)}
</div> </div>
)} )}
</div> </div>
@@ -1610,7 +1712,7 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
<div className="flex border-b border-border px-6 shrink-0"> <div className="flex border-b border-border px-6 shrink-0">
<button <button
onClick={() => setActiveModalTab("status")} onClick={() => setActiveModalTab("status")}
className={`flex items-center gap-2 px-4 py-2.5 text-sm font-medium transition-colors border-b-2 -mb-px ${ className={`flex items-center gap-2 px-4 py-2.5 text-sm font-medium transition-colors border-b-2 -mb-px whitespace-nowrap ${
activeModalTab === "status" activeModalTab === "status"
? "border-cyan-500 text-cyan-500" ? "border-cyan-500 text-cyan-500"
: "border-transparent text-muted-foreground hover:text-foreground" : "border-transparent text-muted-foreground hover:text-foreground"
@@ -1643,7 +1745,7 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
)} )}
<button <button
onClick={() => setActiveModalTab("backups")} onClick={() => setActiveModalTab("backups")}
className={`flex items-center gap-2 px-4 py-2.5 text-sm font-medium transition-colors border-b-2 -mb-px ${ className={`flex items-center gap-2 px-4 py-2.5 text-sm font-medium transition-colors border-b-2 -mb-px whitespace-nowrap ${
activeModalTab === "backups" activeModalTab === "backups"
? "border-amber-500 text-amber-500" ? "border-amber-500 text-amber-500"
: "border-transparent text-muted-foreground hover:text-foreground" : "border-transparent text-muted-foreground hover:text-foreground"
@@ -1655,6 +1757,13 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
<Badge variant="secondary" className="text-xs h-5 ml-1">{vmBackups.length}</Badge> <Badge variant="secondary" className="text-xs h-5 ml-1">{vmBackups.length}</Badge>
)} )}
</button> </button>
{/* Updates is intentionally NOT a tab in the nav — the
extra tab created a scrolling tab strip on mobile
(especially once Mounts + Backups + Updates piled
up) and the swipe affordance was missed. The
clickable violet badge in the modal header is now
the sole entry point; the Updates content panel
below still mounts when activeModalTab === 'updates'. */}
</div> </div>
<div className="flex-1 overflow-y-auto px-6 py-4 min-h-0"> <div className="flex-1 overflow-y-auto px-6 py-4 min-h-0">
@@ -1929,14 +2038,20 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
) : vmDetails.config.description ? ( ) : vmDetails.config.description ? (
<> <>
{(() => { {(() => {
// VM/CT notes are operator-controlled but historically were // VM/CT notes come in two flavours and we mirror the way
// rendered via `dangerouslySetInnerHTML` — a stored XSS sink // the PVE web UI handles each:
// for any user with write access to the VM config (a // • HTML (ProxMenux/community-script helper output with
// non-admin user with PVE permissions, or another admin in // <div align='center'>, tables, logos) → render the
// a multi-admin deployment). We now render the decoded // HTML verbatim. The stable `main` branch did exactly
// notes as plain text inside a <pre> with `white-space: // this with dangerouslySetInnerHTML — we keep that
// pre-wrap` so newlines and indentation are preserved // behaviour but pipe through DOMPurify so the audit
// without interpreting any HTML. See audit Tier 2 #13. // Tier 2 #13 XSS sink stays closed.
// • Plain text / markdown (e.g. qBittorrent's
// `## qBittorrent LXC`) → marked turns it into
// headings + autolinks + line breaks, matching PVE.
// Mixing the two paths breaks the HTML one because marked
// collapses indentation / wraps inline runs and the
// browser then ignores `align="center"`.
let decoded: string let decoded: string
try { try {
decoded = decodeRecursively(vmDetails.config.description) decoded = decodeRecursively(vmDetails.config.description)
@@ -1947,12 +2062,71 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
</div> </div>
) )
} }
const looksLikeHtml = /<\/?[a-z][\s\S]*?>/i.test(decoded)
let html: string
if (looksLikeHtml) {
html = decoded
} else {
try {
html = marked.parse(decoded, {
breaks: true,
gfm: true,
async: false,
}) as string
} catch {
html = decoded.replace(/\n/g, "<br>")
}
}
// Promote legacy `align` HTML attribute to a real inline
// `style="text-align: …"` rule. Tailwind / parent CSS,
// styled-jsx scoping quirks and Safari's UA stylesheet
// can all swallow the bare `align` attribute on `<div>`
// (it's HTML4 obsolete syntax). An inline style is
// bullet-proof: highest specificity, no scope hash needed.
DOMPurify.removeHook("afterSanitizeAttributes")
DOMPurify.addHook("afterSanitizeAttributes", (node: Element) => {
const a = node.getAttribute?.("align")
if (a && /^(center|left|right)$/i.test(a)) {
const cur = node.getAttribute("style") || ""
const sep = cur && !cur.trim().endsWith(";") ? "; " : ""
node.setAttribute(
"style",
`${cur}${sep}text-align: ${a.toLowerCase()}`,
)
}
// Force `target=_blank` links to open in a new tab
// safely (noopener prevents reverse-tabnabbing).
if (node.tagName === "A" && node.getAttribute("target") === "_blank") {
node.setAttribute("rel", "noopener noreferrer")
}
})
const cleanHtml = DOMPurify.sanitize(html, {
ALLOWED_TAGS: [
"a", "p", "br", "div", "span",
"h1", "h2", "h3", "h4", "h5", "h6",
"img",
"table", "thead", "tbody", "tr", "th", "td",
"ul", "ol", "li",
"strong", "em", "b", "i", "u", "code", "pre",
"blockquote", "hr",
"small", "sub", "sup",
],
ALLOWED_ATTR: [
"href", "src", "alt", "title", "target",
"rel", "style", "class",
"align", "width", "height",
"colspan", "rowspan",
],
ALLOWED_URI_REGEXP:
/^(?:(?:https?|mailto|data:image\/(?:png|jpeg|jpg|gif|svg\+xml|webp)):|\/|#)/i,
ADD_ATTR: ["target"],
})
return ( return (
<pre <div
className="text-sm text-foreground proxmenux-notes-plaintext font-sans whitespace-pre-wrap break-words m-0" className="text-sm text-foreground proxmenux-notes break-words"
> // eslint-disable-next-line react/no-danger
{decoded} dangerouslySetInnerHTML={{ __html: cleanHtml }}
</pre> />
) )
})()} })()}
</> </>
@@ -2345,6 +2519,118 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
</div> </div>
)} )}
{/* Updates Tab — LXC only, conditionally rendered.
Lives in its own tab so the per-package list (up to
30 rows) doesn't blow up the Status tab on mobile.
Violet matches the shared "managed updates" theme. */}
{activeModalTab === "updates" &&
selectedVM?.type === "lxc" &&
selectedVM?.update_check?.available && (
<div className="space-y-4" key={`updates-${selectedVM.vmid}`}>
<Card className="border border-border bg-card/50">
<CardContent className="p-4">
<div className="flex items-center justify-between mb-3 flex-wrap gap-2">
<div className="flex items-center gap-2">
<div className="p-1.5 rounded-md bg-violet-500/10">
<Package className="h-4 w-4 text-violet-400" />
</div>
<h3 className="text-sm font-semibold text-foreground">
Pending package updates
</h3>
</div>
<Badge
variant="outline"
className="text-xs bg-violet-500/10 text-violet-400 border-violet-500/30"
>
{selectedVM.update_check.count} total
</Badge>
</div>
<div className="text-xs text-muted-foreground mb-3 leading-relaxed">
Last checked:{" "}
{selectedVM.update_check.last_check
? new Date(selectedVM.update_check.last_check).toLocaleString()
: "—"}
{" · "}Apply with{" "}
<code className="text-foreground/80">pct enter {selectedVM.vmid}</code>
{" → "}
<code className="text-foreground/80">apt update &amp;&amp; apt upgrade</code>
</div>
{/* Two render modes:
• Full list when every pending package fits
(registry cap is 30 packages per CT — so
CTs with ≤30 updates show every row).
• Summary when the CT has more pending than
the registry stored. Showing 30 random
rows out of 139 misleads the user — a
count + security count + "inspect inside"
hint is honester. */}
{(() => {
const stored = selectedVM.update_check.packages?.length || 0
const total = selectedVM.update_check.count || 0
const sec = selectedVM.update_check.security_count || 0
const truncated = total > stored
if (!truncated && stored > 0) {
return (
<div className="border-t border-border divide-y divide-border/50">
{selectedVM.update_check.packages.map((p) => (
<div
key={p.name}
className="flex flex-col sm:flex-row sm:items-center sm:justify-between gap-0.5 sm:gap-2 py-2 text-sm"
>
<span className="font-mono text-foreground/90 flex items-center gap-2 min-w-0">
{p.security && (
<Shield
className="h-4 w-4 text-green-500 flex-shrink-0"
aria-label="Security update"
/>
)}
<span className="truncate">{p.name}</span>
</span>
<span className="flex items-center gap-1.5 text-muted-foreground flex-shrink-0 font-mono text-xs sm:text-sm">
<span>{p.current || "—"}</span>
<span></span>
<span className="text-foreground">{p.latest}</span>
</span>
</div>
))}
</div>
)
}
// Truncated OR no per-package detail — render a summary.
return (
<div className="border-t border-border pt-3 space-y-2 text-sm">
<div className="flex items-center gap-2">
<Package className="h-4 w-4 text-violet-400 flex-shrink-0" />
<span>
<span className="font-semibold">{total}</span> package
{total === 1 ? "" : "s"} pending
</span>
</div>
{sec > 0 && (
<div className="flex items-center gap-2">
<Shield className="h-4 w-4 text-green-500 flex-shrink-0" />
<span>
<span className="font-semibold">{sec}</span> security update
{sec === 1 ? "" : "s"}
</span>
</div>
)}
<div className="text-xs text-muted-foreground pt-1 leading-relaxed">
Full list available inside the container:{" "}
<code className="text-foreground/80">
pct enter {selectedVM.vmid}
</code>{" "}
{" "}
<code className="text-foreground/80">apt list --upgradable</code>
</div>
</div>
)
})()}
</CardContent>
</Card>
</div>
)}
{/* Sprint 13.29: Mount Points Tab — LXC only. {/* Sprint 13.29: Mount Points Tab — LXC only.
Renders configured mpX entries first, then any Renders configured mpX entries first, then any
ad-hoc NFS/CIFS/SMB mounts found inside the ad-hoc NFS/CIFS/SMB mounts found inside the
+21
View File
@@ -133,6 +133,27 @@ export async function fetchApi<T>(endpoint: string, options?: RequestInit): Prom
} }
throw new Error(`Unauthorized: ${endpoint}`) throw new Error(`Unauthorized: ${endpoint}`)
} }
// Try to surface the backend's JSON error payload instead of a
// bare `500 INTERNAL SERVER ERROR`. The Flask routes consistently
// return `{error: "..."}` on failure (e.g. /api/vms/<id>/control
// includes the pvesh stderr — telling the user "no space left on
// device" is infinitely more useful than the raw status text).
try {
const ct = response.headers.get("content-type") || ""
if (ct.includes("application/json")) {
const body = await response.json()
const detail =
(body && (body.error || body.message)) || ""
if (detail) {
throw new Error(detail)
}
}
} catch (parseErr) {
if (parseErr instanceof Error && parseErr.message.includes("API request failed")) {
throw parseErr
}
// JSON parse failed — fall through to the generic message.
}
throw new Error(`API request failed: ${response.status} ${response.statusText}`) throw new Error(`API request failed: ${response.status} ${response.statusText}`)
} }
+3
View File
@@ -43,7 +43,9 @@
"clsx": "^2.1.1", "clsx": "^2.1.1",
"cmdk": "1.0.4", "cmdk": "1.0.4",
"date-fns": "4.1.0", "date-fns": "4.1.0",
"dompurify": "^3.2.7",
"embla-carousel-react": "8.5.1", "embla-carousel-react": "8.5.1",
"marked": "^15.0.7",
"geist": "^1.3.1", "geist": "^1.3.1",
"input-otp": "1.4.1", "input-otp": "1.4.1",
"lucide-react": "^0.454.0", "lucide-react": "^0.454.0",
@@ -66,6 +68,7 @@
"zod": "3.25.67" "zod": "3.25.67"
}, },
"devDependencies": { "devDependencies": {
"@types/dompurify": "^3.0.5",
"@types/node": "^22", "@types/node": "^22",
"@types/react": "^18", "@types/react": "^18",
"@types/react-dom": "^18", "@types/react-dom": "^18",
+119 -4
View File
@@ -1019,10 +1019,16 @@ def _capture_health_journal_context(categories: list, reason: str = '') -> str:
if not pattern: if not pattern:
return "" return ""
# Capture recent journal entries matching keywords # Capture recent journal entries matching keywords.
# Use -b 0 to only include logs from the current boot # Use -b 0 to only include logs from the current boot.
# Filter out the Monitor's own stdout (AppRun, [HealthPersistence],
# proxmenux-auth, etc.) BEFORE keyword matching — otherwise a startup
# line like "[HealthPersistence] Database initialized with 13 tables"
# leaks into the AI context because grep -iE 'ata' matches the
# substring "ata" in "dATAbase". Self-logs are never system evidence.
cmd = ( cmd = (
f"journalctl -b 0 --since='10 minutes ago' --no-pager -n 500 2>/dev/null | " f"journalctl -b 0 --since='10 minutes ago' --no-pager -n 500 2>/dev/null | "
f"grep -vE 'AppRun\\[|proxmenux-auth|\\[HealthPersistence\\]|\\[ProxMenux\\]|\\[NotificationManager\\]|\\[AIEnhancer\\]' | "
f"grep -iE '{pattern}' | tail -n 30" f"grep -iE '{pattern}' | tail -n 30"
) )
@@ -1131,12 +1137,28 @@ def _health_collector_loop():
'updates': 'update_summary', 'updates': 'update_summary',
} }
# Sub-categories already rolled up into details['storage']
# by _check_proxmox_storage_status. Emitting them as their
# own health_degraded entries duplicates the same warning
# (e.g. "Storage Mounts & Space" + "PVE Storage Capacity"
# both saying "PBS-Cloud (pbs) usage ≥70%"). Skip them at
# the notification layer — they still update _prev_statuses
# so a future degradation transition is detected normally.
_STORAGE_SUBCATEGORIES = {
'pve_storage_capacity', 'zfs_pool_capacity',
'lxc_disk', 'lxc_mounts', 'remote_mounts',
}
for cat_key, cat_data in details.items(): for cat_key, cat_data in details.items():
cur_status = cat_data.get('status', 'OK') cur_status = cat_data.get('status', 'OK')
prev_status = _prev_statuses.get(cat_key, 'OK') prev_status = _prev_statuses.get(cat_key, 'OK')
cur_rank = _SEV_RANK.get(cur_status, 0) cur_rank = _SEV_RANK.get(cur_status, 0)
prev_rank = _SEV_RANK.get(prev_status, 0) prev_rank = _SEV_RANK.get(prev_status, 0)
if cat_key in _STORAGE_SUBCATEGORIES:
_prev_statuses[cat_key] = cur_status
continue
if cur_rank > prev_rank and cur_rank >= 2: # WARNING or CRITICAL if cur_rank > prev_rank and cur_rank >= 2: # WARNING or CRITICAL
reason = cat_data.get('reason', f'{cat_key} status changed to {cur_status}') reason = cat_data.get('reason', f'{cat_key} status changed to {cur_status}')
reason_lower = reason.lower() reason_lower = reason.lower()
@@ -4676,10 +4698,50 @@ def get_network_info():
'vm_lxc_total_count': 0 'vm_lxc_total_count': 0
} }
def _get_lxc_update_status_map() -> dict:
"""Read the managed_installs registry and project the LXC update
state into a quick lookup ``{vmid: {available, count, security_count,
last_check, packages[]}}``. Used to decorate ``/api/vms`` output
without forcing the frontend to fetch a second endpoint.
Returns an empty dict if the registry module isn't available or
nothing is registered callers must treat absence as "no info".
"""
try:
import managed_installs
except Exception:
return {}
try:
active = managed_installs.get_active_items() or []
except Exception:
return {}
out: dict = {}
for it in active:
if it.get('type') != 'lxc':
continue
vmid = it.get('_vmid') or it.get('id', '').removeprefix('lxc:')
if not vmid:
continue
update = it.get('update_check') or {}
out[str(vmid)] = {
'available': bool(update.get('available')),
'count': int(update.get('_count') or 0),
'security_count': int(update.get('_security_count') or 0),
'last_check': update.get('last_check'),
'latest': update.get('latest'),
'error': update.get('error'),
# Cap packages list shipped to UI — modal uses first 30 max
'packages': (update.get('_packages') or [])[:30],
}
return out
def get_proxmox_vms(): def get_proxmox_vms():
"""Get Proxmox VM and LXC information (requires pvesh command) - only from local node""" """Get Proxmox VM and LXC information (requires pvesh command) - only from local node"""
try: try:
all_vms = [] all_vms = []
lxc_updates_map = _get_lxc_update_status_map()
try: try:
# local_node = socket.gethostname() # local_node = socket.gethostname()
@@ -4694,11 +4756,12 @@ def get_proxmox_vms():
# print(f"[v0] Skipping VM {resource.get('vmid')} from remote node: {node}") # print(f"[v0] Skipping VM {resource.get('vmid')} from remote node: {node}")
continue continue
vm_type = 'lxc' if resource.get('type') == 'lxc' else 'qemu'
vm_data = { vm_data = {
'vmid': resource.get('vmid'), 'vmid': resource.get('vmid'),
'name': resource.get('name', f"VM-{resource.get('vmid')}"), 'name': resource.get('name', f"VM-{resource.get('vmid')}"),
'status': resource.get('status', 'unknown'), 'status': resource.get('status', 'unknown'),
'type': 'lxc' if resource.get('type') == 'lxc' else 'qemu', 'type': vm_type,
'cpu': resource.get('cpu', 0), 'cpu': resource.get('cpu', 0),
'mem': resource.get('mem', 0), 'mem': resource.get('mem', 0),
'maxmem': resource.get('maxmem', 0), 'maxmem': resource.get('maxmem', 0),
@@ -4710,6 +4773,14 @@ def get_proxmox_vms():
'diskread': resource.get('diskread', 0), 'diskread': resource.get('diskread', 0),
'diskwrite': resource.get('diskwrite', 0) 'diskwrite': resource.get('diskwrite', 0)
} }
# Decorate LXC rows with the apt update status if the
# managed_installs registry has it. Absent key means
# either the user hasn't enabled the feature or the
# CT isn't running / isn't Debian/Ubuntu.
if vm_type == 'lxc':
upd = lxc_updates_map.get(str(resource.get('vmid')))
if upd is not None:
vm_data['update_check'] = upd
all_vms.append(vm_data) all_vms.append(vm_data)
return all_vms return all_vms
@@ -11035,9 +11106,53 @@ def api_vm_control(vmid):
'message': f'Successfully executed {action} on {vm_info.get("name")}' 'message': f'Successfully executed {action} on {vm_info.get("name")}'
}) })
else: else:
# `pvesh` failed → fire the matching vm_fail / ct_fail
# notification so the user gets paged on their channels
# too, not just an in-dashboard alert. Previously this
# path silently returned a 500 to the browser and lost
# the event entirely (reported on .1.10: tried to start
# VM 106 while log2ram tmpfs was full → 500 in the UI
# but no Telegram message). The stderr is the most
# useful single line we have — `pvesh` reliably prints
# the underlying daemon failure there (e.g.
# "start failed: command '/usr/bin/kvm …' failed with
# exit code 1: no space left on device").
err_text = (control_result.stderr or '').strip() \
or (control_result.stdout or '').strip() \
or f'{action} returned exit code {control_result.returncode}'
# Truncate runaway stderr (some pvesh failures dump
# multi-KB tracebacks) — keep the notification readable.
if len(err_text) > 500:
err_text = err_text[:500] + ''
try:
from notification_manager import notification_manager as _nm
import socket as _sock
_host = _sock.gethostname()
event_type = 'ct_fail' if vm_type == 'lxc' else 'vm_fail'
_nm.emit_event(
event_type=event_type,
severity='CRITICAL',
data={
'hostname': _host,
'vmid': str(vmid),
'vmname': vm_info.get('name') or f'{vm_type}-{vmid}',
'reason': f'{action} failed: {err_text}',
'action': action,
},
source='dashboard',
entity='vm',
entity_id=str(vmid),
)
except Exception as _emit_err:
print(f"[api_vm_control] failed to emit {vm_type}_fail "
f"notification: {type(_emit_err).__name__}: {_emit_err}")
return jsonify({ return jsonify({
'success': False, 'success': False,
'error': control_result.stderr 'vmid': vmid,
'action': action,
'error': err_text,
}), 500 }), 500
else: else:
return jsonify({'error': 'Failed to get VM details'}), 500 return jsonify({'error': 'Failed to get VM details'}), 500
+41 -7
View File
@@ -92,7 +92,15 @@ class HealthPersistence:
self.data_dir.mkdir(parents=True, exist_ok=True) self.data_dir.mkdir(parents=True, exist_ok=True)
self.db_path = self.data_dir / 'health_monitor.db' self.db_path = self.data_dir / 'health_monitor.db'
self._db_lock = threading.Lock() # Reentrant lock: `record_disk_observation` acquires this and then
# calls `register_disk` which acquires it again on the same thread.
# With a plain `threading.Lock` that second acquire deadlocks and the
# caller hangs forever — visible symptom on RimegraVE (Pedro Rico
# 19/05): no disk_observation update since the day a thread first
# walked that path. `RLock` allows re-entry from the same thread
# while still serialising cross-thread writes, which is what the
# serialisation rationale (race-free UPSERT dedup) actually wants.
self._db_lock = threading.RLock()
self._init_database() self._init_database()
def _get_conn(self) -> sqlite3.Connection: def _get_conn(self) -> sqlite3.Connection:
@@ -229,6 +237,29 @@ class HealthPersistence:
'ON digest_pending(channel, ts)' 'ON digest_pending(channel, ts)'
) )
# Sibling table for events buffered DURING Quiet Hours. Same
# shape as digest_pending so the existing summary renderer can
# be reused. Kept separate because the lifecycle is different:
# digest_pending flushes once per day at digest_time, while
# quiet_pending flushes once per Quiet Hours close (an arbitrary
# time that depends on the user's window settings).
cursor.execute('''
CREATE TABLE IF NOT EXISTS quiet_pending (
id INTEGER PRIMARY KEY AUTOINCREMENT,
channel TEXT NOT NULL,
event_type TEXT NOT NULL,
event_group TEXT NOT NULL,
severity TEXT NOT NULL,
ts INTEGER NOT NULL,
title TEXT NOT NULL,
body TEXT NOT NULL
)
''')
cursor.execute(
'CREATE INDEX IF NOT EXISTS idx_quiet_pending_channel '
'ON quiet_pending(channel, ts)'
)
# Migration: add missing columns to errors table for existing DBs # Migration: add missing columns to errors table for existing DBs
cursor.execute("PRAGMA table_info(errors)") cursor.execute("PRAGMA table_info(errors)")
columns = [col[1] for col in cursor.fetchall()] columns = [col[1] for col in cursor.fetchall()]
@@ -2289,11 +2320,15 @@ class HealthPersistence:
# Upsert observation: if same (disk, type, signature), bump count + update last timestamp. # Upsert observation: if same (disk, type, signature), bump count + update last timestamp.
# IMPORTANT: Do NOT reset dismissed — if the user dismissed this observation, # IMPORTANT: Do NOT reset dismissed — if the user dismissed this observation,
# re-detecting the same journal entry must not un-dismiss it. Also do not # re-detecting the same journal entry must not un-dismiss it. BUT we DO
# increment the occurrence_count on dismissed rows (audit Tier 5 — once # keep counting + updating last_occurrence even when dismissed, because the
# the user has dismissed, we don't want the counter to keep growing for # responsible-monitoring contract is: every error counts toward the
# journal events that no longer interest them; this also stops the badge # accumulated total shown in the disk modal ("324 connection errors"),
# from drifting upward for dismissed conditions). # even errors of the same signature the user already saw once. Dismissed
# only mutes notifications, NOT the per-disk error history surfaced in the
# UI. Reverting the earlier "WHERE dismissed=0" gate that froze the
# counter and last_occurrence for /dev/sdh on 2026-05-09, leaving 10
# silent days of unreported ATA errors (Pedro Rico, 19/05).
cursor.execute(f''' cursor.execute(f'''
INSERT INTO disk_observations INSERT INTO disk_observations
(disk_registry_id, {type_col}, error_signature, {first_col}, (disk_registry_id, {type_col}, error_signature, {first_col},
@@ -2303,7 +2338,6 @@ class HealthPersistence:
{last_col} = excluded.{last_col}, {last_col} = excluded.{last_col},
occurrence_count = occurrence_count + 1, occurrence_count = occurrence_count + 1,
severity = CASE WHEN excluded.severity = 'critical' THEN 'critical' ELSE severity END severity = CASE WHEN excluded.severity = 'critical' THEN 'critical' ELSE severity END
WHERE dismissed = 0
''', (disk_id, error_type, error_signature, now, now, raw_message, severity)) ''', (disk_id, error_type, error_signature, now, now, raw_message, severity))
conn.commit() conn.commit()
+68 -9
View File
@@ -274,6 +274,12 @@ def _df_via_host_pid(host_pid: str, ct_target: str) -> dict[str, Optional[int]]:
numbers reflect the whole storage pool instead of the per-subvol numbers reflect the whole storage pool instead of the per-subvol
quota — without this the UI showed 851 GB total for a 150 GB ZFS quota — without this the UI showed 851 GB total for a 150 GB ZFS
subvol because pvesm reports the rpool's free space. subvol because pvesm reports the rpool's free space.
Note: this path does NOT measure NFS/CIFS mounts that were set up
from INSIDE the CT (`mount -t nfs` / `/etc/fstab` inside the
container). Those live in the CT's own mount namespace and aren't
visible to the host's `df` even through `/proc/<pid>/root`. Use
`_df_via_pct_exec` for ad-hoc mounts.
""" """
empty = {"total_bytes": None, "used_bytes": None, "available_bytes": None} empty = {"total_bytes": None, "used_bytes": None, "available_bytes": None}
if not host_pid or not ct_target: if not host_pid or not ct_target:
@@ -301,6 +307,44 @@ def _df_via_host_pid(host_pid: str, ct_target: str) -> dict[str, Optional[int]]:
return empty return empty
def _df_via_pct_exec(vmid: str, ct_target: str,
timeout: int = 6) -> dict[str, Optional[int]]:
"""``df`` a path from INSIDE the CT via ``pct exec``. Needed for
ad-hoc NFS/CIFS mounts that live in the CT's own mount namespace
and aren't visible from the host (so `_df_via_host_pid` returns
empty for them).
Heavier than the host-side df (full `pct exec` round-trip ~1-3s),
so we only use it for ad-hoc mounts. The 6s timeout is generous
enough for NFS over slow links but won't drag the request past
the proxy timeout.
"""
empty = {"total_bytes": None, "used_bytes": None, "available_bytes": None}
if not vmid or not ct_target:
return empty
try:
proc = subprocess.run(
[_PCT, "exec", vmid, "--", "df", "-B1",
"--output=size,used,avail", ct_target],
capture_output=True, text=True, timeout=timeout,
)
if proc.returncode != 0:
return empty
lines = [ln for ln in proc.stdout.strip().splitlines() if ln.strip()]
if len(lines) < 2:
return empty
parts = lines[-1].split()
if len(parts) < 3:
return empty
return {
"total_bytes": int(parts[0]),
"used_bytes": int(parts[1]),
"available_bytes": int(parts[2]),
}
except (subprocess.TimeoutExpired, OSError, ValueError):
return empty
def _capacity_for(source: str, classification: dict[str, Any], def _capacity_for(source: str, classification: dict[str, Any],
pve_storages: dict[str, dict[str, Any]], pve_storages: dict[str, dict[str, Any]],
config_options: Optional[dict[str, Any]] = None, config_options: Optional[dict[str, Any]] = None,
@@ -606,14 +650,29 @@ def get_lxc_mount_points(vmid: str) -> dict[str, Any]:
] ]
# Same parallelisation as the configured-mp loop: stat'ing # Same parallelisation as the configured-mp loop: stat'ing
# stale NFS exports serially can dominate the request and # stale NFS exports serially can dominate the request and
# push it past the proxy timeout. # push it past the proxy timeout. Capacity (`df`) is fetched
# in the SAME pool so the UI can render the usage bar for
# ad-hoc NFS/CIFS mounts too — null capacity was a regression
# spotted on CT 103 /mnt/Media. Skip df when stat already
# showed the mount as unreachable, otherwise the df subprocess
# blocks on the same broken export.
if ad_hoc_candidates: if ad_hoc_candidates:
with ThreadPoolExecutor(max_workers=max_workers) as pool: with ThreadPoolExecutor(max_workers=max_workers) as pool:
healths = list(pool.map( def _gather_adhoc(rt):
lambda rt: _stat_via_host(host_pid, rt["rt_target"]), h = _stat_via_host(host_pid, rt["rt_target"])
ad_hoc_candidates, if h.get("reachable"):
)) # NFS/CIFS mounts done inside the CT live in the
for rt, health in zip(ad_hoc_candidates, healths): # container's own mount namespace and aren't
# visible to `df` from the host even via
# /proc/<pid>/root — use `pct exec df` instead.
cap = _df_via_pct_exec(vmid, rt["rt_target"])
else:
cap = {"total_bytes": None, "used_bytes": None,
"available_bytes": None}
return rt, h, cap
results = list(pool.map(_gather_adhoc, ad_hoc_candidates))
for rt, health, cap in results:
ad_hoc.append({ ad_hoc.append({
"mp_index": "", "mp_index": "",
"source": rt["rt_source"], "source": rt["rt_source"],
@@ -624,9 +683,9 @@ def get_lxc_mount_points(vmid: str) -> dict[str, Any]:
"origin_label": rt["rt_source"], "origin_label": rt["rt_source"],
"config_options": {}, "config_options": {},
"config_flags": [], "config_flags": [],
"total_bytes": None, "total_bytes": cap["total_bytes"],
"used_bytes": None, "used_bytes": cap["used_bytes"],
"available_bytes": None, "available_bytes": cap["available_bytes"],
"runtime_mounted": True, "runtime_mounted": True,
"runtime_source": rt["rt_source"], "runtime_source": rt["rt_source"],
"runtime_fstype": rt["rt_fstype"], "runtime_fstype": rt["rt_fstype"],
+328 -1
View File
@@ -189,12 +189,169 @@ def _detect_oci_apps() -> list[dict]:
return out return out
# ── LXC containers (Phase 1: apt-based update detection) ────────────
#
# Each running Debian/Ubuntu CT becomes a registry entry of type "lxc".
# Detection is opt-in: gated on the `lxc_updates_available` notification
# being enabled somewhere, so the heavy `pct exec` work doesn't run on
# hosts where the user hasn't asked for this.
#
# Phase 2 hook: once helper-scripts metadata is integrated, entries can
# carry `_helper_script_app` so the checker swaps generic apt counting
# for app-specific upstream-release tracking (Vaultwarden, Jellyfin,
# etc.). For now every LXC uses the generic apt path.
_PCT_BIN = "/usr/sbin/pct"
_LXC_EXEC_TIMEOUT_SEC = 10
_LXC_OS_PROBE_TIMEOUT_SEC = 5
def _lxc_updates_notification_enabled() -> bool:
"""Return True if the user has enabled `lxc_updates_available` on
at least one configured channel. Used to gate the heavy detection
+ checker work — when disabled we don't touch any CT at all.
"""
try:
import notification_manager as _nm_mod
nm = _nm_mod.notification_manager
return bool(nm.is_event_enabled("lxc_updates_available"))
except Exception:
return False
def _list_pve_lxcs() -> list[dict]:
"""Return basic info per LXC on this node via ``pct list``. Each
item is ``{vmid, status, name}``. Empty list on any failure — never
raises so the detector caller can continue.
"""
try:
r = subprocess.run(
[_PCT_BIN, "list"],
capture_output=True, text=True, timeout=5,
)
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
return []
if r.returncode != 0:
return []
out: list[dict] = []
for line in r.stdout.splitlines()[1:]: # skip header row
# `pct list` columns: VMID Status Lock Name
# `Lock` is empty most of the time, so split max 4 ways
parts = line.split(None, 3)
if len(parts) < 2:
continue
vmid = parts[0]
status = parts[1]
# Name is the last column; in unlocked rows the 3rd col may
# be the name itself if Lock was omitted by the formatter.
name = parts[-1] if len(parts) >= 3 else ""
if not vmid.isdigit():
continue
out.append({"vmid": vmid, "status": status, "name": name})
return out
_SUPPORTED_OS_FAMILIES = ("debian", "ubuntu", "alpine")
def _probe_lxc_os(vmid: str) -> Optional[str]:
"""Return a normalized family identifier (``debian`` / ``ubuntu`` /
``alpine``) by reading ``/etc/os-release`` inside the running CT.
Returns None for distributions whose package manager we don't yet
speak — those CTs are skipped in detection so the framework
doesn't keep retrying a checker we can't run.
Cached per CT in the registry — re-probed only when the entry has
no ``_os_family`` yet, since the OS rarely changes for the life of
a CT.
"""
try:
r = subprocess.run(
[_PCT_BIN, "exec", vmid, "--", "cat", "/etc/os-release"],
capture_output=True, text=True,
timeout=_LXC_OS_PROBE_TIMEOUT_SEC,
)
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
return None
if r.returncode != 0:
return None
text = r.stdout.lower()
if "id=ubuntu" in text:
return "ubuntu"
if "id=debian" in text or "id_like=debian" in text:
return "debian"
if "id=alpine" in text:
return "alpine"
# Future Phase 1.5: CentOS/Rocky/Alma (dnf check-update), Arch
# (checkupdates), openSUSE (zypper list-updates). Each needs a
# parser similar to apt/apk — skip silently for now.
return None
def _detect_lxc_containers() -> list[dict]:
"""Enumerate running Debian/Ubuntu CTs as registry entries.
OS detection is cached in the registry entry (`_os_family`), so the
expensive ``pct exec cat /etc/os-release`` only runs the first time
a CT is seen. CT reinstalls with a different OS will keep the old
family cached until the user resets the registry — acceptable
trade-off vs paying the probe cost every 24h cycle.
"""
if not _lxc_updates_notification_enabled():
return []
# Read existing registry so we can preserve cached `_os_family`.
# No lock needed here — we only inspect; the framework holds the
# write lock when it merges back our results in detect_and_register.
try:
existing = _read_registry().get("items", [])
except Exception:
existing = []
existing_by_id = {
it.get("id"): it for it in existing
if isinstance(it, dict) and it.get("type") == "lxc"
}
cts = _list_pve_lxcs()
out: list[dict] = []
for ct in cts:
if ct["status"] != "running":
continue
vmid = ct["vmid"]
cid = f"lxc:{vmid}"
prior = existing_by_id.get(cid) or {}
os_family = prior.get("_os_family")
if not os_family:
os_family = _probe_lxc_os(vmid)
if os_family not in _SUPPORTED_OS_FAMILIES:
# Distribution we don't yet have a package-manager
# parser for. Skip silently. The framework marks any
# existing entry as removed_at if it stops appearing
# in the detector output.
continue
out.append({
"id": cid,
"type": "lxc",
"name": ct.get("name") or f"CT-{vmid}",
"current_version": None, # apt has no single version
"menu_label": None, # user upgrades inside the CT
"menu_script": None,
"_vmid": vmid,
"_os_family": os_family,
# Phase 2 hook: populate `_helper_script_app` here once we
# learn how to read the community-scripts marker.
})
return out
# Detectors registered here. Each returns either a single entry dict # Detectors registered here. Each returns either a single entry dict
# or a list (for sources that yield multiple items, like OCI). The # or a list (for sources that yield multiple items, like OCI). The
# framework normalises both shapes. # framework normalises both shapes.
_DETECTORS: list[Callable[[], Any]] = [ _DETECTORS: list[Callable[[], Any]] = [
_detect_nvidia_xfree86, _detect_nvidia_xfree86,
_detect_oci_apps, _detect_oci_apps,
_detect_lxc_containers,
] ]
@@ -514,9 +671,173 @@ def _check_nvidia_xfree86(entry: dict) -> dict:
} }
def _parse_apt_list_upgradable(text: str) -> list[dict]:
"""Parse the output of ``apt list --upgradable`` into structured rows.
Each upgradable line looks like::
package/release version arch [upgradable from: oldversion]
Returns a list of ``{name, current, latest, security}``. Lines that
can't be parsed are skipped; the header ``Listing...`` is ignored
because it lacks the ``[upgradable`` marker.
"security" flag is detected from the release/suite name (e.g.
``bookworm-security``, ``jammy-security``). Some derivatives don't
use that naming and will report security=False even when patches
are present — acceptable for Phase 1, refined later if needed.
"""
rows: list[dict] = []
for line in text.splitlines():
line = line.strip()
if not line or "[upgradable" not in line or "/" not in line:
continue
try:
head, _, tail = line.partition(" ")
name, _, release = head.partition("/")
tail_parts = tail.split()
if not tail_parts:
continue
new_ver = tail_parts[0]
old_ver = ""
if "from:" in line:
old_ver = line.split("from:", 1)[1].strip().rstrip("]").strip()
release_lower = release.lower()
is_security = "-security" in release_lower or "/security" in release_lower
rows.append({
"name": name,
"current": old_ver,
"latest": new_ver,
"security": is_security,
})
except Exception:
continue
return rows
def _parse_apk_list_upgradable(text: str) -> list[dict]:
"""Parse the output of ``apk list -u`` into structured rows.
Lines look like::
busybox-1.36.1-r29 x86_64 {busybox} (GPL-2.0-only) [upgradable from: busybox-1.36.1-r28]
apk smashes name + version into the leading token, so reliable
name/version splitting requires walking from the right (versions
end in ``-r<num>``). For the badge + notification we only need a
count and a representative sample, so we keep the parser tolerant
and surface the raw token as the package "name". Alpine's main
repos don't expose a separate "security" suite via apk metadata,
so we mark every row as ``security=False`` — security==0 always.
"""
rows: list[dict] = []
for line in text.splitlines():
line = line.strip()
if not line or "[upgradable" not in line:
continue
try:
first_tok = line.split(" ", 1)[0]
old = ""
if "from:" in line:
old = line.split("from:", 1)[1].strip().rstrip("]").strip()
rows.append({
"name": first_tok,
"current": old,
"latest": first_tok,
"security": False,
})
except Exception:
continue
return rows
def _run_pct_pkg_listing(vmid: str, cmd: str) -> tuple[bool, str, str]:
"""Run a package-listing command inside ``vmid`` via ``pct exec``.
Returns ``(ok, stdout, error_message)``. Centralises the timeout
and stderr handling so apt/apk callers stay symmetric.
"""
try:
r = subprocess.run(
[_PCT_BIN, "exec", vmid, "--", "sh", "-c", cmd],
capture_output=True, text=True,
timeout=_LXC_EXEC_TIMEOUT_SEC,
)
except subprocess.TimeoutExpired:
return False, "", f"{cmd.split()[0]} listing timed out"
except (FileNotFoundError, OSError) as e:
return False, "", str(e)
if r.returncode != 0:
return False, "", (r.stderr or "package listing failed").strip()[:200]
return True, r.stdout, ""
def _check_lxc_updates(entry: dict) -> dict:
"""Inspect pending package updates inside the LXC and report them.
Dispatches to the right package-manager parser based on the cached
``_os_family``. Uses the CT's existing metadata cache — never runs
``apt update`` / ``apk update`` from outside, so the user's own
update cadence (unattended-upgrades, cron) is preserved.
The dedup fingerprint (``latest``) combines count, security count
and the sorted top package names so a stable set of pending
updates doesn't re-notify daily, while a meaningfully different
update set does.
"""
vmid = entry.get("_vmid")
family = (entry.get("_os_family") or "").lower()
if not vmid:
return {
"available": False, "latest": None,
"last_check": _now_iso(), "error": "no vmid in entry",
}
if family in ("debian", "ubuntu"):
ok, stdout, err = _run_pct_pkg_listing(
vmid, "apt list --upgradable 2>/dev/null"
)
packages = _parse_apt_list_upgradable(stdout) if ok else []
elif family == "alpine":
ok, stdout, err = _run_pct_pkg_listing(
vmid, "apk list -u 2>/dev/null"
)
packages = _parse_apk_list_upgradable(stdout) if ok else []
else:
return {
"available": False, "latest": None,
"last_check": _now_iso(),
"error": f"unsupported family: {family}",
}
if not ok:
return {
"available": False, "latest": None,
"last_check": _now_iso(), "error": err,
}
count = len(packages)
sec_count = sum(1 for p in packages if p.get("security"))
available = count > 0
latest_fp = None
if available:
top_names = ",".join(sorted(p["name"] for p in packages)[:5])
latest_fp = f"{count}:{sec_count}:{top_names}"
return {
"available": available,
"latest": latest_fp,
"last_check": _now_iso(),
"error": None,
"_count": count,
"_security_count": sec_count,
"_packages": packages[:30], # cap to keep the registry compact
}
_CHECKERS: dict[str, Callable[[dict], dict]] = { _CHECKERS: dict[str, Callable[[dict], dict]] = {
"oci_app": _check_oci_app, "oci_app": _check_oci_app,
"nvidia_xfree86": _check_nvidia_xfree86, "nvidia_xfree86": _check_nvidia_xfree86,
"lxc": _check_lxc_updates,
} }
@@ -562,8 +883,14 @@ def check_for_updates(force: bool = False) -> list[dict]:
} }
if result.get("current") and not it.get("current_version"): if result.get("current") and not it.get("current_version"):
it["current_version"] = result["current"] it["current_version"] = result["current"]
# Per-checker extras carried through into the persisted
# `update_check` blob. Add new keys here when a future
# checker needs to surface fields beyond available/latest.
# `_count` + `_security_count` were missing originally, so
# the LXC checker's counts dropped on the floor and the
# frontend badge couldn't render.
for extra_key in ("_packages", "_upgrade_kind", "_kernel", for extra_key in ("_packages", "_upgrade_kind", "_kernel",
"_kernel_note"): "_kernel_note", "_count", "_security_count"):
if extra_key in result: if extra_key in result:
it["update_check"][extra_key] = result[extra_key] it["update_check"][extra_key] = result[extra_key]
+164 -19
View File
@@ -327,14 +327,27 @@ def is_vzdump_active_on_host() -> bool:
try: try:
with open(_VZDUMP_ACTIVE_FILE, 'r') as f: with open(_VZDUMP_ACTIVE_FILE, 'r') as f:
for line in f: for line in f:
# UPID format: UPID:node:pid:pstart:starttime:type:id:user: # tasks/active row layout (whitespace separated):
# "<UPID> 1" ← running
# "<UPID> 1 <endtime_hex> <STATUS>" ← finished
# PVE leaves finished rows lingering for hours
# sometimes — without the field-count check below the
# PID-recycling case fires a false positive (an
# unrelated process inherited the old vzdump's PID
# and `os.kill(pid, 0)` succeeds).
if ':vzdump:' not in line: if ':vzdump:' not in line:
continue continue
parts = line.strip().split(':') fields = line.split()
if len(parts) < 3: if not fields:
continue
# >2 fields means endtime + status are written → terminated.
if len(fields) > 2:
continue
upid_parts = fields[0].split(':')
if len(upid_parts) < 3:
continue continue
try: try:
pid = int(parts[2], 16) # PID in UPID is hex pid = int(upid_parts[2], 16) # PID in UPID is hex
os.kill(pid, 0) os.kill(pid, 0)
found = True found = True
break break
@@ -1033,21 +1046,28 @@ class JournalWatcher:
else: else:
resolved = re.sub(r'\d+$', '', raw_device) if raw_device.startswith('sd') else raw_device resolved = re.sub(r'\d+$', '', raw_device) if raw_device.startswith('sd') else raw_device
# ── Gate 1: SMART must confirm disk failure ── # ── ALWAYS persist the observation, regardless of SMART ──
# If the disk is healthy (PASSED) or we can't verify # The disk_observation_contract is explicit (memory note
# (UNKNOWN / unresolvable ATA port), do NOT notify. # disk-observation-contract): every kernel-surfaced disk
# error must be recorded in disk_observations *even when
# SMART reports PASSED*. Silent errors on a "healthy" disk
# are exactly the early-warning signal the modal histogram
# exists to surface ("324 connection errors on this disk").
# Previously this line lived AFTER a `return` gate keyed on
# smart_health != 'FAILED', so the 3162 ata8 errors on
# .1.10 (PASSED SMART) all dropped on the floor instead of
# accumulating in the per-disk audit history.
self._record_disk_io_observation(resolved, msg)
# ── Gate 1: only NOTIFY when SMART reports FAILED ──
# Observation is already saved above. We avoid spamming a
# CRITICAL notification for transient ATA/SCSI noise on
# otherwise-healthy disks — the modal histogram surfaces
# those without paging the user at 3 AM.
smart_health = self._quick_smart_health(resolved) smart_health = self._quick_smart_health(resolved)
if smart_health != 'FAILED': if smart_health != 'FAILED':
return return
# ── Persist observation (before the cooldown gate) ──
# The 24h cooldown below only suppresses RE-notification; the
# per-disk observations history must reflect every genuine
# detection. The DB UPSERT dedups same-signature events via
# occurrence_count, so calling this on every match is safe.
# Aligns with the parallel path in HealthMonitor._check_disks_optimized.
self._record_disk_io_observation(resolved, msg)
# ── Gate 2: 24-hour dedup per device ── # ── Gate 2: 24-hour dedup per device ──
# Check both in-memory cache AND the DB (user dismiss clears DB cooldowns). # Check both in-memory cache AND the DB (user dismiss clears DB cooldowns).
# If user dismissed the error, _clear_disk_io_cooldown() removed the DB # If user dismissed the error, _clear_disk_io_cooldown() removed the DB
@@ -1814,10 +1834,29 @@ class TaskWatcher:
line = line.strip() line = line.strip()
if not line: if not line:
continue continue
upid = line.split()[0] if line.split() else line parts = line.split()
if not parts:
continue
upid = parts[0]
current_upids.add(upid) current_upids.add(upid)
if ':vzdump:' in upid: if ':vzdump:' not in upid:
continue
# PVE writes each line in tasks/active as:
# "<UPID> 1" ← task still running
# "<UPID> 1 <endtime_hex> <STATUS>" ← task already finished
# PVE doesn't always prune finished rows from this
# file (observed on RimegraVE 19/05: 25 OK/error
# entries lingering for hours after job end). Just
# matching ':vzdump:' kept `_vzdump_running_since`
# permanently fresh, which then made
# `_is_vzdump_active()` return True forever and
# silenced every vm_start / vm_stop / vm_shutdown
# via the _BACKUP_NOISE filter. Only treat the row
# as a live vzdump when no end-time / status has
# been written yet (≤ 2 fields: UPID + version).
if len(parts) <= 2:
found_vzdump = True found_vzdump = True
# Keep _vzdump_running_since fresh as long as vzdump is in active # Keep _vzdump_running_since fresh as long as vzdump is in active
@@ -2175,6 +2214,16 @@ class PollingCollector:
# has an update". # has an update".
self._last_managed_check = 0 self._last_managed_check = 0
self._notified_managed_updates: dict[str, str] = {} self._notified_managed_updates: dict[str, str] = {}
# LXC notifications are grouped — one event per polling cycle
# covering every running Debian/Ubuntu CT with pending apt
# updates. The fingerprint encodes the per-CT state so a stable
# batch doesn't re-notify while a meaningful change does.
self._notified_lxc_batch: str | None = None
# Track previous state of the LXC-updates notification toggle
# so a user enabling it post-startup bypasses the 24h gate
# ONCE — the next polling cycle runs a fresh detection without
# waiting up to a day. Cleared after the forced run completes.
self._lxc_was_enabled: bool = False
# Track notified ProxMenux versions to avoid duplicates # Track notified ProxMenux versions to avoid duplicates
self._notified_proxmenux_version: str | None = None self._notified_proxmenux_version: str | None = None
self._notified_proxmenux_beta_version: str | None = None self._notified_proxmenux_beta_version: str | None = None
@@ -3101,7 +3150,24 @@ class PollingCollector:
NVIDIA driver → ``nvidia_driver_update_available``, etc.). NVIDIA driver → ``nvidia_driver_update_available``, etc.).
""" """
now = time.time() now = time.time()
if now - self._last_managed_check < self.UPDATE_CHECK_INTERVAL:
# Detect OFF→ON transition of the LXC update toggle. Without
# this, the first polling cycle after service start always sets
# the 24h gate — so a user who enables the toggle later (which
# is the normal flow, since the toggle defaults to OFF) would
# have to wait up to 24h or restart the service before the
# detector ran. A one-shot bypass on the transition fixes that
# without weakening the 24h cadence in steady state.
try:
import managed_installs as _mi
lxc_enabled_now = _mi._lxc_updates_notification_enabled()
except Exception:
lxc_enabled_now = False
lxc_just_enabled = lxc_enabled_now and not self._lxc_was_enabled
self._lxc_was_enabled = lxc_enabled_now
if (not lxc_just_enabled
and now - self._last_managed_check < self.UPDATE_CHECK_INTERVAL):
return return
self._last_managed_check = now self._last_managed_check = now
@@ -3117,8 +3183,15 @@ class PollingCollector:
print(f"[PollingCollector] managed_installs update run failed: {e}") print(f"[PollingCollector] managed_installs update run failed: {e}")
return return
# Split LXC updates out of the per-item event stream — they get
# one grouped notification per cycle instead of one per CT, to
# avoid spamming the user when 15 CTs have pending updates the
# same day. Non-LXC types keep their existing per-item flow.
lxc_updates = [u for u in updates if u.get('type') == 'lxc']
other_updates = [u for u in updates if u.get('type') != 'lxc']
seen_ids: set[str] = set() seen_ids: set[str] = set()
for item in updates: for item in other_updates:
item_id = item.get('id', '') item_id = item.get('id', '')
if not item_id: if not item_id:
continue continue
@@ -3143,6 +3216,17 @@ class PollingCollector:
entity_id=f'managed_{item_id}', entity_id=f'managed_{item_id}',
)) ))
# LXC: emit one grouped event with all CTs that have pending
# updates. The batch fingerprint is recomputed every cycle and
# compared with the last notified one — if the set of CTs or
# their per-CT fingerprints changed, we notify again.
if lxc_updates:
self._emit_lxc_updates_batch(lxc_updates)
else:
# Empty batch — clear the dedup so a fresh batch later fires
# a new notification even with the same CTs/versions.
self._notified_lxc_batch = None
# Forget items that no longer have an update available. If # Forget items that no longer have an update available. If
# the user installs the update and then a later release lands, # the user installs the update and then a later release lands,
# the dedup state is already cleared so the next notification # the dedup state is already cleared so the next notification
@@ -3159,6 +3243,67 @@ class PollingCollector:
if stale_id not in active_with_update: if stale_id not in active_with_update:
self._notified_managed_updates.pop(stale_id, None) self._notified_managed_updates.pop(stale_id, None)
def _emit_lxc_updates_batch(self, items: list[dict]) -> None:
"""Build and queue a single ``lxc_updates_available`` event for
every running CT that currently has pending apt updates.
The batch fingerprint combines every CT's per-CT fingerprint
(count + security_count + top package names). A new CT entering
the set OR an existing CT changing its per-CT fingerprint
produces a new batch fingerprint, so the cooldown is broken and
the event fires. A truly stable batch is silenced via the
equality check below.
"""
# Stable order so the fingerprint is deterministic
items_sorted = sorted(items, key=lambda x: x.get('id', ''))
ct_lines: list[str] = []
per_ct_fps: list[str] = []
total_packages = 0
total_security = 0
for idx, it in enumerate(items_sorted):
update = it.get('update_check', {}) or {}
count = int(update.get('_count') or 0)
sec_count = int(update.get('_security_count') or 0)
total_packages += count
total_security += sec_count
vmid = it.get('_vmid') or it.get('id', '').removeprefix('lxc:') or '?'
name = it.get('name') or f'CT {vmid}'
# Each CT renders across two/three lines so the count and the
# security count don't compete with the CT label on the same
# row — much easier to read in Telegram/Discord at a glance.
# A blank line before every CT except the first separates
# entries cleanly without a trailing blank at the end.
if idx > 0:
ct_lines.append("")
ct_lines.append(f"🏷️ CT {vmid} ({name}):")
ct_lines.append(f" 📦 {count} update(s)")
if sec_count:
ct_lines.append(f" 🔒 {sec_count} security")
per_ct_fps.append(f"{it.get('id', '')}={update.get('latest', '')}")
batch_fingerprint = '|'.join(per_ct_fps)
if self._notified_lxc_batch == batch_fingerprint:
return # same batch as last time — silent
self._notified_lxc_batch = batch_fingerprint
data = {
'hostname': self._hostname,
'count': len(items_sorted),
'total_packages': total_packages,
'security_count': total_security,
'ct_list': '\n'.join(ct_lines),
}
self._queue.put(NotificationEvent(
'lxc_updates_available', 'INFO', data,
source='polling',
entity='node',
# Hash so different batches get distinct cooldown keys
entity_id=f'lxc_batch_{abs(hash(batch_fingerprint)) % 10**10}',
))
def _build_managed_install_event(self, item: dict) -> tuple[str, dict]: def _build_managed_install_event(self, item: dict) -> tuple[str, dict]:
"""Translate a registry item into a (event_type, template_data) """Translate a registry item into a (event_type, template_data)
pair. Per-type bodies live here so the registry stays pair. Per-type bodies live here so the registry stays
+166 -13
View File
@@ -973,6 +973,8 @@ class NotificationManager:
cleanup_interval = 3600 # Cleanup cooldowns every hour cleanup_interval = 3600 # Cleanup cooldowns every hour
flush_interval = 5 # Flush aggregation buckets every 5s flush_interval = 5 # Flush aggregation buckets every 5s
digest_check_interval = 60 # Re-evaluate digest schedule every minute digest_check_interval = 60 # Re-evaluate digest schedule every minute
last_quiet_check = 0.0
quiet_check_interval = 60 # Re-evaluate per-channel quiet window every minute
while self._running: while self._running:
try: try:
@@ -990,6 +992,13 @@ class NotificationManager:
if now_mono - last_digest_check > digest_check_interval: if now_mono - last_digest_check > digest_check_interval:
self._maybe_flush_digests() self._maybe_flush_digests()
last_digest_check = now_mono last_digest_check = now_mono
# Quiet Hours close → flush buffered sub-CRITICAL events
# as a single grouped summary. Has to run even when the
# queue is idle, otherwise users who don't generate any
# events post-window would never see their summary.
if now_mono - last_quiet_check > quiet_check_interval:
self._maybe_flush_quiet_hours()
last_quiet_check = now_mono
continue continue
try: try:
@@ -998,9 +1007,21 @@ class NotificationManager:
print(f"[NotificationManager] Dispatch error: {e}") print(f"[NotificationManager] Dispatch error: {e}")
# Also flush aggregation after each event # Also flush aggregation after each event
if time.monotonic() - last_flush > flush_interval: now_mono = time.monotonic()
if now_mono - last_flush > flush_interval:
self._flush_aggregation() self._flush_aggregation()
last_flush = time.monotonic() last_flush = now_mono
# Re-check digest schedule after each event too. The idle-only
# check above misses the daily flush window when the queue stays
# busy through the digest_time minute (rare but real: a burst of
# journal events arriving at the same minute as the target). The
# 23h guard inside _maybe_flush_digests keeps it idempotent.
if now_mono - last_digest_check > digest_check_interval:
self._maybe_flush_digests()
last_digest_check = now_mono
if now_mono - last_quiet_check > quiet_check_interval:
self._maybe_flush_quiet_hours()
last_quiet_check = now_mono
def _flush_aggregation(self): def _flush_aggregation(self):
"""Flush expired aggregation buckets and dispatch summaries.""" """Flush expired aggregation buckets and dispatch summaries."""
@@ -1171,20 +1192,20 @@ class NotificationManager:
# ── Per-channel quiet hours ── # ── Per-channel quiet hours ──
# The user marks a window (e.g. 22:00 → 06:00) during which only # The user marks a window (e.g. 22:00 → 06:00) during which only
# CRITICAL events reach this channel. Anything below CRITICAL is # CRITICAL events reach this channel. Sub-CRITICAL events are
# dropped silently — not buffered, not retried — because the # **buffered** to `quiet_pending` and flushed as a SINGLE grouped
# whole point is "don't wake me up at 3 AM unless the disk # summary when the window closes — so the user doesn't get
# exploded". CRITICAL always wins. The window is configured # paged at 3 AM but also doesn't lose 8h of activity overnight.
# per-channel; same channel can have different rules from # CRITICAL always wins. The window is configured per-channel.
# another. See _in_quiet_hours() for boundary semantics. # See _in_quiet_hours() for boundary semantics.
# `_dispatch_to_channels` does NOT receive the NotificationEvent # `_dispatch_to_channels` does NOT receive the NotificationEvent
# object — only the rendered primitives. Using `event.X` here # object — only the rendered primitives. Using `event.X` here
# raised `NameError: name 'event' is not defined` for every # raised `NameError` for every event passing through, silenced
# event passing through (silenced by the dispatch loop's broad # by the dispatch loop's broad except → no notifications EVER
# except → no notifications EVER delivered after Quiet Hours + # delivered after Quiet Hours + Daily Digest were merged.
# Daily Digest were merged). All community-reported "stopped
# receiving notifications after update" cases trace back here.
if severity != 'CRITICAL' and self._in_quiet_hours(ch_name): if severity != 'CRITICAL' and self._in_quiet_hours(ch_name):
self._buffer_quiet_event(ch_name, event_type, event_group,
severity, title, body)
continue continue
# ── Per-channel daily digest ── # ── Per-channel daily digest ──
@@ -1537,6 +1558,126 @@ class NotificationManager:
) )
return '\n'.join(lines).rstrip() + '\n' return '\n'.join(lines).rstrip() + '\n'
# ─── Quiet Hours buffer + flush ────────────────────────────
# Reused infrastructure: `quiet_pending` table (created in
# health_persistence) has the same shape as `digest_pending`, so
# `_compose_digest_body` renders the summary unchanged. What
# differs is the lifecycle — quiet_pending flushes when each
# channel's window CLOSES, not at a fixed daily time. We track
# that transition via `self._was_in_quiet_hours[ch_name]`.
def _buffer_quiet_event(self, ch_name: str, event_type: str,
event_group: str, severity: str,
title: str, body: str) -> None:
"""Append a sub-CRITICAL event to the channel's quiet-hours
buffer in SQLite. Mirrors `_buffer_digest_event` — same shape,
different table.
"""
try:
conn = sqlite3.connect(str(DB_PATH), timeout=10)
conn.execute('PRAGMA journal_mode=WAL')
conn.execute('PRAGMA busy_timeout=5000')
conn.execute(
'INSERT INTO quiet_pending '
'(channel, event_type, event_group, severity, ts, title, body) '
'VALUES (?, ?, ?, ?, ?, ?, ?)',
(ch_name, event_type, event_group, severity,
int(time.time()), title, body),
)
conn.commit()
conn.close()
except Exception as e:
print(f"[NotificationManager] quiet_pending write failed: {e}")
def _maybe_flush_quiet_hours(self) -> None:
"""Detect per-channel quiet-hours close (in→out transition) and
emit one summary notification with everything buffered during
the window. Called every ~60s from the dispatch loop.
State held in-memory: `self._was_in_quiet_hours[ch_name]`. On
first run after restart all channels start as "unknown" — we
seed with the current window status WITHOUT firing a summary,
so a Monitor restart in the middle of someone's quiet window
doesn't trigger a fake close-of-window flush.
"""
if not hasattr(self, '_was_in_quiet_hours'):
self._was_in_quiet_hours = {}
for ch_name, channel in list(self._channels.items()):
currently_in = self._in_quiet_hours(ch_name)
previously_in = self._was_in_quiet_hours.get(ch_name)
self._was_in_quiet_hours[ch_name] = currently_in
# Seed run (no prior state) — don't fire anything.
if previously_in is None:
continue
# Still in the window → just buffer.
if currently_in:
continue
# Was in window, now out → close transition → flush.
if previously_in and not currently_in:
try:
self._flush_quiet_for_channel(ch_name, channel)
except Exception as e:
print(f"[NotificationManager] quiet flush failed for "
f"{ch_name}: {e}")
def _flush_quiet_for_channel(self, ch_name: str, channel: Any) -> None:
"""Send a single grouped summary of everything buffered for
`ch_name` during the just-closed quiet window, then drop the
buffer rows. Reuses `_compose_digest_body` for rendering since
the row shape is identical.
"""
try:
conn = sqlite3.connect(str(DB_PATH), timeout=10)
conn.execute('PRAGMA journal_mode=WAL')
cursor = conn.cursor()
cursor.execute(
'SELECT id, event_type, event_group, ts, title, body '
'FROM quiet_pending WHERE channel = ? ORDER BY ts ASC',
(ch_name,),
)
rows = cursor.fetchall()
conn.close()
except Exception as e:
print(f"[NotificationManager] quiet read failed for {ch_name}: {e}")
return
if not rows:
return
host = _hostname(self._config)
summary_title = (
f"{host}: {len(rows)} events buffered during Quiet Hours"
)
summary_body = self._compose_digest_body(rows)
try:
channel.send(summary_title, summary_body, severity='INFO',
data={'_quiet_hours_summary': True, '_count': len(rows)})
except Exception as e:
print(f"[NotificationManager] quiet send failed for "
f"{ch_name}: {e}")
return
# Only drop the rows after a successful send so a transient
# transport failure (Telegram timeout, SMTP outage) doesn't
# lose the user's overnight context.
try:
ids = [r[0] for r in rows]
conn = sqlite3.connect(str(DB_PATH), timeout=10)
conn.execute('PRAGMA journal_mode=WAL')
placeholders = ','.join('?' * len(ids))
conn.execute(
f'DELETE FROM quiet_pending WHERE id IN ({placeholders})',
ids,
)
conn.commit()
conn.close()
except Exception as e:
print(f"[NotificationManager] quiet cleanup failed for "
f"{ch_name}: {e}")
def _passes_cooldown(self, event: NotificationEvent) -> bool: def _passes_cooldown(self, event: NotificationEvent) -> bool:
"""Check if the event passes cooldown rules WITHOUT stamping. """Check if the event passes cooldown rules WITHOUT stamping.
@@ -2315,6 +2456,18 @@ class NotificationManager:
ch_cfg: Dict[str, Any] = { ch_cfg: Dict[str, Any] = {
'enabled': self._config.get(f'{ch_type}.enabled', 'false') == 'true', 'enabled': self._config.get(f'{ch_type}.enabled', 'false') == 'true',
'rich_format': self._config.get(f'{ch_type}.rich_format', 'false') == 'true', 'rich_format': self._config.get(f'{ch_type}.rich_format', 'false') == 'true',
# Quiet Hours + Daily Digest live in the same per-channel
# namespace but weren't being projected back to the UI —
# the toggles round-tripped through POST but the GET only
# returned `enabled`/`rich_format` plus channel-specific
# config_keys, so after a reload the user saw the toggle
# off even though the DB had it on. Reported on .1.10
# along with the post-window delivery bug.
'quiet_enabled': self._config.get(f'{ch_type}.quiet_enabled', 'false') == 'true',
'quiet_start': self._config.get(f'{ch_type}.quiet_start', '22:00'),
'quiet_end': self._config.get(f'{ch_type}.quiet_end', '06:00'),
'digest_enabled': self._config.get(f'{ch_type}.digest_enabled', 'false') == 'true',
'digest_time': self._config.get(f'{ch_type}.digest_time', '09:00'),
} }
for config_key in info['config_keys']: for config_key in info['config_keys']:
full_key = f'{ch_type}.{config_key}' full_key = f'{ch_type}.{config_key}'
+41 -10
View File
@@ -484,6 +484,23 @@ TEMPLATES = {
}, },
# ── VM / CT events ── # ── VM / CT events ──
# Phase 1: apt-based update detection inside running Debian/Ubuntu
# LXCs. Grouped — one notification per cycle covers every CT with
# pending updates. Opt-in (default_enabled=False) because the check
# uses `pct exec` to inspect package state inside the user's CTs.
# Phase 2 (community-scripts metadata) will extend this without
# changing the event type.
'lxc_updates_available': {
'title': '{hostname}: {count} LXC(s) with package updates available',
'body': (
'📊 {count} LXC(s) with pending package updates '
'(📦 {total_packages} total, 🔒 {security_count} security):\n\n'
'{ct_list}'
),
'label': 'LXC updates available (experimental)',
'group': 'vm_ct',
'default_enabled': False,
},
'vm_start': { 'vm_start': {
'title': '{hostname}: VM {vmname} ({vmid}) started', 'title': '{hostname}: VM {vmname} ({vmid}) started',
'body': 'Virtual machine {vmname} (ID: {vmid}) is now running.', 'body': 'Virtual machine {vmname} (ID: {vmid}) is now running.',
@@ -1109,8 +1126,8 @@ TEMPLATES = {
'title': '{hostname}: {count} ProxMenux optimization update(s) available', 'title': '{hostname}: {count} ProxMenux optimization update(s) available',
'body': ( 'body': (
'{count} optimization update(s) detected on this host.\n\n' '{count} optimization update(s) detected on this host.\n\n'
'Tools:\n{tool_list}\n\n' '🛠️ Tools:\n{tool_list}\n\n'
'How to apply:\n' '💡 How to apply:\n'
' • ProxMenux Monitor → Settings → ProxMenux Optimizations\n' ' • ProxMenux Monitor → Settings → ProxMenux Optimizations\n'
' • Or run the post-install menu (option 2) → "Apply available updates"' ' • Or run the post-install menu (option 2) → "Apply available updates"'
), ),
@@ -1129,12 +1146,12 @@ TEMPLATES = {
'secure_gateway_update_available': { 'secure_gateway_update_available': {
'title': '{hostname}: {app_name} update available — v{latest_version}', 'title': '{hostname}: {app_name} update available — v{latest_version}',
'body': ( 'body': (
'{app_name} (managed by ProxMenux) has {package_count} package update(s) ' '{app_name} (managed by ProxMenux) has 📦 {package_count} package update(s) '
'pending in its container.\n' 'pending in its container.\n'
'Current Tailscale: v{current_version} → Latest: v{latest_version}\n\n' '🔹 Current Tailscale: v{current_version} 🟢 Latest: v{latest_version}\n\n'
'Open ProxMenux Monitor > Settings > Secure Gateway and click ' '💡 Open ProxMenux Monitor > Settings > Secure Gateway and click '
'"Update" to apply.\n\n' '"Update" to apply.\n\n'
'Packages:\n{package_list}' '🗂️ Packages:\n{package_list}'
), ),
'label': 'Secure Gateway update available', 'label': 'Secure Gateway update available',
'group': 'updates', 'group': 'updates',
@@ -1147,10 +1164,10 @@ TEMPLATES = {
'title': '{hostname}: NVIDIA driver update available — v{latest_version}', 'title': '{hostname}: NVIDIA driver update available — v{latest_version}',
'body': ( 'body': (
'A newer NVIDIA driver compatible with kernel {kernel} is available.\n' 'A newer NVIDIA driver compatible with kernel {kernel} is available.\n'
'Currently installed: v{current_version}\n' '🔹 Currently installed: v{current_version}\n'
'Latest available: v{latest_version}\n\n' '🟢 Latest available: v{latest_version}\n\n'
'{upgrade_reason}\n\n' '{upgrade_reason}\n\n'
'To reinstall:\n' '💡 To reinstall:\n'
' • From the ProxMenux post-install menu: {menu_label}\n\n' ' • From the ProxMenux post-install menu: {menu_label}\n\n'
'Reinstalling rebuilds the DKMS module against the running kernel and ' 'Reinstalling rebuilds the DKMS module against the running kernel and '
'requires a reboot to load the new driver.' 'requires a reboot to load the new driver.'
@@ -1465,6 +1482,7 @@ CATEGORY_EMOJI = {
# Event-specific title icons (override category default when present) # Event-specific title icons (override category default when present)
EVENT_EMOJI = { EVENT_EMOJI = {
# VM / CT # VM / CT
'lxc_updates_available': '\U0001F4E6', # \uD83D\uDCE6 package \u2014 pending CT updates
'vm_start': '\u25B6\uFE0F', # play button 'vm_start': '\u25B6\uFE0F', # play button
'vm_start_warning': '\u26A0\uFE0F', # warning sign - started with warnings 'vm_start_warning': '\u26A0\uFE0F', # warning sign - started with warnings
'vm_stop': '\u23F9\uFE0F', # stop button 'vm_stop': '\u23F9\uFE0F', # stop button
@@ -1768,6 +1786,14 @@ Your job: translate alerts into {language} and enrich them with context when pro
═══ ABSOLUTE CONSTRAINTS (NO EXCEPTIONS) ═══ ═══ ABSOLUTE CONSTRAINTS (NO EXCEPTIONS) ═══
- NO HALLUCINATIONS: Do not invent causes, solutions, or facts not present in the provided data - NO HALLUCINATIONS: Do not invent causes, solutions, or facts not present in the provided data
- NO SPECULATION: If something is unclear, state what IS known, not what MIGHT be - NO SPECULATION: If something is unclear, state what IS known, not what MIGHT be
- NO FILLER LINES: Every output line must derive from the input message, the journal context,
or the known-error database. NEVER add generic statements like "Event detected during normal
operation", "No further issues", or padding lines just to fill space. If a field has no evidence,
OMIT it — a shorter output is always better than invented content.
- 📝 Log lines: ONLY include when the journal context contains an actual relevant log line.
Convey its meaning faithfully, do not invent one. If no relevant log exists, OMIT the 📝 line.
- ⏱️ Duration/timing lines: ONLY for backup/migration durations explicitly present in the input.
NEVER use ⏱️ for vague "event detected at X" filler.
- NO CONVERSATIONAL TEXT: Never write "Here is...", "I've translated...", "Let me explain..." - NO CONVERSATIONAL TEXT: Never write "Here is...", "I've translated...", "Let me explain..."
- ONLY use information from: the message, journal context, and known error database (if provided) - ONLY use information from: the message, journal context, and known error database (if provided)
@@ -1884,7 +1910,12 @@ Your goal is to maintain the original structure of the message while using emoji
ESPECIALLY when adding new context, formatting technical data, or writing tips. ESPECIALLY when adding new context, formatting technical data, or writing tips.
RULES: RULES:
1. PRESERVE BASE STRUCTURE: Respect the original fields and layout provided in the input message. 1. PRESERVE BASE STRUCTURE AND INPUT EMOJIS: Respect the original fields and layout provided in
the input message. **CRITICAL: every emoji already present in the input (📊, 🏷️, 📦, 🔒, 🛠️,
💡, ⚠️, ✨, 🌐, 🔥, 💧, 📝, ⏱️, etc.) MUST appear in the output, in the same position relative
to its label.** Translating the surrounding words is fine; deleting or relocating the emoji is
not. You may add additional context-appropriate emojis from BODY EMOJIS below, but never strip
the ones the template already provides.
2. ENHANCE WITH ICONS: Place emojis at the START of a line to identify the data type. 2. ENHANCE WITH ICONS: Place emojis at the START of a line to identify the data type.
3. NEW CONTEXT: When adding journal info, SMART data, or known errors, use appropriate icons to make it readable. 3. NEW CONTEXT: When adding journal info, SMART data, or known errors, use appropriate icons to make it readable.
4. NO SPAM: Do not put emojis in the middle or end of sentences. Use 1-3 emojis at START of lines where they add clarity. Combine when meaningful (💾✅ backup ok). 4. NO SPAM: Do not put emojis in the middle or end of sentences. Use 1-3 emojis at START of lines where they add clarity. Combine when meaningful (💾✅ backup ok).
+8 -2
View File
@@ -791,7 +791,8 @@ install_normal_version() {
fi fi
for pkg in "${BASIC_DEPS[@]}"; do for pkg in "${BASIC_DEPS[@]}"; do
if ! dpkg -l | grep -qw "$pkg"; then # Strict per-package check — see comment in install_translation_version().
if ! dpkg-query -W -f='${Status}' "$pkg" 2>/dev/null | grep -q "ok installed"; then
if apt-get install -y "$pkg" > /dev/null 2>&1; then if apt-get install -y "$pkg" > /dev/null 2>&1; then
update_config "$pkg" "installed" update_config "$pkg" "installed"
else else
@@ -939,7 +940,12 @@ install_translation_version() {
DEPS=("dialog" "curl" "git" "python3" "python3-venv" "python3-pip") DEPS=("dialog" "curl" "git" "python3" "python3-venv" "python3-pip")
for pkg in "${DEPS[@]}"; do for pkg in "${DEPS[@]}"; do
if ! dpkg -l | grep -qw "$pkg"; then # `dpkg -l | grep -qw "$pkg"` treats `-` as a word boundary, so a
# query for `python3` would falsely match `python3-pip` and skip
# the real `python3` install. `dpkg-query -W -f='${Status}'` asks
# for the EXACT package and reports "install ok installed" only
# when truly present. Issue #205 traced back here.
if ! dpkg-query -W -f='${Status}' "$pkg" 2>/dev/null | grep -q "ok installed"; then
if apt-get install -y "$pkg" > /dev/null 2>&1; then if apt-get install -y "$pkg" > /dev/null 2>&1; then
update_config "$pkg" "installed" update_config "$pkg" "installed"
else else
+5 -1
View File
@@ -569,7 +569,11 @@ install_beta() {
fi fi
for pkg in "${BASIC_DEPS[@]}"; do for pkg in "${BASIC_DEPS[@]}"; do
if ! dpkg -l | grep -qw "$pkg"; then # Strict per-package check — `dpkg -l | grep -qw python3` falsely
# matches `python3-pip` (the `-` is a word boundary), so dpkg-query
# for the EXACT package name is the only reliable test.
# Issue #205.
if ! dpkg-query -W -f='${Status}' "$pkg" 2>/dev/null | grep -q "ok installed"; then
if apt-get install -y "$pkg" > /dev/null 2>&1; then if apt-get install -y "$pkg" > /dev/null 2>&1; then
update_config "$pkg" "installed" update_config "$pkg" "installed"
else else
+204
View File
@@ -997,3 +997,207 @@ pmx_ask_permanent_mount() {
echo "false" echo "false"
fi fi
} }
# ==========================================================
# Inspect the filesystem behind a path inside a CT and report
# which POSIX features it supports. Used by `samba_lxc_server.sh`
# and `nfs_lxc_server.sh` to decide whether traditional
# chown/chmod is enough, ACLs are needed, or the filesystem
# (exFAT, FAT32, NTFS via fuseblk) supports neither — in which
# case the only viable path is configuring the HOST mount with
# `uid=`/`gid=`/`fmask=`/`dmask=` options.
#
# Args:
# $1 = CTID
# $2 = path inside the CT (e.g. /mnt/media)
#
# Echoes a single line with 4 tab-separated fields:
# <fstype>\t<can_chown>\t<can_acl>\t<unprivileged>
# where can_chown / can_acl / unprivileged are "yes" / "no".
#
# Sample outputs:
# "ext4 yes yes no" → ext4 on privileged CT, full POSIX
# "zfs yes no no" → ZFS without acltype=posixacl
# "exfat no no no" → exFAT, no POSIX semantics at all
# "ext4 yes yes yes" → ext4 on unprivileged CT (caller
# must keep in mind chown from
# inside is likely to fail anyway)
# ==========================================================
pmx_detect_share_target_caps() {
local ctid="$1"
local path="$2"
# Filesystem reported by the kernel (NOT what fstab claims —
# the actual mounted FS as seen from inside the CT).
local fstype
fstype=$(pct exec "$ctid" -- stat -f -c '%T' "$path" 2>/dev/null)
fstype="${fstype:-unknown}"
local can_chown="yes"
local can_acl="yes"
case "$fstype" in
ext2*|ext3*|ext4*|xfs|btrfs|tmpfs|nfs*|cifs*|smb*)
# Native POSIX. ACL is the kernel default for these.
;;
zfs)
# ZFS supports chown natively, but POSIX ACL only when
# acltype=posixacl. Probe with a no-op setfacl. We
# ensure setfacl exists first; if not, install it.
if ! pct exec "$ctid" -- bash -c "command -v setfacl >/dev/null" 2>/dev/null; then
pct exec "$ctid" -- bash -c "apt-get install -y -qq acl >/dev/null 2>&1" || true
fi
if ! pct exec "$ctid" -- setfacl -m "u::rwx" "$path" >/dev/null 2>&1; then
can_acl="no"
fi
;;
msdos|vfat|exfat|ntfs|fuseblk)
# These filesystems do not carry POSIX ownership / mode
# / ACL at all. Permissions come exclusively from the
# mount-time options (uid=, gid=, fmask=, dmask=).
can_chown="no"
can_acl="no"
;;
*)
# Unknown FS — probe both. We try chown to ourselves
# (no-op when it succeeds) and a no-op setfacl. Both
# are cheap and tell us what works.
local cur_owner
cur_owner=$(pct exec "$ctid" -- stat -c '%U:%G' "$path" 2>/dev/null)
if [[ -z "$cur_owner" ]] || ! pct exec "$ctid" -- chown "$cur_owner" "$path" >/dev/null 2>&1; then
can_chown="no"
fi
if ! pct exec "$ctid" -- bash -c "command -v setfacl >/dev/null" 2>/dev/null; then
pct exec "$ctid" -- bash -c "apt-get install -y -qq acl >/dev/null 2>&1" || true
fi
if ! pct exec "$ctid" -- setfacl -m "u::rwx" "$path" >/dev/null 2>&1; then
can_acl="no"
fi
;;
esac
# CT type — privileged (unprivileged: 0) lets chown / chmod
# run as effective host root. Unprivileged CTs have a user
# namespace mapping and chown from inside the CT typically
# fails on host-side bind mounts.
local unprivileged
unprivileged=$(pct config "$ctid" 2>/dev/null | awk -F': ' '/^unprivileged:/ {print $2; exit}')
local unpriv_flag="no"
[[ "$unprivileged" == "1" ]] && unpriv_flag="yes"
printf '%s\t%s\t%s\t%s\n' "$fstype" "$can_chown" "$can_acl" "$unpriv_flag"
}
# ==========================================================
# Configure ownership / permissions on a shared mountpoint so
# the given Samba/NFS user can write to it. Branches by the
# filesystem capabilities reported by pmx_detect_share_target_caps.
#
# Args:
# $1 = CTID
# $2 = mount point inside the CT
# $3 = username inside the CT (must already exist)
#
# Returns:
# 0 on success or partial success (warnings shown).
# 1 only on hard failures the caller should refuse to proceed on.
#
# Expects the global helper `sharedfiles` group to already exist
# in the CT (caller is responsible for that — see
# setup_universal_sharedfiles_group).
# ==========================================================
pmx_setup_share_permissions() {
local ctid="$1"
local mp="$2"
local username="$3"
# Probe filesystem capabilities.
local caps fstype can_chown can_acl unpriv
caps=$(pmx_detect_share_target_caps "$ctid" "$mp")
IFS=$'\t' read -r fstype can_chown can_acl unpriv <<<"$caps"
msg_info "$(translate "Detected filesystem at $mp:") $fstype (chown=$can_chown, acl=$can_acl, unprivileged_ct=$unpriv)"
# Always ensure the user is in the sharedfiles group — this
# is harmless regardless of FS capabilities. Skip when no user
# was passed (NFS path: only the group matters, no per-user ACL).
if [[ -n "$username" ]]; then
pct exec "$ctid" -- usermod -aG sharedfiles "$username" 2>/dev/null || true
fi
# ACL spec — include the user only when one is provided.
local acl_spec="g:sharedfiles:rwx,m::rwx"
if [[ -n "$username" ]]; then
acl_spec="u:$username:rwx,$acl_spec"
fi
if [[ "$can_chown" == "yes" ]]; then
# POSIX-friendly filesystem. Set group ownership +
# setgid bit so new files inherit the group.
if pct exec "$ctid" -- chown root:sharedfiles "$mp" 2>/dev/null \
&& pct exec "$ctid" -- chmod 2775 "$mp" 2>/dev/null; then
msg_ok "$(translate "Ownership set to root:sharedfiles with 2775 on:") $mp"
else
msg_warn "$(translate "chown/chmod failed — likely unprivileged CT against host bind mount. Falling back to ACL.")"
fi
if [[ "$can_acl" == "yes" ]]; then
# Access + default ACL so new files clients create
# inherit write permission for the sharedfiles group
# (and the Samba user, when one is provided). Without
# `-d` (default ACL) the parent's ACL doesn't propagate
# to children → new files end up with restrictive 755
# and clients get "permission denied" on the next write.
# `m::rwx` keeps the ACL mask from clipping rwx grants.
pct exec "$ctid" -- setfacl -R -m "$acl_spec" "$mp" 2>/dev/null || true
pct exec "$ctid" -- setfacl -R -d -m "$acl_spec" "$mp" 2>/dev/null || true
msg_ok "$(translate "POSIX ACLs applied (access + default for inheritance).")"
else
msg_warn "$(translate "Filesystem $fstype does not support POSIX ACLs — relying on group ownership only.")"
if [[ "$fstype" == "zfs" ]]; then
msg_warn "$(translate "Tip: zfs set acltype=posixacl xattr=sa <pool>/<dataset> enables full ACL support.")"
fi
fi
else
# exFAT / FAT32 / NTFS-fuse / similar — permissions live
# entirely in the host mount options. Don't waste cycles
# trying chown/chmod/setfacl; tell the user what to do
# and refuse to silently produce a broken share.
local uid_in_ct gid_in_ct
uid_in_ct=$(pct exec "$ctid" -- id -u "$username" 2>/dev/null)
gid_in_ct=$(pct exec "$ctid" -- getent group sharedfiles 2>/dev/null | cut -d: -f3)
msg_warn "$(translate "Filesystem $fstype does NOT support chown/chmod/ACL.")"
msg_warn "$(translate "On a privileged CT the mount options carry the only permissions.")"
msg_warn "$(translate "Stop the CT, unmount the disk on the HOST, and remount with:")"
echo
echo " mount -o uid=${uid_in_ct:-1000},gid=${gid_in_ct:-100},fmask=0002,dmask=0002 <device> <hostpath>"
echo
msg_warn "$(translate "Then update /etc/fstab on the host with the same options.")"
msg_warn "$(translate "Recommendation: reformat the disk to ext4 for a robust setup — see docs.")"
fi
# Verify the user can actually write. `runuser` instead of
# `su` — `pct exec ... su -` raises 'cannot set groups:
# Operation not permitted' due to a PAM/cap quirk with the
# exec entry path; runuser doesn't have that issue.
# Skipped for the NFS path (no specific user to test as — the
# NFS server itself decides UID mapping at export time).
if [[ -z "$username" ]]; then
msg_ok "$(translate "Directory configured for sharedfiles group access on:") $mp"
return 0
fi
local has_access
has_access=$(pct exec "$ctid" -- runuser -u "$username" -- \
bash -c "test -w '$mp' && echo yes || echo no" 2>/dev/null)
if [[ "$has_access" == "yes" ]]; then
msg_ok "$(translate "Write access verified for user:") $username"
return 0
else
msg_error "$(translate "Write access test FAILED for user:") $username"
msg_warn "$(translate "Samba/NFS clients will likely receive 'permission denied'. Review the steps above.")"
return 1
fi
}
+8 -5
View File
@@ -602,12 +602,9 @@ EOF
install_log2ram_auto() { install_log2ram_auto() {
local FUNC_VERSION="1.2" local FUNC_VERSION="1.2"
# description: Install Log2RAM with size auto-tuned to host RAM (128M/256M/512M); SSD/M.2 detection skips on rotational disks. # description: Install Log2RAM with size auto-tuned to host RAM (128M/256M/512M); SSD/M.2 detection skips on rotational disks.
# ── Reinstall detection ─────────────────────────────────────────────────
# If log2ram was previously installed by ProxMenux, skip hardware detection
# and reinstall directly — no prompts, transparent to user. Sprint 12A:
# also matches the new structured form `{"installed": true, ...}` written by
# the updated register_tool, in addition to the legacy boolean true entry.
if [[ -f "$TOOLS_JSON" ]] && jq -e '.log2ram == true or .log2ram.installed == true' "$TOOLS_JSON" >/dev/null 2>&1; then if [[ -f "$TOOLS_JSON" ]] && jq -e '.log2ram == true or .log2ram.installed == true' "$TOOLS_JSON" >/dev/null 2>&1; then
msg_ok "$(translate "Log2RAM already registered — updating to latest configuration")" msg_ok "$(translate "Log2RAM already registered — updating to latest configuration")"
else else
@@ -854,6 +851,11 @@ EOF
#msg_ok "$(translate "Backup created:") /etc/systemd/journald.conf.bak.$(date +%Y%m%d-%H%M%S)" #msg_ok "$(translate "Backup created:") /etc/systemd/journald.conf.bak.$(date +%Y%m%d-%H%M%S)"
msg_ok "$(translate "Journald configuration adjusted to") ${USE_MB}M (Log2RAM ${LOG2RAM_SIZE})" msg_ok "$(translate "Journald configuration adjusted to") ${USE_MB}M (Log2RAM ${LOG2RAM_SIZE})"
systemctl daemon-reload >/dev/null 2>&1 || true
systemctl restart log2ram >/dev/null 2>&1 || true
log2ram clean >/dev/null 2>&1 || true
log2ram write >/dev/null 2>&1 || true
systemctl restart rsyslog >/dev/null 2>&1 || true
register_tool "log2ram" true "$FUNC_VERSION" register_tool "log2ram" true "$FUNC_VERSION"
} }
@@ -933,6 +935,7 @@ enable_zfs_autotrim() {
fi fi
if ! pool_supports_autotrim "$pool"; then if ! pool_supports_autotrim "$pool"; then
stop_spinner
msg_info2 "$(translate "Pool does not appear to use SSD/NVMe devices with discard support. Skipping ZFS autotrim for pool:") $pool" msg_info2 "$(translate "Pool does not appear to use SSD/NVMe devices with discard support. Skipping ZFS autotrim for pool:") $pool"
continue continue
fi fi
+7 -3
View File
@@ -280,9 +280,13 @@ create_nfs_export() {
msg_info "$(translate "Setting directory ownership and permissions...")" msg_info "$(translate "Setting directory ownership and permissions...")"
pct exec "$CTID" -- chown root:sharedfiles "$MOUNT_POINT" # Hand off ownership/perm setup to the shared helper. It detects the
pct exec "$CTID" -- chmod 2775 "$MOUNT_POINT" # underlying filesystem (ext4/xfs/zfs/exfat/ntfs-fuse/…), picks the
msg_ok "$(translate "Directory configured with sharedfiles group ownership")" # right strategy (chown+chmod, ACLs, or just inform the user when
# the FS can't carry POSIX permissions), and verifies the result
# with `runuser`. Empty username — NFS doesn't authenticate per-user
# the way Samba does; the `sharedfiles` group is all we need.
pmx_setup_share_permissions "$CTID" "$MOUNT_POINT" ""
+29 -60
View File
@@ -173,11 +173,10 @@ create_share() {
if [[ -n "$IS_MOUNTED" ]]; then if [[ -n "$IS_MOUNTED" ]]; then
msg_info "$(translate "Detected a mounted directory from host. Setting up shared group...")" msg_info "$(translate "Detected a mounted directory from host. Setting up shared group...")"
# Match the GID `nfs_lxc_server.sh` uses (101000) so the same # The `sharedfiles` group bridges Samba- and NFS-served paths so a
# `sharedfiles` group bridges Samba- and NFS-served paths. The # file written by one protocol is writable by the other. Fixed GID
# previous `999` was inconsistent — files written via Samba were # 101000 keeps the group ID consistent across CTs / hosts that
# owned by GID 999 and not visible to NFS clients accessing the # share the same mount.
# same dataset. Audit Tier 6 — GID inconsistente.
SHARE_GID=101000 SHARE_GID=101000
GROUP_EXISTS=$(pct exec "$CTID" -- getent group sharedfiles || true) GROUP_EXISTS=$(pct exec "$CTID" -- getent group sharedfiles || true)
GID_IN_USE=$(pct exec "$CTID" -- getent group "$SHARE_GID" | cut -d: -f1 || true) GID_IN_USE=$(pct exec "$CTID" -- getent group "$SHARE_GID" | cut -d: -f1 || true)
@@ -194,64 +193,22 @@ create_share() {
msg_ok "$(translate "Group 'sharedfiles' already exists inside the CT")" msg_ok "$(translate "Group 'sharedfiles' already exists inside the CT")"
fi fi
if pct exec "$CTID" -- getent group sharedfiles >/dev/null; then # Hand off ownership/perm setup to the shared helper. It detects
pct exec "$CTID" -- usermod -aG sharedfiles "$USERNAME" # the underlying filesystem (ext4/xfs/zfs/exfat/ntfs-fuse/…), picks
# chown/chmod on a host bind-mount FAIL with "Operation not # the right strategy (chown+chmod, ACLs, or just inform the user
# permitted" inside an unprivileged CT — the kernel won't let # if the FS can't carry POSIX permissions), and verifies write
# an unprivileged user namespace change ownership of files # access with `runuser` (avoids the `su: cannot set groups`
# that belong to a different (real-host) UID. The host owns # PAM quirk that hits `pct exec`).
# the directory; we only need write access for $USERNAME and pmx_setup_share_permissions "$CTID" "$MOUNT_POINT" "$USERNAME"
# the `sharedfiles` group, which the ACL block below handles.
# Silence the failure so it doesn't look alarming in the log.
pct exec "$CTID" -- chown root:sharedfiles "$MOUNT_POINT" 2>/dev/null || true
pct exec "$CTID" -- chmod 2775 "$MOUNT_POINT" 2>/dev/null || true
else
msg_error "$(translate "Group 'sharedfiles' was not created successfully. Skipping chown/usermod.")"
fi
# Apply BOTH access and default POSIX ACLs unconditionally.
# Previously this ran only when `test -w` failed for $USERNAME —
# but a local `test -w` says nothing about whether Samba can
# write through the share. Once Windows creates a *new* file or
# subfolder, it inherits the parent's effective ACL; without a
# `default:` entry the new entry has no ACL at all and falls
# back to the host bind-mount's restrictive 755 → Windows shows
# "permission denied" even though the same user can write from
# inside the CT shell. The `-d` flag is what fixes that.
# `m::rwx` keeps the ACL mask from clipping rwx grants.
if pct exec "$CTID" -- bash -c "command -v setfacl >/dev/null"; then
pct exec "$CTID" -- setfacl -R \
-m "u:$USERNAME:rwx,g:sharedfiles:rwx,m::rwx" \
"$MOUNT_POINT" 2>/dev/null || true
pct exec "$CTID" -- setfacl -R -d \
-m "u:$USERNAME:rwx,g:sharedfiles:rwx,m::rwx" \
"$MOUNT_POINT" 2>/dev/null || true
else
pct exec "$CTID" -- apt-get install -y -qq acl >/dev/null 2>&1 || true
pct exec "$CTID" -- setfacl -R \
-m "u:$USERNAME:rwx,g:sharedfiles:rwx,m::rwx" \
"$MOUNT_POINT" 2>/dev/null || true
pct exec "$CTID" -- setfacl -R -d \
-m "u:$USERNAME:rwx,g:sharedfiles:rwx,m::rwx" \
"$MOUNT_POINT" 2>/dev/null || true
fi
HAS_ACCESS=$(pct exec "$CTID" -- su -s /bin/bash -c "test -w '$MOUNT_POINT' && echo yes || echo no" "$USERNAME" 2>/dev/null)
if [ "$HAS_ACCESS" = "no" ]; then
msg_warn "$(translate "ACL applied but write test still failed — check host-side permissions of:") $MOUNT_POINT"
else
msg_ok "$(translate "Write access (incl. default ACL for new files) confirmed for user:") $USERNAME"
fi
else else
msg_ok "$(translate "No shared mount detected. Applying standard local access.")" msg_ok "$(translate "No shared mount detected. Applying standard local access.")"
# Local (CT-internal) path — chown/chmod should normally succeed, # Local (CT-internal) path: rootfs is always POSIX-friendly, so
# but on rare bind setups (e.g. zfs with acltype=off) they can # chown/chmod always succeed. Keep the previous behaviour.
# still trip. Suppress stderr to keep the log clean; the pct exec "$CTID" -- chown -R "$USERNAME:$USERNAME" "$MOUNT_POINT"
# write-access probe below is the source of truth. pct exec "$CTID" -- chmod -R 755 "$MOUNT_POINT"
pct exec "$CTID" -- chown -R "$USERNAME:$USERNAME" "$MOUNT_POINT" 2>/dev/null || true
pct exec "$CTID" -- chmod -R 755 "$MOUNT_POINT" 2>/dev/null || true
HAS_ACCESS=$(pct exec "$CTID" -- su -s /bin/bash -c "test -w '$MOUNT_POINT' && echo yes || echo no" "$USERNAME" 2>/dev/null) HAS_ACCESS=$(pct exec "$CTID" -- runuser -u "$USERNAME" -- \
bash -c "test -w '$MOUNT_POINT' && echo yes || echo no" 2>/dev/null)
if [ "$HAS_ACCESS" = "no" ]; then if [ "$HAS_ACCESS" = "no" ]; then
pct exec "$CTID" -- setfacl -R -m "u:$USERNAME:rwx" "$MOUNT_POINT" 2>/dev/null || true pct exec "$CTID" -- setfacl -R -m "u:$USERNAME:rwx" "$MOUNT_POINT" 2>/dev/null || true
msg_warn "$(translate "ACL permissions applied for local access for user:") $USERNAME" msg_warn "$(translate "ACL permissions applied for local access for user:") $USERNAME"
@@ -268,6 +225,14 @@ create_share() {
SHARE_NAME=$(basename "$MOUNT_POINT") SHARE_NAME=$(basename "$MOUNT_POINT")
# `force user = $USERNAME` makes every Samba file operation happen
# under that unix UID regardless of the connecting Windows account.
# Combined with `force group = sharedfiles` and the matching
# ownership / ACLs applied earlier, this is what keeps writes
# consistent on host bind-mounts where the kernel sees Samba's
# impersonated UID — without it Windows can authenticate fine but
# writes silently fail because Samba ends up writing as some other
# mapped UID with no permission on the target.
case "$SHARE_OPTIONS" in case "$SHARE_OPTIONS" in
rw) rw)
CONFIG=$(cat <<EOF CONFIG=$(cat <<EOF
@@ -279,6 +244,7 @@ create_share() {
browseable = yes browseable = yes
guest ok = no guest ok = no
valid users = $USERNAME valid users = $USERNAME
force user = $USERNAME
force group = sharedfiles force group = sharedfiles
create mask = 0664 create mask = 0664
directory mask = 2775 directory mask = 2775
@@ -298,6 +264,7 @@ EOF
browseable = yes browseable = yes
guest ok = no guest ok = no
valid users = $USERNAME valid users = $USERNAME
force user = $USERNAME
force group = sharedfiles force group = sharedfiles
veto files = /lost+found/ veto files = /lost+found/
EOF EOF
@@ -310,6 +277,7 @@ EOF
comment = Custom shared folder for $USERNAME comment = Custom shared folder for $USERNAME
path = $MOUNT_POINT path = $MOUNT_POINT
valid users = $USERNAME valid users = $USERNAME
force user = $USERNAME
force group = sharedfiles force group = sharedfiles
$CUSTOM_CONFIG $CUSTOM_CONFIG
veto files = /lost+found/ veto files = /lost+found/
@@ -326,6 +294,7 @@ EOF
browseable = yes browseable = yes
guest ok = no guest ok = no
valid users = $USERNAME valid users = $USERNAME
force user = $USERNAME
force group = sharedfiles force group = sharedfiles
create mask = 0664 create mask = 0664
directory mask = 2775 directory mask = 2775