mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-05-21 16:15:05 +00:00
Update AppImage
This commit is contained in:
Binary file not shown.
@@ -1 +1 @@
|
||||
9315f939f10353d0105a6a2cb8f3c7e21b02620a513b52ce9349a088b95751b8 ProxMenux-1.2.1.1-beta.AppImage
|
||||
150694a49a5b0a4546a2bf5fedcc0914d37666d0cdeac1d9fdc58793c131b4bd ProxMenux-1.2.1.1-beta.AppImage
|
||||
|
||||
@@ -398,31 +398,30 @@ export function HealthThresholds() {
|
||||
if (!leaf) return null
|
||||
const key = pathKey(path)
|
||||
const editingValue = pending[key] ?? String(leaf.value)
|
||||
// Pick the badge palette from the leaf name so warning rows render
|
||||
// amber and critical rows render red. `swap_critical` and any other
|
||||
// *_critical key fall into the red bucket via the substring check.
|
||||
// The input border carries the severity colour so the editable field
|
||||
// itself shows what kind of threshold this is — no separate badge
|
||||
// duplicating the number, which users mistook for the "real" value.
|
||||
// `swap_critical` and any other `*_critical` leaf falls into the red
|
||||
// bucket via the substring check. A blue ring on top of the colour
|
||||
// border signals "customised vs recommended" — two independent
|
||||
// signals on the same widget.
|
||||
const last = path[path.length - 1] || ""
|
||||
const isCritical = last.toLowerCase().includes("critical")
|
||||
const isWarning = last.toLowerCase().includes("warning")
|
||||
const badgeClasses = isCritical
|
||||
? "bg-red-500/10 text-red-500 border-red-500/30"
|
||||
const severityBorder = isCritical
|
||||
? "border-red-500/40 bg-red-500/5 focus-visible:border-red-500"
|
||||
: isWarning
|
||||
? "bg-amber-500/10 text-amber-500 border-amber-500/30"
|
||||
: "bg-muted text-muted-foreground border-border"
|
||||
? "border-amber-500/40 bg-amber-500/5 focus-visible:border-amber-500"
|
||||
: ""
|
||||
const isCustomised = leaf.customised && !(key in pending)
|
||||
const customisedRing = isCustomised ? "ring-2 ring-blue-500/40" : ""
|
||||
const recommendedTooltip = `Recommended: ${leaf.recommended}${leaf.unit}`
|
||||
return (
|
||||
<div key={key} className="flex items-center justify-between gap-2 py-1.5 px-1">
|
||||
<span className="text-xs sm:text-sm text-foreground/90 min-w-0 flex items-center gap-2">
|
||||
<span className="h-1.5 w-1.5 rounded-full bg-blue-500 flex-shrink-0" aria-hidden="true" />
|
||||
<span className="text-xs sm:text-sm text-foreground/90 min-w-0">
|
||||
{label}
|
||||
</span>
|
||||
<div className="flex items-center gap-2 flex-shrink-0">
|
||||
<span
|
||||
className={`inline-flex items-center justify-center h-6 px-2 rounded-md border text-[11px] font-mono tabular-nums ${badgeClasses}`}
|
||||
title="Recommended default value"
|
||||
>
|
||||
{leaf.recommended}
|
||||
{leaf.unit}
|
||||
</span>
|
||||
<Input
|
||||
type="number"
|
||||
min={leaf.min}
|
||||
@@ -430,14 +429,13 @@ export function HealthThresholds() {
|
||||
step={leaf.step}
|
||||
disabled={!editMode}
|
||||
value={editingValue}
|
||||
title={recommendedTooltip}
|
||||
onChange={(e) =>
|
||||
setPending((p) => ({ ...p, [key]: e.target.value }))
|
||||
}
|
||||
className={`w-20 h-7 text-xs text-right tabular-nums ${
|
||||
!editMode ? "opacity-70" : ""
|
||||
} ${
|
||||
leaf.customised && !(key in pending) ? "border-blue-500/40" : ""
|
||||
}`}
|
||||
} ${severityBorder} ${customisedRing}`}
|
||||
/>
|
||||
<span className="text-[11px] text-muted-foreground w-6">{leaf.unit}</span>
|
||||
</div>
|
||||
@@ -507,9 +505,9 @@ export function HealthThresholds() {
|
||||
</div>
|
||||
<CardDescription>
|
||||
The Health Monitor and notifications fire when these thresholds are crossed.
|
||||
Recommended values are shown with their reference color (amber for warning,
|
||||
red for critical); your edits override them. Leave a value unchanged to keep
|
||||
the recommended.
|
||||
Amber inputs are warning levels, red inputs are critical levels. A blue ring
|
||||
marks a value you've customised away from the recommended default — hover the
|
||||
field to see the recommendation, or use Reset to restore it.
|
||||
</CardDescription>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
@@ -520,14 +518,22 @@ export function HealthThresholds() {
|
||||
) : !tree ? (
|
||||
<div className="text-sm text-muted-foreground">Failed to load thresholds.</div>
|
||||
) : (
|
||||
<div className="space-y-4">
|
||||
<div>
|
||||
{error && (
|
||||
<div className="flex items-start gap-2 p-2.5 rounded-md bg-red-500/10 border border-red-500/30 text-red-500 text-xs">
|
||||
<div className="mb-4 flex items-start gap-2 p-2.5 rounded-md bg-red-500/10 border border-red-500/30 text-red-500 text-xs">
|
||||
<AlertCircle className="h-4 w-4 flex-shrink-0 mt-0.5" />
|
||||
<div className="flex-1">{error}</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/*
|
||||
Masonry-style flow via CSS columns: cards keep their natural
|
||||
height (CPU = 2 rows, Disk temperature = 8 rows) and the
|
||||
browser packs them top-to-bottom into 1/2/3 columns based on
|
||||
viewport. `break-inside-avoid` keeps each card whole.
|
||||
Mobile (<md) stays single-column as today.
|
||||
*/}
|
||||
<div className="columns-1 md:columns-2 2xl:columns-3 gap-4 space-y-4 [&>*]:break-inside-avoid">
|
||||
{SECTIONS.map((section) => {
|
||||
const Icon = section.icon
|
||||
return (
|
||||
@@ -568,6 +574,7 @@ export function HealthThresholds() {
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</CardContent>
|
||||
|
||||
@@ -492,11 +492,11 @@ export function NotificationSettings() {
|
||||
<div className="space-y-2 pt-2 border-t border-border/50">
|
||||
<div className="flex items-center justify-between py-1">
|
||||
<div>
|
||||
<Label className="text-xs font-medium flex items-center gap-1.5">
|
||||
<Moon className="h-3.5 w-3.5 text-blue-400" />
|
||||
<Label className="text-xs sm:text-sm text-foreground/80 flex items-center gap-2">
|
||||
<Moon className="h-4 w-4 text-blue-400" />
|
||||
Quiet hours
|
||||
</Label>
|
||||
<p className="text-[10px] text-muted-foreground">
|
||||
<p className="text-xs text-muted-foreground mt-1">
|
||||
During this window only CRITICAL events reach this channel.
|
||||
</p>
|
||||
</div>
|
||||
@@ -517,29 +517,35 @@ export function NotificationSettings() {
|
||||
</div>
|
||||
{enabled && (
|
||||
<>
|
||||
<div className="grid grid-cols-2 gap-2">
|
||||
<div>
|
||||
<Label className="text-[10px] text-muted-foreground">From</Label>
|
||||
{/* Inline label + intrinsic-width inputs. The previous
|
||||
`grid-cols-2 + full-width inputs` rendered weirdly on
|
||||
iOS Safari (the native time picker centered "22:00"
|
||||
inside a 200-px box with huge empty margins). flex +
|
||||
w-24/w-28 keeps the input tight to the HH:MM text on
|
||||
every viewport and the touch target stays comfortable. */}
|
||||
<div className="flex flex-wrap items-center gap-x-4 gap-y-2 pt-1">
|
||||
<div className="flex items-center gap-2">
|
||||
<Label className="text-xs text-muted-foreground">From</Label>
|
||||
<Input
|
||||
type="time"
|
||||
value={start}
|
||||
onChange={(e) => updateChannel(chName, "quiet_start", e.target.value)}
|
||||
disabled={!editMode}
|
||||
className="h-7 text-xs font-mono"
|
||||
className="h-9 w-28 text-sm font-mono"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<Label className="text-[10px] text-muted-foreground">Until</Label>
|
||||
<div className="flex items-center gap-2">
|
||||
<Label className="text-xs text-muted-foreground">Until</Label>
|
||||
<Input
|
||||
type="time"
|
||||
value={end}
|
||||
onChange={(e) => updateChannel(chName, "quiet_end", e.target.value)}
|
||||
disabled={!editMode}
|
||||
className="h-7 text-xs font-mono"
|
||||
className="h-9 w-28 text-sm font-mono"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
<p className="text-[10px] text-muted-foreground">
|
||||
<p className="text-xs text-muted-foreground">
|
||||
{sameTime
|
||||
? "Set a different start and end time to activate."
|
||||
: live
|
||||
@@ -571,11 +577,11 @@ export function NotificationSettings() {
|
||||
<div className="space-y-2 pt-2 border-t border-border/50">
|
||||
<div className="flex items-center justify-between py-1">
|
||||
<div>
|
||||
<Label className="text-xs font-medium flex items-center gap-1.5">
|
||||
<Newspaper className="h-3.5 w-3.5 text-violet-400" />
|
||||
<Label className="text-xs sm:text-sm text-foreground/80 flex items-center gap-2">
|
||||
<Newspaper className="h-4 w-4 text-violet-400" />
|
||||
Daily digest of INFO events
|
||||
</Label>
|
||||
<p className="text-[10px] text-muted-foreground">
|
||||
<p className="text-xs text-muted-foreground mt-1">
|
||||
All INFO events (backups OK, updates available, etc.) accumulate during the day and arrive once at this time as a single summary. CRITICAL and WARNING are never delayed.
|
||||
</p>
|
||||
</div>
|
||||
@@ -596,17 +602,17 @@ export function NotificationSettings() {
|
||||
</div>
|
||||
{enabled && (
|
||||
<>
|
||||
<div>
|
||||
<Label className="text-[10px] text-muted-foreground">Send at</Label>
|
||||
<div className="flex items-center gap-2 pt-1">
|
||||
<Label className="text-xs text-muted-foreground">Send at</Label>
|
||||
<Input
|
||||
type="time"
|
||||
value={time}
|
||||
onChange={(e) => updateChannel(chName, "digest_time", e.target.value)}
|
||||
disabled={!editMode}
|
||||
className="h-7 text-xs font-mono"
|
||||
className="h-9 w-28 text-sm font-mono"
|
||||
/>
|
||||
</div>
|
||||
<p className="text-[10px] text-muted-foreground">{nextLabel}</p>
|
||||
<p className="text-xs text-muted-foreground">{nextLabel}</p>
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -8,7 +8,7 @@ import { Badge } from "./ui/badge"
|
||||
import { Progress } from "./ui/progress"
|
||||
import { Button } from "./ui/button"
|
||||
import { Dialog, DialogContent, DialogHeader, DialogTitle, DialogFooter, DialogDescription } from "./ui/dialog"
|
||||
import { Server, Play, Square, Cpu, MemoryStick, HardDrive, Network, Power, RotateCcw, StopCircle, Container, ChevronDown, ChevronUp, Terminal, Archive, Plus, Loader2, Clock, Database, Shield, Bell, FileText, Settings2, Activity } from 'lucide-react'
|
||||
import { Server, Play, Square, Cpu, MemoryStick, HardDrive, Network, Power, RotateCcw, StopCircle, Container, ChevronDown, ChevronUp, ChevronRight, Terminal, Archive, Plus, Loader2, Clock, Database, Shield, Bell, FileText, Settings2, Activity, Package } from 'lucide-react'
|
||||
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "./ui/select"
|
||||
import { Checkbox } from "./ui/checkbox"
|
||||
import { Textarea } from "./ui/textarea"
|
||||
@@ -19,6 +19,28 @@ import { LxcTerminalModal } from "./lxc-terminal-modal"
|
||||
import { formatStorage } from "../lib/utils"
|
||||
import { formatNetworkTraffic, getNetworkUnit } from "../lib/format-network"
|
||||
import { fetchApi } from "../lib/api-config"
|
||||
import DOMPurify from "dompurify"
|
||||
import { marked } from "marked"
|
||||
|
||||
// Sent by /api/vms only for LXC rows, only when the user has enabled
|
||||
// `lxc_updates_available` notifications. The Monitor populates this
|
||||
// from managed_installs registry → frontend uses it to render the
|
||||
// inline update badge + the modal's "Pending updates" section.
|
||||
interface LxcPackageUpdate {
|
||||
name: string
|
||||
current: string
|
||||
latest: string
|
||||
security: boolean
|
||||
}
|
||||
interface LxcUpdateCheck {
|
||||
available: boolean
|
||||
count: number
|
||||
security_count: number
|
||||
last_check: string | null
|
||||
latest: string | null
|
||||
error: string | null
|
||||
packages: LxcPackageUpdate[]
|
||||
}
|
||||
|
||||
interface VMData {
|
||||
vmid: number
|
||||
@@ -36,6 +58,7 @@ interface VMData {
|
||||
diskread?: number
|
||||
diskwrite?: number
|
||||
ip?: string
|
||||
update_check?: LxcUpdateCheck
|
||||
}
|
||||
|
||||
interface VMConfig {
|
||||
@@ -622,7 +645,7 @@ export function VirtualMachines() {
|
||||
const [backupPbsChangeMode, setBackupPbsChangeMode] = useState<string>("default")
|
||||
|
||||
// Tab state for modal
|
||||
const [activeModalTab, setActiveModalTab] = useState<"status" | "mounts" | "backups">("status")
|
||||
const [activeModalTab, setActiveModalTab] = useState<"status" | "mounts" | "backups" | "updates">("status")
|
||||
// Sprint 13.29: per-LXC mount points lazy-loaded when the user opens
|
||||
// the LXC modal. We fetch alongside backups (one-shot) so switching
|
||||
// tabs is instantaneous; the cost is small (parses one config file
|
||||
@@ -984,6 +1007,74 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
|
||||
// Ensure vmData is always an array (backend may return object on error)
|
||||
const safeVMData = Array.isArray(vmData) ? vmData : []
|
||||
|
||||
// Render the "📦 N updates / 🛡 N security" badge next to an LXC in
|
||||
// the dashboard list. Used ONLY in the card row alongside Uptime —
|
||||
// the modal surfaces the same info via a dedicated tab instead of
|
||||
// duplicating a badge in its header.
|
||||
//
|
||||
// Sizing matches the sibling "Uptime: …" text (text-sm + h-4 icon)
|
||||
// so the row reads as a single visual unit. Colour is violet, the
|
||||
// shared accent for "managed updates" across notifications and UI
|
||||
// (mirrors the Secure Gateway visual treatment). Security count
|
||||
// stays red because it's still an urgency cue independent of the
|
||||
// update theme.
|
||||
const renderLxcUpdateBadge = (
|
||||
uc?: LxcUpdateCheck,
|
||||
compact = false,
|
||||
onClick?: () => void,
|
||||
) => {
|
||||
if (!uc?.available || !uc.count || uc.count <= 0) return null
|
||||
const last = uc.last_check
|
||||
? new Date(uc.last_check).toLocaleString()
|
||||
: "—"
|
||||
const topNames = (uc.packages || [])
|
||||
.slice(0, 5)
|
||||
.map((p) => p.name)
|
||||
.join(", ")
|
||||
const secHint =
|
||||
uc.security_count > 0 ? ` · ${uc.security_count} security` : ""
|
||||
// Tooltip leads with the action when the badge is clickable so the
|
||||
// affordance is explicit on hover — the chevron at the end of the
|
||||
// badge reinforces the same signal visually for users who don't
|
||||
// hover (mobile).
|
||||
const tooltipPrefix = onClick ? "Click to view pending packages · " : ""
|
||||
const tooltip = `${tooltipPrefix}Last checked: ${last}${secHint}${topNames ? ` · ${topNames}` : ""}`
|
||||
// Compact = mobile card; matches the surrounding 10-12px chrome
|
||||
// (ID line, type badge) so the count doesn't visually dominate.
|
||||
// Non-compact = desktop card row, sized to match "Uptime: ..." text.
|
||||
const sizing = compact
|
||||
? "text-[11px] gap-1 px-1.5 py-0"
|
||||
: "text-sm gap-1.5 px-2 py-0.5"
|
||||
const iconSize = compact ? "h-3 w-3" : "h-4 w-4"
|
||||
// Only soften the bg on hover — no border change, no focus ring.
|
||||
// The chevron at the end of the badge carries the "open this"
|
||||
// affordance on its own. The Badge component's CVA base adds a
|
||||
// `focus:ring-2 focus:ring-ring focus:ring-offset-2` (the white
|
||||
// double border we kept seeing on tap/click) — explicitly cancel
|
||||
// every piece of it here.
|
||||
const clickable = onClick
|
||||
? "cursor-pointer hover:bg-violet-500/20 transition-colors focus:outline-none focus:ring-0 focus:ring-offset-0 focus-visible:outline-none focus-visible:ring-0 focus-visible:ring-offset-0"
|
||||
: ""
|
||||
return (
|
||||
<Badge
|
||||
variant="outline"
|
||||
className={`bg-violet-500/10 text-violet-400 border-violet-500/30 flex items-center flex-shrink-0 ${sizing} ${clickable}`}
|
||||
title={tooltip}
|
||||
onClick={onClick}
|
||||
role={onClick ? "button" : undefined}
|
||||
tabIndex={onClick ? 0 : undefined}
|
||||
>
|
||||
<Package className={iconSize} />
|
||||
{uc.count} {compact ? "" : (uc.count === 1 ? "update" : "updates")}
|
||||
{/* Chevron only when the badge is wired up as a clickable
|
||||
shortcut — its absence on the dashboard card avoids
|
||||
implying interactivity where there isn't any (the whole
|
||||
row is the click target there). */}
|
||||
{onClick && <ChevronRight className={`${iconSize} -mr-0.5 opacity-80`} />}
|
||||
</Badge>
|
||||
)
|
||||
}
|
||||
|
||||
// Total allocated RAM for ALL VMs/LXCs (running + stopped)
|
||||
const totalAllocatedMemoryGB = useMemo(() => {
|
||||
return (safeVMData.reduce((sum, vm) => sum + (vm.maxmem || 0), 0) / 1024 ** 3).toFixed(1)
|
||||
@@ -1111,67 +1202,57 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
|
||||
|
||||
return (
|
||||
<div className="space-y-6">
|
||||
{/*
|
||||
styled-jsx is scoped by default — it adds a hash class to
|
||||
selectors so they only match elements rendered by this
|
||||
component. Content injected via `dangerouslySetInnerHTML`
|
||||
does NOT get the hash, so descendant selectors like
|
||||
`div[align="center"]` never matched the helper-script HTML
|
||||
and notes rendered left-aligned. Wrapping the descendant
|
||||
selectors in `:global(...)` keeps the parent class scoped
|
||||
but lets the inner rules apply to the injected HTML.
|
||||
*/}
|
||||
<style jsx>{`
|
||||
.proxmenux-notes {
|
||||
/* Reset any inherited styles */
|
||||
all: revert;
|
||||
|
||||
/* Ensure links display inline */
|
||||
a {
|
||||
display: inline-block;
|
||||
margin-right: 4px;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
/* Ensure images display inline */
|
||||
img {
|
||||
display: inline-block;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
/* Ensure paragraphs with links display inline */
|
||||
p {
|
||||
margin: 0.5rem 0;
|
||||
}
|
||||
|
||||
/* Override inline width and center the table */
|
||||
table {
|
||||
width: auto !important;
|
||||
margin: 0 auto;
|
||||
}
|
||||
|
||||
/* Ensure divs respect centering */
|
||||
div[align="center"] {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
/* Remove border-left since logo already has the line, keep text left-aligned */
|
||||
table td:nth-child(2) {
|
||||
text-align: left;
|
||||
padding-left: 16px;
|
||||
}
|
||||
|
||||
/* Increase h1 font size for VM name */
|
||||
table td:nth-child(2) h1 {
|
||||
text-align: left;
|
||||
font-size: 2rem;
|
||||
font-weight: bold;
|
||||
line-height: 1.2;
|
||||
}
|
||||
|
||||
/* Ensure p in the second cell is left-aligned */
|
||||
table td:nth-child(2) p {
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
/* Add separator after tables */
|
||||
table + p {
|
||||
margin-top: 1rem;
|
||||
padding-top: 1rem;
|
||||
border-top: 1px solid rgba(255, 255, 255, 0.1);
|
||||
}
|
||||
}
|
||||
|
||||
.proxmenux-notes :global(a) {
|
||||
display: inline-block;
|
||||
margin-right: 4px;
|
||||
text-decoration: none;
|
||||
}
|
||||
.proxmenux-notes :global(img) {
|
||||
display: inline-block;
|
||||
vertical-align: middle;
|
||||
}
|
||||
.proxmenux-notes :global(p) {
|
||||
margin: 0.5rem 0;
|
||||
}
|
||||
.proxmenux-notes :global(table) {
|
||||
width: auto !important;
|
||||
margin: 0 auto;
|
||||
}
|
||||
.proxmenux-notes :global(div[align="center"]) {
|
||||
text-align: center;
|
||||
}
|
||||
.proxmenux-notes :global(table td:nth-child(2)) {
|
||||
text-align: left;
|
||||
padding-left: 16px;
|
||||
}
|
||||
.proxmenux-notes :global(table td:nth-child(2) h1) {
|
||||
text-align: left;
|
||||
font-size: 2rem;
|
||||
font-weight: bold;
|
||||
line-height: 1.2;
|
||||
}
|
||||
.proxmenux-notes :global(table td:nth-child(2) p) {
|
||||
text-align: left;
|
||||
}
|
||||
.proxmenux-notes :global(table + p) {
|
||||
margin-top: 1rem;
|
||||
padding-top: 1rem;
|
||||
border-top: 1px solid rgba(255, 255, 255, 0.1);
|
||||
}
|
||||
.proxmenux-notes-plaintext {
|
||||
white-space: pre-wrap;
|
||||
font-family: monospace;
|
||||
@@ -1364,6 +1445,7 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
|
||||
</span>
|
||||
)}
|
||||
<span className="text-sm text-muted-foreground ml-auto">Uptime: {formatUptime(vm.uptime)}</span>
|
||||
{vm.type === "lxc" && renderLxcUpdateBadge(vm.update_check)}
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-2 md:grid-cols-5 gap-3">
|
||||
@@ -1474,7 +1556,10 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
|
||||
|
||||
{/* Name and ID */}
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="font-semibold text-foreground truncate">{vm.name}</div>
|
||||
<div className="font-semibold text-foreground truncate flex items-center gap-1.5">
|
||||
<span className="truncate">{vm.name}</span>
|
||||
{vm.type === "lxc" && renderLxcUpdateBadge(vm.update_check, true)}
|
||||
</div>
|
||||
<div className="text-[10px] text-muted-foreground">ID: {vm.vmid}</div>
|
||||
</div>
|
||||
|
||||
@@ -1575,6 +1660,17 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
|
||||
Uptime: {formatUptime(selectedVM.uptime)}
|
||||
</span>
|
||||
)}
|
||||
{/* Clickable badge — the sole entry point to
|
||||
the Updates panel now that the tab is no
|
||||
longer in the nav. Full-size so it reads
|
||||
at the same weight as the surrounding
|
||||
Uptime / Type / Status chips. */}
|
||||
{selectedVM.type === "lxc" &&
|
||||
renderLxcUpdateBadge(
|
||||
selectedVM.update_check,
|
||||
false,
|
||||
() => setActiveModalTab("updates"),
|
||||
)}
|
||||
</div>
|
||||
</>
|
||||
)}
|
||||
@@ -1600,6 +1696,12 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
|
||||
Uptime: {formatUptime(selectedVM.uptime)}
|
||||
</span>
|
||||
)}
|
||||
{selectedVM.type === "lxc" &&
|
||||
renderLxcUpdateBadge(
|
||||
selectedVM.update_check,
|
||||
false,
|
||||
() => setActiveModalTab("updates"),
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
@@ -1610,7 +1712,7 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
|
||||
<div className="flex border-b border-border px-6 shrink-0">
|
||||
<button
|
||||
onClick={() => setActiveModalTab("status")}
|
||||
className={`flex items-center gap-2 px-4 py-2.5 text-sm font-medium transition-colors border-b-2 -mb-px ${
|
||||
className={`flex items-center gap-2 px-4 py-2.5 text-sm font-medium transition-colors border-b-2 -mb-px whitespace-nowrap ${
|
||||
activeModalTab === "status"
|
||||
? "border-cyan-500 text-cyan-500"
|
||||
: "border-transparent text-muted-foreground hover:text-foreground"
|
||||
@@ -1643,7 +1745,7 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
|
||||
)}
|
||||
<button
|
||||
onClick={() => setActiveModalTab("backups")}
|
||||
className={`flex items-center gap-2 px-4 py-2.5 text-sm font-medium transition-colors border-b-2 -mb-px ${
|
||||
className={`flex items-center gap-2 px-4 py-2.5 text-sm font-medium transition-colors border-b-2 -mb-px whitespace-nowrap ${
|
||||
activeModalTab === "backups"
|
||||
? "border-amber-500 text-amber-500"
|
||||
: "border-transparent text-muted-foreground hover:text-foreground"
|
||||
@@ -1655,6 +1757,13 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
|
||||
<Badge variant="secondary" className="text-xs h-5 ml-1">{vmBackups.length}</Badge>
|
||||
)}
|
||||
</button>
|
||||
{/* Updates is intentionally NOT a tab in the nav — the
|
||||
extra tab created a scrolling tab strip on mobile
|
||||
(especially once Mounts + Backups + Updates piled
|
||||
up) and the swipe affordance was missed. The
|
||||
clickable violet badge in the modal header is now
|
||||
the sole entry point; the Updates content panel
|
||||
below still mounts when activeModalTab === 'updates'. */}
|
||||
</div>
|
||||
|
||||
<div className="flex-1 overflow-y-auto px-6 py-4 min-h-0">
|
||||
@@ -1929,14 +2038,20 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
|
||||
) : vmDetails.config.description ? (
|
||||
<>
|
||||
{(() => {
|
||||
// VM/CT notes are operator-controlled but historically were
|
||||
// rendered via `dangerouslySetInnerHTML` — a stored XSS sink
|
||||
// for any user with write access to the VM config (a
|
||||
// non-admin user with PVE permissions, or another admin in
|
||||
// a multi-admin deployment). We now render the decoded
|
||||
// notes as plain text inside a <pre> with `white-space:
|
||||
// pre-wrap` so newlines and indentation are preserved
|
||||
// without interpreting any HTML. See audit Tier 2 #13.
|
||||
// VM/CT notes come in two flavours and we mirror the way
|
||||
// the PVE web UI handles each:
|
||||
// • HTML (ProxMenux/community-script helper output with
|
||||
// <div align='center'>, tables, logos) → render the
|
||||
// HTML verbatim. The stable `main` branch did exactly
|
||||
// this with dangerouslySetInnerHTML — we keep that
|
||||
// behaviour but pipe through DOMPurify so the audit
|
||||
// Tier 2 #13 XSS sink stays closed.
|
||||
// • Plain text / markdown (e.g. qBittorrent's
|
||||
// `## qBittorrent LXC`) → marked turns it into
|
||||
// headings + autolinks + line breaks, matching PVE.
|
||||
// Mixing the two paths breaks the HTML one because marked
|
||||
// collapses indentation / wraps inline runs and the
|
||||
// browser then ignores `align="center"`.
|
||||
let decoded: string
|
||||
try {
|
||||
decoded = decodeRecursively(vmDetails.config.description)
|
||||
@@ -1947,12 +2062,71 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
|
||||
</div>
|
||||
)
|
||||
}
|
||||
const looksLikeHtml = /<\/?[a-z][\s\S]*?>/i.test(decoded)
|
||||
let html: string
|
||||
if (looksLikeHtml) {
|
||||
html = decoded
|
||||
} else {
|
||||
try {
|
||||
html = marked.parse(decoded, {
|
||||
breaks: true,
|
||||
gfm: true,
|
||||
async: false,
|
||||
}) as string
|
||||
} catch {
|
||||
html = decoded.replace(/\n/g, "<br>")
|
||||
}
|
||||
}
|
||||
// Promote legacy `align` HTML attribute to a real inline
|
||||
// `style="text-align: …"` rule. Tailwind / parent CSS,
|
||||
// styled-jsx scoping quirks and Safari's UA stylesheet
|
||||
// can all swallow the bare `align` attribute on `<div>`
|
||||
// (it's HTML4 obsolete syntax). An inline style is
|
||||
// bullet-proof: highest specificity, no scope hash needed.
|
||||
DOMPurify.removeHook("afterSanitizeAttributes")
|
||||
DOMPurify.addHook("afterSanitizeAttributes", (node: Element) => {
|
||||
const a = node.getAttribute?.("align")
|
||||
if (a && /^(center|left|right)$/i.test(a)) {
|
||||
const cur = node.getAttribute("style") || ""
|
||||
const sep = cur && !cur.trim().endsWith(";") ? "; " : ""
|
||||
node.setAttribute(
|
||||
"style",
|
||||
`${cur}${sep}text-align: ${a.toLowerCase()}`,
|
||||
)
|
||||
}
|
||||
// Force `target=_blank` links to open in a new tab
|
||||
// safely (noopener prevents reverse-tabnabbing).
|
||||
if (node.tagName === "A" && node.getAttribute("target") === "_blank") {
|
||||
node.setAttribute("rel", "noopener noreferrer")
|
||||
}
|
||||
})
|
||||
const cleanHtml = DOMPurify.sanitize(html, {
|
||||
ALLOWED_TAGS: [
|
||||
"a", "p", "br", "div", "span",
|
||||
"h1", "h2", "h3", "h4", "h5", "h6",
|
||||
"img",
|
||||
"table", "thead", "tbody", "tr", "th", "td",
|
||||
"ul", "ol", "li",
|
||||
"strong", "em", "b", "i", "u", "code", "pre",
|
||||
"blockquote", "hr",
|
||||
"small", "sub", "sup",
|
||||
],
|
||||
ALLOWED_ATTR: [
|
||||
"href", "src", "alt", "title", "target",
|
||||
"rel", "style", "class",
|
||||
"align", "width", "height",
|
||||
"colspan", "rowspan",
|
||||
],
|
||||
ALLOWED_URI_REGEXP:
|
||||
/^(?:(?:https?|mailto|data:image\/(?:png|jpeg|jpg|gif|svg\+xml|webp)):|\/|#)/i,
|
||||
ADD_ATTR: ["target"],
|
||||
})
|
||||
return (
|
||||
<pre
|
||||
className="text-sm text-foreground proxmenux-notes-plaintext font-sans whitespace-pre-wrap break-words m-0"
|
||||
>
|
||||
{decoded}
|
||||
</pre>
|
||||
<div
|
||||
className="text-sm text-foreground proxmenux-notes break-words"
|
||||
// eslint-disable-next-line react/no-danger
|
||||
dangerouslySetInnerHTML={{ __html: cleanHtml }}
|
||||
/>
|
||||
)
|
||||
})()}
|
||||
</>
|
||||
@@ -2345,6 +2519,118 @@ const handleDownloadLogs = async (vmid: number, vmName: string) => {
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Updates Tab — LXC only, conditionally rendered.
|
||||
Lives in its own tab so the per-package list (up to
|
||||
30 rows) doesn't blow up the Status tab on mobile.
|
||||
Violet matches the shared "managed updates" theme. */}
|
||||
{activeModalTab === "updates" &&
|
||||
selectedVM?.type === "lxc" &&
|
||||
selectedVM?.update_check?.available && (
|
||||
<div className="space-y-4" key={`updates-${selectedVM.vmid}`}>
|
||||
<Card className="border border-border bg-card/50">
|
||||
<CardContent className="p-4">
|
||||
<div className="flex items-center justify-between mb-3 flex-wrap gap-2">
|
||||
<div className="flex items-center gap-2">
|
||||
<div className="p-1.5 rounded-md bg-violet-500/10">
|
||||
<Package className="h-4 w-4 text-violet-400" />
|
||||
</div>
|
||||
<h3 className="text-sm font-semibold text-foreground">
|
||||
Pending package updates
|
||||
</h3>
|
||||
</div>
|
||||
<Badge
|
||||
variant="outline"
|
||||
className="text-xs bg-violet-500/10 text-violet-400 border-violet-500/30"
|
||||
>
|
||||
{selectedVM.update_check.count} total
|
||||
</Badge>
|
||||
</div>
|
||||
<div className="text-xs text-muted-foreground mb-3 leading-relaxed">
|
||||
Last checked:{" "}
|
||||
{selectedVM.update_check.last_check
|
||||
? new Date(selectedVM.update_check.last_check).toLocaleString()
|
||||
: "—"}
|
||||
{" · "}Apply with{" "}
|
||||
<code className="text-foreground/80">pct enter {selectedVM.vmid}</code>
|
||||
{" → "}
|
||||
<code className="text-foreground/80">apt update && apt upgrade</code>
|
||||
</div>
|
||||
{/* Two render modes:
|
||||
• Full list when every pending package fits
|
||||
(registry cap is 30 packages per CT — so
|
||||
CTs with ≤30 updates show every row).
|
||||
• Summary when the CT has more pending than
|
||||
the registry stored. Showing 30 random
|
||||
rows out of 139 misleads the user — a
|
||||
count + security count + "inspect inside"
|
||||
hint is honester. */}
|
||||
{(() => {
|
||||
const stored = selectedVM.update_check.packages?.length || 0
|
||||
const total = selectedVM.update_check.count || 0
|
||||
const sec = selectedVM.update_check.security_count || 0
|
||||
const truncated = total > stored
|
||||
if (!truncated && stored > 0) {
|
||||
return (
|
||||
<div className="border-t border-border divide-y divide-border/50">
|
||||
{selectedVM.update_check.packages.map((p) => (
|
||||
<div
|
||||
key={p.name}
|
||||
className="flex flex-col sm:flex-row sm:items-center sm:justify-between gap-0.5 sm:gap-2 py-2 text-sm"
|
||||
>
|
||||
<span className="font-mono text-foreground/90 flex items-center gap-2 min-w-0">
|
||||
{p.security && (
|
||||
<Shield
|
||||
className="h-4 w-4 text-green-500 flex-shrink-0"
|
||||
aria-label="Security update"
|
||||
/>
|
||||
)}
|
||||
<span className="truncate">{p.name}</span>
|
||||
</span>
|
||||
<span className="flex items-center gap-1.5 text-muted-foreground flex-shrink-0 font-mono text-xs sm:text-sm">
|
||||
<span>{p.current || "—"}</span>
|
||||
<span>→</span>
|
||||
<span className="text-foreground">{p.latest}</span>
|
||||
</span>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
// Truncated OR no per-package detail — render a summary.
|
||||
return (
|
||||
<div className="border-t border-border pt-3 space-y-2 text-sm">
|
||||
<div className="flex items-center gap-2">
|
||||
<Package className="h-4 w-4 text-violet-400 flex-shrink-0" />
|
||||
<span>
|
||||
<span className="font-semibold">{total}</span> package
|
||||
{total === 1 ? "" : "s"} pending
|
||||
</span>
|
||||
</div>
|
||||
{sec > 0 && (
|
||||
<div className="flex items-center gap-2">
|
||||
<Shield className="h-4 w-4 text-green-500 flex-shrink-0" />
|
||||
<span>
|
||||
<span className="font-semibold">{sec}</span> security update
|
||||
{sec === 1 ? "" : "s"}
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
<div className="text-xs text-muted-foreground pt-1 leading-relaxed">
|
||||
Full list available inside the container:{" "}
|
||||
<code className="text-foreground/80">
|
||||
pct enter {selectedVM.vmid}
|
||||
</code>{" "}
|
||||
→{" "}
|
||||
<code className="text-foreground/80">apt list --upgradable</code>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})()}
|
||||
</CardContent>
|
||||
</Card>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Sprint 13.29: Mount Points Tab — LXC only.
|
||||
Renders configured mpX entries first, then any
|
||||
ad-hoc NFS/CIFS/SMB mounts found inside the
|
||||
|
||||
@@ -133,6 +133,27 @@ export async function fetchApi<T>(endpoint: string, options?: RequestInit): Prom
|
||||
}
|
||||
throw new Error(`Unauthorized: ${endpoint}`)
|
||||
}
|
||||
// Try to surface the backend's JSON error payload instead of a
|
||||
// bare `500 INTERNAL SERVER ERROR`. The Flask routes consistently
|
||||
// return `{error: "..."}` on failure (e.g. /api/vms/<id>/control
|
||||
// includes the pvesh stderr — telling the user "no space left on
|
||||
// device" is infinitely more useful than the raw status text).
|
||||
try {
|
||||
const ct = response.headers.get("content-type") || ""
|
||||
if (ct.includes("application/json")) {
|
||||
const body = await response.json()
|
||||
const detail =
|
||||
(body && (body.error || body.message)) || ""
|
||||
if (detail) {
|
||||
throw new Error(detail)
|
||||
}
|
||||
}
|
||||
} catch (parseErr) {
|
||||
if (parseErr instanceof Error && parseErr.message.includes("API request failed")) {
|
||||
throw parseErr
|
||||
}
|
||||
// JSON parse failed — fall through to the generic message.
|
||||
}
|
||||
throw new Error(`API request failed: ${response.status} ${response.statusText}`)
|
||||
}
|
||||
|
||||
|
||||
@@ -43,7 +43,9 @@
|
||||
"clsx": "^2.1.1",
|
||||
"cmdk": "1.0.4",
|
||||
"date-fns": "4.1.0",
|
||||
"dompurify": "^3.2.7",
|
||||
"embla-carousel-react": "8.5.1",
|
||||
"marked": "^15.0.7",
|
||||
"geist": "^1.3.1",
|
||||
"input-otp": "1.4.1",
|
||||
"lucide-react": "^0.454.0",
|
||||
@@ -66,6 +68,7 @@
|
||||
"zod": "3.25.67"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/dompurify": "^3.0.5",
|
||||
"@types/node": "^22",
|
||||
"@types/react": "^18",
|
||||
"@types/react-dom": "^18",
|
||||
|
||||
@@ -1019,10 +1019,16 @@ def _capture_health_journal_context(categories: list, reason: str = '') -> str:
|
||||
if not pattern:
|
||||
return ""
|
||||
|
||||
# Capture recent journal entries matching keywords
|
||||
# Use -b 0 to only include logs from the current boot
|
||||
# Capture recent journal entries matching keywords.
|
||||
# Use -b 0 to only include logs from the current boot.
|
||||
# Filter out the Monitor's own stdout (AppRun, [HealthPersistence],
|
||||
# proxmenux-auth, etc.) BEFORE keyword matching — otherwise a startup
|
||||
# line like "[HealthPersistence] Database initialized with 13 tables"
|
||||
# leaks into the AI context because grep -iE 'ata' matches the
|
||||
# substring "ata" in "dATAbase". Self-logs are never system evidence.
|
||||
cmd = (
|
||||
f"journalctl -b 0 --since='10 minutes ago' --no-pager -n 500 2>/dev/null | "
|
||||
f"grep -vE 'AppRun\\[|proxmenux-auth|\\[HealthPersistence\\]|\\[ProxMenux\\]|\\[NotificationManager\\]|\\[AIEnhancer\\]' | "
|
||||
f"grep -iE '{pattern}' | tail -n 30"
|
||||
)
|
||||
|
||||
@@ -1131,12 +1137,28 @@ def _health_collector_loop():
|
||||
'updates': 'update_summary',
|
||||
}
|
||||
|
||||
# Sub-categories already rolled up into details['storage']
|
||||
# by _check_proxmox_storage_status. Emitting them as their
|
||||
# own health_degraded entries duplicates the same warning
|
||||
# (e.g. "Storage Mounts & Space" + "PVE Storage Capacity"
|
||||
# both saying "PBS-Cloud (pbs) usage ≥70%"). Skip them at
|
||||
# the notification layer — they still update _prev_statuses
|
||||
# so a future degradation transition is detected normally.
|
||||
_STORAGE_SUBCATEGORIES = {
|
||||
'pve_storage_capacity', 'zfs_pool_capacity',
|
||||
'lxc_disk', 'lxc_mounts', 'remote_mounts',
|
||||
}
|
||||
|
||||
for cat_key, cat_data in details.items():
|
||||
cur_status = cat_data.get('status', 'OK')
|
||||
prev_status = _prev_statuses.get(cat_key, 'OK')
|
||||
cur_rank = _SEV_RANK.get(cur_status, 0)
|
||||
prev_rank = _SEV_RANK.get(prev_status, 0)
|
||||
|
||||
if cat_key in _STORAGE_SUBCATEGORIES:
|
||||
_prev_statuses[cat_key] = cur_status
|
||||
continue
|
||||
|
||||
if cur_rank > prev_rank and cur_rank >= 2: # WARNING or CRITICAL
|
||||
reason = cat_data.get('reason', f'{cat_key} status changed to {cur_status}')
|
||||
reason_lower = reason.lower()
|
||||
@@ -4676,16 +4698,56 @@ def get_network_info():
|
||||
'vm_lxc_total_count': 0
|
||||
}
|
||||
|
||||
def _get_lxc_update_status_map() -> dict:
|
||||
"""Read the managed_installs registry and project the LXC update
|
||||
state into a quick lookup ``{vmid: {available, count, security_count,
|
||||
last_check, packages[]}}``. Used to decorate ``/api/vms`` output
|
||||
without forcing the frontend to fetch a second endpoint.
|
||||
|
||||
Returns an empty dict if the registry module isn't available or
|
||||
nothing is registered — callers must treat absence as "no info".
|
||||
"""
|
||||
try:
|
||||
import managed_installs
|
||||
except Exception:
|
||||
return {}
|
||||
try:
|
||||
active = managed_installs.get_active_items() or []
|
||||
except Exception:
|
||||
return {}
|
||||
|
||||
out: dict = {}
|
||||
for it in active:
|
||||
if it.get('type') != 'lxc':
|
||||
continue
|
||||
vmid = it.get('_vmid') or it.get('id', '').removeprefix('lxc:')
|
||||
if not vmid:
|
||||
continue
|
||||
update = it.get('update_check') or {}
|
||||
out[str(vmid)] = {
|
||||
'available': bool(update.get('available')),
|
||||
'count': int(update.get('_count') or 0),
|
||||
'security_count': int(update.get('_security_count') or 0),
|
||||
'last_check': update.get('last_check'),
|
||||
'latest': update.get('latest'),
|
||||
'error': update.get('error'),
|
||||
# Cap packages list shipped to UI — modal uses first 30 max
|
||||
'packages': (update.get('_packages') or [])[:30],
|
||||
}
|
||||
return out
|
||||
|
||||
|
||||
def get_proxmox_vms():
|
||||
"""Get Proxmox VM and LXC information (requires pvesh command) - only from local node"""
|
||||
try:
|
||||
all_vms = []
|
||||
|
||||
lxc_updates_map = _get_lxc_update_status_map()
|
||||
|
||||
try:
|
||||
# local_node = socket.gethostname()
|
||||
local_node = get_proxmox_node_name()
|
||||
# print(f"[v0] Local node detected: {local_node}")
|
||||
|
||||
|
||||
resources = get_cached_pvesh_cluster_resources_vm()
|
||||
if resources:
|
||||
for resource in resources:
|
||||
@@ -4693,12 +4755,13 @@ def get_proxmox_vms():
|
||||
if node != local_node:
|
||||
# print(f"[v0] Skipping VM {resource.get('vmid')} from remote node: {node}")
|
||||
continue
|
||||
|
||||
|
||||
vm_type = 'lxc' if resource.get('type') == 'lxc' else 'qemu'
|
||||
vm_data = {
|
||||
'vmid': resource.get('vmid'),
|
||||
'name': resource.get('name', f"VM-{resource.get('vmid')}"),
|
||||
'status': resource.get('status', 'unknown'),
|
||||
'type': 'lxc' if resource.get('type') == 'lxc' else 'qemu',
|
||||
'type': vm_type,
|
||||
'cpu': resource.get('cpu', 0),
|
||||
'mem': resource.get('mem', 0),
|
||||
'maxmem': resource.get('maxmem', 0),
|
||||
@@ -4710,6 +4773,14 @@ def get_proxmox_vms():
|
||||
'diskread': resource.get('diskread', 0),
|
||||
'diskwrite': resource.get('diskwrite', 0)
|
||||
}
|
||||
# Decorate LXC rows with the apt update status if the
|
||||
# managed_installs registry has it. Absent key means
|
||||
# either the user hasn't enabled the feature or the
|
||||
# CT isn't running / isn't Debian/Ubuntu.
|
||||
if vm_type == 'lxc':
|
||||
upd = lxc_updates_map.get(str(resource.get('vmid')))
|
||||
if upd is not None:
|
||||
vm_data['update_check'] = upd
|
||||
all_vms.append(vm_data)
|
||||
|
||||
return all_vms
|
||||
@@ -11035,9 +11106,53 @@ def api_vm_control(vmid):
|
||||
'message': f'Successfully executed {action} on {vm_info.get("name")}'
|
||||
})
|
||||
else:
|
||||
# `pvesh` failed → fire the matching vm_fail / ct_fail
|
||||
# notification so the user gets paged on their channels
|
||||
# too, not just an in-dashboard alert. Previously this
|
||||
# path silently returned a 500 to the browser and lost
|
||||
# the event entirely (reported on .1.10: tried to start
|
||||
# VM 106 while log2ram tmpfs was full → 500 in the UI
|
||||
# but no Telegram message). The stderr is the most
|
||||
# useful single line we have — `pvesh` reliably prints
|
||||
# the underlying daemon failure there (e.g.
|
||||
# "start failed: command '/usr/bin/kvm …' failed with
|
||||
# exit code 1: no space left on device").
|
||||
err_text = (control_result.stderr or '').strip() \
|
||||
or (control_result.stdout or '').strip() \
|
||||
or f'{action} returned exit code {control_result.returncode}'
|
||||
# Truncate runaway stderr (some pvesh failures dump
|
||||
# multi-KB tracebacks) — keep the notification readable.
|
||||
if len(err_text) > 500:
|
||||
err_text = err_text[:500] + ' …'
|
||||
|
||||
try:
|
||||
from notification_manager import notification_manager as _nm
|
||||
import socket as _sock
|
||||
_host = _sock.gethostname()
|
||||
event_type = 'ct_fail' if vm_type == 'lxc' else 'vm_fail'
|
||||
_nm.emit_event(
|
||||
event_type=event_type,
|
||||
severity='CRITICAL',
|
||||
data={
|
||||
'hostname': _host,
|
||||
'vmid': str(vmid),
|
||||
'vmname': vm_info.get('name') or f'{vm_type}-{vmid}',
|
||||
'reason': f'{action} failed: {err_text}',
|
||||
'action': action,
|
||||
},
|
||||
source='dashboard',
|
||||
entity='vm',
|
||||
entity_id=str(vmid),
|
||||
)
|
||||
except Exception as _emit_err:
|
||||
print(f"[api_vm_control] failed to emit {vm_type}_fail "
|
||||
f"notification: {type(_emit_err).__name__}: {_emit_err}")
|
||||
|
||||
return jsonify({
|
||||
'success': False,
|
||||
'error': control_result.stderr
|
||||
'vmid': vmid,
|
||||
'action': action,
|
||||
'error': err_text,
|
||||
}), 500
|
||||
else:
|
||||
return jsonify({'error': 'Failed to get VM details'}), 500
|
||||
|
||||
@@ -92,7 +92,15 @@ class HealthPersistence:
|
||||
self.data_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
self.db_path = self.data_dir / 'health_monitor.db'
|
||||
self._db_lock = threading.Lock()
|
||||
# Reentrant lock: `record_disk_observation` acquires this and then
|
||||
# calls `register_disk` which acquires it again on the same thread.
|
||||
# With a plain `threading.Lock` that second acquire deadlocks and the
|
||||
# caller hangs forever — visible symptom on RimegraVE (Pedro Rico
|
||||
# 19/05): no disk_observation update since the day a thread first
|
||||
# walked that path. `RLock` allows re-entry from the same thread
|
||||
# while still serialising cross-thread writes, which is what the
|
||||
# serialisation rationale (race-free UPSERT dedup) actually wants.
|
||||
self._db_lock = threading.RLock()
|
||||
self._init_database()
|
||||
|
||||
def _get_conn(self) -> sqlite3.Connection:
|
||||
@@ -228,6 +236,29 @@ class HealthPersistence:
|
||||
'CREATE INDEX IF NOT EXISTS idx_digest_pending_channel '
|
||||
'ON digest_pending(channel, ts)'
|
||||
)
|
||||
|
||||
# Sibling table for events buffered DURING Quiet Hours. Same
|
||||
# shape as digest_pending so the existing summary renderer can
|
||||
# be reused. Kept separate because the lifecycle is different:
|
||||
# digest_pending flushes once per day at digest_time, while
|
||||
# quiet_pending flushes once per Quiet Hours close (an arbitrary
|
||||
# time that depends on the user's window settings).
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS quiet_pending (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
channel TEXT NOT NULL,
|
||||
event_type TEXT NOT NULL,
|
||||
event_group TEXT NOT NULL,
|
||||
severity TEXT NOT NULL,
|
||||
ts INTEGER NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
body TEXT NOT NULL
|
||||
)
|
||||
''')
|
||||
cursor.execute(
|
||||
'CREATE INDEX IF NOT EXISTS idx_quiet_pending_channel '
|
||||
'ON quiet_pending(channel, ts)'
|
||||
)
|
||||
|
||||
# Migration: add missing columns to errors table for existing DBs
|
||||
cursor.execute("PRAGMA table_info(errors)")
|
||||
@@ -2289,11 +2320,15 @@ class HealthPersistence:
|
||||
|
||||
# Upsert observation: if same (disk, type, signature), bump count + update last timestamp.
|
||||
# IMPORTANT: Do NOT reset dismissed — if the user dismissed this observation,
|
||||
# re-detecting the same journal entry must not un-dismiss it. Also do not
|
||||
# increment the occurrence_count on dismissed rows (audit Tier 5 — once
|
||||
# the user has dismissed, we don't want the counter to keep growing for
|
||||
# journal events that no longer interest them; this also stops the badge
|
||||
# from drifting upward for dismissed conditions).
|
||||
# re-detecting the same journal entry must not un-dismiss it. BUT we DO
|
||||
# keep counting + updating last_occurrence even when dismissed, because the
|
||||
# responsible-monitoring contract is: every error counts toward the
|
||||
# accumulated total shown in the disk modal ("324 connection errors"),
|
||||
# even errors of the same signature the user already saw once. Dismissed
|
||||
# only mutes notifications, NOT the per-disk error history surfaced in the
|
||||
# UI. Reverting the earlier "WHERE dismissed=0" gate that froze the
|
||||
# counter and last_occurrence for /dev/sdh on 2026-05-09, leaving 10
|
||||
# silent days of unreported ATA errors (Pedro Rico, 19/05).
|
||||
cursor.execute(f'''
|
||||
INSERT INTO disk_observations
|
||||
(disk_registry_id, {type_col}, error_signature, {first_col},
|
||||
@@ -2303,7 +2338,6 @@ class HealthPersistence:
|
||||
{last_col} = excluded.{last_col},
|
||||
occurrence_count = occurrence_count + 1,
|
||||
severity = CASE WHEN excluded.severity = 'critical' THEN 'critical' ELSE severity END
|
||||
WHERE dismissed = 0
|
||||
''', (disk_id, error_type, error_signature, now, now, raw_message, severity))
|
||||
|
||||
conn.commit()
|
||||
|
||||
@@ -274,6 +274,12 @@ def _df_via_host_pid(host_pid: str, ct_target: str) -> dict[str, Optional[int]]:
|
||||
numbers reflect the whole storage pool instead of the per-subvol
|
||||
quota — without this the UI showed 851 GB total for a 150 GB ZFS
|
||||
subvol because pvesm reports the rpool's free space.
|
||||
|
||||
Note: this path does NOT measure NFS/CIFS mounts that were set up
|
||||
from INSIDE the CT (`mount -t nfs` / `/etc/fstab` inside the
|
||||
container). Those live in the CT's own mount namespace and aren't
|
||||
visible to the host's `df` even through `/proc/<pid>/root`. Use
|
||||
`_df_via_pct_exec` for ad-hoc mounts.
|
||||
"""
|
||||
empty = {"total_bytes": None, "used_bytes": None, "available_bytes": None}
|
||||
if not host_pid or not ct_target:
|
||||
@@ -301,6 +307,44 @@ def _df_via_host_pid(host_pid: str, ct_target: str) -> dict[str, Optional[int]]:
|
||||
return empty
|
||||
|
||||
|
||||
def _df_via_pct_exec(vmid: str, ct_target: str,
|
||||
timeout: int = 6) -> dict[str, Optional[int]]:
|
||||
"""``df`` a path from INSIDE the CT via ``pct exec``. Needed for
|
||||
ad-hoc NFS/CIFS mounts that live in the CT's own mount namespace
|
||||
and aren't visible from the host (so `_df_via_host_pid` returns
|
||||
empty for them).
|
||||
|
||||
Heavier than the host-side df (full `pct exec` round-trip ~1-3s),
|
||||
so we only use it for ad-hoc mounts. The 6s timeout is generous
|
||||
enough for NFS over slow links but won't drag the request past
|
||||
the proxy timeout.
|
||||
"""
|
||||
empty = {"total_bytes": None, "used_bytes": None, "available_bytes": None}
|
||||
if not vmid or not ct_target:
|
||||
return empty
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
[_PCT, "exec", vmid, "--", "df", "-B1",
|
||||
"--output=size,used,avail", ct_target],
|
||||
capture_output=True, text=True, timeout=timeout,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
return empty
|
||||
lines = [ln for ln in proc.stdout.strip().splitlines() if ln.strip()]
|
||||
if len(lines) < 2:
|
||||
return empty
|
||||
parts = lines[-1].split()
|
||||
if len(parts) < 3:
|
||||
return empty
|
||||
return {
|
||||
"total_bytes": int(parts[0]),
|
||||
"used_bytes": int(parts[1]),
|
||||
"available_bytes": int(parts[2]),
|
||||
}
|
||||
except (subprocess.TimeoutExpired, OSError, ValueError):
|
||||
return empty
|
||||
|
||||
|
||||
def _capacity_for(source: str, classification: dict[str, Any],
|
||||
pve_storages: dict[str, dict[str, Any]],
|
||||
config_options: Optional[dict[str, Any]] = None,
|
||||
@@ -606,14 +650,29 @@ def get_lxc_mount_points(vmid: str) -> dict[str, Any]:
|
||||
]
|
||||
# Same parallelisation as the configured-mp loop: stat'ing
|
||||
# stale NFS exports serially can dominate the request and
|
||||
# push it past the proxy timeout.
|
||||
# push it past the proxy timeout. Capacity (`df`) is fetched
|
||||
# in the SAME pool so the UI can render the usage bar for
|
||||
# ad-hoc NFS/CIFS mounts too — null capacity was a regression
|
||||
# spotted on CT 103 /mnt/Media. Skip df when stat already
|
||||
# showed the mount as unreachable, otherwise the df subprocess
|
||||
# blocks on the same broken export.
|
||||
if ad_hoc_candidates:
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
||||
healths = list(pool.map(
|
||||
lambda rt: _stat_via_host(host_pid, rt["rt_target"]),
|
||||
ad_hoc_candidates,
|
||||
))
|
||||
for rt, health in zip(ad_hoc_candidates, healths):
|
||||
def _gather_adhoc(rt):
|
||||
h = _stat_via_host(host_pid, rt["rt_target"])
|
||||
if h.get("reachable"):
|
||||
# NFS/CIFS mounts done inside the CT live in the
|
||||
# container's own mount namespace and aren't
|
||||
# visible to `df` from the host even via
|
||||
# /proc/<pid>/root — use `pct exec df` instead.
|
||||
cap = _df_via_pct_exec(vmid, rt["rt_target"])
|
||||
else:
|
||||
cap = {"total_bytes": None, "used_bytes": None,
|
||||
"available_bytes": None}
|
||||
return rt, h, cap
|
||||
|
||||
results = list(pool.map(_gather_adhoc, ad_hoc_candidates))
|
||||
for rt, health, cap in results:
|
||||
ad_hoc.append({
|
||||
"mp_index": "",
|
||||
"source": rt["rt_source"],
|
||||
@@ -624,9 +683,9 @@ def get_lxc_mount_points(vmid: str) -> dict[str, Any]:
|
||||
"origin_label": rt["rt_source"],
|
||||
"config_options": {},
|
||||
"config_flags": [],
|
||||
"total_bytes": None,
|
||||
"used_bytes": None,
|
||||
"available_bytes": None,
|
||||
"total_bytes": cap["total_bytes"],
|
||||
"used_bytes": cap["used_bytes"],
|
||||
"available_bytes": cap["available_bytes"],
|
||||
"runtime_mounted": True,
|
||||
"runtime_source": rt["rt_source"],
|
||||
"runtime_fstype": rt["rt_fstype"],
|
||||
|
||||
@@ -189,12 +189,169 @@ def _detect_oci_apps() -> list[dict]:
|
||||
return out
|
||||
|
||||
|
||||
# ── LXC containers (Phase 1: apt-based update detection) ────────────
|
||||
#
|
||||
# Each running Debian/Ubuntu CT becomes a registry entry of type "lxc".
|
||||
# Detection is opt-in: gated on the `lxc_updates_available` notification
|
||||
# being enabled somewhere, so the heavy `pct exec` work doesn't run on
|
||||
# hosts where the user hasn't asked for this.
|
||||
#
|
||||
# Phase 2 hook: once helper-scripts metadata is integrated, entries can
|
||||
# carry `_helper_script_app` so the checker swaps generic apt counting
|
||||
# for app-specific upstream-release tracking (Vaultwarden, Jellyfin,
|
||||
# etc.). For now every LXC uses the generic apt path.
|
||||
|
||||
_PCT_BIN = "/usr/sbin/pct"
|
||||
_LXC_EXEC_TIMEOUT_SEC = 10
|
||||
_LXC_OS_PROBE_TIMEOUT_SEC = 5
|
||||
|
||||
|
||||
def _lxc_updates_notification_enabled() -> bool:
|
||||
"""Return True if the user has enabled `lxc_updates_available` on
|
||||
at least one configured channel. Used to gate the heavy detection
|
||||
+ checker work — when disabled we don't touch any CT at all.
|
||||
"""
|
||||
try:
|
||||
import notification_manager as _nm_mod
|
||||
nm = _nm_mod.notification_manager
|
||||
return bool(nm.is_event_enabled("lxc_updates_available"))
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _list_pve_lxcs() -> list[dict]:
|
||||
"""Return basic info per LXC on this node via ``pct list``. Each
|
||||
item is ``{vmid, status, name}``. Empty list on any failure — never
|
||||
raises so the detector caller can continue.
|
||||
"""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
[_PCT_BIN, "list"],
|
||||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
return []
|
||||
if r.returncode != 0:
|
||||
return []
|
||||
|
||||
out: list[dict] = []
|
||||
for line in r.stdout.splitlines()[1:]: # skip header row
|
||||
# `pct list` columns: VMID Status Lock Name
|
||||
# `Lock` is empty most of the time, so split max 4 ways
|
||||
parts = line.split(None, 3)
|
||||
if len(parts) < 2:
|
||||
continue
|
||||
vmid = parts[0]
|
||||
status = parts[1]
|
||||
# Name is the last column; in unlocked rows the 3rd col may
|
||||
# be the name itself if Lock was omitted by the formatter.
|
||||
name = parts[-1] if len(parts) >= 3 else ""
|
||||
if not vmid.isdigit():
|
||||
continue
|
||||
out.append({"vmid": vmid, "status": status, "name": name})
|
||||
return out
|
||||
|
||||
|
||||
_SUPPORTED_OS_FAMILIES = ("debian", "ubuntu", "alpine")
|
||||
|
||||
|
||||
def _probe_lxc_os(vmid: str) -> Optional[str]:
|
||||
"""Return a normalized family identifier (``debian`` / ``ubuntu`` /
|
||||
``alpine``) by reading ``/etc/os-release`` inside the running CT.
|
||||
Returns None for distributions whose package manager we don't yet
|
||||
speak — those CTs are skipped in detection so the framework
|
||||
doesn't keep retrying a checker we can't run.
|
||||
|
||||
Cached per CT in the registry — re-probed only when the entry has
|
||||
no ``_os_family`` yet, since the OS rarely changes for the life of
|
||||
a CT.
|
||||
"""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
[_PCT_BIN, "exec", vmid, "--", "cat", "/etc/os-release"],
|
||||
capture_output=True, text=True,
|
||||
timeout=_LXC_OS_PROBE_TIMEOUT_SEC,
|
||||
)
|
||||
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
||||
return None
|
||||
if r.returncode != 0:
|
||||
return None
|
||||
text = r.stdout.lower()
|
||||
if "id=ubuntu" in text:
|
||||
return "ubuntu"
|
||||
if "id=debian" in text or "id_like=debian" in text:
|
||||
return "debian"
|
||||
if "id=alpine" in text:
|
||||
return "alpine"
|
||||
# Future Phase 1.5: CentOS/Rocky/Alma (dnf check-update), Arch
|
||||
# (checkupdates), openSUSE (zypper list-updates). Each needs a
|
||||
# parser similar to apt/apk — skip silently for now.
|
||||
return None
|
||||
|
||||
|
||||
def _detect_lxc_containers() -> list[dict]:
|
||||
"""Enumerate running Debian/Ubuntu CTs as registry entries.
|
||||
|
||||
OS detection is cached in the registry entry (`_os_family`), so the
|
||||
expensive ``pct exec cat /etc/os-release`` only runs the first time
|
||||
a CT is seen. CT reinstalls with a different OS will keep the old
|
||||
family cached until the user resets the registry — acceptable
|
||||
trade-off vs paying the probe cost every 24h cycle.
|
||||
"""
|
||||
if not _lxc_updates_notification_enabled():
|
||||
return []
|
||||
|
||||
# Read existing registry so we can preserve cached `_os_family`.
|
||||
# No lock needed here — we only inspect; the framework holds the
|
||||
# write lock when it merges back our results in detect_and_register.
|
||||
try:
|
||||
existing = _read_registry().get("items", [])
|
||||
except Exception:
|
||||
existing = []
|
||||
existing_by_id = {
|
||||
it.get("id"): it for it in existing
|
||||
if isinstance(it, dict) and it.get("type") == "lxc"
|
||||
}
|
||||
|
||||
cts = _list_pve_lxcs()
|
||||
out: list[dict] = []
|
||||
for ct in cts:
|
||||
if ct["status"] != "running":
|
||||
continue
|
||||
vmid = ct["vmid"]
|
||||
cid = f"lxc:{vmid}"
|
||||
prior = existing_by_id.get(cid) or {}
|
||||
os_family = prior.get("_os_family")
|
||||
if not os_family:
|
||||
os_family = _probe_lxc_os(vmid)
|
||||
if os_family not in _SUPPORTED_OS_FAMILIES:
|
||||
# Distribution we don't yet have a package-manager
|
||||
# parser for. Skip silently. The framework marks any
|
||||
# existing entry as removed_at if it stops appearing
|
||||
# in the detector output.
|
||||
continue
|
||||
out.append({
|
||||
"id": cid,
|
||||
"type": "lxc",
|
||||
"name": ct.get("name") or f"CT-{vmid}",
|
||||
"current_version": None, # apt has no single version
|
||||
"menu_label": None, # user upgrades inside the CT
|
||||
"menu_script": None,
|
||||
"_vmid": vmid,
|
||||
"_os_family": os_family,
|
||||
# Phase 2 hook: populate `_helper_script_app` here once we
|
||||
# learn how to read the community-scripts marker.
|
||||
})
|
||||
return out
|
||||
|
||||
|
||||
# Detectors registered here. Each returns either a single entry dict
|
||||
# or a list (for sources that yield multiple items, like OCI). The
|
||||
# framework normalises both shapes.
|
||||
_DETECTORS: list[Callable[[], Any]] = [
|
||||
_detect_nvidia_xfree86,
|
||||
_detect_oci_apps,
|
||||
_detect_lxc_containers,
|
||||
]
|
||||
|
||||
|
||||
@@ -514,9 +671,173 @@ def _check_nvidia_xfree86(entry: dict) -> dict:
|
||||
}
|
||||
|
||||
|
||||
def _parse_apt_list_upgradable(text: str) -> list[dict]:
|
||||
"""Parse the output of ``apt list --upgradable`` into structured rows.
|
||||
|
||||
Each upgradable line looks like::
|
||||
|
||||
package/release version arch [upgradable from: oldversion]
|
||||
|
||||
Returns a list of ``{name, current, latest, security}``. Lines that
|
||||
can't be parsed are skipped; the header ``Listing...`` is ignored
|
||||
because it lacks the ``[upgradable`` marker.
|
||||
|
||||
"security" flag is detected from the release/suite name (e.g.
|
||||
``bookworm-security``, ``jammy-security``). Some derivatives don't
|
||||
use that naming and will report security=False even when patches
|
||||
are present — acceptable for Phase 1, refined later if needed.
|
||||
"""
|
||||
rows: list[dict] = []
|
||||
for line in text.splitlines():
|
||||
line = line.strip()
|
||||
if not line or "[upgradable" not in line or "/" not in line:
|
||||
continue
|
||||
try:
|
||||
head, _, tail = line.partition(" ")
|
||||
name, _, release = head.partition("/")
|
||||
tail_parts = tail.split()
|
||||
if not tail_parts:
|
||||
continue
|
||||
new_ver = tail_parts[0]
|
||||
old_ver = ""
|
||||
if "from:" in line:
|
||||
old_ver = line.split("from:", 1)[1].strip().rstrip("]").strip()
|
||||
release_lower = release.lower()
|
||||
is_security = "-security" in release_lower or "/security" in release_lower
|
||||
rows.append({
|
||||
"name": name,
|
||||
"current": old_ver,
|
||||
"latest": new_ver,
|
||||
"security": is_security,
|
||||
})
|
||||
except Exception:
|
||||
continue
|
||||
return rows
|
||||
|
||||
|
||||
def _parse_apk_list_upgradable(text: str) -> list[dict]:
|
||||
"""Parse the output of ``apk list -u`` into structured rows.
|
||||
|
||||
Lines look like::
|
||||
|
||||
busybox-1.36.1-r29 x86_64 {busybox} (GPL-2.0-only) [upgradable from: busybox-1.36.1-r28]
|
||||
|
||||
apk smashes name + version into the leading token, so reliable
|
||||
name/version splitting requires walking from the right (versions
|
||||
end in ``-r<num>``). For the badge + notification we only need a
|
||||
count and a representative sample, so we keep the parser tolerant
|
||||
and surface the raw token as the package "name". Alpine's main
|
||||
repos don't expose a separate "security" suite via apk metadata,
|
||||
so we mark every row as ``security=False`` — security==0 always.
|
||||
"""
|
||||
rows: list[dict] = []
|
||||
for line in text.splitlines():
|
||||
line = line.strip()
|
||||
if not line or "[upgradable" not in line:
|
||||
continue
|
||||
try:
|
||||
first_tok = line.split(" ", 1)[0]
|
||||
old = ""
|
||||
if "from:" in line:
|
||||
old = line.split("from:", 1)[1].strip().rstrip("]").strip()
|
||||
rows.append({
|
||||
"name": first_tok,
|
||||
"current": old,
|
||||
"latest": first_tok,
|
||||
"security": False,
|
||||
})
|
||||
except Exception:
|
||||
continue
|
||||
return rows
|
||||
|
||||
|
||||
def _run_pct_pkg_listing(vmid: str, cmd: str) -> tuple[bool, str, str]:
|
||||
"""Run a package-listing command inside ``vmid`` via ``pct exec``.
|
||||
Returns ``(ok, stdout, error_message)``. Centralises the timeout
|
||||
and stderr handling so apt/apk callers stay symmetric.
|
||||
"""
|
||||
try:
|
||||
r = subprocess.run(
|
||||
[_PCT_BIN, "exec", vmid, "--", "sh", "-c", cmd],
|
||||
capture_output=True, text=True,
|
||||
timeout=_LXC_EXEC_TIMEOUT_SEC,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return False, "", f"{cmd.split()[0]} listing timed out"
|
||||
except (FileNotFoundError, OSError) as e:
|
||||
return False, "", str(e)
|
||||
if r.returncode != 0:
|
||||
return False, "", (r.stderr or "package listing failed").strip()[:200]
|
||||
return True, r.stdout, ""
|
||||
|
||||
|
||||
def _check_lxc_updates(entry: dict) -> dict:
|
||||
"""Inspect pending package updates inside the LXC and report them.
|
||||
|
||||
Dispatches to the right package-manager parser based on the cached
|
||||
``_os_family``. Uses the CT's existing metadata cache — never runs
|
||||
``apt update`` / ``apk update`` from outside, so the user's own
|
||||
update cadence (unattended-upgrades, cron) is preserved.
|
||||
|
||||
The dedup fingerprint (``latest``) combines count, security count
|
||||
and the sorted top package names so a stable set of pending
|
||||
updates doesn't re-notify daily, while a meaningfully different
|
||||
update set does.
|
||||
"""
|
||||
vmid = entry.get("_vmid")
|
||||
family = (entry.get("_os_family") or "").lower()
|
||||
if not vmid:
|
||||
return {
|
||||
"available": False, "latest": None,
|
||||
"last_check": _now_iso(), "error": "no vmid in entry",
|
||||
}
|
||||
|
||||
if family in ("debian", "ubuntu"):
|
||||
ok, stdout, err = _run_pct_pkg_listing(
|
||||
vmid, "apt list --upgradable 2>/dev/null"
|
||||
)
|
||||
packages = _parse_apt_list_upgradable(stdout) if ok else []
|
||||
elif family == "alpine":
|
||||
ok, stdout, err = _run_pct_pkg_listing(
|
||||
vmid, "apk list -u 2>/dev/null"
|
||||
)
|
||||
packages = _parse_apk_list_upgradable(stdout) if ok else []
|
||||
else:
|
||||
return {
|
||||
"available": False, "latest": None,
|
||||
"last_check": _now_iso(),
|
||||
"error": f"unsupported family: {family}",
|
||||
}
|
||||
|
||||
if not ok:
|
||||
return {
|
||||
"available": False, "latest": None,
|
||||
"last_check": _now_iso(), "error": err,
|
||||
}
|
||||
|
||||
count = len(packages)
|
||||
sec_count = sum(1 for p in packages if p.get("security"))
|
||||
available = count > 0
|
||||
latest_fp = None
|
||||
if available:
|
||||
top_names = ",".join(sorted(p["name"] for p in packages)[:5])
|
||||
latest_fp = f"{count}:{sec_count}:{top_names}"
|
||||
|
||||
return {
|
||||
"available": available,
|
||||
"latest": latest_fp,
|
||||
"last_check": _now_iso(),
|
||||
"error": None,
|
||||
"_count": count,
|
||||
"_security_count": sec_count,
|
||||
"_packages": packages[:30], # cap to keep the registry compact
|
||||
}
|
||||
|
||||
|
||||
_CHECKERS: dict[str, Callable[[dict], dict]] = {
|
||||
"oci_app": _check_oci_app,
|
||||
"nvidia_xfree86": _check_nvidia_xfree86,
|
||||
"lxc": _check_lxc_updates,
|
||||
}
|
||||
|
||||
|
||||
@@ -562,8 +883,14 @@ def check_for_updates(force: bool = False) -> list[dict]:
|
||||
}
|
||||
if result.get("current") and not it.get("current_version"):
|
||||
it["current_version"] = result["current"]
|
||||
# Per-checker extras carried through into the persisted
|
||||
# `update_check` blob. Add new keys here when a future
|
||||
# checker needs to surface fields beyond available/latest.
|
||||
# `_count` + `_security_count` were missing originally, so
|
||||
# the LXC checker's counts dropped on the floor and the
|
||||
# frontend badge couldn't render.
|
||||
for extra_key in ("_packages", "_upgrade_kind", "_kernel",
|
||||
"_kernel_note"):
|
||||
"_kernel_note", "_count", "_security_count"):
|
||||
if extra_key in result:
|
||||
it["update_check"][extra_key] = result[extra_key]
|
||||
|
||||
|
||||
@@ -327,14 +327,27 @@ def is_vzdump_active_on_host() -> bool:
|
||||
try:
|
||||
with open(_VZDUMP_ACTIVE_FILE, 'r') as f:
|
||||
for line in f:
|
||||
# UPID format: UPID:node:pid:pstart:starttime:type:id:user:
|
||||
# tasks/active row layout (whitespace separated):
|
||||
# "<UPID> 1" ← running
|
||||
# "<UPID> 1 <endtime_hex> <STATUS>" ← finished
|
||||
# PVE leaves finished rows lingering for hours
|
||||
# sometimes — without the field-count check below the
|
||||
# PID-recycling case fires a false positive (an
|
||||
# unrelated process inherited the old vzdump's PID
|
||||
# and `os.kill(pid, 0)` succeeds).
|
||||
if ':vzdump:' not in line:
|
||||
continue
|
||||
parts = line.strip().split(':')
|
||||
if len(parts) < 3:
|
||||
fields = line.split()
|
||||
if not fields:
|
||||
continue
|
||||
# >2 fields means endtime + status are written → terminated.
|
||||
if len(fields) > 2:
|
||||
continue
|
||||
upid_parts = fields[0].split(':')
|
||||
if len(upid_parts) < 3:
|
||||
continue
|
||||
try:
|
||||
pid = int(parts[2], 16) # PID in UPID is hex
|
||||
pid = int(upid_parts[2], 16) # PID in UPID is hex
|
||||
os.kill(pid, 0)
|
||||
found = True
|
||||
break
|
||||
@@ -1033,21 +1046,28 @@ class JournalWatcher:
|
||||
else:
|
||||
resolved = re.sub(r'\d+$', '', raw_device) if raw_device.startswith('sd') else raw_device
|
||||
|
||||
# ── Gate 1: SMART must confirm disk failure ──
|
||||
# If the disk is healthy (PASSED) or we can't verify
|
||||
# (UNKNOWN / unresolvable ATA port), do NOT notify.
|
||||
# ── ALWAYS persist the observation, regardless of SMART ──
|
||||
# The disk_observation_contract is explicit (memory note
|
||||
# disk-observation-contract): every kernel-surfaced disk
|
||||
# error must be recorded in disk_observations *even when
|
||||
# SMART reports PASSED*. Silent errors on a "healthy" disk
|
||||
# are exactly the early-warning signal the modal histogram
|
||||
# exists to surface ("324 connection errors on this disk").
|
||||
# Previously this line lived AFTER a `return` gate keyed on
|
||||
# smart_health != 'FAILED', so the 3162 ata8 errors on
|
||||
# .1.10 (PASSED SMART) all dropped on the floor instead of
|
||||
# accumulating in the per-disk audit history.
|
||||
self._record_disk_io_observation(resolved, msg)
|
||||
|
||||
# ── Gate 1: only NOTIFY when SMART reports FAILED ──
|
||||
# Observation is already saved above. We avoid spamming a
|
||||
# CRITICAL notification for transient ATA/SCSI noise on
|
||||
# otherwise-healthy disks — the modal histogram surfaces
|
||||
# those without paging the user at 3 AM.
|
||||
smart_health = self._quick_smart_health(resolved)
|
||||
if smart_health != 'FAILED':
|
||||
return
|
||||
|
||||
# ── Persist observation (before the cooldown gate) ──
|
||||
# The 24h cooldown below only suppresses RE-notification; the
|
||||
# per-disk observations history must reflect every genuine
|
||||
# detection. The DB UPSERT dedups same-signature events via
|
||||
# occurrence_count, so calling this on every match is safe.
|
||||
# Aligns with the parallel path in HealthMonitor._check_disks_optimized.
|
||||
self._record_disk_io_observation(resolved, msg)
|
||||
|
||||
# ── Gate 2: 24-hour dedup per device ──
|
||||
# Check both in-memory cache AND the DB (user dismiss clears DB cooldowns).
|
||||
# If user dismissed the error, _clear_disk_io_cooldown() removed the DB
|
||||
@@ -1814,12 +1834,31 @@ class TaskWatcher:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
upid = line.split()[0] if line.split() else line
|
||||
parts = line.split()
|
||||
if not parts:
|
||||
continue
|
||||
upid = parts[0]
|
||||
current_upids.add(upid)
|
||||
|
||||
if ':vzdump:' in upid:
|
||||
|
||||
if ':vzdump:' not in upid:
|
||||
continue
|
||||
|
||||
# PVE writes each line in tasks/active as:
|
||||
# "<UPID> 1" ← task still running
|
||||
# "<UPID> 1 <endtime_hex> <STATUS>" ← task already finished
|
||||
# PVE doesn't always prune finished rows from this
|
||||
# file (observed on RimegraVE 19/05: 25 OK/error
|
||||
# entries lingering for hours after job end). Just
|
||||
# matching ':vzdump:' kept `_vzdump_running_since`
|
||||
# permanently fresh, which then made
|
||||
# `_is_vzdump_active()` return True forever and
|
||||
# silenced every vm_start / vm_stop / vm_shutdown
|
||||
# via the _BACKUP_NOISE filter. Only treat the row
|
||||
# as a live vzdump when no end-time / status has
|
||||
# been written yet (≤ 2 fields: UPID + version).
|
||||
if len(parts) <= 2:
|
||||
found_vzdump = True
|
||||
|
||||
|
||||
# Keep _vzdump_running_since fresh as long as vzdump is in active
|
||||
if found_vzdump:
|
||||
self._vzdump_running_since = time.time()
|
||||
@@ -2175,6 +2214,16 @@ class PollingCollector:
|
||||
# has an update".
|
||||
self._last_managed_check = 0
|
||||
self._notified_managed_updates: dict[str, str] = {}
|
||||
# LXC notifications are grouped — one event per polling cycle
|
||||
# covering every running Debian/Ubuntu CT with pending apt
|
||||
# updates. The fingerprint encodes the per-CT state so a stable
|
||||
# batch doesn't re-notify while a meaningful change does.
|
||||
self._notified_lxc_batch: str | None = None
|
||||
# Track previous state of the LXC-updates notification toggle
|
||||
# so a user enabling it post-startup bypasses the 24h gate
|
||||
# ONCE — the next polling cycle runs a fresh detection without
|
||||
# waiting up to a day. Cleared after the forced run completes.
|
||||
self._lxc_was_enabled: bool = False
|
||||
# Track notified ProxMenux versions to avoid duplicates
|
||||
self._notified_proxmenux_version: str | None = None
|
||||
self._notified_proxmenux_beta_version: str | None = None
|
||||
@@ -3101,7 +3150,24 @@ class PollingCollector:
|
||||
NVIDIA driver → ``nvidia_driver_update_available``, etc.).
|
||||
"""
|
||||
now = time.time()
|
||||
if now - self._last_managed_check < self.UPDATE_CHECK_INTERVAL:
|
||||
|
||||
# Detect OFF→ON transition of the LXC update toggle. Without
|
||||
# this, the first polling cycle after service start always sets
|
||||
# the 24h gate — so a user who enables the toggle later (which
|
||||
# is the normal flow, since the toggle defaults to OFF) would
|
||||
# have to wait up to 24h or restart the service before the
|
||||
# detector ran. A one-shot bypass on the transition fixes that
|
||||
# without weakening the 24h cadence in steady state.
|
||||
try:
|
||||
import managed_installs as _mi
|
||||
lxc_enabled_now = _mi._lxc_updates_notification_enabled()
|
||||
except Exception:
|
||||
lxc_enabled_now = False
|
||||
lxc_just_enabled = lxc_enabled_now and not self._lxc_was_enabled
|
||||
self._lxc_was_enabled = lxc_enabled_now
|
||||
|
||||
if (not lxc_just_enabled
|
||||
and now - self._last_managed_check < self.UPDATE_CHECK_INTERVAL):
|
||||
return
|
||||
self._last_managed_check = now
|
||||
|
||||
@@ -3117,8 +3183,15 @@ class PollingCollector:
|
||||
print(f"[PollingCollector] managed_installs update run failed: {e}")
|
||||
return
|
||||
|
||||
# Split LXC updates out of the per-item event stream — they get
|
||||
# one grouped notification per cycle instead of one per CT, to
|
||||
# avoid spamming the user when 15 CTs have pending updates the
|
||||
# same day. Non-LXC types keep their existing per-item flow.
|
||||
lxc_updates = [u for u in updates if u.get('type') == 'lxc']
|
||||
other_updates = [u for u in updates if u.get('type') != 'lxc']
|
||||
|
||||
seen_ids: set[str] = set()
|
||||
for item in updates:
|
||||
for item in other_updates:
|
||||
item_id = item.get('id', '')
|
||||
if not item_id:
|
||||
continue
|
||||
@@ -3143,6 +3216,17 @@ class PollingCollector:
|
||||
entity_id=f'managed_{item_id}',
|
||||
))
|
||||
|
||||
# LXC: emit one grouped event with all CTs that have pending
|
||||
# updates. The batch fingerprint is recomputed every cycle and
|
||||
# compared with the last notified one — if the set of CTs or
|
||||
# their per-CT fingerprints changed, we notify again.
|
||||
if lxc_updates:
|
||||
self._emit_lxc_updates_batch(lxc_updates)
|
||||
else:
|
||||
# Empty batch — clear the dedup so a fresh batch later fires
|
||||
# a new notification even with the same CTs/versions.
|
||||
self._notified_lxc_batch = None
|
||||
|
||||
# Forget items that no longer have an update available. If
|
||||
# the user installs the update and then a later release lands,
|
||||
# the dedup state is already cleared so the next notification
|
||||
@@ -3159,6 +3243,67 @@ class PollingCollector:
|
||||
if stale_id not in active_with_update:
|
||||
self._notified_managed_updates.pop(stale_id, None)
|
||||
|
||||
def _emit_lxc_updates_batch(self, items: list[dict]) -> None:
|
||||
"""Build and queue a single ``lxc_updates_available`` event for
|
||||
every running CT that currently has pending apt updates.
|
||||
|
||||
The batch fingerprint combines every CT's per-CT fingerprint
|
||||
(count + security_count + top package names). A new CT entering
|
||||
the set OR an existing CT changing its per-CT fingerprint
|
||||
produces a new batch fingerprint, so the cooldown is broken and
|
||||
the event fires. A truly stable batch is silenced via the
|
||||
equality check below.
|
||||
"""
|
||||
# Stable order so the fingerprint is deterministic
|
||||
items_sorted = sorted(items, key=lambda x: x.get('id', ''))
|
||||
|
||||
ct_lines: list[str] = []
|
||||
per_ct_fps: list[str] = []
|
||||
total_packages = 0
|
||||
total_security = 0
|
||||
|
||||
for idx, it in enumerate(items_sorted):
|
||||
update = it.get('update_check', {}) or {}
|
||||
count = int(update.get('_count') or 0)
|
||||
sec_count = int(update.get('_security_count') or 0)
|
||||
total_packages += count
|
||||
total_security += sec_count
|
||||
|
||||
vmid = it.get('_vmid') or it.get('id', '').removeprefix('lxc:') or '?'
|
||||
name = it.get('name') or f'CT {vmid}'
|
||||
# Each CT renders across two/three lines so the count and the
|
||||
# security count don't compete with the CT label on the same
|
||||
# row — much easier to read in Telegram/Discord at a glance.
|
||||
# A blank line before every CT except the first separates
|
||||
# entries cleanly without a trailing blank at the end.
|
||||
if idx > 0:
|
||||
ct_lines.append("")
|
||||
ct_lines.append(f"🏷️ CT {vmid} ({name}):")
|
||||
ct_lines.append(f" 📦 {count} update(s)")
|
||||
if sec_count:
|
||||
ct_lines.append(f" 🔒 {sec_count} security")
|
||||
per_ct_fps.append(f"{it.get('id', '')}={update.get('latest', '')}")
|
||||
|
||||
batch_fingerprint = '|'.join(per_ct_fps)
|
||||
if self._notified_lxc_batch == batch_fingerprint:
|
||||
return # same batch as last time — silent
|
||||
self._notified_lxc_batch = batch_fingerprint
|
||||
|
||||
data = {
|
||||
'hostname': self._hostname,
|
||||
'count': len(items_sorted),
|
||||
'total_packages': total_packages,
|
||||
'security_count': total_security,
|
||||
'ct_list': '\n'.join(ct_lines),
|
||||
}
|
||||
self._queue.put(NotificationEvent(
|
||||
'lxc_updates_available', 'INFO', data,
|
||||
source='polling',
|
||||
entity='node',
|
||||
# Hash so different batches get distinct cooldown keys
|
||||
entity_id=f'lxc_batch_{abs(hash(batch_fingerprint)) % 10**10}',
|
||||
))
|
||||
|
||||
def _build_managed_install_event(self, item: dict) -> tuple[str, dict]:
|
||||
"""Translate a registry item into a (event_type, template_data)
|
||||
pair. Per-type bodies live here so the registry stays
|
||||
|
||||
@@ -973,7 +973,9 @@ class NotificationManager:
|
||||
cleanup_interval = 3600 # Cleanup cooldowns every hour
|
||||
flush_interval = 5 # Flush aggregation buckets every 5s
|
||||
digest_check_interval = 60 # Re-evaluate digest schedule every minute
|
||||
|
||||
last_quiet_check = 0.0
|
||||
quiet_check_interval = 60 # Re-evaluate per-channel quiet window every minute
|
||||
|
||||
while self._running:
|
||||
try:
|
||||
event = self._event_queue.get(timeout=2)
|
||||
@@ -990,17 +992,36 @@ class NotificationManager:
|
||||
if now_mono - last_digest_check > digest_check_interval:
|
||||
self._maybe_flush_digests()
|
||||
last_digest_check = now_mono
|
||||
# Quiet Hours close → flush buffered sub-CRITICAL events
|
||||
# as a single grouped summary. Has to run even when the
|
||||
# queue is idle, otherwise users who don't generate any
|
||||
# events post-window would never see their summary.
|
||||
if now_mono - last_quiet_check > quiet_check_interval:
|
||||
self._maybe_flush_quiet_hours()
|
||||
last_quiet_check = now_mono
|
||||
continue
|
||||
|
||||
try:
|
||||
self._process_event(event)
|
||||
except Exception as e:
|
||||
print(f"[NotificationManager] Dispatch error: {e}")
|
||||
|
||||
|
||||
# Also flush aggregation after each event
|
||||
if time.monotonic() - last_flush > flush_interval:
|
||||
now_mono = time.monotonic()
|
||||
if now_mono - last_flush > flush_interval:
|
||||
self._flush_aggregation()
|
||||
last_flush = time.monotonic()
|
||||
last_flush = now_mono
|
||||
# Re-check digest schedule after each event too. The idle-only
|
||||
# check above misses the daily flush window when the queue stays
|
||||
# busy through the digest_time minute (rare but real: a burst of
|
||||
# journal events arriving at the same minute as the target). The
|
||||
# 23h guard inside _maybe_flush_digests keeps it idempotent.
|
||||
if now_mono - last_digest_check > digest_check_interval:
|
||||
self._maybe_flush_digests()
|
||||
last_digest_check = now_mono
|
||||
if now_mono - last_quiet_check > quiet_check_interval:
|
||||
self._maybe_flush_quiet_hours()
|
||||
last_quiet_check = now_mono
|
||||
|
||||
def _flush_aggregation(self):
|
||||
"""Flush expired aggregation buckets and dispatch summaries."""
|
||||
@@ -1171,20 +1192,20 @@ class NotificationManager:
|
||||
|
||||
# ── Per-channel quiet hours ──
|
||||
# The user marks a window (e.g. 22:00 → 06:00) during which only
|
||||
# CRITICAL events reach this channel. Anything below CRITICAL is
|
||||
# dropped silently — not buffered, not retried — because the
|
||||
# whole point is "don't wake me up at 3 AM unless the disk
|
||||
# exploded". CRITICAL always wins. The window is configured
|
||||
# per-channel; same channel can have different rules from
|
||||
# another. See _in_quiet_hours() for boundary semantics.
|
||||
# CRITICAL events reach this channel. Sub-CRITICAL events are
|
||||
# **buffered** to `quiet_pending` and flushed as a SINGLE grouped
|
||||
# summary when the window closes — so the user doesn't get
|
||||
# paged at 3 AM but also doesn't lose 8h of activity overnight.
|
||||
# CRITICAL always wins. The window is configured per-channel.
|
||||
# See _in_quiet_hours() for boundary semantics.
|
||||
# `_dispatch_to_channels` does NOT receive the NotificationEvent
|
||||
# object — only the rendered primitives. Using `event.X` here
|
||||
# raised `NameError: name 'event' is not defined` for every
|
||||
# event passing through (silenced by the dispatch loop's broad
|
||||
# except → no notifications EVER delivered after Quiet Hours +
|
||||
# Daily Digest were merged). All community-reported "stopped
|
||||
# receiving notifications after update" cases trace back here.
|
||||
# raised `NameError` for every event passing through, silenced
|
||||
# by the dispatch loop's broad except → no notifications EVER
|
||||
# delivered after Quiet Hours + Daily Digest were merged.
|
||||
if severity != 'CRITICAL' and self._in_quiet_hours(ch_name):
|
||||
self._buffer_quiet_event(ch_name, event_type, event_group,
|
||||
severity, title, body)
|
||||
continue
|
||||
|
||||
# ── Per-channel daily digest ──
|
||||
@@ -1537,6 +1558,126 @@ class NotificationManager:
|
||||
)
|
||||
return '\n'.join(lines).rstrip() + '\n'
|
||||
|
||||
# ─── Quiet Hours buffer + flush ────────────────────────────
|
||||
# Reused infrastructure: `quiet_pending` table (created in
|
||||
# health_persistence) has the same shape as `digest_pending`, so
|
||||
# `_compose_digest_body` renders the summary unchanged. What
|
||||
# differs is the lifecycle — quiet_pending flushes when each
|
||||
# channel's window CLOSES, not at a fixed daily time. We track
|
||||
# that transition via `self._was_in_quiet_hours[ch_name]`.
|
||||
|
||||
def _buffer_quiet_event(self, ch_name: str, event_type: str,
|
||||
event_group: str, severity: str,
|
||||
title: str, body: str) -> None:
|
||||
"""Append a sub-CRITICAL event to the channel's quiet-hours
|
||||
buffer in SQLite. Mirrors `_buffer_digest_event` — same shape,
|
||||
different table.
|
||||
"""
|
||||
try:
|
||||
conn = sqlite3.connect(str(DB_PATH), timeout=10)
|
||||
conn.execute('PRAGMA journal_mode=WAL')
|
||||
conn.execute('PRAGMA busy_timeout=5000')
|
||||
conn.execute(
|
||||
'INSERT INTO quiet_pending '
|
||||
'(channel, event_type, event_group, severity, ts, title, body) '
|
||||
'VALUES (?, ?, ?, ?, ?, ?, ?)',
|
||||
(ch_name, event_type, event_group, severity,
|
||||
int(time.time()), title, body),
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
print(f"[NotificationManager] quiet_pending write failed: {e}")
|
||||
|
||||
def _maybe_flush_quiet_hours(self) -> None:
|
||||
"""Detect per-channel quiet-hours close (in→out transition) and
|
||||
emit one summary notification with everything buffered during
|
||||
the window. Called every ~60s from the dispatch loop.
|
||||
|
||||
State held in-memory: `self._was_in_quiet_hours[ch_name]`. On
|
||||
first run after restart all channels start as "unknown" — we
|
||||
seed with the current window status WITHOUT firing a summary,
|
||||
so a Monitor restart in the middle of someone's quiet window
|
||||
doesn't trigger a fake close-of-window flush.
|
||||
"""
|
||||
if not hasattr(self, '_was_in_quiet_hours'):
|
||||
self._was_in_quiet_hours = {}
|
||||
|
||||
for ch_name, channel in list(self._channels.items()):
|
||||
currently_in = self._in_quiet_hours(ch_name)
|
||||
previously_in = self._was_in_quiet_hours.get(ch_name)
|
||||
self._was_in_quiet_hours[ch_name] = currently_in
|
||||
|
||||
# Seed run (no prior state) — don't fire anything.
|
||||
if previously_in is None:
|
||||
continue
|
||||
# Still in the window → just buffer.
|
||||
if currently_in:
|
||||
continue
|
||||
# Was in window, now out → close transition → flush.
|
||||
if previously_in and not currently_in:
|
||||
try:
|
||||
self._flush_quiet_for_channel(ch_name, channel)
|
||||
except Exception as e:
|
||||
print(f"[NotificationManager] quiet flush failed for "
|
||||
f"{ch_name}: {e}")
|
||||
|
||||
def _flush_quiet_for_channel(self, ch_name: str, channel: Any) -> None:
|
||||
"""Send a single grouped summary of everything buffered for
|
||||
`ch_name` during the just-closed quiet window, then drop the
|
||||
buffer rows. Reuses `_compose_digest_body` for rendering since
|
||||
the row shape is identical.
|
||||
"""
|
||||
try:
|
||||
conn = sqlite3.connect(str(DB_PATH), timeout=10)
|
||||
conn.execute('PRAGMA journal_mode=WAL')
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(
|
||||
'SELECT id, event_type, event_group, ts, title, body '
|
||||
'FROM quiet_pending WHERE channel = ? ORDER BY ts ASC',
|
||||
(ch_name,),
|
||||
)
|
||||
rows = cursor.fetchall()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
print(f"[NotificationManager] quiet read failed for {ch_name}: {e}")
|
||||
return
|
||||
|
||||
if not rows:
|
||||
return
|
||||
|
||||
host = _hostname(self._config)
|
||||
summary_title = (
|
||||
f"{host}: {len(rows)} events buffered during Quiet Hours"
|
||||
)
|
||||
summary_body = self._compose_digest_body(rows)
|
||||
|
||||
try:
|
||||
channel.send(summary_title, summary_body, severity='INFO',
|
||||
data={'_quiet_hours_summary': True, '_count': len(rows)})
|
||||
except Exception as e:
|
||||
print(f"[NotificationManager] quiet send failed for "
|
||||
f"{ch_name}: {e}")
|
||||
return
|
||||
|
||||
# Only drop the rows after a successful send so a transient
|
||||
# transport failure (Telegram timeout, SMTP outage) doesn't
|
||||
# lose the user's overnight context.
|
||||
try:
|
||||
ids = [r[0] for r in rows]
|
||||
conn = sqlite3.connect(str(DB_PATH), timeout=10)
|
||||
conn.execute('PRAGMA journal_mode=WAL')
|
||||
placeholders = ','.join('?' * len(ids))
|
||||
conn.execute(
|
||||
f'DELETE FROM quiet_pending WHERE id IN ({placeholders})',
|
||||
ids,
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
print(f"[NotificationManager] quiet cleanup failed for "
|
||||
f"{ch_name}: {e}")
|
||||
|
||||
def _passes_cooldown(self, event: NotificationEvent) -> bool:
|
||||
"""Check if the event passes cooldown rules WITHOUT stamping.
|
||||
|
||||
@@ -2315,6 +2456,18 @@ class NotificationManager:
|
||||
ch_cfg: Dict[str, Any] = {
|
||||
'enabled': self._config.get(f'{ch_type}.enabled', 'false') == 'true',
|
||||
'rich_format': self._config.get(f'{ch_type}.rich_format', 'false') == 'true',
|
||||
# Quiet Hours + Daily Digest live in the same per-channel
|
||||
# namespace but weren't being projected back to the UI —
|
||||
# the toggles round-tripped through POST but the GET only
|
||||
# returned `enabled`/`rich_format` plus channel-specific
|
||||
# config_keys, so after a reload the user saw the toggle
|
||||
# off even though the DB had it on. Reported on .1.10
|
||||
# along with the post-window delivery bug.
|
||||
'quiet_enabled': self._config.get(f'{ch_type}.quiet_enabled', 'false') == 'true',
|
||||
'quiet_start': self._config.get(f'{ch_type}.quiet_start', '22:00'),
|
||||
'quiet_end': self._config.get(f'{ch_type}.quiet_end', '06:00'),
|
||||
'digest_enabled': self._config.get(f'{ch_type}.digest_enabled', 'false') == 'true',
|
||||
'digest_time': self._config.get(f'{ch_type}.digest_time', '09:00'),
|
||||
}
|
||||
for config_key in info['config_keys']:
|
||||
full_key = f'{ch_type}.{config_key}'
|
||||
|
||||
@@ -484,6 +484,23 @@ TEMPLATES = {
|
||||
},
|
||||
|
||||
# ── VM / CT events ──
|
||||
# Phase 1: apt-based update detection inside running Debian/Ubuntu
|
||||
# LXCs. Grouped — one notification per cycle covers every CT with
|
||||
# pending updates. Opt-in (default_enabled=False) because the check
|
||||
# uses `pct exec` to inspect package state inside the user's CTs.
|
||||
# Phase 2 (community-scripts metadata) will extend this without
|
||||
# changing the event type.
|
||||
'lxc_updates_available': {
|
||||
'title': '{hostname}: {count} LXC(s) with package updates available',
|
||||
'body': (
|
||||
'📊 {count} LXC(s) with pending package updates '
|
||||
'(📦 {total_packages} total, 🔒 {security_count} security):\n\n'
|
||||
'{ct_list}'
|
||||
),
|
||||
'label': 'LXC updates available (experimental)',
|
||||
'group': 'vm_ct',
|
||||
'default_enabled': False,
|
||||
},
|
||||
'vm_start': {
|
||||
'title': '{hostname}: VM {vmname} ({vmid}) started',
|
||||
'body': 'Virtual machine {vmname} (ID: {vmid}) is now running.',
|
||||
@@ -1109,8 +1126,8 @@ TEMPLATES = {
|
||||
'title': '{hostname}: {count} ProxMenux optimization update(s) available',
|
||||
'body': (
|
||||
'{count} optimization update(s) detected on this host.\n\n'
|
||||
'Tools:\n{tool_list}\n\n'
|
||||
'How to apply:\n'
|
||||
'🛠️ Tools:\n{tool_list}\n\n'
|
||||
'💡 How to apply:\n'
|
||||
' • ProxMenux Monitor → Settings → ProxMenux Optimizations\n'
|
||||
' • Or run the post-install menu (option 2) → "Apply available updates"'
|
||||
),
|
||||
@@ -1129,12 +1146,12 @@ TEMPLATES = {
|
||||
'secure_gateway_update_available': {
|
||||
'title': '{hostname}: {app_name} update available — v{latest_version}',
|
||||
'body': (
|
||||
'{app_name} (managed by ProxMenux) has {package_count} package update(s) '
|
||||
'{app_name} (managed by ProxMenux) has 📦 {package_count} package update(s) '
|
||||
'pending in its container.\n'
|
||||
'Current Tailscale: v{current_version} → Latest: v{latest_version}\n\n'
|
||||
'Open ProxMenux Monitor > Settings > Secure Gateway and click '
|
||||
'🔹 Current Tailscale: v{current_version} → 🟢 Latest: v{latest_version}\n\n'
|
||||
'💡 Open ProxMenux Monitor > Settings > Secure Gateway and click '
|
||||
'"Update" to apply.\n\n'
|
||||
'Packages:\n{package_list}'
|
||||
'🗂️ Packages:\n{package_list}'
|
||||
),
|
||||
'label': 'Secure Gateway update available',
|
||||
'group': 'updates',
|
||||
@@ -1147,10 +1164,10 @@ TEMPLATES = {
|
||||
'title': '{hostname}: NVIDIA driver update available — v{latest_version}',
|
||||
'body': (
|
||||
'A newer NVIDIA driver compatible with kernel {kernel} is available.\n'
|
||||
'Currently installed: v{current_version}\n'
|
||||
'Latest available: v{latest_version}\n\n'
|
||||
'🔹 Currently installed: v{current_version}\n'
|
||||
'🟢 Latest available: v{latest_version}\n\n'
|
||||
'{upgrade_reason}\n\n'
|
||||
'To reinstall:\n'
|
||||
'💡 To reinstall:\n'
|
||||
' • From the ProxMenux post-install menu: {menu_label}\n\n'
|
||||
'Reinstalling rebuilds the DKMS module against the running kernel and '
|
||||
'requires a reboot to load the new driver.'
|
||||
@@ -1465,6 +1482,7 @@ CATEGORY_EMOJI = {
|
||||
# Event-specific title icons (override category default when present)
|
||||
EVENT_EMOJI = {
|
||||
# VM / CT
|
||||
'lxc_updates_available': '\U0001F4E6', # \uD83D\uDCE6 package \u2014 pending CT updates
|
||||
'vm_start': '\u25B6\uFE0F', # play button
|
||||
'vm_start_warning': '\u26A0\uFE0F', # warning sign - started with warnings
|
||||
'vm_stop': '\u23F9\uFE0F', # stop button
|
||||
@@ -1768,6 +1786,14 @@ Your job: translate alerts into {language} and enrich them with context when pro
|
||||
═══ ABSOLUTE CONSTRAINTS (NO EXCEPTIONS) ═══
|
||||
- NO HALLUCINATIONS: Do not invent causes, solutions, or facts not present in the provided data
|
||||
- NO SPECULATION: If something is unclear, state what IS known, not what MIGHT be
|
||||
- NO FILLER LINES: Every output line must derive from the input message, the journal context,
|
||||
or the known-error database. NEVER add generic statements like "Event detected during normal
|
||||
operation", "No further issues", or padding lines just to fill space. If a field has no evidence,
|
||||
OMIT it — a shorter output is always better than invented content.
|
||||
- 📝 Log lines: ONLY include when the journal context contains an actual relevant log line.
|
||||
Convey its meaning faithfully, do not invent one. If no relevant log exists, OMIT the 📝 line.
|
||||
- ⏱️ Duration/timing lines: ONLY for backup/migration durations explicitly present in the input.
|
||||
NEVER use ⏱️ for vague "event detected at X" filler.
|
||||
- NO CONVERSATIONAL TEXT: Never write "Here is...", "I've translated...", "Let me explain..."
|
||||
- ONLY use information from: the message, journal context, and known error database (if provided)
|
||||
|
||||
@@ -1884,7 +1910,12 @@ Your goal is to maintain the original structure of the message while using emoji
|
||||
ESPECIALLY when adding new context, formatting technical data, or writing tips.
|
||||
|
||||
RULES:
|
||||
1. PRESERVE BASE STRUCTURE: Respect the original fields and layout provided in the input message.
|
||||
1. PRESERVE BASE STRUCTURE AND INPUT EMOJIS: Respect the original fields and layout provided in
|
||||
the input message. **CRITICAL: every emoji already present in the input (📊, 🏷️, 📦, 🔒, 🛠️,
|
||||
💡, ⚠️, ✨, 🌐, 🔥, 💧, 📝, ⏱️, etc.) MUST appear in the output, in the same position relative
|
||||
to its label.** Translating the surrounding words is fine; deleting or relocating the emoji is
|
||||
not. You may add additional context-appropriate emojis from BODY EMOJIS below, but never strip
|
||||
the ones the template already provides.
|
||||
2. ENHANCE WITH ICONS: Place emojis at the START of a line to identify the data type.
|
||||
3. NEW CONTEXT: When adding journal info, SMART data, or known errors, use appropriate icons to make it readable.
|
||||
4. NO SPAM: Do not put emojis in the middle or end of sentences. Use 1-3 emojis at START of lines where they add clarity. Combine when meaningful (💾✅ backup ok).
|
||||
|
||||
@@ -791,7 +791,8 @@ install_normal_version() {
|
||||
fi
|
||||
|
||||
for pkg in "${BASIC_DEPS[@]}"; do
|
||||
if ! dpkg -l | grep -qw "$pkg"; then
|
||||
# Strict per-package check — see comment in install_translation_version().
|
||||
if ! dpkg-query -W -f='${Status}' "$pkg" 2>/dev/null | grep -q "ok installed"; then
|
||||
if apt-get install -y "$pkg" > /dev/null 2>&1; then
|
||||
update_config "$pkg" "installed"
|
||||
else
|
||||
@@ -939,7 +940,12 @@ install_translation_version() {
|
||||
|
||||
DEPS=("dialog" "curl" "git" "python3" "python3-venv" "python3-pip")
|
||||
for pkg in "${DEPS[@]}"; do
|
||||
if ! dpkg -l | grep -qw "$pkg"; then
|
||||
# `dpkg -l | grep -qw "$pkg"` treats `-` as a word boundary, so a
|
||||
# query for `python3` would falsely match `python3-pip` and skip
|
||||
# the real `python3` install. `dpkg-query -W -f='${Status}'` asks
|
||||
# for the EXACT package and reports "install ok installed" only
|
||||
# when truly present. Issue #205 traced back here.
|
||||
if ! dpkg-query -W -f='${Status}' "$pkg" 2>/dev/null | grep -q "ok installed"; then
|
||||
if apt-get install -y "$pkg" > /dev/null 2>&1; then
|
||||
update_config "$pkg" "installed"
|
||||
else
|
||||
|
||||
@@ -569,7 +569,11 @@ install_beta() {
|
||||
fi
|
||||
|
||||
for pkg in "${BASIC_DEPS[@]}"; do
|
||||
if ! dpkg -l | grep -qw "$pkg"; then
|
||||
# Strict per-package check — `dpkg -l | grep -qw python3` falsely
|
||||
# matches `python3-pip` (the `-` is a word boundary), so dpkg-query
|
||||
# for the EXACT package name is the only reliable test.
|
||||
# Issue #205.
|
||||
if ! dpkg-query -W -f='${Status}' "$pkg" 2>/dev/null | grep -q "ok installed"; then
|
||||
if apt-get install -y "$pkg" > /dev/null 2>&1; then
|
||||
update_config "$pkg" "installed"
|
||||
else
|
||||
|
||||
@@ -997,3 +997,207 @@ pmx_ask_permanent_mount() {
|
||||
echo "false"
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
# ==========================================================
|
||||
# Inspect the filesystem behind a path inside a CT and report
|
||||
# which POSIX features it supports. Used by `samba_lxc_server.sh`
|
||||
# and `nfs_lxc_server.sh` to decide whether traditional
|
||||
# chown/chmod is enough, ACLs are needed, or the filesystem
|
||||
# (exFAT, FAT32, NTFS via fuseblk) supports neither — in which
|
||||
# case the only viable path is configuring the HOST mount with
|
||||
# `uid=`/`gid=`/`fmask=`/`dmask=` options.
|
||||
#
|
||||
# Args:
|
||||
# $1 = CTID
|
||||
# $2 = path inside the CT (e.g. /mnt/media)
|
||||
#
|
||||
# Echoes a single line with 4 tab-separated fields:
|
||||
# <fstype>\t<can_chown>\t<can_acl>\t<unprivileged>
|
||||
# where can_chown / can_acl / unprivileged are "yes" / "no".
|
||||
#
|
||||
# Sample outputs:
|
||||
# "ext4 yes yes no" → ext4 on privileged CT, full POSIX
|
||||
# "zfs yes no no" → ZFS without acltype=posixacl
|
||||
# "exfat no no no" → exFAT, no POSIX semantics at all
|
||||
# "ext4 yes yes yes" → ext4 on unprivileged CT (caller
|
||||
# must keep in mind chown from
|
||||
# inside is likely to fail anyway)
|
||||
# ==========================================================
|
||||
pmx_detect_share_target_caps() {
|
||||
local ctid="$1"
|
||||
local path="$2"
|
||||
|
||||
# Filesystem reported by the kernel (NOT what fstab claims —
|
||||
# the actual mounted FS as seen from inside the CT).
|
||||
local fstype
|
||||
fstype=$(pct exec "$ctid" -- stat -f -c '%T' "$path" 2>/dev/null)
|
||||
fstype="${fstype:-unknown}"
|
||||
|
||||
local can_chown="yes"
|
||||
local can_acl="yes"
|
||||
|
||||
case "$fstype" in
|
||||
ext2*|ext3*|ext4*|xfs|btrfs|tmpfs|nfs*|cifs*|smb*)
|
||||
# Native POSIX. ACL is the kernel default for these.
|
||||
;;
|
||||
zfs)
|
||||
# ZFS supports chown natively, but POSIX ACL only when
|
||||
# acltype=posixacl. Probe with a no-op setfacl. We
|
||||
# ensure setfacl exists first; if not, install it.
|
||||
if ! pct exec "$ctid" -- bash -c "command -v setfacl >/dev/null" 2>/dev/null; then
|
||||
pct exec "$ctid" -- bash -c "apt-get install -y -qq acl >/dev/null 2>&1" || true
|
||||
fi
|
||||
if ! pct exec "$ctid" -- setfacl -m "u::rwx" "$path" >/dev/null 2>&1; then
|
||||
can_acl="no"
|
||||
fi
|
||||
;;
|
||||
msdos|vfat|exfat|ntfs|fuseblk)
|
||||
# These filesystems do not carry POSIX ownership / mode
|
||||
# / ACL at all. Permissions come exclusively from the
|
||||
# mount-time options (uid=, gid=, fmask=, dmask=).
|
||||
can_chown="no"
|
||||
can_acl="no"
|
||||
;;
|
||||
*)
|
||||
# Unknown FS — probe both. We try chown to ourselves
|
||||
# (no-op when it succeeds) and a no-op setfacl. Both
|
||||
# are cheap and tell us what works.
|
||||
local cur_owner
|
||||
cur_owner=$(pct exec "$ctid" -- stat -c '%U:%G' "$path" 2>/dev/null)
|
||||
if [[ -z "$cur_owner" ]] || ! pct exec "$ctid" -- chown "$cur_owner" "$path" >/dev/null 2>&1; then
|
||||
can_chown="no"
|
||||
fi
|
||||
if ! pct exec "$ctid" -- bash -c "command -v setfacl >/dev/null" 2>/dev/null; then
|
||||
pct exec "$ctid" -- bash -c "apt-get install -y -qq acl >/dev/null 2>&1" || true
|
||||
fi
|
||||
if ! pct exec "$ctid" -- setfacl -m "u::rwx" "$path" >/dev/null 2>&1; then
|
||||
can_acl="no"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
# CT type — privileged (unprivileged: 0) lets chown / chmod
|
||||
# run as effective host root. Unprivileged CTs have a user
|
||||
# namespace mapping and chown from inside the CT typically
|
||||
# fails on host-side bind mounts.
|
||||
local unprivileged
|
||||
unprivileged=$(pct config "$ctid" 2>/dev/null | awk -F': ' '/^unprivileged:/ {print $2; exit}')
|
||||
local unpriv_flag="no"
|
||||
[[ "$unprivileged" == "1" ]] && unpriv_flag="yes"
|
||||
|
||||
printf '%s\t%s\t%s\t%s\n' "$fstype" "$can_chown" "$can_acl" "$unpriv_flag"
|
||||
}
|
||||
|
||||
|
||||
# ==========================================================
|
||||
# Configure ownership / permissions on a shared mountpoint so
|
||||
# the given Samba/NFS user can write to it. Branches by the
|
||||
# filesystem capabilities reported by pmx_detect_share_target_caps.
|
||||
#
|
||||
# Args:
|
||||
# $1 = CTID
|
||||
# $2 = mount point inside the CT
|
||||
# $3 = username inside the CT (must already exist)
|
||||
#
|
||||
# Returns:
|
||||
# 0 on success or partial success (warnings shown).
|
||||
# 1 only on hard failures the caller should refuse to proceed on.
|
||||
#
|
||||
# Expects the global helper `sharedfiles` group to already exist
|
||||
# in the CT (caller is responsible for that — see
|
||||
# setup_universal_sharedfiles_group).
|
||||
# ==========================================================
|
||||
pmx_setup_share_permissions() {
|
||||
local ctid="$1"
|
||||
local mp="$2"
|
||||
local username="$3"
|
||||
|
||||
# Probe filesystem capabilities.
|
||||
local caps fstype can_chown can_acl unpriv
|
||||
caps=$(pmx_detect_share_target_caps "$ctid" "$mp")
|
||||
IFS=$'\t' read -r fstype can_chown can_acl unpriv <<<"$caps"
|
||||
|
||||
msg_info "$(translate "Detected filesystem at $mp:") $fstype (chown=$can_chown, acl=$can_acl, unprivileged_ct=$unpriv)"
|
||||
|
||||
# Always ensure the user is in the sharedfiles group — this
|
||||
# is harmless regardless of FS capabilities. Skip when no user
|
||||
# was passed (NFS path: only the group matters, no per-user ACL).
|
||||
if [[ -n "$username" ]]; then
|
||||
pct exec "$ctid" -- usermod -aG sharedfiles "$username" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# ACL spec — include the user only when one is provided.
|
||||
local acl_spec="g:sharedfiles:rwx,m::rwx"
|
||||
if [[ -n "$username" ]]; then
|
||||
acl_spec="u:$username:rwx,$acl_spec"
|
||||
fi
|
||||
|
||||
if [[ "$can_chown" == "yes" ]]; then
|
||||
# POSIX-friendly filesystem. Set group ownership +
|
||||
# setgid bit so new files inherit the group.
|
||||
if pct exec "$ctid" -- chown root:sharedfiles "$mp" 2>/dev/null \
|
||||
&& pct exec "$ctid" -- chmod 2775 "$mp" 2>/dev/null; then
|
||||
msg_ok "$(translate "Ownership set to root:sharedfiles with 2775 on:") $mp"
|
||||
else
|
||||
msg_warn "$(translate "chown/chmod failed — likely unprivileged CT against host bind mount. Falling back to ACL.")"
|
||||
fi
|
||||
|
||||
if [[ "$can_acl" == "yes" ]]; then
|
||||
# Access + default ACL so new files clients create
|
||||
# inherit write permission for the sharedfiles group
|
||||
# (and the Samba user, when one is provided). Without
|
||||
# `-d` (default ACL) the parent's ACL doesn't propagate
|
||||
# to children → new files end up with restrictive 755
|
||||
# and clients get "permission denied" on the next write.
|
||||
# `m::rwx` keeps the ACL mask from clipping rwx grants.
|
||||
pct exec "$ctid" -- setfacl -R -m "$acl_spec" "$mp" 2>/dev/null || true
|
||||
pct exec "$ctid" -- setfacl -R -d -m "$acl_spec" "$mp" 2>/dev/null || true
|
||||
msg_ok "$(translate "POSIX ACLs applied (access + default for inheritance).")"
|
||||
else
|
||||
msg_warn "$(translate "Filesystem $fstype does not support POSIX ACLs — relying on group ownership only.")"
|
||||
if [[ "$fstype" == "zfs" ]]; then
|
||||
msg_warn "$(translate "Tip: zfs set acltype=posixacl xattr=sa <pool>/<dataset> enables full ACL support.")"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
# exFAT / FAT32 / NTFS-fuse / similar — permissions live
|
||||
# entirely in the host mount options. Don't waste cycles
|
||||
# trying chown/chmod/setfacl; tell the user what to do
|
||||
# and refuse to silently produce a broken share.
|
||||
local uid_in_ct gid_in_ct
|
||||
uid_in_ct=$(pct exec "$ctid" -- id -u "$username" 2>/dev/null)
|
||||
gid_in_ct=$(pct exec "$ctid" -- getent group sharedfiles 2>/dev/null | cut -d: -f3)
|
||||
msg_warn "$(translate "Filesystem $fstype does NOT support chown/chmod/ACL.")"
|
||||
msg_warn "$(translate "On a privileged CT the mount options carry the only permissions.")"
|
||||
msg_warn "$(translate "Stop the CT, unmount the disk on the HOST, and remount with:")"
|
||||
echo
|
||||
echo " mount -o uid=${uid_in_ct:-1000},gid=${gid_in_ct:-100},fmask=0002,dmask=0002 <device> <hostpath>"
|
||||
echo
|
||||
msg_warn "$(translate "Then update /etc/fstab on the host with the same options.")"
|
||||
msg_warn "$(translate "Recommendation: reformat the disk to ext4 for a robust setup — see docs.")"
|
||||
fi
|
||||
|
||||
# Verify the user can actually write. `runuser` instead of
|
||||
# `su` — `pct exec ... su -` raises 'cannot set groups:
|
||||
# Operation not permitted' due to a PAM/cap quirk with the
|
||||
# exec entry path; runuser doesn't have that issue.
|
||||
# Skipped for the NFS path (no specific user to test as — the
|
||||
# NFS server itself decides UID mapping at export time).
|
||||
if [[ -z "$username" ]]; then
|
||||
msg_ok "$(translate "Directory configured for sharedfiles group access on:") $mp"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local has_access
|
||||
has_access=$(pct exec "$ctid" -- runuser -u "$username" -- \
|
||||
bash -c "test -w '$mp' && echo yes || echo no" 2>/dev/null)
|
||||
if [[ "$has_access" == "yes" ]]; then
|
||||
msg_ok "$(translate "Write access verified for user:") $username"
|
||||
return 0
|
||||
else
|
||||
msg_error "$(translate "Write access test FAILED for user:") $username"
|
||||
msg_warn "$(translate "Samba/NFS clients will likely receive 'permission denied'. Review the steps above.")"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
@@ -602,12 +602,9 @@ EOF
|
||||
|
||||
install_log2ram_auto() {
|
||||
local FUNC_VERSION="1.2"
|
||||
|
||||
# description: Install Log2RAM with size auto-tuned to host RAM (128M/256M/512M); SSD/M.2 detection skips on rotational disks.
|
||||
# ── Reinstall detection ─────────────────────────────────────────────────
|
||||
# If log2ram was previously installed by ProxMenux, skip hardware detection
|
||||
# and reinstall directly — no prompts, transparent to user. Sprint 12A:
|
||||
# also matches the new structured form `{"installed": true, ...}` written by
|
||||
# the updated register_tool, in addition to the legacy boolean true entry.
|
||||
|
||||
if [[ -f "$TOOLS_JSON" ]] && jq -e '.log2ram == true or .log2ram.installed == true' "$TOOLS_JSON" >/dev/null 2>&1; then
|
||||
msg_ok "$(translate "Log2RAM already registered — updating to latest configuration")"
|
||||
else
|
||||
@@ -854,6 +851,11 @@ EOF
|
||||
#msg_ok "$(translate "Backup created:") /etc/systemd/journald.conf.bak.$(date +%Y%m%d-%H%M%S)"
|
||||
msg_ok "$(translate "Journald configuration adjusted to") ${USE_MB}M (Log2RAM ${LOG2RAM_SIZE})"
|
||||
|
||||
systemctl daemon-reload >/dev/null 2>&1 || true
|
||||
systemctl restart log2ram >/dev/null 2>&1 || true
|
||||
log2ram clean >/dev/null 2>&1 || true
|
||||
log2ram write >/dev/null 2>&1 || true
|
||||
systemctl restart rsyslog >/dev/null 2>&1 || true
|
||||
|
||||
register_tool "log2ram" true "$FUNC_VERSION"
|
||||
}
|
||||
@@ -933,6 +935,7 @@ enable_zfs_autotrim() {
|
||||
fi
|
||||
|
||||
if ! pool_supports_autotrim "$pool"; then
|
||||
stop_spinner
|
||||
msg_info2 "$(translate "Pool does not appear to use SSD/NVMe devices with discard support. Skipping ZFS autotrim for pool:") $pool"
|
||||
continue
|
||||
fi
|
||||
|
||||
@@ -280,9 +280,13 @@ create_nfs_export() {
|
||||
|
||||
|
||||
msg_info "$(translate "Setting directory ownership and permissions...")"
|
||||
pct exec "$CTID" -- chown root:sharedfiles "$MOUNT_POINT"
|
||||
pct exec "$CTID" -- chmod 2775 "$MOUNT_POINT"
|
||||
msg_ok "$(translate "Directory configured with sharedfiles group ownership")"
|
||||
# Hand off ownership/perm setup to the shared helper. It detects the
|
||||
# underlying filesystem (ext4/xfs/zfs/exfat/ntfs-fuse/…), picks the
|
||||
# right strategy (chown+chmod, ACLs, or just inform the user when
|
||||
# the FS can't carry POSIX permissions), and verifies the result
|
||||
# with `runuser`. Empty username — NFS doesn't authenticate per-user
|
||||
# the way Samba does; the `sharedfiles` group is all we need.
|
||||
pmx_setup_share_permissions "$CTID" "$MOUNT_POINT" ""
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -172,16 +172,15 @@ create_share() {
|
||||
IS_MOUNTED=$(pct exec "$CTID" -- mount | grep "$MOUNT_POINT" || true)
|
||||
if [[ -n "$IS_MOUNTED" ]]; then
|
||||
msg_info "$(translate "Detected a mounted directory from host. Setting up shared group...")"
|
||||
|
||||
# Match the GID `nfs_lxc_server.sh` uses (101000) so the same
|
||||
# `sharedfiles` group bridges Samba- and NFS-served paths. The
|
||||
# previous `999` was inconsistent — files written via Samba were
|
||||
# owned by GID 999 and not visible to NFS clients accessing the
|
||||
# same dataset. Audit Tier 6 — GID inconsistente.
|
||||
|
||||
# The `sharedfiles` group bridges Samba- and NFS-served paths so a
|
||||
# file written by one protocol is writable by the other. Fixed GID
|
||||
# 101000 keeps the group ID consistent across CTs / hosts that
|
||||
# share the same mount.
|
||||
SHARE_GID=101000
|
||||
GROUP_EXISTS=$(pct exec "$CTID" -- getent group sharedfiles || true)
|
||||
GID_IN_USE=$(pct exec "$CTID" -- getent group "$SHARE_GID" | cut -d: -f1 || true)
|
||||
|
||||
|
||||
if [[ -z "$GROUP_EXISTS" ]]; then
|
||||
if [[ -z "$GID_IN_USE" ]]; then
|
||||
pct exec "$CTID" -- groupadd -g "$SHARE_GID" sharedfiles
|
||||
@@ -193,65 +192,23 @@ create_share() {
|
||||
else
|
||||
msg_ok "$(translate "Group 'sharedfiles' already exists inside the CT")"
|
||||
fi
|
||||
|
||||
if pct exec "$CTID" -- getent group sharedfiles >/dev/null; then
|
||||
pct exec "$CTID" -- usermod -aG sharedfiles "$USERNAME"
|
||||
# chown/chmod on a host bind-mount FAIL with "Operation not
|
||||
# permitted" inside an unprivileged CT — the kernel won't let
|
||||
# an unprivileged user namespace change ownership of files
|
||||
# that belong to a different (real-host) UID. The host owns
|
||||
# the directory; we only need write access for $USERNAME and
|
||||
# the `sharedfiles` group, which the ACL block below handles.
|
||||
# Silence the failure so it doesn't look alarming in the log.
|
||||
pct exec "$CTID" -- chown root:sharedfiles "$MOUNT_POINT" 2>/dev/null || true
|
||||
pct exec "$CTID" -- chmod 2775 "$MOUNT_POINT" 2>/dev/null || true
|
||||
else
|
||||
msg_error "$(translate "Group 'sharedfiles' was not created successfully. Skipping chown/usermod.")"
|
||||
fi
|
||||
|
||||
# Apply BOTH access and default POSIX ACLs unconditionally.
|
||||
# Previously this ran only when `test -w` failed for $USERNAME —
|
||||
# but a local `test -w` says nothing about whether Samba can
|
||||
# write through the share. Once Windows creates a *new* file or
|
||||
# subfolder, it inherits the parent's effective ACL; without a
|
||||
# `default:` entry the new entry has no ACL at all and falls
|
||||
# back to the host bind-mount's restrictive 755 → Windows shows
|
||||
# "permission denied" even though the same user can write from
|
||||
# inside the CT shell. The `-d` flag is what fixes that.
|
||||
# `m::rwx` keeps the ACL mask from clipping rwx grants.
|
||||
if pct exec "$CTID" -- bash -c "command -v setfacl >/dev/null"; then
|
||||
pct exec "$CTID" -- setfacl -R \
|
||||
-m "u:$USERNAME:rwx,g:sharedfiles:rwx,m::rwx" \
|
||||
"$MOUNT_POINT" 2>/dev/null || true
|
||||
pct exec "$CTID" -- setfacl -R -d \
|
||||
-m "u:$USERNAME:rwx,g:sharedfiles:rwx,m::rwx" \
|
||||
"$MOUNT_POINT" 2>/dev/null || true
|
||||
else
|
||||
pct exec "$CTID" -- apt-get install -y -qq acl >/dev/null 2>&1 || true
|
||||
pct exec "$CTID" -- setfacl -R \
|
||||
-m "u:$USERNAME:rwx,g:sharedfiles:rwx,m::rwx" \
|
||||
"$MOUNT_POINT" 2>/dev/null || true
|
||||
pct exec "$CTID" -- setfacl -R -d \
|
||||
-m "u:$USERNAME:rwx,g:sharedfiles:rwx,m::rwx" \
|
||||
"$MOUNT_POINT" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
HAS_ACCESS=$(pct exec "$CTID" -- su -s /bin/bash -c "test -w '$MOUNT_POINT' && echo yes || echo no" "$USERNAME" 2>/dev/null)
|
||||
if [ "$HAS_ACCESS" = "no" ]; then
|
||||
msg_warn "$(translate "ACL applied but write test still failed — check host-side permissions of:") $MOUNT_POINT"
|
||||
else
|
||||
msg_ok "$(translate "Write access (incl. default ACL for new files) confirmed for user:") $USERNAME"
|
||||
fi
|
||||
# Hand off ownership/perm setup to the shared helper. It detects
|
||||
# the underlying filesystem (ext4/xfs/zfs/exfat/ntfs-fuse/…), picks
|
||||
# the right strategy (chown+chmod, ACLs, or just inform the user
|
||||
# if the FS can't carry POSIX permissions), and verifies write
|
||||
# access with `runuser` (avoids the `su: cannot set groups`
|
||||
# PAM quirk that hits `pct exec`).
|
||||
pmx_setup_share_permissions "$CTID" "$MOUNT_POINT" "$USERNAME"
|
||||
else
|
||||
msg_ok "$(translate "No shared mount detected. Applying standard local access.")"
|
||||
# Local (CT-internal) path — chown/chmod should normally succeed,
|
||||
# but on rare bind setups (e.g. zfs with acltype=off) they can
|
||||
# still trip. Suppress stderr to keep the log clean; the
|
||||
# write-access probe below is the source of truth.
|
||||
pct exec "$CTID" -- chown -R "$USERNAME:$USERNAME" "$MOUNT_POINT" 2>/dev/null || true
|
||||
pct exec "$CTID" -- chmod -R 755 "$MOUNT_POINT" 2>/dev/null || true
|
||||
# Local (CT-internal) path: rootfs is always POSIX-friendly, so
|
||||
# chown/chmod always succeed. Keep the previous behaviour.
|
||||
pct exec "$CTID" -- chown -R "$USERNAME:$USERNAME" "$MOUNT_POINT"
|
||||
pct exec "$CTID" -- chmod -R 755 "$MOUNT_POINT"
|
||||
|
||||
HAS_ACCESS=$(pct exec "$CTID" -- su -s /bin/bash -c "test -w '$MOUNT_POINT' && echo yes || echo no" "$USERNAME" 2>/dev/null)
|
||||
HAS_ACCESS=$(pct exec "$CTID" -- runuser -u "$USERNAME" -- \
|
||||
bash -c "test -w '$MOUNT_POINT' && echo yes || echo no" 2>/dev/null)
|
||||
if [ "$HAS_ACCESS" = "no" ]; then
|
||||
pct exec "$CTID" -- setfacl -R -m "u:$USERNAME:rwx" "$MOUNT_POINT" 2>/dev/null || true
|
||||
msg_warn "$(translate "ACL permissions applied for local access for user:") $USERNAME"
|
||||
@@ -268,6 +225,14 @@ create_share() {
|
||||
|
||||
SHARE_NAME=$(basename "$MOUNT_POINT")
|
||||
|
||||
# `force user = $USERNAME` makes every Samba file operation happen
|
||||
# under that unix UID regardless of the connecting Windows account.
|
||||
# Combined with `force group = sharedfiles` and the matching
|
||||
# ownership / ACLs applied earlier, this is what keeps writes
|
||||
# consistent on host bind-mounts where the kernel sees Samba's
|
||||
# impersonated UID — without it Windows can authenticate fine but
|
||||
# writes silently fail because Samba ends up writing as some other
|
||||
# mapped UID with no permission on the target.
|
||||
case "$SHARE_OPTIONS" in
|
||||
rw)
|
||||
CONFIG=$(cat <<EOF
|
||||
@@ -279,6 +244,7 @@ create_share() {
|
||||
browseable = yes
|
||||
guest ok = no
|
||||
valid users = $USERNAME
|
||||
force user = $USERNAME
|
||||
force group = sharedfiles
|
||||
create mask = 0664
|
||||
directory mask = 2775
|
||||
@@ -298,6 +264,7 @@ EOF
|
||||
browseable = yes
|
||||
guest ok = no
|
||||
valid users = $USERNAME
|
||||
force user = $USERNAME
|
||||
force group = sharedfiles
|
||||
veto files = /lost+found/
|
||||
EOF
|
||||
@@ -310,6 +277,7 @@ EOF
|
||||
comment = Custom shared folder for $USERNAME
|
||||
path = $MOUNT_POINT
|
||||
valid users = $USERNAME
|
||||
force user = $USERNAME
|
||||
force group = sharedfiles
|
||||
$CUSTOM_CONFIG
|
||||
veto files = /lost+found/
|
||||
@@ -326,6 +294,7 @@ EOF
|
||||
browseable = yes
|
||||
guest ok = no
|
||||
valid users = $USERNAME
|
||||
force user = $USERNAME
|
||||
force group = sharedfiles
|
||||
create mask = 0664
|
||||
directory mask = 2775
|
||||
|
||||
Reference in New Issue
Block a user