Update notification service

This commit is contained in:
MacRimi
2026-03-03 13:40:46 +01:00
parent f0b8ed20a2
commit da3f99a254
5 changed files with 475 additions and 141 deletions

View File

@@ -40,13 +40,18 @@ interface EventTypeInfo {
default_enabled: boolean default_enabled: boolean
} }
interface ChannelOverrides {
categories: Record<string, boolean>
events: Record<string, boolean>
}
interface NotificationConfig { interface NotificationConfig {
enabled: boolean enabled: boolean
channels: Record<string, ChannelConfig> channels: Record<string, ChannelConfig>
severity_filter: string
event_categories: Record<string, boolean> event_categories: Record<string, boolean>
event_toggles: Record<string, boolean> event_toggles: Record<string, boolean>
event_types_by_group: Record<string, EventTypeInfo[]> event_types_by_group: Record<string, EventTypeInfo[]>
channel_overrides: Record<string, ChannelOverrides>
ai_enabled: boolean ai_enabled: boolean
ai_provider: string ai_provider: string
ai_api_key: string ai_api_key: string
@@ -79,23 +84,22 @@ interface HistoryEntry {
error_message: string | null error_message: string | null
} }
const SEVERITY_OPTIONS = [
{ value: "critical", label: "Critical only" },
{ value: "warning", label: "Warning + Critical" },
{ value: "info", label: "All (Info + Warning + Critical)" },
]
const EVENT_CATEGORIES = [ const EVENT_CATEGORIES = [
{ key: "system", label: "System", desc: "Startup, shutdown, kernel events" },
{ key: "vm_ct", label: "VM / CT", desc: "Start, stop, crash, migration" }, { key: "vm_ct", label: "VM / CT", desc: "Start, stop, crash, migration" },
{ key: "backup", label: "Backups", desc: "Backup start, complete, fail" }, { key: "backup", label: "Backups", desc: "Backup start, complete, fail" },
{ key: "resources", label: "Resources", desc: "CPU, memory, temperature" }, { key: "resources", label: "Resources", desc: "CPU, memory, temperature" },
{ key: "storage", label: "Storage", desc: "Disk space, I/O errors, SMART" }, { key: "storage", label: "Storage", desc: "Disk space, I/O, SMART" },
{ key: "network", label: "Network", desc: "Connectivity, bond, latency" }, { key: "network", label: "Network", desc: "Connectivity, bond, latency" },
{ key: "security", label: "Security", desc: "Auth failures, fail2ban, firewall" }, { key: "security", label: "Security", desc: "Auth failures, Fail2Ban, firewall" },
{ key: "cluster", label: "Cluster", desc: "Quorum, split-brain, HA fencing" }, { key: "cluster", label: "Cluster", desc: "Quorum, split-brain, HA fencing" },
{ key: "services", label: "Services", desc: "System services, shutdown, reboot" },
{ key: "health", label: "Health Monitor", desc: "Health checks, degradation, recovery" },
{ key: "updates", label: "Updates", desc: "System and PVE updates" },
{ key: "other", label: "Other", desc: "Uncategorized notifications" },
] ]
const CHANNEL_TYPES = ["telegram", "gotify", "discord", "email"] as const
const AI_PROVIDERS = [ const AI_PROVIDERS = [
{ value: "openai", label: "OpenAI" }, { value: "openai", label: "OpenAI" },
{ value: "groq", label: "Groq" }, { value: "groq", label: "Groq" },
@@ -109,13 +113,19 @@ const DEFAULT_CONFIG: NotificationConfig = {
discord: { enabled: false }, discord: { enabled: false },
email: { enabled: false }, email: { enabled: false },
}, },
severity_filter: "all",
event_categories: { event_categories: {
system: true, vm_ct: true, backup: true, resources: true, vm_ct: true, backup: true, resources: true, storage: true,
storage: true, network: true, security: true, cluster: true, network: true, security: true, cluster: true, services: true,
health: true, updates: true, other: true,
}, },
event_toggles: {}, event_toggles: {},
event_types_by_group: {}, event_types_by_group: {},
channel_overrides: {
telegram: { categories: {}, events: {} },
gotify: { categories: {}, events: {} },
discord: { categories: {}, events: {} },
email: { categories: {}, events: {} },
},
ai_enabled: false, ai_enabled: false,
ai_provider: "openai", ai_provider: "openai",
ai_api_key: "", ai_api_key: "",
@@ -217,7 +227,6 @@ export function NotificationSettings() {
const flattenConfig = (cfg: NotificationConfig): Record<string, string> => { const flattenConfig = (cfg: NotificationConfig): Record<string, string> => {
const flat: Record<string, string> = { const flat: Record<string, string> = {
enabled: String(cfg.enabled), enabled: String(cfg.enabled),
severity_filter: cfg.severity_filter,
ai_enabled: String(cfg.ai_enabled), ai_enabled: String(cfg.ai_enabled),
ai_provider: cfg.ai_provider, ai_provider: cfg.ai_provider,
ai_api_key: cfg.ai_api_key, ai_api_key: cfg.ai_api_key,
@@ -235,20 +244,17 @@ export function NotificationSettings() {
flat[`${chName}.${field}`] = String(value ?? "") flat[`${chName}.${field}`] = String(value ?? "")
} }
} }
// Flatten event_categories: { system: true, backups: false } -> events.system, events.backups // Flatten global event_categories: { vm_ct: true, backup: false } -> events.vm_ct, events.backup
for (const [cat, enabled] of Object.entries(cfg.event_categories)) { for (const [cat, enabled] of Object.entries(cfg.event_categories)) {
flat[`events.${cat}`] = String(enabled) flat[`events.${cat}`] = String(enabled)
} }
// Flatten event_toggles: { vm_start: true, vm_stop: false } -> event.vm_start, event.vm_stop // Flatten global event_toggles: { vm_start: true } -> event.vm_start
// Always write ALL toggles to DB so the backend has an explicit record.
// This ensures default_enabled changes in templates don't get overridden by stale DB values.
if (cfg.event_toggles) { if (cfg.event_toggles) {
for (const [evt, enabled] of Object.entries(cfg.event_toggles)) { for (const [evt, enabled] of Object.entries(cfg.event_toggles)) {
flat[`event.${evt}`] = String(enabled) flat[`event.${evt}`] = String(enabled)
} }
} }
// Also write any events NOT in event_toggles using their template defaults. // Write defaults for events NOT in toggles
// This covers newly added templates whose default_enabled may be false.
if (cfg.event_types_by_group) { if (cfg.event_types_by_group) {
for (const events of Object.values(cfg.event_types_by_group)) { for (const events of Object.values(cfg.event_types_by_group)) {
for (const evt of (events as Array<{type: string, default_enabled: boolean}>)) { for (const evt of (events as Array<{type: string, default_enabled: boolean}>)) {
@@ -259,6 +265,21 @@ export function NotificationSettings() {
} }
} }
} }
// Flatten per-channel overrides: telegram.events.backup, telegram.event.vm_start, etc.
if (cfg.channel_overrides) {
for (const [chName, overrides] of Object.entries(cfg.channel_overrides)) {
if (overrides.categories) {
for (const [cat, enabled] of Object.entries(overrides.categories)) {
flat[`${chName}.events.${cat}`] = String(enabled)
}
}
if (overrides.events) {
for (const [evt, enabled] of Object.entries(overrides.events)) {
flat[`${chName}.event.${evt}`] = String(enabled)
}
}
}
}
return flat return flat
} }
@@ -1052,27 +1073,8 @@ matcher: proxmenux-pbs
<span className="text-xs font-medium text-muted-foreground uppercase tracking-wider">Filters & Events</span> <span className="text-xs font-medium text-muted-foreground uppercase tracking-wider">Filters & Events</span>
</div> </div>
<div className="rounded-lg border border-border/50 bg-muted/20 p-3 space-y-4"> <div className="rounded-lg border border-border/50 bg-muted/20 p-3 space-y-4">
{/* Severity */} {/* Event Categories (global defaults -- per-channel overrides in Channel Filters below) */}
<div className="space-y-1.5"> <div className="space-y-1.5">
<Label className="text-[11px] text-muted-foreground">Severity Filter</Label>
<Select
value={config.severity_filter}
onValueChange={v => updateConfig(p => ({ ...p, severity_filter: v }))}
disabled={!editMode}
>
<SelectTrigger className={`h-8 text-xs ${!editMode ? "opacity-60" : ""}`}>
<SelectValue />
</SelectTrigger>
<SelectContent>
{SEVERITY_OPTIONS.map(opt => (
<SelectItem key={opt.value} value={opt.value}>{opt.label}</SelectItem>
))}
</SelectContent>
</Select>
</div>
{/* Event Categories */}
<div className="space-y-1.5 border-t border-border/30 pt-3">
<Label className="text-[11px] text-muted-foreground">Event Categories</Label> <Label className="text-[11px] text-muted-foreground">Event Categories</Label>
<div className="space-y-1.5"> <div className="space-y-1.5">
{EVENT_CATEGORIES.map(cat => { {EVENT_CATEGORIES.map(cat => {
@@ -1198,6 +1200,118 @@ matcher: proxmenux-pbs
})} })}
</div> </div>
</div> </div>
{/* Per-channel overrides */}
<div className="space-y-2 border-t border-border/30 pt-3">
<Label className="text-[11px] text-muted-foreground">Channel Filters</Label>
<p className="text-[10px] text-muted-foreground leading-relaxed">
By default every channel inherits the global settings above. Override specific categories per channel to customize what each destination receives.
</p>
<div className="space-y-2">
{CHANNEL_TYPES.map(chName => {
const chEnabled = config.channels[chName]?.enabled
if (!chEnabled) return null
const overrides = config.channel_overrides?.[chName] || { categories: {}, events: {} }
const hasOverrides = Object.keys(overrides.categories).length > 0
const chLabel = chName === "email" ? "Email" : chName.charAt(0).toUpperCase() + chName.slice(1)
const chColor = chName === "telegram" ? "blue" : chName === "gotify" ? "green" : chName === "discord" ? "indigo" : "amber"
return (
<details key={chName} className="group">
<summary className={`flex items-center justify-between text-[11px] font-medium cursor-pointer hover:text-foreground transition-colors py-1.5 px-2 rounded-md hover:bg-muted/50 ${
hasOverrides ? `text-${chColor}-400` : "text-muted-foreground"
}`}>
<div className="flex items-center gap-2">
<ChevronDown className="h-3 w-3 group-open:rotate-180 transition-transform" />
<span>{chLabel}</span>
{hasOverrides && (
<span className={`text-[9px] px-1.5 py-0.5 rounded-full bg-${chColor}-500/15 text-${chColor}-400`}>
customized
</span>
)}
</div>
{!hasOverrides && (
<span className="text-[9px] text-muted-foreground/60">inherits global</span>
)}
</summary>
<div className="mt-1.5 ml-5 space-y-1">
{EVENT_CATEGORIES.map(cat => {
const globalEnabled = config.event_categories[cat.key] ?? true
const override = overrides.categories[cat.key]
const isCustomized = override !== undefined
const effectiveEnabled = isCustomized ? override : globalEnabled
return (
<div key={cat.key} className="flex items-center justify-between py-1 px-2 rounded hover:bg-muted/30">
<div className="flex items-center gap-2">
<span className={`text-[11px] ${effectiveEnabled ? "text-foreground" : "text-muted-foreground/50"}`}>
{cat.label}
</span>
{!isCustomized && (
<span className="text-[9px] text-muted-foreground/40">global</span>
)}
</div>
<div className="flex items-center gap-1.5">
{isCustomized && (
<button
type="button"
className="text-[9px] text-muted-foreground hover:text-foreground px-1"
disabled={!editMode}
onClick={() => {
if (!editMode) return
updateConfig(p => {
const ch = { ...(p.channel_overrides?.[chName] || { categories: {}, events: {} }) }
const cats = { ...ch.categories }
delete cats[cat.key]
return { ...p, channel_overrides: { ...p.channel_overrides, [chName]: { ...ch, categories: cats } } }
})
}}
>
reset
</button>
)}
<button
type="button"
role="switch"
aria-checked={effectiveEnabled}
disabled={!editMode}
className={`relative inline-flex h-3.5 w-6 shrink-0 items-center rounded-full transition-colors ${
!editMode ? "opacity-50 cursor-not-allowed" : "cursor-pointer"
} ${effectiveEnabled ? `bg-${chColor}-600` : "bg-muted-foreground/30"}`}
onClick={() => {
if (!editMode) return
updateConfig(p => {
const ch = { ...(p.channel_overrides?.[chName] || { categories: {}, events: {} }) }
return {
...p,
channel_overrides: {
...p.channel_overrides,
[chName]: { ...ch, categories: { ...ch.categories, [cat.key]: !effectiveEnabled } }
}
}
})
}}
>
<span className={`pointer-events-none block h-2.5 w-2.5 rounded-full bg-background shadow-sm transition-transform ${
effectiveEnabled ? "translate-x-3" : "translate-x-0.5"
}`} />
</button>
</div>
</div>
)
})}
</div>
</details>
)
})}
{CHANNEL_TYPES.every(ch => !config.channels[ch]?.enabled) && (
<p className="text-[10px] text-muted-foreground/50 italic py-2">
Enable at least one channel above to configure per-channel filters.
</p>
)}
</div>
</div>
</div>{/* close bordered filters container */} </div>{/* close bordered filters container */}
</div> </div>

View File

@@ -575,13 +575,31 @@ def _temperature_collector_loop():
def _health_collector_loop(): def _health_collector_loop():
"""Background thread: run full health checks every 5 minutes. """Background thread: run full health checks every 5 minutes.
Keeps the health cache always fresh and records events/errors in the DB Keeps the health cache always fresh and records events/errors in the DB.
so the future notification service can consume them.""" Also emits notifications when a health category degrades (OK -> WARNING/CRITICAL)."""
from health_monitor import health_monitor from health_monitor import health_monitor
# Wait 30s after startup to let other services initialize # Wait 30s after startup to let other services initialize
time.sleep(30) time.sleep(30)
# Track previous status per category to detect transitions
_prev_statuses = {}
# Severity ranking for comparison
_SEV_RANK = {'OK': 0, 'INFO': 0, 'UNKNOWN': 1, 'WARNING': 2, 'CRITICAL': 3}
# Human-readable category names
_CAT_NAMES = {
'cpu': 'CPU Usage & Temperature',
'memory': 'Memory & Swap',
'storage': 'Storage Mounts & Space',
'disks': 'Disk I/O & Errors',
'network': 'Network Interfaces',
'vms': 'VMs & Containers',
'services': 'PVE Services',
'logs': 'System Logs',
'updates': 'System Updates',
'security': 'Security',
}
while True: while True:
try: try:
# Run full health check (results get cached internally + recorded in DB) # Run full health check (results get cached internally + recorded in DB)
@@ -598,6 +616,64 @@ def _health_collector_loop():
health_monitor.cached_results['_bg_detailed'] = result health_monitor.cached_results['_bg_detailed'] = result
health_monitor.last_check_times['_bg_overall'] = time.time() health_monitor.last_check_times['_bg_overall'] = time.time()
health_monitor.last_check_times['_bg_detailed'] = time.time() health_monitor.last_check_times['_bg_detailed'] = time.time()
# ── Health degradation notifications ──
# Compare each category's current status to previous cycle.
# Notify when a category DEGRADES (OK->WARNING, WARNING->CRITICAL, etc.)
# Include the detailed 'reason' so the user knows exactly what triggered it.
details = result.get('details', {})
degraded = []
for cat_key, cat_data in details.items():
cur_status = cat_data.get('status', 'OK')
prev_status = _prev_statuses.get(cat_key, 'OK')
cur_rank = _SEV_RANK.get(cur_status, 0)
prev_rank = _SEV_RANK.get(prev_status, 0)
if cur_rank > prev_rank and cur_rank >= 2: # WARNING or CRITICAL
reason = cat_data.get('reason', f'{cat_key} status changed to {cur_status}')
cat_name = _CAT_NAMES.get(cat_key, cat_key)
degraded.append({
'category': cat_name,
'status': cur_status,
'reason': reason,
})
_prev_statuses[cat_key] = cur_status
# Send grouped notification if any categories degraded
if degraded and notification_manager._enabled:
hostname = result.get('hostname', '')
if not hostname:
import socket as _sock
hostname = _sock.gethostname()
if len(degraded) == 1:
d = degraded[0]
title = f"{hostname}: Health {d['status']} - {d['category']}"
body = d['reason']
severity = d['status']
else:
# Multiple categories degraded at once -- group them
max_sev = max(degraded, key=lambda x: _SEV_RANK.get(x['status'], 0))['status']
title = f"{hostname}: {len(degraded)} health checks degraded"
lines = []
for d in degraded:
lines.append(f" [{d['status']}] {d['category']}: {d['reason']}")
body = '\n'.join(lines)
severity = max_sev
try:
notification_manager.send_notification(
event_type='health_degraded',
severity=severity,
title=title,
message=body,
data={'hostname': hostname, 'count': str(len(degraded))},
source='health_monitor',
)
except Exception as e:
print(f"[ProxMenux] Health notification error: {e}")
except Exception as e: except Exception as e:
print(f"[ProxMenux] Health collector error: {e}") print(f"[ProxMenux] Health collector error: {e}")

View File

@@ -2778,24 +2778,41 @@ class HealthMonitor:
return 'INFO' return 'INFO'
return severity return severity
# Build detail strings that include the actual error samples
# so the user can see exactly WHAT is triggering the warning.
if cascade_count > 0:
cascade_detail = f'{cascade_count} pattern(s) repeating >=15 times: ' + '; '.join(cascade_samples)
else:
cascade_detail = 'No cascading errors'
if spike_count > 0:
spike_detail = f'{spike_count} pattern(s) with 4x increase: ' + '; '.join(spike_samples)
else:
spike_detail = 'No error spikes'
if persistent_count > 0:
persist_detail = f'{persistent_count} recurring pattern(s) over 15+ min: ' + '; '.join(persist_samples)
else:
persist_detail = 'No persistent patterns'
log_checks = { log_checks = {
'log_error_cascade': { 'log_error_cascade': {
'status': _log_check_status('log_error_cascade', cascade_count > 0, 'WARNING'), 'status': _log_check_status('log_error_cascade', cascade_count > 0, 'WARNING'),
'detail': f'{cascade_count} pattern(s) repeating >=15 times' if cascade_count > 0 else 'No cascading errors', 'detail': cascade_detail,
'dismissable': True, 'dismissable': True,
'dismissed': 'log_error_cascade' in dismissed_keys, 'dismissed': 'log_error_cascade' in dismissed_keys,
'error_key': 'log_error_cascade' 'error_key': 'log_error_cascade'
}, },
'log_error_spike': { 'log_error_spike': {
'status': _log_check_status('log_error_spike', spike_count > 0, 'WARNING'), 'status': _log_check_status('log_error_spike', spike_count > 0, 'WARNING'),
'detail': f'{spike_count} pattern(s) with 4x increase' if spike_count > 0 else 'No error spikes', 'detail': spike_detail,
'dismissable': True, 'dismissable': True,
'dismissed': 'log_error_spike' in dismissed_keys, 'dismissed': 'log_error_spike' in dismissed_keys,
'error_key': 'log_error_spike' 'error_key': 'log_error_spike'
}, },
'log_persistent_errors': { 'log_persistent_errors': {
'status': _log_check_status('log_persistent_errors', persistent_count > 0, 'WARNING'), 'status': _log_check_status('log_persistent_errors', persistent_count > 0, 'WARNING'),
'detail': f'{persistent_count} recurring pattern(s) over 15+ min' if persistent_count > 0 else 'No persistent patterns', 'detail': persist_detail,
'dismissable': True, 'dismissable': True,
'dismissed': 'log_persistent_errors' in dismissed_keys, 'dismissed': 'log_persistent_errors' in dismissed_keys,
'error_key': 'log_persistent_errors' 'error_key': 'log_persistent_errors'

View File

@@ -69,9 +69,15 @@ GROUP_RATE_LIMITS = {
'resources': {'max_per_minute': 3, 'max_per_hour': 20}, 'resources': {'max_per_minute': 3, 'max_per_hour': 20},
'vm_ct': {'max_per_minute': 10, 'max_per_hour': 60}, 'vm_ct': {'max_per_minute': 10, 'max_per_hour': 60},
'backup': {'max_per_minute': 5, 'max_per_hour': 30}, 'backup': {'max_per_minute': 5, 'max_per_hour': 30},
'system': {'max_per_minute': 5, 'max_per_hour': 30}, 'services': {'max_per_minute': 5, 'max_per_hour': 30},
'health': {'max_per_minute': 3, 'max_per_hour': 20},
'updates': {'max_per_minute': 3, 'max_per_hour': 15},
'other': {'max_per_minute': 5, 'max_per_hour': 30},
} }
# Default fallback for unknown groups
_DEFAULT_RATE_LIMIT = {'max_per_minute': 5, 'max_per_hour': 30}
class GroupRateLimiter: class GroupRateLimiter:
"""Rate limiter per event group. Prevents notification storms.""" """Rate limiter per event group. Prevents notification storms."""
@@ -84,7 +90,7 @@ class GroupRateLimiter:
def allow(self, group: str) -> bool: def allow(self, group: str) -> bool:
"""Check if group rate limit allows this event.""" """Check if group rate limit allows this event."""
limits = GROUP_RATE_LIMITS.get(group, GROUP_RATE_LIMITS['system']) limits = GROUP_RATE_LIMITS.get(group, _DEFAULT_RATE_LIMIT)
now = time.time() now = time.time()
# Initialize if needed # Initialize if needed
@@ -554,35 +560,28 @@ class NotificationManager:
print(f"[NotificationManager] Aggregation flush error: {e}") print(f"[NotificationManager] Aggregation flush error: {e}")
def _process_event(self, event: NotificationEvent): def _process_event(self, event: NotificationEvent):
"""Process a single event: filter -> aggregate -> cooldown -> rate limit -> dispatch.""" """Process a single event: filter -> aggregate -> cooldown -> rate limit -> dispatch.
NOTE: Group and per-event filters are checked globally here.
Per-channel overrides are applied later in _dispatch_to_channels().
"""
if not self._enabled: if not self._enabled:
return return
# Check if this event's GROUP is enabled in settings. # Check if this event's GROUP is enabled globally.
# The UI saves categories by group key: events.vm_ct, events.backup, etc.
template = TEMPLATES.get(event.event_type, {}) template = TEMPLATES.get(event.event_type, {})
event_group = template.get('group', 'system') event_group = template.get('group', 'other')
group_setting = f'events.{event_group}' group_setting = f'events.{event_group}'
if self._config.get(group_setting, 'true') == 'false': if self._config.get(group_setting, 'true') == 'false':
return return
# Check if this SPECIFIC event type is enabled (granular per-event toggle). # Check if this SPECIFIC event type is enabled globally.
# Key format: event.{event_type} = "true"/"false"
# Default comes from the template's default_enabled field. # Default comes from the template's default_enabled field.
default_enabled = 'true' if template.get('default_enabled', True) else 'false' default_enabled = 'true' if template.get('default_enabled', True) else 'false'
event_specific = f'event.{event.event_type}' event_specific = f'event.{event.event_type}'
if self._config.get(event_specific, default_enabled) == 'false': if self._config.get(event_specific, default_enabled) == 'false':
return return
# Check severity filter.
# The UI saves severity_filter as: "all", "warning", "critical".
# Map to our internal severity names for comparison.
severity_map = {'all': 'INFO', 'warning': 'WARNING', 'critical': 'CRITICAL'}
raw_filter = self._config.get('severity_filter', 'all')
min_severity = severity_map.get(raw_filter.lower(), 'INFO')
if not self._meets_severity(event.severity, min_severity):
return
# Try aggregation (may buffer the event) # Try aggregation (may buffer the event)
result = self._aggregator.ingest(event) result = self._aggregator.ingest(event)
if result is None: if result is None:
@@ -593,30 +592,23 @@ class NotificationManager:
self._dispatch_event(event) self._dispatch_event(event)
def _process_event_direct(self, event: NotificationEvent): def _process_event_direct(self, event: NotificationEvent):
"""Process a burst summary event. Bypasses aggregator but applies ALL other filters.""" """Process a burst summary event. Bypasses aggregator but applies global filters."""
if not self._enabled: if not self._enabled:
return return
# Check group filter (same as _process_event) # Check group filter
template = TEMPLATES.get(event.event_type, {}) template = TEMPLATES.get(event.event_type, {})
event_group = template.get('group', 'system') event_group = template.get('group', 'other')
group_setting = f'events.{event_group}' group_setting = f'events.{event_group}'
if self._config.get(group_setting, 'true') == 'false': if self._config.get(group_setting, 'true') == 'false':
return return
# Check per-event filter (same as _process_event) # Check per-event filter
default_enabled = 'true' if template.get('default_enabled', True) else 'false' default_enabled = 'true' if template.get('default_enabled', True) else 'false'
event_specific = f'event.{event.event_type}' event_specific = f'event.{event.event_type}'
if self._config.get(event_specific, default_enabled) == 'false': if self._config.get(event_specific, default_enabled) == 'false':
return return
# Check severity filter (same mapping as _process_event)
severity_map = {'all': 'INFO', 'warning': 'WARNING', 'critical': 'CRITICAL'}
raw_filter = self._config.get('severity_filter', 'all')
min_severity = severity_map.get(raw_filter.lower(), 'INFO')
if not self._meets_severity(event.severity, min_severity):
return
self._dispatch_event(event) self._dispatch_event(event)
def _dispatch_event(self, event: NotificationEvent): def _dispatch_event(self, event: NotificationEvent):
@@ -636,7 +628,7 @@ class NotificationManager:
# Check group rate limit # Check group rate limit
template = TEMPLATES.get(event.event_type, {}) template = TEMPLATES.get(event.event_type, {})
group = template.get('group', 'system') group = template.get('group', 'other')
if not self._group_limiter.allow(group): if not self._group_limiter.allow(group):
return return
@@ -674,11 +666,33 @@ class NotificationManager:
def _dispatch_to_channels(self, title: str, body: str, severity: str, def _dispatch_to_channels(self, title: str, body: str, severity: str,
event_type: str, data: Dict, source: str): event_type: str, data: Dict, source: str):
"""Send notification through all configured channels.""" """Send notification through configured channels, respecting per-channel overrides.
Each channel can override global category/event settings:
- {channel}.events.{group} = "true"/"false" (category override)
- {channel}.event.{type} = "true"/"false" (per-event override)
If no override exists, the channel inherits the global setting (already checked).
"""
with self._lock: with self._lock:
channels = dict(self._channels) channels = dict(self._channels)
template = TEMPLATES.get(event_type, {})
event_group = template.get('group', 'other')
for ch_name, channel in channels.items(): for ch_name, channel in channels.items():
# ── Per-channel override check ──
# If the channel has an explicit override for this group or event, respect it.
# If no override, the global filter already passed (checked in _process_event).
ch_group_key = f'{ch_name}.events.{event_group}'
ch_group_override = self._config.get(ch_group_key)
if ch_group_override == 'false':
continue # Channel explicitly disabled this category
ch_event_key = f'{ch_name}.event.{event_type}'
ch_event_override = self._config.get(ch_event_key)
if ch_event_override == 'false':
continue # Channel explicitly disabled this event
try: try:
result = channel.send(title, body, severity, data) result = channel.send(title, body, severity, data)
self._record_history( self._record_history(
@@ -857,12 +871,6 @@ class NotificationManager:
except Exception: except Exception:
pass pass
@staticmethod
def _meets_severity(event_severity: str, min_severity: str) -> bool:
"""Check if event severity meets the minimum threshold."""
levels = {'INFO': 0, 'WARNING': 1, 'CRITICAL': 2}
return levels.get(event_severity, 0) >= levels.get(min_severity, 0)
# ─── History Recording ────────────────────────────────────── # ─── History Recording ──────────────────────────────────────
def _record_history(self, event_type: str, channel: str, title: str, def _record_history(self, event_type: str, channel: str, title: str,
@@ -1171,7 +1179,7 @@ class NotificationManager:
channels[ch_type] = ch_cfg channels[ch_type] = ch_cfg
# Build event_categories dict (group-level toggle) # Build event_categories dict (group-level toggle)
# EVENT_GROUPS is a dict: { 'system': {...}, 'vm_ct': {...}, ... } # EVENT_GROUPS is a dict: { 'vm_ct': {...}, 'services': {...}, 'health': {...}, ... }
event_categories = {} event_categories = {}
for group_key in EVENT_GROUPS: for group_key in EVENT_GROUPS:
event_categories[group_key] = self._config.get(f'events.{group_key}', 'true') == 'true' event_categories[group_key] = self._config.get(f'events.{group_key}', 'true') == 'true'
@@ -1189,13 +1197,28 @@ class NotificationManager:
# Build event_types_by_group for UI rendering # Build event_types_by_group for UI rendering
event_types_by_group = get_event_types_by_group() event_types_by_group = get_event_types_by_group()
# Build per-channel overrides
# Keys: {channel}.events.{group} and {channel}.event.{event_type}
channel_overrides = {}
for ch_type in CHANNEL_TYPES:
ch_overrides = {'categories': {}, 'events': {}}
for group_key in EVENT_GROUPS:
val = self._config.get(f'{ch_type}.events.{group_key}')
if val is not None:
ch_overrides['categories'][group_key] = val == 'true'
for event_type_key in TEMPLATES:
val = self._config.get(f'{ch_type}.event.{event_type_key}')
if val is not None:
ch_overrides['events'][event_type_key] = val == 'true'
channel_overrides[ch_type] = ch_overrides
config = { config = {
'enabled': self._enabled, 'enabled': self._enabled,
'channels': channels, 'channels': channels,
'severity_filter': self._config.get('severity_filter', 'all'),
'event_categories': event_categories, 'event_categories': event_categories,
'event_toggles': event_toggles, 'event_toggles': event_toggles,
'event_types_by_group': event_types_by_group, 'event_types_by_group': event_types_by_group,
'channel_overrides': channel_overrides,
'ai_enabled': self._config.get('ai_enabled', 'false') == 'true', 'ai_enabled': self._config.get('ai_enabled', 'false') == 'true',
'ai_provider': self._config.get('ai_provider', 'openai'), 'ai_provider': self._config.get('ai_provider', 'openai'),
'ai_api_key': self._config.get('ai_api_key', ''), 'ai_api_key': self._config.get('ai_api_key', ''),

View File

@@ -342,25 +342,36 @@ TEMPLATES = {
'state_change': { 'state_change': {
'title': '{hostname}: {category} changed to {current}', 'title': '{hostname}: {category} changed to {current}',
'body': '{category} status changed from {previous} to {current}.\n{reason}', 'body': '{category} status changed from {previous} to {current}.\n{reason}',
'group': 'system', 'label': 'Health state changed',
'group': 'health',
'default_enabled': False, 'default_enabled': False,
}, },
'new_error': { 'new_error': {
'title': '{hostname}: New {severity} - {category}', 'title': '{hostname}: New {severity} - {category}',
'body': '{reason}', 'body': '{reason}',
'group': 'system', 'label': 'New health issue',
'group': 'health',
'default_enabled': True, 'default_enabled': True,
}, },
'error_resolved': { 'error_resolved': {
'title': '{hostname}: Resolved - {category}', 'title': '{hostname}: Resolved - {category}',
'body': '{reason}\nDuration: {duration}', 'body': '{reason}\nDuration: {duration}',
'group': 'system', 'label': 'Health issue resolved',
'group': 'health',
'default_enabled': True, 'default_enabled': True,
}, },
'error_escalated': { 'error_escalated': {
'title': '{hostname}: Escalated to {severity} - {category}', 'title': '{hostname}: Escalated to {severity} - {category}',
'body': '{reason}', 'body': '{reason}',
'group': 'system', 'label': 'Health issue escalated',
'group': 'health',
'default_enabled': True,
},
'health_degraded': {
'title': '{hostname}: Health check degraded',
'body': '{reason}',
'label': 'Health check degraded',
'group': 'health',
'default_enabled': True, 'default_enabled': True,
}, },
@@ -368,90 +379,105 @@ TEMPLATES = {
'vm_start': { 'vm_start': {
'title': '{hostname}: VM {vmid} started', 'title': '{hostname}: VM {vmid} started',
'body': '{vmname} ({vmid}) has been started.', 'body': '{vmname} ({vmid}) has been started.',
'label': 'VM started',
'group': 'vm_ct', 'group': 'vm_ct',
'default_enabled': True, 'default_enabled': True,
}, },
'vm_stop': { 'vm_stop': {
'title': '{hostname}: VM {vmid} stopped', 'title': '{hostname}: VM {vmid} stopped',
'body': '{vmname} ({vmid}) has been stopped.', 'body': '{vmname} ({vmid}) has been stopped.',
'label': 'VM stopped',
'group': 'vm_ct', 'group': 'vm_ct',
'default_enabled': False, 'default_enabled': False,
}, },
'vm_shutdown': { 'vm_shutdown': {
'title': '{hostname}: VM {vmid} shutdown', 'title': '{hostname}: VM {vmid} shutdown',
'body': '{vmname} ({vmid}) has been shut down.', 'body': '{vmname} ({vmid}) has been shut down.',
'label': 'VM shutdown',
'group': 'vm_ct', 'group': 'vm_ct',
'default_enabled': False, 'default_enabled': False,
}, },
'vm_fail': { 'vm_fail': {
'title': '{hostname}: VM {vmid} FAILED', 'title': '{hostname}: VM {vmid} FAILED',
'body': '{vmname} ({vmid}) has failed.\n{reason}', 'body': '{vmname} ({vmid}) has failed.\n{reason}',
'label': 'VM FAILED',
'group': 'vm_ct', 'group': 'vm_ct',
'default_enabled': True, 'default_enabled': True,
}, },
'vm_restart': { 'vm_restart': {
'title': '{hostname}: VM {vmid} restarted', 'title': '{hostname}: VM {vmid} restarted',
'body': '{vmname} ({vmid}) has been restarted.', 'body': '{vmname} ({vmid}) has been restarted.',
'label': 'VM restarted',
'group': 'vm_ct', 'group': 'vm_ct',
'default_enabled': False, 'default_enabled': False,
}, },
'ct_start': { 'ct_start': {
'title': '{hostname}: CT {vmid} started', 'title': '{hostname}: CT {vmid} started',
'body': '{vmname} ({vmid}) has been started.', 'body': '{vmname} ({vmid}) has been started.',
'label': 'CT started',
'group': 'vm_ct', 'group': 'vm_ct',
'default_enabled': True, 'default_enabled': True,
}, },
'ct_stop': { 'ct_stop': {
'title': '{hostname}: CT {vmid} stopped', 'title': '{hostname}: CT {vmid} stopped',
'body': '{vmname} ({vmid}) has been stopped.', 'body': '{vmname} ({vmid}) has been stopped.',
'label': 'CT stopped',
'group': 'vm_ct', 'group': 'vm_ct',
'default_enabled': False, 'default_enabled': False,
}, },
'ct_shutdown': { 'ct_shutdown': {
'title': '{hostname}: CT {vmid} shutdown', 'title': '{hostname}: CT {vmid} shutdown',
'body': '{vmname} ({vmid}) has been shut down.', 'body': '{vmname} ({vmid}) has been shut down.',
'label': 'CT shutdown',
'group': 'vm_ct', 'group': 'vm_ct',
'default_enabled': False, 'default_enabled': False,
}, },
'ct_restart': { 'ct_restart': {
'title': '{hostname}: CT {vmid} restarted', 'title': '{hostname}: CT {vmid} restarted',
'body': '{vmname} ({vmid}) has been restarted.', 'body': '{vmname} ({vmid}) has been restarted.',
'label': 'CT restarted',
'group': 'vm_ct', 'group': 'vm_ct',
'default_enabled': False, 'default_enabled': False,
}, },
'ct_fail': { 'ct_fail': {
'title': '{hostname}: CT {vmid} FAILED', 'title': '{hostname}: CT {vmid} FAILED',
'body': '{vmname} ({vmid}) has failed.\n{reason}', 'body': '{vmname} ({vmid}) has failed.\n{reason}',
'label': 'CT FAILED',
'group': 'vm_ct', 'group': 'vm_ct',
'default_enabled': True, 'default_enabled': True,
}, },
'migration_start': { 'migration_start': {
'title': '{hostname}: Migration started - {vmid}', 'title': '{hostname}: Migration started - {vmid}',
'body': '{vmname} ({vmid}) migration to {target_node} started.', 'body': '{vmname} ({vmid}) migration to {target_node} started.',
'label': 'Migration started',
'group': 'vm_ct', 'group': 'vm_ct',
'default_enabled': True, 'default_enabled': True,
}, },
'migration_complete': { 'migration_complete': {
'title': '{hostname}: Migration complete - {vmid}', 'title': '{hostname}: Migration complete - {vmid}',
'body': '{vmname} ({vmid}) migrated successfully to {target_node}.', 'body': '{vmname} ({vmid}) migrated successfully to {target_node}.',
'label': 'Migration complete',
'group': 'vm_ct', 'group': 'vm_ct',
'default_enabled': True, 'default_enabled': True,
}, },
'migration_fail': { 'migration_fail': {
'title': '{hostname}: Migration FAILED - {vmid}', 'title': '{hostname}: Migration FAILED - {vmid}',
'body': '{vmname} ({vmid}) migration to {target_node} failed.\n{reason}', 'body': '{vmname} ({vmid}) migration to {target_node} failed.\n{reason}',
'label': 'Migration FAILED',
'group': 'vm_ct', 'group': 'vm_ct',
'default_enabled': True, 'default_enabled': True,
}, },
'replication_fail': { 'replication_fail': {
'title': '{hostname}: Replication FAILED - {vmid}', 'title': '{hostname}: Replication FAILED - {vmid}',
'body': 'Replication of {vmname} ({vmid}) has failed.\n{reason}', 'body': 'Replication of {vmname} ({vmid}) has failed.\n{reason}',
'label': 'Replication FAILED',
'group': 'vm_ct', 'group': 'vm_ct',
'default_enabled': True, 'default_enabled': True,
}, },
'replication_complete': { 'replication_complete': {
'title': '{hostname}: Replication complete - {vmid}', 'title': '{hostname}: Replication complete - {vmid}',
'body': 'Replication of {vmname} ({vmid}) completed successfully.', 'body': 'Replication of {vmname} ({vmid}) completed successfully.',
'label': 'Replication complete',
'group': 'vm_ct', 'group': 'vm_ct',
'default_enabled': False, 'default_enabled': False,
}, },
@@ -460,30 +486,35 @@ TEMPLATES = {
'backup_start': { 'backup_start': {
'title': '{hostname}: Backup started', 'title': '{hostname}: Backup started',
'body': '{reason}', 'body': '{reason}',
'label': 'Backup started',
'group': 'backup', 'group': 'backup',
'default_enabled': False, 'default_enabled': False,
}, },
'backup_complete': { 'backup_complete': {
'title': '{hostname}: Backup complete - {vmid}', 'title': '{hostname}: Backup complete - {vmid}',
'body': 'Backup of {vmname} ({vmid}) completed successfully.\nSize: {size}', 'body': 'Backup of {vmname} ({vmid}) completed successfully.\nSize: {size}',
'label': 'Backup complete',
'group': 'backup', 'group': 'backup',
'default_enabled': True, 'default_enabled': True,
}, },
'backup_fail': { 'backup_fail': {
'title': '{hostname}: Backup FAILED - {vmid}', 'title': '{hostname}: Backup FAILED - {vmid}',
'body': 'Backup of {vmname} ({vmid}) has failed.\n{reason}', 'body': 'Backup of {vmname} ({vmid}) has failed.\n{reason}',
'label': 'Backup FAILED',
'group': 'backup', 'group': 'backup',
'default_enabled': True, 'default_enabled': True,
}, },
'snapshot_complete': { 'snapshot_complete': {
'title': '{hostname}: Snapshot created - {vmid}', 'title': '{hostname}: Snapshot created - {vmid}',
'body': 'Snapshot of {vmname} ({vmid}) created: {snapshot_name}', 'body': 'Snapshot of {vmname} ({vmid}) created: {snapshot_name}',
'label': 'Snapshot created',
'group': 'backup', 'group': 'backup',
'default_enabled': False, 'default_enabled': False,
}, },
'snapshot_fail': { 'snapshot_fail': {
'title': '{hostname}: Snapshot FAILED - {vmid}', 'title': '{hostname}: Snapshot FAILED - {vmid}',
'body': 'Snapshot of {vmname} ({vmid}) failed.\n{reason}', 'body': 'Snapshot of {vmname} ({vmid}) failed.\n{reason}',
'label': 'Snapshot FAILED',
'group': 'backup', 'group': 'backup',
'default_enabled': True, 'default_enabled': True,
}, },
@@ -492,42 +523,49 @@ TEMPLATES = {
'cpu_high': { 'cpu_high': {
'title': '{hostname}: High CPU usage ({value}%)', 'title': '{hostname}: High CPU usage ({value}%)',
'body': 'CPU usage is at {value}% on {cores} cores.\n{details}', 'body': 'CPU usage is at {value}% on {cores} cores.\n{details}',
'label': 'High CPU usage',
'group': 'resources', 'group': 'resources',
'default_enabled': True, 'default_enabled': True,
}, },
'ram_high': { 'ram_high': {
'title': '{hostname}: High memory usage ({value}%)', 'title': '{hostname}: High memory usage ({value}%)',
'body': 'Memory usage: {used} / {total} ({value}%).\n{details}', 'body': 'Memory usage: {used} / {total} ({value}%).\n{details}',
'label': 'High memory usage',
'group': 'resources', 'group': 'resources',
'default_enabled': True, 'default_enabled': True,
}, },
'temp_high': { 'temp_high': {
'title': '{hostname}: High temperature ({value}C)', 'title': '{hostname}: High temperature ({value}C)',
'body': 'CPU temperature: {value}C (threshold: {threshold}C).\n{details}', 'body': 'CPU temperature: {value}C (threshold: {threshold}C).\n{details}',
'label': 'High temperature',
'group': 'resources', 'group': 'resources',
'default_enabled': True, 'default_enabled': True,
}, },
'disk_space_low': { 'disk_space_low': {
'title': '{hostname}: Low disk space on {mount}', 'title': '{hostname}: Low disk space on {mount}',
'body': '{mount}: {used}% used ({available} available).', 'body': '{mount}: {used}% used ({available} available).',
'label': 'Low disk space',
'group': 'storage', 'group': 'storage',
'default_enabled': True, 'default_enabled': True,
}, },
'disk_io_error': { 'disk_io_error': {
'title': '{hostname}: Disk failure detected on {device}', 'title': '{hostname}: Disk failure detected on {device}',
'body': '{reason}', 'body': '{reason}',
'label': 'Disk failure / I/O error',
'group': 'storage', 'group': 'storage',
'default_enabled': True, 'default_enabled': True,
}, },
'storage_unavailable': { 'storage_unavailable': {
'title': '{hostname}: Storage unavailable - {storage_name}', 'title': '{hostname}: Storage unavailable - {storage_name}',
'body': 'PVE storage "{storage_name}" ({storage_type}) is not available.\n{reason}', 'body': 'PVE storage "{storage_name}" ({storage_type}) is not available.\n{reason}',
'label': 'Storage unavailable',
'group': 'storage', 'group': 'storage',
'default_enabled': True, 'default_enabled': True,
}, },
'load_high': { 'load_high': {
'title': '{hostname}: High system load ({value})', 'title': '{hostname}: High system load ({value})',
'body': 'System load average: {value} on {cores} cores.\n{details}', 'body': 'System load average: {value} on {cores} cores.\n{details}',
'label': 'High system load',
'group': 'resources', 'group': 'resources',
'default_enabled': True, 'default_enabled': True,
}, },
@@ -536,12 +574,14 @@ TEMPLATES = {
'network_down': { 'network_down': {
'title': '{hostname}: Network connectivity lost', 'title': '{hostname}: Network connectivity lost',
'body': 'Network connectivity check failed.\n{reason}', 'body': 'Network connectivity check failed.\n{reason}',
'label': 'Network connectivity lost',
'group': 'network', 'group': 'network',
'default_enabled': True, 'default_enabled': True,
}, },
'network_latency': { 'network_latency': {
'title': '{hostname}: High network latency ({value}ms)', 'title': '{hostname}: High network latency ({value}ms)',
'body': 'Latency to gateway: {value}ms (threshold: {threshold}ms).', 'body': 'Latency to gateway: {value}ms (threshold: {threshold}ms).',
'label': 'High network latency',
'group': 'network', 'group': 'network',
'default_enabled': False, 'default_enabled': False,
}, },
@@ -550,24 +590,28 @@ TEMPLATES = {
'auth_fail': { 'auth_fail': {
'title': '{hostname}: Authentication failure', 'title': '{hostname}: Authentication failure',
'body': 'Failed login attempt from {source_ip}.\nUser: {username}\nService: {service}', 'body': 'Failed login attempt from {source_ip}.\nUser: {username}\nService: {service}',
'label': 'Authentication failure',
'group': 'security', 'group': 'security',
'default_enabled': True, 'default_enabled': True,
}, },
'ip_block': { 'ip_block': {
'title': '{hostname}: IP blocked by Fail2Ban', 'title': '{hostname}: IP blocked by Fail2Ban',
'body': 'IP {source_ip} has been banned.\nJail: {jail}\nFailures: {failures}', 'body': 'IP {source_ip} has been banned.\nJail: {jail}\nFailures: {failures}',
'label': 'IP blocked by Fail2Ban',
'group': 'security', 'group': 'security',
'default_enabled': True, 'default_enabled': True,
}, },
'firewall_issue': { 'firewall_issue': {
'title': '{hostname}: Firewall issue detected', 'title': '{hostname}: Firewall issue detected',
'body': '{reason}', 'body': '{reason}',
'label': 'Firewall issue detected',
'group': 'security', 'group': 'security',
'default_enabled': True, 'default_enabled': True,
}, },
'user_permission_change': { 'user_permission_change': {
'title': '{hostname}: User permission changed', 'title': '{hostname}: User permission changed',
'body': 'User: {username}\nChange: {change_details}', 'body': 'User: {username}\nChange: {change_details}',
'label': 'User permission changed',
'group': 'security', 'group': 'security',
'default_enabled': True, 'default_enabled': True,
}, },
@@ -576,101 +620,128 @@ TEMPLATES = {
'split_brain': { 'split_brain': {
'title': '{hostname}: SPLIT-BRAIN detected', 'title': '{hostname}: SPLIT-BRAIN detected',
'body': 'Cluster split-brain condition detected.\nQuorum status: {quorum}', 'body': 'Cluster split-brain condition detected.\nQuorum status: {quorum}',
'label': 'SPLIT-BRAIN detected',
'group': 'cluster', 'group': 'cluster',
'default_enabled': True, 'default_enabled': True,
}, },
'node_disconnect': { 'node_disconnect': {
'title': '{hostname}: Node disconnected', 'title': '{hostname}: Node disconnected',
'body': 'Node {node_name} has disconnected from the cluster.', 'body': 'Node {node_name} has disconnected from the cluster.',
'label': 'Node disconnected',
'group': 'cluster', 'group': 'cluster',
'default_enabled': True, 'default_enabled': True,
}, },
'node_reconnect': { 'node_reconnect': {
'title': '{hostname}: Node reconnected', 'title': '{hostname}: Node reconnected',
'body': 'Node {node_name} has reconnected to the cluster.', 'body': 'Node {node_name} has reconnected to the cluster.',
'label': 'Node reconnected',
'group': 'cluster', 'group': 'cluster',
'default_enabled': True, 'default_enabled': True,
}, },
# ── System events ── # ── Services events ──
'system_shutdown': { 'system_shutdown': {
'title': '{hostname}: System shutting down', 'title': '{hostname}: System shutting down',
'body': '{reason}', 'body': '{reason}',
'group': 'system', 'label': 'System shutting down',
'group': 'services',
'default_enabled': True, 'default_enabled': True,
}, },
'system_reboot': { 'system_reboot': {
'title': '{hostname}: System rebooting', 'title': '{hostname}: System rebooting',
'body': '{reason}', 'body': '{reason}',
'group': 'system', 'label': 'System rebooting',
'group': 'services',
'default_enabled': True, 'default_enabled': True,
}, },
'system_problem': { 'system_problem': {
'title': '{hostname}: System problem detected', 'title': '{hostname}: System problem detected',
'body': '{reason}', 'body': '{reason}',
'group': 'system', 'label': 'System problem detected',
'group': 'services',
'default_enabled': True, 'default_enabled': True,
}, },
'service_fail': { 'service_fail': {
'title': '{hostname}: Service failed - {service_name}', 'title': '{hostname}: Service failed - {service_name}',
'body': '{reason}', 'body': '{reason}',
'group': 'system', 'label': 'Service failed',
'group': 'services',
'default_enabled': True, 'default_enabled': True,
}, },
'oom_kill': {
'title': '{hostname}: OOM Kill - {process}',
'body': '{reason}',
'label': 'Out of memory kill',
'group': 'services',
'default_enabled': True,
},
# ── Hidden internal templates (not shown in UI) ──
'service_fail_batch': { 'service_fail_batch': {
'title': '{hostname}: {service_count} services failed', 'title': '{hostname}: {service_count} services failed',
'body': '{reason}', 'body': '{reason}',
'group': 'system', 'label': 'Service fail batch',
'group': 'services',
'default_enabled': True, 'default_enabled': True,
'hidden': True,
}, },
'system_mail': { 'system_mail': {
'title': '{hostname}: {pve_title}', 'title': '{hostname}: {pve_title}',
'body': '{reason}', 'body': '{reason}',
'group': 'system', 'label': 'PVE system mail',
'group': 'other',
'default_enabled': True, 'default_enabled': True,
'hidden': True,
},
'webhook_test': {
'title': '{hostname}: Webhook test received',
'body': 'PVE webhook connectivity test successful.\n{reason}',
'label': 'Webhook test',
'group': 'other',
'default_enabled': True,
'hidden': True,
}, },
'update_available': { 'update_available': {
'title': '{hostname}: Updates available', 'title': '{hostname}: Updates available',
'body': 'Total updates: {total_count}\nSecurity: {security_count}\nProxmox: {pve_count}\nKernel: {kernel_count}\nImportant: {important_list}', 'body': 'Total updates: {total_count}\nSecurity: {security_count}\nProxmox: {pve_count}\nKernel: {kernel_count}\nImportant: {important_list}',
'group': 'system', 'label': 'Updates available (legacy)',
'default_enabled': False, # Superseded by update_summary 'group': 'updates',
},
'update_complete': {
'title': '{hostname}: Update completed',
'body': '{details}',
'group': 'system',
'default_enabled': False, 'default_enabled': False,
'hidden': True,
}, },
# ── Unknown persistent (from health monitor) ──
'unknown_persistent': { 'unknown_persistent': {
'title': '{hostname}: Check unavailable - {category}', 'title': '{hostname}: Check unavailable - {category}',
'body': 'Health check for {category} has been unavailable for 3+ cycles.\n{reason}', 'body': 'Health check for {category} has been unavailable for 3+ cycles.\n{reason}',
'group': 'system', 'label': 'Check unavailable',
'group': 'health',
'default_enabled': False, 'default_enabled': False,
'hidden': True,
}, },
# ── Persistent Health Issues (daily digest) ── # ── Health Monitor events ──
'health_persistent': { 'health_persistent': {
'title': '{hostname}: {count} active health issue(s)', 'title': '{hostname}: {count} active health issue(s)',
'body': 'The following health issues remain active:\n{issue_list}\n\nThis digest is sent once every 24 hours while issues persist.', 'body': 'The following health issues remain active:\n{issue_list}\n\nThis digest is sent once every 24 hours while issues persist.',
'group': 'system', 'label': 'Active health issues (daily)',
'group': 'health',
'default_enabled': True, 'default_enabled': True,
}, },
'health_issue_new': { 'health_issue_new': {
'title': '{hostname}: New health issue - {category}', 'title': '{hostname}: New health issue - {category}',
'body': 'New {severity} issue detected:\n{reason}', 'body': 'New {severity} issue detected:\n{reason}',
'group': 'system', 'label': 'New health issue',
'group': 'health',
'default_enabled': True, 'default_enabled': True,
}, },
'health_issue_resolved': { 'health_issue_resolved': {
'title': '{hostname}: Resolved - {category}', 'title': '{hostname}: Resolved - {category}',
'body': '{category} issue has been resolved.\n{reason}\nDuration: {duration}', 'body': '{category} issue has been resolved.\n{reason}\nDuration: {duration}',
'group': 'system', 'label': 'Health issue resolved',
'group': 'health',
'default_enabled': True, 'default_enabled': True,
}, },
# ── Update notifications (enriched) ── # ── Update notifications ──
'update_summary': { 'update_summary': {
'title': '{hostname}: Updates available', 'title': '{hostname}: Updates available',
'body': ( 'body': (
@@ -680,80 +751,99 @@ TEMPLATES = {
'Kernel updates: {kernel_count}\n' 'Kernel updates: {kernel_count}\n'
'Important packages: {important_list}' 'Important packages: {important_list}'
), ),
'group': 'system', 'label': 'Updates available',
'group': 'updates',
'default_enabled': True, 'default_enabled': True,
}, },
'pve_update': { 'pve_update': {
'title': '{hostname}: Proxmox VE {new_version} available', 'title': '{hostname}: Proxmox VE {new_version} available',
'body': 'Proxmox VE {current_version} -> {new_version}\n{details}', 'body': 'Proxmox VE {current_version} -> {new_version}\n{details}',
'group': 'system', 'label': 'Proxmox VE update available',
'group': 'updates',
'default_enabled': True, 'default_enabled': True,
}, },
'update_complete': {
# ── PVE webhook test ── 'title': '{hostname}: Update completed',
'webhook_test': { 'body': '{details}',
'title': '{hostname}: Webhook test received', 'label': 'Update completed',
'body': 'PVE webhook connectivity test successful.\n{reason}', 'group': 'updates',
'group': 'system', 'default_enabled': False,
'default_enabled': True,
}, },
# ── Burst aggregation summaries ── # ── Burst aggregation summaries (hidden -- auto-generated by BurstAggregator) ──
# These inherit enabled state from their parent event type at dispatch time.
'burst_auth_fail': { 'burst_auth_fail': {
'title': '{hostname}: {count} auth failures in {window}', 'title': '{hostname}: {count} auth failures in {window}',
'body': '{count} authentication failures detected in {window}.\nSources: {entity_list}', 'body': '{count} authentication failures detected in {window}.\nSources: {entity_list}',
'label': 'Auth failures burst',
'group': 'security', 'group': 'security',
'default_enabled': True, 'default_enabled': True,
'hidden': True,
}, },
'burst_ip_block': { 'burst_ip_block': {
'title': '{hostname}: Fail2Ban banned {count} IPs in {window}', 'title': '{hostname}: Fail2Ban banned {count} IPs in {window}',
'body': '{count} IPs banned by Fail2Ban in {window}.\nIPs: {entity_list}', 'body': '{count} IPs banned by Fail2Ban in {window}.\nIPs: {entity_list}',
'label': 'IP block burst',
'group': 'security', 'group': 'security',
'default_enabled': True, 'default_enabled': True,
'hidden': True,
}, },
'burst_disk_io': { 'burst_disk_io': {
'title': '{hostname}: {count} disk I/O errors on {entity_list}', 'title': '{hostname}: {count} disk I/O errors on {entity_list}',
'body': '{count} I/O errors detected in {window}.\nDevices: {entity_list}', 'body': '{count} I/O errors detected in {window}.\nDevices: {entity_list}',
'label': 'Disk I/O burst',
'group': 'storage', 'group': 'storage',
'default_enabled': True, 'default_enabled': True,
'hidden': True,
}, },
'burst_cluster': { 'burst_cluster': {
'title': '{hostname}: Cluster flapping detected ({count} changes)', 'title': '{hostname}: Cluster flapping detected ({count} changes)',
'body': 'Cluster state changed {count} times in {window}.\nNodes: {entity_list}', 'body': 'Cluster state changed {count} times in {window}.\nNodes: {entity_list}',
'label': 'Cluster flapping burst',
'group': 'cluster', 'group': 'cluster',
'default_enabled': True, 'default_enabled': True,
'hidden': True,
}, },
'burst_service_fail': { 'burst_service_fail': {
'title': '{hostname}: {count} services failed in {window}', 'title': '{hostname}: {count} services failed in {window}',
'body': '{count} service failures detected in {window}.\nThis typically indicates a node reboot or PVE service restart.\n\nAdditional failures:\n{details}', 'body': '{count} service failures detected in {window}.\nThis typically indicates a node reboot or PVE service restart.\n\nAdditional failures:\n{details}',
'group': 'system', 'label': 'Service fail burst',
'group': 'services',
'default_enabled': True, 'default_enabled': True,
'hidden': True,
}, },
'burst_system': { 'burst_system': {
'title': '{hostname}: {count} system problems in {window}', 'title': '{hostname}: {count} system problems in {window}',
'body': '{count} system problems detected in {window}.\n\nAdditional issues:\n{details}', 'body': '{count} system problems detected in {window}.\n\nAdditional issues:\n{details}',
'group': 'system', 'label': 'System problems burst',
'group': 'services',
'default_enabled': True, 'default_enabled': True,
'hidden': True,
}, },
'burst_generic': { 'burst_generic': {
'title': '{hostname}: {count} {event_type} events in {window}', 'title': '{hostname}: {count} {event_type} events in {window}',
'body': '{count} events of type {event_type} in {window}.\n\nAdditional events:\n{details}', 'body': '{count} events of type {event_type} in {window}.\n\nAdditional events:\n{details}',
'group': 'system', 'label': 'Generic burst',
'group': 'other',
'default_enabled': True, 'default_enabled': True,
'hidden': True,
}, },
} }
# ─── Event Groups (for UI filtering) ───────────────────────────── # ─── Event Groups (for UI filtering) ─────────────────────────────
EVENT_GROUPS = { EVENT_GROUPS = {
'system': {'label': 'System', 'description': 'System health, services, updates'}, 'vm_ct': {'label': 'VM / CT', 'description': 'Start, stop, crash, migration'},
'vm_ct': {'label': 'VM / CT', 'description': 'Virtual machines and containers'}, 'backup': {'label': 'Backups', 'description': 'Backup start, complete, fail'},
'backup': {'label': 'Backup', 'description': 'Backups and snapshots'}, 'resources': {'label': 'Resources', 'description': 'CPU, memory, temperature'},
'resources': {'label': 'Resources', 'description': 'CPU, memory, temperature, load'}, 'storage': {'label': 'Storage', 'description': 'Disk space, I/O, SMART'},
'storage': {'label': 'Storage', 'description': 'Disk space and I/O'}, 'network': {'label': 'Network', 'description': 'Connectivity, bond, latency'},
'network': {'label': 'Network', 'description': 'Connectivity and latency'}, 'security': {'label': 'Security', 'description': 'Auth failures, Fail2Ban, firewall'},
'security': {'label': 'Security', 'description': 'Authentication, firewall, bans'}, 'cluster': {'label': 'Cluster', 'description': 'Quorum, split-brain, HA fencing'},
'cluster': {'label': 'Cluster', 'description': 'Cluster health and quorum'}, 'services': {'label': 'Services', 'description': 'System services, shutdown, reboot'},
'health': {'label': 'Health Monitor', 'description': 'Health checks, degradation, recovery'},
'updates': {'label': 'Updates', 'description': 'System and PVE updates'},
'other': {'label': 'Other', 'description': 'Uncategorized notifications'},
} }
@@ -777,14 +867,16 @@ def render_template(event_type: str, data: Dict[str, Any]) -> Dict[str, Any]:
template = TEMPLATES.get(event_type) template = TEMPLATES.get(event_type)
if not template: if not template:
# Catch-all: unknown event types always get delivered (group 'other')
# so no Proxmox notification is ever silently dropped.
fallback_body = data.get('message', data.get('reason', str(data))) fallback_body = data.get('message', data.get('reason', str(data)))
severity = data.get('severity', 'INFO') severity = data.get('severity', 'INFO')
return { return {
'title': f"{_get_hostname()}: {event_type}", 'title': f"{_get_hostname()}: {event_type}",
'body': fallback_body, 'body_text': fallback_body, 'body': fallback_body, 'body_text': fallback_body,
'body_html': f'<p>{html_mod.escape(str(fallback_body))}</p>', 'body_html': f'<p>{html_mod.escape(str(fallback_body))}</p>',
'fields': [], 'tags': [severity, 'system', event_type], 'fields': [], 'tags': [severity, 'other', event_type],
'severity': severity, 'group': 'system', 'severity': severity, 'group': 'other',
} }
# Ensure hostname is always available # Ensure hostname is always available
@@ -883,24 +975,36 @@ def render_template(event_type: str, data: Dict[str, Any]) -> Dict[str, Any]:
def get_event_types_by_group() -> Dict[str, list]: def get_event_types_by_group() -> Dict[str, list]:
"""Get all event types organized by group, for UI rendering. """Get all event types organized by group, for UI rendering.
Hidden templates (burst aggregations, internal types) are excluded
from the UI. They still work in the backend and inherit enabled
state from their parent event type.
Returns: Returns:
{group_key: [{'type': event_type, 'title': template_title, {group_key: [{'type': event_type, 'title': label,
'default_enabled': bool}, ...]} 'default_enabled': bool}, ...]}
""" """
result = {} result = {}
for event_type, template in TEMPLATES.items(): for event_type, template in TEMPLATES.items():
group = template.get('group', 'system') # Skip hidden templates (bursts, internal, deprecated)
if template.get('hidden', False):
continue
group = template.get('group', 'other')
if group not in result: if group not in result:
result[group] = [] result[group] = []
import re
# Clean title: remove {hostname}: prefix and any remaining {placeholders} # Use explicit label if available, otherwise derive from title
title = template['title'].replace('{hostname}', '').strip(': ') label = template.get('label', '')
title = re.sub(r'\s*\{[^}]+\}', '', title).strip(' -:') if not label:
if not title: import re
title = event_type.replace('_', ' ').title() label = template['title'].replace('{hostname}', '').strip(': ')
label = re.sub(r'\s*\{[^}]+\}', '', label).strip(' -:')
if not label:
label = event_type.replace('_', ' ').title()
result[group].append({ result[group].append({
'type': event_type, 'type': event_type,
'title': title, 'title': label,
'default_enabled': template.get('default_enabled', True), 'default_enabled': template.get('default_enabled', True),
}) })
return result return result