Update notification service

This commit is contained in:
MacRimi
2026-03-06 12:06:53 +01:00
parent 925fe1cce0
commit ea2763c48c
3 changed files with 146 additions and 54 deletions

View File

@@ -286,8 +286,12 @@ export function StorageOverview() {
if (!iso) return 'N/A' if (!iso) return 'N/A'
try { try {
const d = new Date(iso) const d = new Date(iso)
return d.toLocaleDateString(undefined, { month: 'short', day: 'numeric', year: 'numeric' }) const day = d.getDate().toString().padStart(2, '0')
+ ' ' + d.toLocaleTimeString(undefined, { hour: '2-digit', minute: '2-digit' }) const month = (d.getMonth() + 1).toString().padStart(2, '0')
const year = d.getFullYear()
const hours = d.getHours().toString().padStart(2, '0')
const mins = d.getMinutes().toString().padStart(2, '0')
return `${day}/${month}/${year} ${hours}:${mins}`
} catch { return iso } } catch { return iso }
} }
@@ -1287,13 +1291,16 @@ export function StorageOverview() {
{/* Observations Section */} {/* Observations Section */}
{(diskObservations.length > 0 || loadingObservations) && ( {(diskObservations.length > 0 || loadingObservations) && (
<div className="border-t pt-4"> <div className="border-t pt-4">
<h4 className="font-semibold mb-3 flex items-center gap-2"> <h4 className="font-semibold mb-2 flex items-center gap-2">
<Info className="h-4 w-4 text-blue-400" /> <Info className="h-4 w-4 text-blue-400" />
Observations Observations
<Badge className="bg-blue-500/10 text-blue-400 border-blue-500/20 text-[10px] px-1.5 py-0"> <Badge className="bg-blue-500/10 text-blue-400 border-blue-500/20 text-[10px] px-1.5 py-0">
{diskObservations.length} {diskObservations.length}
</Badge> </Badge>
</h4> </h4>
<p className="text-xs text-muted-foreground mb-3">
The following observations have been recorded for this disk:
</p>
{loadingObservations ? ( {loadingObservations ? (
<div className="flex items-center gap-2 text-sm text-muted-foreground py-2"> <div className="flex items-center gap-2 text-sm text-muted-foreground py-2">
<div className="h-4 w-4 rounded-full border-2 border-transparent border-t-blue-400 animate-spin" /> <div className="h-4 w-4 rounded-full border-2 border-transparent border-t-blue-400 animate-spin" />
@@ -1310,36 +1317,37 @@ export function StorageOverview() {
: 'bg-blue-500/5 border-blue-500/20' : 'bg-blue-500/5 border-blue-500/20'
}`} }`}
> >
<div className="flex items-start justify-between gap-2"> {/* Header with type badge */}
<div className="flex items-center gap-2 flex-wrap"> <div className="flex items-center gap-2 flex-wrap mb-2">
<Badge className={`text-[10px] px-1.5 py-0 ${ <Badge className={`text-[10px] px-1.5 py-0 ${
obs.severity === 'critical' obs.severity === 'critical'
? 'bg-red-500/10 text-red-400 border-red-500/20' ? 'bg-red-500/10 text-red-400 border-red-500/20'
: 'bg-blue-500/10 text-blue-400 border-blue-500/20' : 'bg-blue-500/10 text-blue-400 border-blue-500/20'
}`}> }`}>
{obsTypeLabel(obs.error_type)} {obsTypeLabel(obs.error_type)}
</Badge> </Badge>
{obs.occurrence_count > 1 && (
<span className="text-xs text-muted-foreground">
{'Occurred ' + obs.occurrence_count + 'x'}
</span>
)}
</div>
</div> </div>
<p className="mt-1.5 text-xs whitespace-pre-line opacity-90 font-mono leading-relaxed">
{/* Error message - responsive text wrap */}
<p className="text-xs whitespace-pre-wrap break-words opacity-90 font-mono leading-relaxed mb-3">
{obs.raw_message} {obs.raw_message}
</p> </p>
<div className="flex items-center gap-3 mt-2 text-[10px] text-muted-foreground">
{/* Dates - stacked on mobile, inline on desktop */}
<div className="flex flex-col sm:flex-row sm:items-center gap-1 sm:gap-3 text-[10px] text-muted-foreground border-t border-white/5 pt-2">
<span className="flex items-center gap-1"> <span className="flex items-center gap-1">
<Clock className="h-3 w-3" /> <Clock className="h-3 w-3 flex-shrink-0" />
{'First: ' + formatObsDate(obs.first_occurrence)} <span className="break-words">First: {formatObsDate(obs.first_occurrence)}</span>
</span> </span>
{obs.occurrence_count > 1 && ( <span className="flex items-center gap-1">
<span className="flex items-center gap-1"> <Clock className="h-3 w-3 flex-shrink-0" />
<Clock className="h-3 w-3" /> <span className="break-words">Last: {formatObsDate(obs.last_occurrence)}</span>
{'Last: ' + formatObsDate(obs.last_occurrence)} </span>
</span> </div>
)}
{/* Occurrences count */}
<div className="text-[10px] text-muted-foreground mt-1">
Occurrences: <span className="font-medium text-foreground">{obs.occurrence_count}</span>
</div> </div>
</div> </div>
))} ))}

View File

@@ -129,10 +129,64 @@ class JournalWatcher:
if self._running: if self._running:
return return
self._running = True self._running = True
self._load_disk_io_notified() # Restore 24h dedup timestamps from DB
self._thread = threading.Thread(target=self._watch_loop, daemon=True, self._thread = threading.Thread(target=self._watch_loop, daemon=True,
name='journal-watcher') name='journal-watcher')
self._thread.start() self._thread.start()
def _load_disk_io_notified(self):
"""Load disk I/O notification timestamps from DB to survive restarts."""
try:
db_path = Path('/usr/local/share/proxmenux/health_monitor.db')
if not db_path.exists():
return
conn = sqlite3.connect(str(db_path), timeout=10)
conn.execute('PRAGMA journal_mode=WAL')
cursor = conn.cursor()
# Ensure table exists
cursor.execute('''
CREATE TABLE IF NOT EXISTS notification_last_sent (
fingerprint TEXT PRIMARY KEY,
last_sent_ts REAL NOT NULL
)
''')
conn.commit()
cursor.execute(
"SELECT fingerprint, last_sent_ts FROM notification_last_sent "
"WHERE fingerprint LIKE 'diskio_%' OR fingerprint LIKE 'fs_%'"
)
now = time.time()
for fp, ts in cursor.fetchall():
# Only load if within the 24h window (don't load stale entries)
if now - ts < self._DISK_IO_COOLDOWN:
self._disk_io_notified[fp] = ts
conn.close()
except Exception as e:
print(f"[JournalWatcher] Failed to load disk_io_notified: {e}")
def _save_disk_io_notified(self, key: str, ts: float):
"""Persist a disk I/O notification timestamp to DB."""
try:
db_path = Path('/usr/local/share/proxmenux/health_monitor.db')
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(db_path), timeout=10)
conn.execute('PRAGMA journal_mode=WAL')
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS notification_last_sent (
fingerprint TEXT PRIMARY KEY,
last_sent_ts REAL NOT NULL
)
''')
cursor.execute(
"INSERT OR REPLACE INTO notification_last_sent (fingerprint, last_sent_ts) VALUES (?, ?)",
(key, ts)
)
conn.commit()
conn.close()
except Exception as e:
print(f"[JournalWatcher] Failed to save disk_io_notified: {e}")
def stop(self): def stop(self):
"""Stop the journal watcher.""" """Stop the journal watcher."""
self._running = False self._running = False
@@ -377,8 +431,9 @@ class JournalWatcher:
# UNKNOWN -- can't verify, be conservative # UNKNOWN -- can't verify, be conservative
severity = 'WARNING' severity = 'WARNING'
# Mark dedup timestamp now that we'll send # Mark dedup timestamp now that we'll send (persist to DB)
self._disk_io_notified[fs_dedup_key] = now_fs self._disk_io_notified[fs_dedup_key] = now_fs
self._save_disk_io_notified(fs_dedup_key, now_fs)
# Identify what this device is (model, type, mountpoint) # Identify what this device is (model, type, mountpoint)
device_info = self._identify_block_device(device) device_info = self._identify_block_device(device)
@@ -612,11 +667,12 @@ class JournalWatcher:
# ── Gate 2: 24-hour dedup per device ── # ── Gate 2: 24-hour dedup per device ──
now = time.time() now = time.time()
last_notified = self._disk_io_notified.get(resolved, 0) last_notified = self._disk_io_notified.get(resolved, 0)
if now - last_notified < self._DISK_IO_COOLDOWN: if now - last_notified < self._DISK_IO_COOLDOWN:
return # Already notified for this disk recently return # Already notified for this disk recently
self._disk_io_notified[resolved] = now self._disk_io_notified[resolved] = now
self._save_disk_io_notified(resolved, now)
# ── Build enriched notification ──
# ── Build enriched notification ──
device_info = self._identify_block_device(resolved) device_info = self._identify_block_device(resolved)
parts = [] parts = []
@@ -887,13 +943,11 @@ class JournalWatcher:
details = [] details = []
if storage: if storage:
details.append(f'Storage: {storage}') details.append(f'\U0001F5C4\uFE0F Storage: {storage}')
if mode: if mode:
details.append(f'Mode: {mode}') details.append(f'\u2699\uFE0F Mode: {mode}')
if compress:
details.append(f'Compression: {compress}')
if details: if details:
reason_parts.append(' | '.join(details)) reason_parts.append(' | '.join(details))
reason = '\n'.join(reason_parts) if reason_parts else 'Backup job started' reason = '\n'.join(reason_parts) if reason_parts else 'Backup job started'
@@ -913,6 +967,7 @@ class JournalWatcher:
'hostname': self._hostname, 'hostname': self._hostname,
'user': '', 'user': '',
'reason': reason, 'reason': reason,
'storage': storage or 'local',
}, entity='backup', entity_id=f'vzdump_{guest_key}') }, entity='backup', entity_id=f'vzdump_{guest_key}')
def _resolve_vm_name(self, vmid: str) -> str: def _resolve_vm_name(self, vmid: str) -> str:
@@ -1766,10 +1821,35 @@ class PollingCollector:
entity, eid = self._ENTITY_MAP.get(category, ('node', '')) entity, eid = self._ENTITY_MAP.get(category, ('node', ''))
# For resolved notifications, use only the first line of reason
# (the title/summary) to avoid repeating verbose details.
# Also extract a clean device identifier if present.
reason_lines = (reason or '').split('\n')
reason_summary = reason_lines[0] if reason_lines else ''
# Try to extract device info for a clean "Device: xxx (recovered)" line
device_line = ''
for line in reason_lines:
if 'Device:' in line or 'Device not currently' in line or '/dev/' in line:
# Extract the most useful device description
if 'not currently detected' in line.lower():
device_line = 'Device not currently detected -- may be a disconnected USB or temporary device'
break
elif 'Device:' in line:
device_line = line.strip()
break
if reason_summary and device_line:
clean_reason = f'{reason_summary}\n{device_line} (recovered)'
elif reason_summary:
clean_reason = f'{reason_summary} (recovered)'
else:
clean_reason = 'Condition resolved'
data = { data = {
'hostname': self._hostname, 'hostname': self._hostname,
'category': category, 'category': category,
'reason': f'{reason} (recovered)' if reason else 'Condition resolved', 'reason': clean_reason,
'error_key': key, 'error_key': key,
'severity': 'OK', 'severity': 'OK',
'original_severity': old_meta.get('severity', 'WARNING'), 'original_severity': old_meta.get('severity', 'WARNING'),

View File

@@ -268,26 +268,30 @@ def _format_vzdump_body(parsed: Dict[str, Any], is_success: bool) -> str:
else: else:
parts.append(f"{icon} ID {vmid}") parts.append(f"{icon} ID {vmid}")
# Size and Duration on same line # Size and Duration on same line with icons
detail_line = [] detail_line = []
if vm.get('size'): if vm.get('size'):
detail_line.append(f"Size: {vm['size']}") detail_line.append(f"\U0001F4CF Size: {vm['size']}")
if vm.get('time'): if vm.get('time'):
detail_line.append(f"Duration: {vm['time']}") detail_line.append(f"\u23F1\uFE0F Duration: {vm['time']}")
if detail_line: if detail_line:
parts.append(' | '.join(detail_line)) parts.append(' | '.join(detail_line))
# PBS/File on separate line # PBS/File on separate line with icon
if vm.get('filename'): if vm.get('filename'):
fname = vm['filename'] fname = vm['filename']
if re.match(r'^(?:ct|vm)/\d+/', fname): if re.match(r'^(?:ct|vm)/\d+/', fname):
parts.append(f"PBS: {fname}") parts.append(f"\U0001F5C4\uFE0F PBS: {fname}")
else: else:
parts.append(f"File: {fname}") parts.append(f"\U0001F4C1 File: {fname}")
# Error reason if failed
if status != 'ok' and vm.get('error'):
parts.append(f"\u26A0\uFE0F {vm['error']}")
parts.append('') # blank line between VMs parts.append('') # blank line between VMs
# Summary # Summary line with icons
vm_count = parsed.get('vm_count', 0) vm_count = parsed.get('vm_count', 0)
if vm_count > 0 or parsed.get('total_size'): if vm_count > 0 or parsed.get('total_size'):
ok_count = sum(1 for v in parsed.get('vms', []) ok_count = sum(1 for v in parsed.get('vms', [])
@@ -296,16 +300,16 @@ def _format_vzdump_body(parsed: Dict[str, Any], is_success: bool) -> str:
summary_parts = [] summary_parts = []
if vm_count: if vm_count:
summary_parts.append(f"{vm_count} backup(s)") summary_parts.append(f"\U0001F4CA {vm_count} backups")
if fail_count: if fail_count:
summary_parts.append(f"{fail_count} failed") summary_parts.append(f"\u274C {fail_count} failed")
if parsed.get('total_size'): if parsed.get('total_size'):
summary_parts.append(f"Total: {parsed['total_size']}") summary_parts.append(f"\U0001F4E6 Total: {parsed['total_size']}")
if parsed.get('total_time'): if parsed.get('total_time'):
summary_parts.append(f"Time: {parsed['total_time']}") summary_parts.append(f"\u23F1\uFE0F Time: {parsed['total_time']}")
if summary_parts: if summary_parts:
parts.append('--- ' + ' | '.join(summary_parts)) parts.append(' | '.join(summary_parts))
return '\n'.join(parts) return '\n'.join(parts)
@@ -355,7 +359,7 @@ TEMPLATES = {
}, },
'error_resolved': { 'error_resolved': {
'title': '{hostname}: Resolved - {category}', 'title': '{hostname}: Resolved - {category}',
'body': 'The {category} issue has been resolved.\n{reason}\nPrevious severity: {original_severity}\nDuration: {duration}', 'body': 'The {category} issue has been resolved.\n{reason}\n\U0001F6A6 Previous severity: {original_severity}\n\u23F1\uFE0F Duration: {duration}',
'label': 'Recovery notification', 'label': 'Recovery notification',
'group': 'health', 'group': 'health',
'default_enabled': True, 'default_enabled': True,
@@ -484,7 +488,7 @@ TEMPLATES = {
# ── Backup / Snapshot events ── # ── Backup / Snapshot events ──
'backup_start': { 'backup_start': {
'title': '{hostname}: Backup started', 'title': '{hostname}: Backup started [{storage}]',
'body': '{reason}', 'body': '{reason}',
'label': 'Backup started', 'label': 'Backup started',
'group': 'backup', 'group': 'backup',