Update notification service

This commit is contained in:
MacRimi
2026-03-06 12:06:53 +01:00
parent 925fe1cce0
commit ea2763c48c
3 changed files with 146 additions and 54 deletions

View File

@@ -286,8 +286,12 @@ export function StorageOverview() {
if (!iso) return 'N/A'
try {
const d = new Date(iso)
return d.toLocaleDateString(undefined, { month: 'short', day: 'numeric', year: 'numeric' })
+ ' ' + d.toLocaleTimeString(undefined, { hour: '2-digit', minute: '2-digit' })
const day = d.getDate().toString().padStart(2, '0')
const month = (d.getMonth() + 1).toString().padStart(2, '0')
const year = d.getFullYear()
const hours = d.getHours().toString().padStart(2, '0')
const mins = d.getMinutes().toString().padStart(2, '0')
return `${day}/${month}/${year} ${hours}:${mins}`
} catch { return iso }
}
@@ -1287,13 +1291,16 @@ export function StorageOverview() {
{/* Observations Section */}
{(diskObservations.length > 0 || loadingObservations) && (
<div className="border-t pt-4">
<h4 className="font-semibold mb-3 flex items-center gap-2">
<h4 className="font-semibold mb-2 flex items-center gap-2">
<Info className="h-4 w-4 text-blue-400" />
Observations
<Badge className="bg-blue-500/10 text-blue-400 border-blue-500/20 text-[10px] px-1.5 py-0">
{diskObservations.length}
</Badge>
</h4>
<p className="text-xs text-muted-foreground mb-3">
The following observations have been recorded for this disk:
</p>
{loadingObservations ? (
<div className="flex items-center gap-2 text-sm text-muted-foreground py-2">
<div className="h-4 w-4 rounded-full border-2 border-transparent border-t-blue-400 animate-spin" />
@@ -1310,36 +1317,37 @@ export function StorageOverview() {
: 'bg-blue-500/5 border-blue-500/20'
}`}
>
<div className="flex items-start justify-between gap-2">
<div className="flex items-center gap-2 flex-wrap">
<Badge className={`text-[10px] px-1.5 py-0 ${
obs.severity === 'critical'
? 'bg-red-500/10 text-red-400 border-red-500/20'
: 'bg-blue-500/10 text-blue-400 border-blue-500/20'
}`}>
{obsTypeLabel(obs.error_type)}
</Badge>
{obs.occurrence_count > 1 && (
<span className="text-xs text-muted-foreground">
{'Occurred ' + obs.occurrence_count + 'x'}
</span>
)}
</div>
{/* Header with type badge */}
<div className="flex items-center gap-2 flex-wrap mb-2">
<Badge className={`text-[10px] px-1.5 py-0 ${
obs.severity === 'critical'
? 'bg-red-500/10 text-red-400 border-red-500/20'
: 'bg-blue-500/10 text-blue-400 border-blue-500/20'
}`}>
{obsTypeLabel(obs.error_type)}
</Badge>
</div>
<p className="mt-1.5 text-xs whitespace-pre-line opacity-90 font-mono leading-relaxed">
{/* Error message - responsive text wrap */}
<p className="text-xs whitespace-pre-wrap break-words opacity-90 font-mono leading-relaxed mb-3">
{obs.raw_message}
</p>
<div className="flex items-center gap-3 mt-2 text-[10px] text-muted-foreground">
{/* Dates - stacked on mobile, inline on desktop */}
<div className="flex flex-col sm:flex-row sm:items-center gap-1 sm:gap-3 text-[10px] text-muted-foreground border-t border-white/5 pt-2">
<span className="flex items-center gap-1">
<Clock className="h-3 w-3" />
{'First: ' + formatObsDate(obs.first_occurrence)}
<Clock className="h-3 w-3 flex-shrink-0" />
<span className="break-words">First: {formatObsDate(obs.first_occurrence)}</span>
</span>
{obs.occurrence_count > 1 && (
<span className="flex items-center gap-1">
<Clock className="h-3 w-3" />
{'Last: ' + formatObsDate(obs.last_occurrence)}
</span>
)}
<span className="flex items-center gap-1">
<Clock className="h-3 w-3 flex-shrink-0" />
<span className="break-words">Last: {formatObsDate(obs.last_occurrence)}</span>
</span>
</div>
{/* Occurrences count */}
<div className="text-[10px] text-muted-foreground mt-1">
Occurrences: <span className="font-medium text-foreground">{obs.occurrence_count}</span>
</div>
</div>
))}

View File

@@ -129,10 +129,64 @@ class JournalWatcher:
if self._running:
return
self._running = True
self._load_disk_io_notified() # Restore 24h dedup timestamps from DB
self._thread = threading.Thread(target=self._watch_loop, daemon=True,
name='journal-watcher')
self._thread.start()
def _load_disk_io_notified(self):
"""Load disk I/O notification timestamps from DB to survive restarts."""
try:
db_path = Path('/usr/local/share/proxmenux/health_monitor.db')
if not db_path.exists():
return
conn = sqlite3.connect(str(db_path), timeout=10)
conn.execute('PRAGMA journal_mode=WAL')
cursor = conn.cursor()
# Ensure table exists
cursor.execute('''
CREATE TABLE IF NOT EXISTS notification_last_sent (
fingerprint TEXT PRIMARY KEY,
last_sent_ts REAL NOT NULL
)
''')
conn.commit()
cursor.execute(
"SELECT fingerprint, last_sent_ts FROM notification_last_sent "
"WHERE fingerprint LIKE 'diskio_%' OR fingerprint LIKE 'fs_%'"
)
now = time.time()
for fp, ts in cursor.fetchall():
# Only load if within the 24h window (don't load stale entries)
if now - ts < self._DISK_IO_COOLDOWN:
self._disk_io_notified[fp] = ts
conn.close()
except Exception as e:
print(f"[JournalWatcher] Failed to load disk_io_notified: {e}")
def _save_disk_io_notified(self, key: str, ts: float):
"""Persist a disk I/O notification timestamp to DB."""
try:
db_path = Path('/usr/local/share/proxmenux/health_monitor.db')
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(db_path), timeout=10)
conn.execute('PRAGMA journal_mode=WAL')
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS notification_last_sent (
fingerprint TEXT PRIMARY KEY,
last_sent_ts REAL NOT NULL
)
''')
cursor.execute(
"INSERT OR REPLACE INTO notification_last_sent (fingerprint, last_sent_ts) VALUES (?, ?)",
(key, ts)
)
conn.commit()
conn.close()
except Exception as e:
print(f"[JournalWatcher] Failed to save disk_io_notified: {e}")
def stop(self):
"""Stop the journal watcher."""
self._running = False
@@ -377,8 +431,9 @@ class JournalWatcher:
# UNKNOWN -- can't verify, be conservative
severity = 'WARNING'
# Mark dedup timestamp now that we'll send
# Mark dedup timestamp now that we'll send (persist to DB)
self._disk_io_notified[fs_dedup_key] = now_fs
self._save_disk_io_notified(fs_dedup_key, now_fs)
# Identify what this device is (model, type, mountpoint)
device_info = self._identify_block_device(device)
@@ -612,11 +667,12 @@ class JournalWatcher:
# ── Gate 2: 24-hour dedup per device ──
now = time.time()
last_notified = self._disk_io_notified.get(resolved, 0)
if now - last_notified < self._DISK_IO_COOLDOWN:
return # Already notified for this disk recently
self._disk_io_notified[resolved] = now
# ── Build enriched notification ──
if now - last_notified < self._DISK_IO_COOLDOWN:
return # Already notified for this disk recently
self._disk_io_notified[resolved] = now
self._save_disk_io_notified(resolved, now)
# ── Build enriched notification ──
device_info = self._identify_block_device(resolved)
parts = []
@@ -887,13 +943,11 @@ class JournalWatcher:
details = []
if storage:
details.append(f'Storage: {storage}')
details.append(f'\U0001F5C4\uFE0F Storage: {storage}')
if mode:
details.append(f'Mode: {mode}')
if compress:
details.append(f'Compression: {compress}')
details.append(f'\u2699\uFE0F Mode: {mode}')
if details:
reason_parts.append(' | '.join(details))
reason_parts.append(' | '.join(details))
reason = '\n'.join(reason_parts) if reason_parts else 'Backup job started'
@@ -913,6 +967,7 @@ class JournalWatcher:
'hostname': self._hostname,
'user': '',
'reason': reason,
'storage': storage or 'local',
}, entity='backup', entity_id=f'vzdump_{guest_key}')
def _resolve_vm_name(self, vmid: str) -> str:
@@ -1766,10 +1821,35 @@ class PollingCollector:
entity, eid = self._ENTITY_MAP.get(category, ('node', ''))
# For resolved notifications, use only the first line of reason
# (the title/summary) to avoid repeating verbose details.
# Also extract a clean device identifier if present.
reason_lines = (reason or '').split('\n')
reason_summary = reason_lines[0] if reason_lines else ''
# Try to extract device info for a clean "Device: xxx (recovered)" line
device_line = ''
for line in reason_lines:
if 'Device:' in line or 'Device not currently' in line or '/dev/' in line:
# Extract the most useful device description
if 'not currently detected' in line.lower():
device_line = 'Device not currently detected -- may be a disconnected USB or temporary device'
break
elif 'Device:' in line:
device_line = line.strip()
break
if reason_summary and device_line:
clean_reason = f'{reason_summary}\n{device_line} (recovered)'
elif reason_summary:
clean_reason = f'{reason_summary} (recovered)'
else:
clean_reason = 'Condition resolved'
data = {
'hostname': self._hostname,
'category': category,
'reason': f'{reason} (recovered)' if reason else 'Condition resolved',
'reason': clean_reason,
'error_key': key,
'severity': 'OK',
'original_severity': old_meta.get('severity', 'WARNING'),

View File

@@ -268,26 +268,30 @@ def _format_vzdump_body(parsed: Dict[str, Any], is_success: bool) -> str:
else:
parts.append(f"{icon} ID {vmid}")
# Size and Duration on same line
# Size and Duration on same line with icons
detail_line = []
if vm.get('size'):
detail_line.append(f"Size: {vm['size']}")
detail_line.append(f"\U0001F4CF Size: {vm['size']}")
if vm.get('time'):
detail_line.append(f"Duration: {vm['time']}")
detail_line.append(f"\u23F1\uFE0F Duration: {vm['time']}")
if detail_line:
parts.append(' | '.join(detail_line))
# PBS/File on separate line
# PBS/File on separate line with icon
if vm.get('filename'):
fname = vm['filename']
if re.match(r'^(?:ct|vm)/\d+/', fname):
parts.append(f"PBS: {fname}")
parts.append(f"\U0001F5C4\uFE0F PBS: {fname}")
else:
parts.append(f"File: {fname}")
parts.append(f"\U0001F4C1 File: {fname}")
# Error reason if failed
if status != 'ok' and vm.get('error'):
parts.append(f"\u26A0\uFE0F {vm['error']}")
parts.append('') # blank line between VMs
# Summary
# Summary line with icons
vm_count = parsed.get('vm_count', 0)
if vm_count > 0 or parsed.get('total_size'):
ok_count = sum(1 for v in parsed.get('vms', [])
@@ -296,16 +300,16 @@ def _format_vzdump_body(parsed: Dict[str, Any], is_success: bool) -> str:
summary_parts = []
if vm_count:
summary_parts.append(f"{vm_count} backup(s)")
summary_parts.append(f"\U0001F4CA {vm_count} backups")
if fail_count:
summary_parts.append(f"{fail_count} failed")
summary_parts.append(f"\u274C {fail_count} failed")
if parsed.get('total_size'):
summary_parts.append(f"Total: {parsed['total_size']}")
summary_parts.append(f"\U0001F4E6 Total: {parsed['total_size']}")
if parsed.get('total_time'):
summary_parts.append(f"Time: {parsed['total_time']}")
summary_parts.append(f"\u23F1\uFE0F Time: {parsed['total_time']}")
if summary_parts:
parts.append('--- ' + ' | '.join(summary_parts))
parts.append(' | '.join(summary_parts))
return '\n'.join(parts)
@@ -355,7 +359,7 @@ TEMPLATES = {
},
'error_resolved': {
'title': '{hostname}: Resolved - {category}',
'body': 'The {category} issue has been resolved.\n{reason}\nPrevious severity: {original_severity}\nDuration: {duration}',
'body': 'The {category} issue has been resolved.\n{reason}\n\U0001F6A6 Previous severity: {original_severity}\n\u23F1\uFE0F Duration: {duration}',
'label': 'Recovery notification',
'group': 'health',
'default_enabled': True,
@@ -484,7 +488,7 @@ TEMPLATES = {
# ── Backup / Snapshot events ──
'backup_start': {
'title': '{hostname}: Backup started',
'title': '{hostname}: Backup started [{storage}]',
'body': '{reason}',
'label': 'Backup started',
'group': 'backup',