mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-17 17:42:19 +00:00
Update notification service
This commit is contained in:
@@ -129,10 +129,64 @@ class JournalWatcher:
|
||||
if self._running:
|
||||
return
|
||||
self._running = True
|
||||
self._load_disk_io_notified() # Restore 24h dedup timestamps from DB
|
||||
self._thread = threading.Thread(target=self._watch_loop, daemon=True,
|
||||
name='journal-watcher')
|
||||
self._thread.start()
|
||||
|
||||
def _load_disk_io_notified(self):
|
||||
"""Load disk I/O notification timestamps from DB to survive restarts."""
|
||||
try:
|
||||
db_path = Path('/usr/local/share/proxmenux/health_monitor.db')
|
||||
if not db_path.exists():
|
||||
return
|
||||
conn = sqlite3.connect(str(db_path), timeout=10)
|
||||
conn.execute('PRAGMA journal_mode=WAL')
|
||||
cursor = conn.cursor()
|
||||
# Ensure table exists
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS notification_last_sent (
|
||||
fingerprint TEXT PRIMARY KEY,
|
||||
last_sent_ts REAL NOT NULL
|
||||
)
|
||||
''')
|
||||
conn.commit()
|
||||
cursor.execute(
|
||||
"SELECT fingerprint, last_sent_ts FROM notification_last_sent "
|
||||
"WHERE fingerprint LIKE 'diskio_%' OR fingerprint LIKE 'fs_%'"
|
||||
)
|
||||
now = time.time()
|
||||
for fp, ts in cursor.fetchall():
|
||||
# Only load if within the 24h window (don't load stale entries)
|
||||
if now - ts < self._DISK_IO_COOLDOWN:
|
||||
self._disk_io_notified[fp] = ts
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
print(f"[JournalWatcher] Failed to load disk_io_notified: {e}")
|
||||
|
||||
def _save_disk_io_notified(self, key: str, ts: float):
|
||||
"""Persist a disk I/O notification timestamp to DB."""
|
||||
try:
|
||||
db_path = Path('/usr/local/share/proxmenux/health_monitor.db')
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = sqlite3.connect(str(db_path), timeout=10)
|
||||
conn.execute('PRAGMA journal_mode=WAL')
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS notification_last_sent (
|
||||
fingerprint TEXT PRIMARY KEY,
|
||||
last_sent_ts REAL NOT NULL
|
||||
)
|
||||
''')
|
||||
cursor.execute(
|
||||
"INSERT OR REPLACE INTO notification_last_sent (fingerprint, last_sent_ts) VALUES (?, ?)",
|
||||
(key, ts)
|
||||
)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
except Exception as e:
|
||||
print(f"[JournalWatcher] Failed to save disk_io_notified: {e}")
|
||||
|
||||
def stop(self):
|
||||
"""Stop the journal watcher."""
|
||||
self._running = False
|
||||
@@ -377,8 +431,9 @@ class JournalWatcher:
|
||||
# UNKNOWN -- can't verify, be conservative
|
||||
severity = 'WARNING'
|
||||
|
||||
# Mark dedup timestamp now that we'll send
|
||||
# Mark dedup timestamp now that we'll send (persist to DB)
|
||||
self._disk_io_notified[fs_dedup_key] = now_fs
|
||||
self._save_disk_io_notified(fs_dedup_key, now_fs)
|
||||
|
||||
# Identify what this device is (model, type, mountpoint)
|
||||
device_info = self._identify_block_device(device)
|
||||
@@ -612,11 +667,12 @@ class JournalWatcher:
|
||||
# ── Gate 2: 24-hour dedup per device ──
|
||||
now = time.time()
|
||||
last_notified = self._disk_io_notified.get(resolved, 0)
|
||||
if now - last_notified < self._DISK_IO_COOLDOWN:
|
||||
return # Already notified for this disk recently
|
||||
self._disk_io_notified[resolved] = now
|
||||
|
||||
# ── Build enriched notification ──
|
||||
if now - last_notified < self._DISK_IO_COOLDOWN:
|
||||
return # Already notified for this disk recently
|
||||
self._disk_io_notified[resolved] = now
|
||||
self._save_disk_io_notified(resolved, now)
|
||||
|
||||
# ── Build enriched notification ──
|
||||
device_info = self._identify_block_device(resolved)
|
||||
|
||||
parts = []
|
||||
@@ -887,13 +943,11 @@ class JournalWatcher:
|
||||
|
||||
details = []
|
||||
if storage:
|
||||
details.append(f'Storage: {storage}')
|
||||
details.append(f'\U0001F5C4\uFE0F Storage: {storage}')
|
||||
if mode:
|
||||
details.append(f'Mode: {mode}')
|
||||
if compress:
|
||||
details.append(f'Compression: {compress}')
|
||||
details.append(f'\u2699\uFE0F Mode: {mode}')
|
||||
if details:
|
||||
reason_parts.append(' | '.join(details))
|
||||
reason_parts.append(' | '.join(details))
|
||||
|
||||
reason = '\n'.join(reason_parts) if reason_parts else 'Backup job started'
|
||||
|
||||
@@ -913,6 +967,7 @@ class JournalWatcher:
|
||||
'hostname': self._hostname,
|
||||
'user': '',
|
||||
'reason': reason,
|
||||
'storage': storage or 'local',
|
||||
}, entity='backup', entity_id=f'vzdump_{guest_key}')
|
||||
|
||||
def _resolve_vm_name(self, vmid: str) -> str:
|
||||
@@ -1766,10 +1821,35 @@ class PollingCollector:
|
||||
|
||||
entity, eid = self._ENTITY_MAP.get(category, ('node', ''))
|
||||
|
||||
# For resolved notifications, use only the first line of reason
|
||||
# (the title/summary) to avoid repeating verbose details.
|
||||
# Also extract a clean device identifier if present.
|
||||
reason_lines = (reason or '').split('\n')
|
||||
reason_summary = reason_lines[0] if reason_lines else ''
|
||||
|
||||
# Try to extract device info for a clean "Device: xxx (recovered)" line
|
||||
device_line = ''
|
||||
for line in reason_lines:
|
||||
if 'Device:' in line or 'Device not currently' in line or '/dev/' in line:
|
||||
# Extract the most useful device description
|
||||
if 'not currently detected' in line.lower():
|
||||
device_line = 'Device not currently detected -- may be a disconnected USB or temporary device'
|
||||
break
|
||||
elif 'Device:' in line:
|
||||
device_line = line.strip()
|
||||
break
|
||||
|
||||
if reason_summary and device_line:
|
||||
clean_reason = f'{reason_summary}\n{device_line} (recovered)'
|
||||
elif reason_summary:
|
||||
clean_reason = f'{reason_summary} (recovered)'
|
||||
else:
|
||||
clean_reason = 'Condition resolved'
|
||||
|
||||
data = {
|
||||
'hostname': self._hostname,
|
||||
'category': category,
|
||||
'reason': f'{reason} (recovered)' if reason else 'Condition resolved',
|
||||
'reason': clean_reason,
|
||||
'error_key': key,
|
||||
'severity': 'OK',
|
||||
'original_severity': old_meta.get('severity', 'WARNING'),
|
||||
|
||||
@@ -268,26 +268,30 @@ def _format_vzdump_body(parsed: Dict[str, Any], is_success: bool) -> str:
|
||||
else:
|
||||
parts.append(f"{icon} ID {vmid}")
|
||||
|
||||
# Size and Duration on same line
|
||||
# Size and Duration on same line with icons
|
||||
detail_line = []
|
||||
if vm.get('size'):
|
||||
detail_line.append(f"Size: {vm['size']}")
|
||||
detail_line.append(f"\U0001F4CF Size: {vm['size']}")
|
||||
if vm.get('time'):
|
||||
detail_line.append(f"Duration: {vm['time']}")
|
||||
detail_line.append(f"\u23F1\uFE0F Duration: {vm['time']}")
|
||||
if detail_line:
|
||||
parts.append(' | '.join(detail_line))
|
||||
|
||||
# PBS/File on separate line
|
||||
# PBS/File on separate line with icon
|
||||
if vm.get('filename'):
|
||||
fname = vm['filename']
|
||||
if re.match(r'^(?:ct|vm)/\d+/', fname):
|
||||
parts.append(f"PBS: {fname}")
|
||||
parts.append(f"\U0001F5C4\uFE0F PBS: {fname}")
|
||||
else:
|
||||
parts.append(f"File: {fname}")
|
||||
parts.append(f"\U0001F4C1 File: {fname}")
|
||||
|
||||
# Error reason if failed
|
||||
if status != 'ok' and vm.get('error'):
|
||||
parts.append(f"\u26A0\uFE0F {vm['error']}")
|
||||
|
||||
parts.append('') # blank line between VMs
|
||||
|
||||
# Summary
|
||||
# Summary line with icons
|
||||
vm_count = parsed.get('vm_count', 0)
|
||||
if vm_count > 0 or parsed.get('total_size'):
|
||||
ok_count = sum(1 for v in parsed.get('vms', [])
|
||||
@@ -296,16 +300,16 @@ def _format_vzdump_body(parsed: Dict[str, Any], is_success: bool) -> str:
|
||||
|
||||
summary_parts = []
|
||||
if vm_count:
|
||||
summary_parts.append(f"{vm_count} backup(s)")
|
||||
summary_parts.append(f"\U0001F4CA {vm_count} backups")
|
||||
if fail_count:
|
||||
summary_parts.append(f"{fail_count} failed")
|
||||
summary_parts.append(f"\u274C {fail_count} failed")
|
||||
if parsed.get('total_size'):
|
||||
summary_parts.append(f"Total: {parsed['total_size']}")
|
||||
summary_parts.append(f"\U0001F4E6 Total: {parsed['total_size']}")
|
||||
if parsed.get('total_time'):
|
||||
summary_parts.append(f"Time: {parsed['total_time']}")
|
||||
summary_parts.append(f"\u23F1\uFE0F Time: {parsed['total_time']}")
|
||||
|
||||
if summary_parts:
|
||||
parts.append('--- ' + ' | '.join(summary_parts))
|
||||
parts.append(' | '.join(summary_parts))
|
||||
|
||||
return '\n'.join(parts)
|
||||
|
||||
@@ -355,7 +359,7 @@ TEMPLATES = {
|
||||
},
|
||||
'error_resolved': {
|
||||
'title': '{hostname}: Resolved - {category}',
|
||||
'body': 'The {category} issue has been resolved.\n{reason}\nPrevious severity: {original_severity}\nDuration: {duration}',
|
||||
'body': 'The {category} issue has been resolved.\n{reason}\n\U0001F6A6 Previous severity: {original_severity}\n\u23F1\uFE0F Duration: {duration}',
|
||||
'label': 'Recovery notification',
|
||||
'group': 'health',
|
||||
'default_enabled': True,
|
||||
@@ -484,7 +488,7 @@ TEMPLATES = {
|
||||
|
||||
# ── Backup / Snapshot events ──
|
||||
'backup_start': {
|
||||
'title': '{hostname}: Backup started',
|
||||
'title': '{hostname}: Backup started [{storage}]',
|
||||
'body': '{reason}',
|
||||
'label': 'Backup started',
|
||||
'group': 'backup',
|
||||
|
||||
Reference in New Issue
Block a user