From d6282339828ba2190de39ff941f731b9be5fbcf5 Mon Sep 17 00:00:00 2001 From: MacRimi Date: Sat, 28 Mar 2026 15:50:30 +0100 Subject: [PATCH] Update notification service --- AppImage/scripts/flask_notification_routes.py | 2 +- AppImage/scripts/flask_server.py | 4 ++- AppImage/scripts/health_monitor.py | 35 +++++++++++++------ AppImage/scripts/notification_channels.py | 2 +- AppImage/scripts/notification_events.py | 2 +- 5 files changed, 31 insertions(+), 14 deletions(-) diff --git a/AppImage/scripts/flask_notification_routes.py b/AppImage/scripts/flask_notification_routes.py index 7c3294b7..065f3435 100644 --- a/AppImage/scripts/flask_notification_routes.py +++ b/AppImage/scripts/flask_notification_routes.py @@ -13,7 +13,7 @@ from flask import Blueprint, jsonify, request from notification_manager import notification_manager -# ─── Webhook Hardening Helpers ─────────────────────────────────── +# ─── Webhook Hardening Helpers ──────────────────────────────────��� class WebhookRateLimiter: """Simple sliding-window rate limiter for the webhook endpoint.""" diff --git a/AppImage/scripts/flask_server.py b/AppImage/scripts/flask_server.py index dfeca440..0e53c4aa 100644 --- a/AppImage/scripts/flask_server.py +++ b/AppImage/scripts/flask_server.py @@ -876,7 +876,9 @@ def _capture_health_journal_context(categories: list, reason: str = '') -> str: 'services': ['pveproxy', 'pvedaemon', 'pvestatd', 'corosync', 'ceph', 'systemd', 'failed', 'service', 'unit', 'start', 'stop'], 'vms': ['qemu', 'kvm', 'lxc', 'vzdump', 'qm', 'pct', 'guest agent', - 'qemu-ga', 'migration', 'snapshot'], + 'qemu-ga', 'migration', 'snapshot', 'pve-container', 'vzstart', + 'failed to start', 'start error', 'activation failed', 'cannot start', + 'qemu-server', 'CT ', 'VM '], 'memory': ['oom', 'out of memory', 'killed process', 'swap', 'memory'], 'cpu': ['thermal', 'temperature', 'throttl', 'mce', 'machine check'], 'updates': ['apt', 'dpkg', 'upgrade', 'update', 'package'], diff --git a/AppImage/scripts/health_monitor.py b/AppImage/scripts/health_monitor.py index eb08f9f8..4d09ae49 100644 --- a/AppImage/scripts/health_monitor.py +++ b/AppImage/scripts/health_monitor.py @@ -2682,21 +2682,28 @@ class HealthMonitor: ctid = vzstart_match.group(1) key = f'ct_{ctid}' if key not in vm_details: - # Extraer mensaje de error + # Resolve CT name for better context + ct_name = self._resolve_vm_name(ctid) + ct_display = f"CT {ctid} ({ct_name})" if ct_name else f"CT {ctid}" + + # Extract specific error reason if 'device' in line_lower and 'does not exist' in line_lower: device_match = re.search(r'device\s+([/\w\d]+)\s+does not exist', line_lower) if device_match: - reason = f'Device {device_match.group(1)} missing' + error_detail = f'Device {device_match.group(1)} missing' else: - reason = 'Device error' + error_detail = 'Device error' else: - reason = 'Startup error' + error_detail = 'Startup error' - issues.append(f'CT {ctid}: {reason}') + # Include CT ID in reason for clarity in notifications + reason = f'{ct_display}: {error_detail}' + issues.append(reason) vm_details[key] = { 'status': 'WARNING', 'reason': reason, 'id': ctid, + 'vmname': ct_name, 'type': 'CT' } continue @@ -2830,14 +2837,21 @@ class HealthMonitor: error_key = f'ct_{ctid}' if error_key not in vm_details: + # Resolve CT name for better context + ct_name = self._resolve_vm_name(ctid) + ct_display = f"CT {ctid} ({ct_name})" if ct_name else f"CT {ctid}" + if 'device' in line_lower and 'does not exist' in line_lower: device_match = re.search(r'device\s+([/\w\d]+)\s+does not exist', line_lower) if device_match: - reason = f'Device {device_match.group(1)} missing' + error_detail = f'Device {device_match.group(1)} missing' else: - reason = 'Device error' + error_detail = 'Device error' else: - reason = 'Startup error' + error_detail = 'Startup error' + + # Include CT ID in reason for clarity + reason = f'{ct_display}: {error_detail}' # Record persistent error rec_result = health_persistence.record_error( @@ -2845,14 +2859,15 @@ class HealthMonitor: category='vms', severity='WARNING', reason=reason, - details={'id': ctid, 'type': 'CT'} + details={'id': ctid, 'vmname': ct_name, 'type': 'CT'} ) if not rec_result or rec_result.get('type') != 'skipped_acknowledged': - issues.append(f'CT {ctid}: {reason}') + issues.append(reason) vm_details[error_key] = { 'status': 'WARNING', 'reason': reason, 'id': ctid, + 'vmname': ct_name, 'type': 'CT' } diff --git a/AppImage/scripts/notification_channels.py b/AppImage/scripts/notification_channels.py index 6d535092..53ff059d 100644 --- a/AppImage/scripts/notification_channels.py +++ b/AppImage/scripts/notification_channels.py @@ -251,7 +251,7 @@ class TelegramChannel(NotificationChannel): .replace('>', '>')) -# ─── Gotify ───────────────────────────────────��────────────────── +# ─── Gotify ────────────────────────────────────────────────────── class GotifyChannel(NotificationChannel): """Gotify push notification channel with priority mapping.""" diff --git a/AppImage/scripts/notification_events.py b/AppImage/scripts/notification_events.py index 4a251309..39cdf376 100644 --- a/AppImage/scripts/notification_events.py +++ b/AppImage/scripts/notification_events.py @@ -197,7 +197,7 @@ def capture_journal_context(keywords: list, lines: int = 30, return "" -# ─── Journal Watcher (Real-time) ───────────────��───────────────── +# ─── Journal Watcher (Real-time) ───────────────────────────────── class JournalWatcher: """Watches journald in real-time for critical system events.