Update notification service

This commit is contained in:
MacRimi
2026-03-28 15:50:30 +01:00
parent f4740916f5
commit d628233982
5 changed files with 31 additions and 14 deletions

View File

@@ -13,7 +13,7 @@ from flask import Blueprint, jsonify, request
from notification_manager import notification_manager from notification_manager import notification_manager
# ─── Webhook Hardening Helpers ────────────────────────────────── # ─── Webhook Hardening Helpers ──────────────────────────────────<EFBFBD><EFBFBD><EFBFBD>
class WebhookRateLimiter: class WebhookRateLimiter:
"""Simple sliding-window rate limiter for the webhook endpoint.""" """Simple sliding-window rate limiter for the webhook endpoint."""

View File

@@ -876,7 +876,9 @@ def _capture_health_journal_context(categories: list, reason: str = '') -> str:
'services': ['pveproxy', 'pvedaemon', 'pvestatd', 'corosync', 'ceph', 'services': ['pveproxy', 'pvedaemon', 'pvestatd', 'corosync', 'ceph',
'systemd', 'failed', 'service', 'unit', 'start', 'stop'], 'systemd', 'failed', 'service', 'unit', 'start', 'stop'],
'vms': ['qemu', 'kvm', 'lxc', 'vzdump', 'qm', 'pct', 'guest agent', 'vms': ['qemu', 'kvm', 'lxc', 'vzdump', 'qm', 'pct', 'guest agent',
'qemu-ga', 'migration', 'snapshot'], 'qemu-ga', 'migration', 'snapshot', 'pve-container', 'vzstart',
'failed to start', 'start error', 'activation failed', 'cannot start',
'qemu-server', 'CT ', 'VM '],
'memory': ['oom', 'out of memory', 'killed process', 'swap', 'memory'], 'memory': ['oom', 'out of memory', 'killed process', 'swap', 'memory'],
'cpu': ['thermal', 'temperature', 'throttl', 'mce', 'machine check'], 'cpu': ['thermal', 'temperature', 'throttl', 'mce', 'machine check'],
'updates': ['apt', 'dpkg', 'upgrade', 'update', 'package'], 'updates': ['apt', 'dpkg', 'upgrade', 'update', 'package'],

View File

@@ -2682,21 +2682,28 @@ class HealthMonitor:
ctid = vzstart_match.group(1) ctid = vzstart_match.group(1)
key = f'ct_{ctid}' key = f'ct_{ctid}'
if key not in vm_details: if key not in vm_details:
# Extraer mensaje de error # Resolve CT name for better context
ct_name = self._resolve_vm_name(ctid)
ct_display = f"CT {ctid} ({ct_name})" if ct_name else f"CT {ctid}"
# Extract specific error reason
if 'device' in line_lower and 'does not exist' in line_lower: if 'device' in line_lower and 'does not exist' in line_lower:
device_match = re.search(r'device\s+([/\w\d]+)\s+does not exist', line_lower) device_match = re.search(r'device\s+([/\w\d]+)\s+does not exist', line_lower)
if device_match: if device_match:
reason = f'Device {device_match.group(1)} missing' error_detail = f'Device {device_match.group(1)} missing'
else: else:
reason = 'Device error' error_detail = 'Device error'
else: else:
reason = 'Startup error' error_detail = 'Startup error'
issues.append(f'CT {ctid}: {reason}') # Include CT ID in reason for clarity in notifications
reason = f'{ct_display}: {error_detail}'
issues.append(reason)
vm_details[key] = { vm_details[key] = {
'status': 'WARNING', 'status': 'WARNING',
'reason': reason, 'reason': reason,
'id': ctid, 'id': ctid,
'vmname': ct_name,
'type': 'CT' 'type': 'CT'
} }
continue continue
@@ -2830,14 +2837,21 @@ class HealthMonitor:
error_key = f'ct_{ctid}' error_key = f'ct_{ctid}'
if error_key not in vm_details: if error_key not in vm_details:
# Resolve CT name for better context
ct_name = self._resolve_vm_name(ctid)
ct_display = f"CT {ctid} ({ct_name})" if ct_name else f"CT {ctid}"
if 'device' in line_lower and 'does not exist' in line_lower: if 'device' in line_lower and 'does not exist' in line_lower:
device_match = re.search(r'device\s+([/\w\d]+)\s+does not exist', line_lower) device_match = re.search(r'device\s+([/\w\d]+)\s+does not exist', line_lower)
if device_match: if device_match:
reason = f'Device {device_match.group(1)} missing' error_detail = f'Device {device_match.group(1)} missing'
else: else:
reason = 'Device error' error_detail = 'Device error'
else: else:
reason = 'Startup error' error_detail = 'Startup error'
# Include CT ID in reason for clarity
reason = f'{ct_display}: {error_detail}'
# Record persistent error # Record persistent error
rec_result = health_persistence.record_error( rec_result = health_persistence.record_error(
@@ -2845,14 +2859,15 @@ class HealthMonitor:
category='vms', category='vms',
severity='WARNING', severity='WARNING',
reason=reason, reason=reason,
details={'id': ctid, 'type': 'CT'} details={'id': ctid, 'vmname': ct_name, 'type': 'CT'}
) )
if not rec_result or rec_result.get('type') != 'skipped_acknowledged': if not rec_result or rec_result.get('type') != 'skipped_acknowledged':
issues.append(f'CT {ctid}: {reason}') issues.append(reason)
vm_details[error_key] = { vm_details[error_key] = {
'status': 'WARNING', 'status': 'WARNING',
'reason': reason, 'reason': reason,
'id': ctid, 'id': ctid,
'vmname': ct_name,
'type': 'CT' 'type': 'CT'
} }

View File

@@ -251,7 +251,7 @@ class TelegramChannel(NotificationChannel):
.replace('>', '&gt;')) .replace('>', '&gt;'))
# ─── Gotify ───────────────────────────────────<EFBFBD><EFBFBD>────────────────── # ─── Gotify ─────────────────────────────────────────────────────
class GotifyChannel(NotificationChannel): class GotifyChannel(NotificationChannel):
"""Gotify push notification channel with priority mapping.""" """Gotify push notification channel with priority mapping."""

View File

@@ -197,7 +197,7 @@ def capture_journal_context(keywords: list, lines: int = 30,
return "" return ""
# ─── Journal Watcher (Real-time) ───────────────<EFBFBD><EFBFBD>───────────────── # ─── Journal Watcher (Real-time) ────────────────────────────────
class JournalWatcher: class JournalWatcher:
"""Watches journald in real-time for critical system events. """Watches journald in real-time for critical system events.