update notification_templates.py

This commit is contained in:
MacRimi
2026-04-02 08:38:01 +02:00
parent 5f5dc171be
commit 007e3d1c0e
6 changed files with 121 additions and 13 deletions

View File

@@ -304,19 +304,25 @@ def enrich_context_for_ai(
context_parts = []
combined_text = f"{title} {body} {journal_context}"
# 1. System uptime - only relevant for errors, not informational notifications
# 1. System uptime - only relevant for failure/error events, not informational
# Uptime helps distinguish startup issues from runtime failures
# Only include uptime when something FAILED or has CRITICAL/WARNING status
uptime_relevant_types = [
'error', 'critical', 'warning', 'service', 'system',
'disk', 'smart', 'storage', 'io_error', 'network',
'cluster', 'ha', 'vm', 'ct', 'container', 'backup'
'fail', 'error', 'critical', 'crash', 'panic', 'oom',
'disk_error', 'smart_error', 'io_error', 'service_fail',
'split_brain', 'quorum_lost', 'node_offline'
]
# Exclude informational events (success, start, stop, complete, etc.)
informational_types = [
'update', 'upgrade', 'available', 'info', 'resolved',
'start', 'stop', 'shutdown', 'restart', 'complete',
'backup_complete', 'backup_start', 'migration'
]
# Exclude purely informational events
informational_types = ['update', 'upgrade', 'available', 'info', 'resolved']
is_uptime_relevant = any(t in event_type.lower() for t in uptime_relevant_types)
is_informational = any(t in event_type.lower() for t in informational_types)
# Only add uptime for actual failures, not routine operations
if is_uptime_relevant and not is_informational:
uptime = get_system_uptime()
if uptime and uptime != "unknown":

View File

@@ -953,7 +953,7 @@ def proxmox_webhook():
return jsonify({'accepted': False, 'error': 'internal_error', 'detail': str(e)}), 200
# ─── Internal Shutdown Event Endpoint ────────────────────────────
# ─── Internal Shutdown Event Endpoint ────────────<EFBFBD><EFBFBD><EFBFBD>────────────────
@notification_bp.route('/api/internal/shutdown-event', methods=['POST'])
def internal_shutdown_event():

View File

@@ -620,7 +620,7 @@ class EmailChannel(NotificationChannel):
<td colspan="2" style="padding:8px 12px;font-size:13px;color:#1f2937;border-bottom:1px solid #e5e7eb;">{value}</td>
</tr>'''
# ── Reason / details block (long text, displayed separately)
# ── Reason / details block (long text, displayed separately) <EFBFBD><EFBFBD><EFBFBD>
reason = data.get('reason', '')
reason_html = ''
if reason and len(reason) > 80:

View File

@@ -1319,6 +1319,55 @@ class TaskWatcher:
"""
TASK_LOG = '/var/log/pve/tasks/index'
TASK_DIR = '/var/log/pve/tasks'
def _get_task_log_reason(self, upid: str, status: str) -> str:
"""Read the task log file to extract the actual error/warning reason.
Returns a human-readable reason extracted from the task log,
or falls back to the status code if log cannot be read.
"""
try:
# Parse UPID to find log file
# UPID format: UPID:node:pid:pstart:starttime:type:id:user:
parts = upid.split(':')
if len(parts) < 5:
return status
# Task logs are stored in /var/log/pve/tasks/X/UPID
# where X is first char of hex(starttime)
starttime_hex = parts[4]
if starttime_hex:
# First character of starttime in hex determines subdirectory
subdir = starttime_hex[0].upper()
log_path = os.path.join(self.TASK_DIR, subdir, upid.rstrip(':'))
if os.path.exists(log_path):
with open(log_path, 'r', errors='replace') as f:
lines = f.readlines()
# Look for error/warning messages in the log
# Common patterns: "WARNINGS: ...", "ERROR: ...", "failed: ..."
error_lines = []
for line in lines:
line_lower = line.lower()
# Skip status lines at the end
if line.startswith('TASK '):
continue
# Capture warning/error lines
if any(kw in line_lower for kw in ['warning:', 'error:', 'failed', 'unable to', 'cannot']):
# Clean up the line
clean_line = line.strip()
if clean_line and len(clean_line) < 200: # Reasonable length
error_lines.append(clean_line)
if error_lines:
# Return the most relevant lines (up to 3)
return '; '.join(error_lines[:3])
return status
except Exception:
return status
# Map PVE task types to our event types
TASK_MAP = {
@@ -1559,16 +1608,31 @@ class TaskWatcher:
# Backup just finished -- start grace period for VM restarts
self._vzdump_running_since = time.time() # will expire via grace_period
# Check if task failed
is_error = status and status != 'OK' and status != ''
# Check if task failed or completed with warnings
# WARNINGS means the task completed but with non-fatal issues (e.g., EFI cert warnings)
# The VM/CT DID start successfully, just with caveats
is_warning = status and status.upper() == 'WARNINGS'
is_error = status and status not in ('OK', 'WARNINGS', '')
if is_error:
# Override to failure event
# Override to failure event - task actually failed
if 'start' in event_type:
event_type = event_type.replace('_start', '_fail')
elif 'complete' in event_type:
event_type = event_type.replace('_complete', '_fail')
severity = 'CRITICAL'
elif is_warning:
# Task completed with warnings - VM/CT started but has issues
# Use specific warning event types for better messaging
if event_type == 'vm_start':
event_type = 'vm_start_warning'
elif event_type == 'ct_start':
event_type = 'ct_start_warning'
elif event_type == 'backup_start':
event_type = 'backup_warning' # Backup finished with warnings
elif event_type == 'migration_start':
event_type = 'migration_warning' # Migration finished with warnings
severity = 'WARNING'
elif status == 'OK':
# Task completed successfully
if event_type == 'backup_start':
@@ -1580,12 +1644,18 @@ class TaskWatcher:
# Task just started (no status yet)
severity = default_severity
# Get the actual reason from task log if error or warning
if is_error or is_warning:
reason = self._get_task_log_reason(upid, status)
else:
reason = ''
data = {
'vmid': vmid,
'vmname': vmname or f'ID {vmid}',
'hostname': self._hostname,
'user': user,
'reason': status if is_error else '',
'reason': reason,
'target_node': '',
'size': '',
'snapshot_name': '',

View File

@@ -471,6 +471,13 @@ TEMPLATES = {
'group': 'vm_ct',
'default_enabled': True,
},
'vm_start_warning': {
'title': '{hostname}: VM {vmname} ({vmid}) started with warnings',
'body': 'Virtual machine {vmname} (ID: {vmid}) started successfully but has warnings.\nWarnings: {reason}',
'label': 'VM started (warnings)',
'group': 'vm_ct',
'default_enabled': True,
},
'vm_stop': {
'title': '{hostname}: VM {vmname} ({vmid}) stopped',
'body': 'Virtual machine {vmname} (ID: {vmid}) has been stopped.',
@@ -506,6 +513,13 @@ TEMPLATES = {
'group': 'vm_ct',
'default_enabled': True,
},
'ct_start_warning': {
'title': '{hostname}: CT {vmname} ({vmid}) started with warnings',
'body': 'Container {vmname} (ID: {vmid}) started successfully but has warnings.\nWarnings: {reason}',
'label': 'CT started (warnings)',
'group': 'vm_ct',
'default_enabled': True,
},
'ct_stop': {
'title': '{hostname}: CT {vmname} ({vmid}) stopped',
'body': 'Container {vmname} (ID: {vmid}) has been stopped.',
@@ -548,6 +562,13 @@ TEMPLATES = {
'group': 'vm_ct',
'default_enabled': True,
},
'migration_warning': {
'title': '{hostname}: Migration complete with warnings — {vmname} ({vmid})',
'body': '{vmname} (ID: {vmid}) migrated to node {target_node} but encountered warnings.\nWarnings: {reason}',
'label': 'Migration (warnings)',
'group': 'vm_ct',
'default_enabled': True,
},
'migration_fail': {
'title': '{hostname}: Migration FAILED — {vmname} ({vmid})',
'body': 'Migration of {vmname} (ID: {vmid}) to node {target_node} failed.\nReason: {reason}',
@@ -585,6 +606,13 @@ TEMPLATES = {
'group': 'backup',
'default_enabled': True,
},
'backup_warning': {
'title': '{hostname}: Backup complete with warnings — {vmname} ({vmid})',
'body': 'Backup of {vmname} (ID: {vmid}) completed but encountered warnings.\nWarnings: {reason}',
'label': 'Backup (warnings)',
'group': 'backup',
'default_enabled': True,
},
'backup_fail': {
'title': '{hostname}: Backup FAILED — {vmname} ({vmid})',
'body': 'Backup of {vmname} (ID: {vmid}) failed.\nReason: {reason}',
@@ -1182,23 +1210,27 @@ CATEGORY_EMOJI = {
EVENT_EMOJI = {
# VM / CT
'vm_start': '\u25B6\uFE0F', # play button
'vm_start_warning': '\u26A0\uFE0F', # warning sign - started with warnings
'vm_stop': '\u23F9\uFE0F', # stop button
'vm_shutdown': '\u23CF\uFE0F', # eject
'vm_fail': '\U0001F4A5', # collision (crash)
'vm_restart': '\U0001F504', # cycle
'ct_start': '\u25B6\uFE0F',
'ct_start_warning': '\u26A0\uFE0F', # warning sign - started with warnings
'ct_stop': '\u23F9\uFE0F',
'ct_shutdown': '\u23CF\uFE0F',
'ct_restart': '\U0001F504',
'ct_fail': '\U0001F4A5',
'migration_start': '\U0001F69A', # moving truck
'migration_complete': '\u2705', # check mark
'migration_warning': '\U0001F69A\u26A0\uFE0F', # 🚚⚠️ truck + warning
'migration_fail': '\u274C', # cross mark
'replication_fail': '\u274C',
'replication_complete': '\u2705',
# Backups
'backup_start': '\U0001F4BE\U0001F680', # 💾🚀 floppy + rocket
'backup_complete': '\U0001F4BE\u2705', # 💾✅ floppy + check
'backup_warning': '\U0001F4BE\u26A0\uFE0F', # 💾⚠️ floppy + warning
'backup_fail': '\U0001F4BE\u274C', # 💾❌ floppy + cross
'snapshot_complete': '\U0001F4F8', # camera with flash
'snapshot_fail': '\u274C',

View File

@@ -684,7 +684,7 @@ show_menu() {
echo -ne " Select: "
}
# ── Main ───────────────────────────────────────────────────────
# ── Main ──────<EFBFBD><EFBFBD><EFBFBD>─────────────────────────────────────────────────
main() {
local mode="${1:-menu}"