From e11daa0b362a0ad78802b6a96f51b24bda0372d4 Mon Sep 17 00:00:00 2001
From: MacRimi <ricoextincion@gmail.com>
Date: Thu, 2 Apr 2026 16:59:09 +0200
Subject: [PATCH] update ai_context_enrichment.py

---
 AppImage/scripts/ai_context_enrichment.py  |  28 +++---
 AppImage/scripts/flask_server.py           | 101 ++++++++++++++-------
 AppImage/scripts/notification_templates.py |  16 ++--
 3 files changed, 89 insertions(+), 56 deletions(-)

diff --git a/AppImage/scripts/ai_context_enrichment.py b/AppImage/scripts/ai_context_enrichment.py
index 64fc78f7..f07d3ffa 100644
--- a/AppImage/scripts/ai_context_enrichment.py
+++ b/AppImage/scripts/ai_context_enrichment.py
@@ -304,26 +304,22 @@ def enrich_context_for_ai(
     context_parts = []
     combined_text = f"{title} {body} {journal_context}"
     
-    # 1. System uptime - only relevant for failure/error events, not informational
+    # 1. System uptime - ONLY for critical system-level failures
     # Uptime helps distinguish startup issues from runtime failures
-    # Only include uptime when something FAILED or has CRITICAL/WARNING status
-    uptime_relevant_types = [
-        'fail', 'error', 'critical', 'crash', 'panic', 'oom',
-        'disk_error', 'smart_error', 'io_error', 'service_fail',
-        'split_brain', 'quorum_lost', 'node_offline'
-    ]
-    # Exclude informational events (success, start, stop, complete, etc.)
-    informational_types = [
-        'update', 'upgrade', 'available', 'info', 'resolved',
-        'start', 'stop', 'shutdown', 'restart', 'complete', 
-        'backup_complete', 'backup_start', 'migration'
+    # BUT it's noise for disk errors, warnings, or routine operations
+    # Only include for: system crash, kernel panic, OOM, cluster failures
+    uptime_critical_types = [
+        'crash', 'panic', 'oom', 'kernel',
+        'split_brain', 'quorum_lost', 'node_offline', 'node_fail',
+        'system_fail', 'boot_fail'
     ]
     
-    is_uptime_relevant = any(t in event_type.lower() for t in uptime_relevant_types)
-    is_informational = any(t in event_type.lower() for t in informational_types)
+    # Check if this is a critical system-level event (not disk/service/hardware)
+    event_lower = event_type.lower()
+    is_critical_system_event = any(t in event_lower for t in uptime_critical_types)
     
-    # Only add uptime for actual failures, not routine operations
-    if is_uptime_relevant and not is_informational:
+    # Only add uptime for critical system failures, nothing else
+    if is_critical_system_event:
         uptime = get_system_uptime()
         if uptime and uptime != "unknown":
             context_parts.append(f"System uptime: {uptime}")
diff --git a/AppImage/scripts/flask_server.py b/AppImage/scripts/flask_server.py
index 0e53c4aa..a6c90f15 100644
--- a/AppImage/scripts/flask_server.py
+++ b/AppImage/scripts/flask_server.py
@@ -6192,6 +6192,8 @@ def api_network_interface_metrics(interface_name):
         
         rrd_data = []
         
+        rrd_error = None
+        
         if interface_type == 'vm_lxc':
             # For VM/LXC interfaces, get data from the VM/LXC RRD
             vmid, vm_type = extract_vmid_from_interface(interface_name)
@@ -6202,19 +6204,20 @@ def api_network_interface_metrics(interface_name):
                                            capture_output=True, text=True, timeout=10)
                 
                 if rrd_result.returncode == 0:
-                    all_data = json.loads(rrd_result.stdout)
-                    # Filter to only network-related fields
-                    for point in all_data:
-                        filtered_point = {'time': point.get('time')}
-                        # Add network fields if they exist
-                        for key in ['netin', 'netout']:
-                            if key in point:
-                                filtered_point[key] = point[key]
-                        rrd_data.append(filtered_point)
-
+                    try:
+                        all_data = json.loads(rrd_result.stdout)
+                        # Filter to only network-related fields
+                        for point in all_data:
+                            filtered_point = {'time': point.get('time')}
+                            # Add network fields if they exist
+                            for key in ['netin', 'netout']:
+                                if key in point:
+                                    filtered_point[key] = point[key]
+                            rrd_data.append(filtered_point)
+                    except json.JSONDecodeError:
+                        rrd_error = f'RRD data for {vm_type.upper()} {vmid} is empty or corrupted'
                 else:
-                    # print(f"[v0] ERROR: Failed to get RRD data for VM/LXC")
-                    pass
+                    rrd_error = f'Failed to get RRD data: {rrd_result.stderr}'
         else:
             # For physical/bridge interfaces, get data from node RRD
 
@@ -6223,38 +6226,42 @@ def api_network_interface_metrics(interface_name):
                                        capture_output=True, text=True, timeout=10)
             
             if rrd_result.returncode == 0:
-                all_data = json.loads(rrd_result.stdout)
-                # Filter to only network-related fields for this interface
-                for point in all_data:
-                    filtered_point = {'time': point.get('time')}
-                    # Add network fields if they exist
-                    for key in ['netin', 'netout']:
-                        if key in point:
-                            filtered_point[key] = point[key]
-                    rrd_data.append(filtered_point)
-
+                try:
+                    all_data = json.loads(rrd_result.stdout)
+                    # Filter to only network-related fields for this interface
+                    for point in all_data:
+                        filtered_point = {'time': point.get('time')}
+                        # Add network fields if they exist
+                        for key in ['netin', 'netout']:
+                            if key in point:
+                                filtered_point[key] = point[key]
+                        rrd_data.append(filtered_point)
+                except json.JSONDecodeError:
+                    rrd_error = 'Node RRD data is empty or corrupted'
             else:
-                # print(f"[v0] ERROR: Failed to get RRD data for node")
-                pass
+                rrd_error = f'Failed to get RRD data: {rrd_result.stderr}'
         
 
+        # If there was an RRD error and no data collected, return error with details
+        if rrd_error and not rrd_data:
+            return jsonify({
+                'error': 'RRD data not available',
+                'details': rrd_error,
+                'suggestion': 'The RRD database may be empty or corrupted. Try: systemctl restart rrdcached'
+            }), 503
+        
         return jsonify({
             'interface': interface_name,
             'type': interface_type,
             'timeframe': timeframe,
-            'data': rrd_data
+            'data': rrd_data,
+            'warning': rrd_error if rrd_error else None  # Include warning if there was an error but some data exists
         })
             
     except Exception as e:
 
         return jsonify({'error': str(e)}), 500
 
-@app.route('/api/vms', methods=['GET'])
-@require_auth
-def api_vms():
-    """Get virtual machine information"""
-    return jsonify(get_proxmox_vms())
-
 @app.route('/api/vms/<int:vmid>/metrics', methods=['GET'])
 @require_auth
 def api_vm_metrics(vmid):
@@ -6316,9 +6323,22 @@ def api_vm_metrics(vmid):
                 'data': rrd_data
             })
         else:
-
+            # Check if RRD file is empty or corrupted
+            stderr_lower = rrd_result.stderr.lower() if rrd_result.stderr else ''
+            if 'rrd' in stderr_lower or 'no such file' in stderr_lower or 'empty' in stderr_lower:
+                return jsonify({
+                    'error': 'RRD data not available',
+                    'details': f'The RRD database for {vm_type.upper()} {vmid} may be empty or corrupted.',
+                    'suggestion': 'Try restarting rrdcached: systemctl restart rrdcached'
+                }), 503
             return jsonify({'error': f'Failed to get RRD data: {rrd_result.stderr}'}), 500
-            
+    
+    except json.JSONDecodeError:
+        return jsonify({
+            'error': 'RRD data not available',
+            'details': f'Unable to parse metrics data for VM/LXC {vmid}.',
+            'suggestion': 'Try restarting rrdcached: systemctl restart rrdcached'
+        }), 503
     except Exception as e:
 
         return jsonify({'error': str(e)}), 500
@@ -6381,8 +6401,23 @@ def api_node_metrics():
                 'data': rrd_data
             })
         else:
+            # Check if RRD file is empty or corrupted
+            stderr_lower = rrd_result.stderr.lower() if rrd_result.stderr else ''
+            if 'rrd' in stderr_lower or 'no such file' in stderr_lower or 'empty' in stderr_lower:
+                return jsonify({
+                    'error': 'RRD data not available',
+                    'details': 'The RRD database file may be empty or corrupted. This can happen if rrdcached was not running properly after Proxmox installation.',
+                    'suggestion': 'Try restarting rrdcached: systemctl restart rrdcached'
+                }), 503  # Service Unavailable - more appropriate than 500
             return jsonify({'error': f'Failed to get RRD data: {rrd_result.stderr}'}), 500
             
+    except json.JSONDecodeError:
+        # pvesh returned invalid JSON - likely empty RRD
+        return jsonify({
+            'error': 'RRD data not available',
+            'details': 'Unable to parse metrics data. The RRD database may be empty or corrupted.',
+            'suggestion': 'Try restarting rrdcached: systemctl restart rrdcached'
+        }), 503
     except Exception as e:
 
         return jsonify({'error': str(e)}), 500
diff --git a/AppImage/scripts/notification_templates.py b/AppImage/scripts/notification_templates.py
index 8a75ac86..b1c5b79a 100644
--- a/AppImage/scripts/notification_templates.py
+++ b/AppImage/scripts/notification_templates.py
@@ -1001,7 +1001,7 @@ EVENT_GROUPS = {
 }
 
 
-# ─── Template Renderer ───────────────────────────────────────────
+# ─── Template Renderer ─��─────────────────────────────────────────
 
 def _get_hostname() -> str:
     """Get short hostname for message titles."""
@@ -1622,9 +1622,11 @@ BLANK LINES: Insert between logical sections (VM entries, before summary, before
 
 ═══ EXAMPLES (follow these formats) ═══
 
+IMPORTANT: {hostname} is a placeholder. Always use the ACTUAL hostname from the original message.
+
 BACKUP START:
 [TITLE]
-💾🚀 pve01: Backup started
+💾🚀 {hostname}: Backup started
 [BODY]
 Backup job starting on storage PBS.
 🏷️ VMs: web01 (100)
@@ -1633,7 +1635,7 @@ Backup job starting on storage PBS.
 
 BACKUP COMPLETE:
 [TITLE]
-💾✅ pve01: Backup complete
+💾✅ {hostname}: Backup complete
 [BODY]
 Backup job finished on storage local-bak.
 
@@ -1647,7 +1649,7 @@ Backup job finished on storage local-bak.
 
 BACKUP PARTIAL FAIL:
 [TITLE]
-💾❌ pve01: Backup partially failed
+💾❌ {hostname}: Backup partially failed
 [BODY]
 Backup job finished with errors.
 
@@ -1662,7 +1664,7 @@ Backup job finished with errors.
 
 UPDATES:
 [TITLE]
-📦 amd: Updates available
+📦 {hostname}: Updates available
 [BODY]
 📦 Total updates: 24
 🔒 Security updates: 6
@@ -1673,14 +1675,14 @@ UPDATES:
 
 VM/CT START:
 [TITLE]
-🚀 pve01: VM arch-linux (100) started
+🚀 {hostname}: VM arch-linux (100) started
 [BODY]
 🏷️ Virtual machine arch-linux (ID: 100)
 ✔️ Now running
 
 HEALTH DEGRADED:
 [TITLE]
-⚠️ amd: Health warning — Disk I/O
+⚠️ {hostname}: Health warning — Disk I/O
 [BODY]
 💿 Device: /dev/sda
 ⚠️ 1 sector unreadable (pending)