mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-18 10:02:16 +00:00
update ai_context_enrichment.py
This commit is contained in:
@@ -304,26 +304,22 @@ def enrich_context_for_ai(
|
|||||||
context_parts = []
|
context_parts = []
|
||||||
combined_text = f"{title} {body} {journal_context}"
|
combined_text = f"{title} {body} {journal_context}"
|
||||||
|
|
||||||
# 1. System uptime - only relevant for failure/error events, not informational
|
# 1. System uptime - ONLY for critical system-level failures
|
||||||
# Uptime helps distinguish startup issues from runtime failures
|
# Uptime helps distinguish startup issues from runtime failures
|
||||||
# Only include uptime when something FAILED or has CRITICAL/WARNING status
|
# BUT it's noise for disk errors, warnings, or routine operations
|
||||||
uptime_relevant_types = [
|
# Only include for: system crash, kernel panic, OOM, cluster failures
|
||||||
'fail', 'error', 'critical', 'crash', 'panic', 'oom',
|
uptime_critical_types = [
|
||||||
'disk_error', 'smart_error', 'io_error', 'service_fail',
|
'crash', 'panic', 'oom', 'kernel',
|
||||||
'split_brain', 'quorum_lost', 'node_offline'
|
'split_brain', 'quorum_lost', 'node_offline', 'node_fail',
|
||||||
]
|
'system_fail', 'boot_fail'
|
||||||
# Exclude informational events (success, start, stop, complete, etc.)
|
|
||||||
informational_types = [
|
|
||||||
'update', 'upgrade', 'available', 'info', 'resolved',
|
|
||||||
'start', 'stop', 'shutdown', 'restart', 'complete',
|
|
||||||
'backup_complete', 'backup_start', 'migration'
|
|
||||||
]
|
]
|
||||||
|
|
||||||
is_uptime_relevant = any(t in event_type.lower() for t in uptime_relevant_types)
|
# Check if this is a critical system-level event (not disk/service/hardware)
|
||||||
is_informational = any(t in event_type.lower() for t in informational_types)
|
event_lower = event_type.lower()
|
||||||
|
is_critical_system_event = any(t in event_lower for t in uptime_critical_types)
|
||||||
|
|
||||||
# Only add uptime for actual failures, not routine operations
|
# Only add uptime for critical system failures, nothing else
|
||||||
if is_uptime_relevant and not is_informational:
|
if is_critical_system_event:
|
||||||
uptime = get_system_uptime()
|
uptime = get_system_uptime()
|
||||||
if uptime and uptime != "unknown":
|
if uptime and uptime != "unknown":
|
||||||
context_parts.append(f"System uptime: {uptime}")
|
context_parts.append(f"System uptime: {uptime}")
|
||||||
|
|||||||
@@ -6192,6 +6192,8 @@ def api_network_interface_metrics(interface_name):
|
|||||||
|
|
||||||
rrd_data = []
|
rrd_data = []
|
||||||
|
|
||||||
|
rrd_error = None
|
||||||
|
|
||||||
if interface_type == 'vm_lxc':
|
if interface_type == 'vm_lxc':
|
||||||
# For VM/LXC interfaces, get data from the VM/LXC RRD
|
# For VM/LXC interfaces, get data from the VM/LXC RRD
|
||||||
vmid, vm_type = extract_vmid_from_interface(interface_name)
|
vmid, vm_type = extract_vmid_from_interface(interface_name)
|
||||||
@@ -6202,19 +6204,20 @@ def api_network_interface_metrics(interface_name):
|
|||||||
capture_output=True, text=True, timeout=10)
|
capture_output=True, text=True, timeout=10)
|
||||||
|
|
||||||
if rrd_result.returncode == 0:
|
if rrd_result.returncode == 0:
|
||||||
all_data = json.loads(rrd_result.stdout)
|
try:
|
||||||
# Filter to only network-related fields
|
all_data = json.loads(rrd_result.stdout)
|
||||||
for point in all_data:
|
# Filter to only network-related fields
|
||||||
filtered_point = {'time': point.get('time')}
|
for point in all_data:
|
||||||
# Add network fields if they exist
|
filtered_point = {'time': point.get('time')}
|
||||||
for key in ['netin', 'netout']:
|
# Add network fields if they exist
|
||||||
if key in point:
|
for key in ['netin', 'netout']:
|
||||||
filtered_point[key] = point[key]
|
if key in point:
|
||||||
rrd_data.append(filtered_point)
|
filtered_point[key] = point[key]
|
||||||
|
rrd_data.append(filtered_point)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
rrd_error = f'RRD data for {vm_type.upper()} {vmid} is empty or corrupted'
|
||||||
else:
|
else:
|
||||||
# print(f"[v0] ERROR: Failed to get RRD data for VM/LXC")
|
rrd_error = f'Failed to get RRD data: {rrd_result.stderr}'
|
||||||
pass
|
|
||||||
else:
|
else:
|
||||||
# For physical/bridge interfaces, get data from node RRD
|
# For physical/bridge interfaces, get data from node RRD
|
||||||
|
|
||||||
@@ -6223,38 +6226,42 @@ def api_network_interface_metrics(interface_name):
|
|||||||
capture_output=True, text=True, timeout=10)
|
capture_output=True, text=True, timeout=10)
|
||||||
|
|
||||||
if rrd_result.returncode == 0:
|
if rrd_result.returncode == 0:
|
||||||
all_data = json.loads(rrd_result.stdout)
|
try:
|
||||||
# Filter to only network-related fields for this interface
|
all_data = json.loads(rrd_result.stdout)
|
||||||
for point in all_data:
|
# Filter to only network-related fields for this interface
|
||||||
filtered_point = {'time': point.get('time')}
|
for point in all_data:
|
||||||
# Add network fields if they exist
|
filtered_point = {'time': point.get('time')}
|
||||||
for key in ['netin', 'netout']:
|
# Add network fields if they exist
|
||||||
if key in point:
|
for key in ['netin', 'netout']:
|
||||||
filtered_point[key] = point[key]
|
if key in point:
|
||||||
rrd_data.append(filtered_point)
|
filtered_point[key] = point[key]
|
||||||
|
rrd_data.append(filtered_point)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
rrd_error = 'Node RRD data is empty or corrupted'
|
||||||
else:
|
else:
|
||||||
# print(f"[v0] ERROR: Failed to get RRD data for node")
|
rrd_error = f'Failed to get RRD data: {rrd_result.stderr}'
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
|
# If there was an RRD error and no data collected, return error with details
|
||||||
|
if rrd_error and not rrd_data:
|
||||||
|
return jsonify({
|
||||||
|
'error': 'RRD data not available',
|
||||||
|
'details': rrd_error,
|
||||||
|
'suggestion': 'The RRD database may be empty or corrupted. Try: systemctl restart rrdcached'
|
||||||
|
}), 503
|
||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'interface': interface_name,
|
'interface': interface_name,
|
||||||
'type': interface_type,
|
'type': interface_type,
|
||||||
'timeframe': timeframe,
|
'timeframe': timeframe,
|
||||||
'data': rrd_data
|
'data': rrd_data,
|
||||||
|
'warning': rrd_error if rrd_error else None # Include warning if there was an error but some data exists
|
||||||
})
|
})
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
||||||
return jsonify({'error': str(e)}), 500
|
return jsonify({'error': str(e)}), 500
|
||||||
|
|
||||||
@app.route('/api/vms', methods=['GET'])
|
|
||||||
@require_auth
|
|
||||||
def api_vms():
|
|
||||||
"""Get virtual machine information"""
|
|
||||||
return jsonify(get_proxmox_vms())
|
|
||||||
|
|
||||||
@app.route('/api/vms/<int:vmid>/metrics', methods=['GET'])
|
@app.route('/api/vms/<int:vmid>/metrics', methods=['GET'])
|
||||||
@require_auth
|
@require_auth
|
||||||
def api_vm_metrics(vmid):
|
def api_vm_metrics(vmid):
|
||||||
@@ -6316,9 +6323,22 @@ def api_vm_metrics(vmid):
|
|||||||
'data': rrd_data
|
'data': rrd_data
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
|
# Check if RRD file is empty or corrupted
|
||||||
|
stderr_lower = rrd_result.stderr.lower() if rrd_result.stderr else ''
|
||||||
|
if 'rrd' in stderr_lower or 'no such file' in stderr_lower or 'empty' in stderr_lower:
|
||||||
|
return jsonify({
|
||||||
|
'error': 'RRD data not available',
|
||||||
|
'details': f'The RRD database for {vm_type.upper()} {vmid} may be empty or corrupted.',
|
||||||
|
'suggestion': 'Try restarting rrdcached: systemctl restart rrdcached'
|
||||||
|
}), 503
|
||||||
return jsonify({'error': f'Failed to get RRD data: {rrd_result.stderr}'}), 500
|
return jsonify({'error': f'Failed to get RRD data: {rrd_result.stderr}'}), 500
|
||||||
|
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return jsonify({
|
||||||
|
'error': 'RRD data not available',
|
||||||
|
'details': f'Unable to parse metrics data for VM/LXC {vmid}.',
|
||||||
|
'suggestion': 'Try restarting rrdcached: systemctl restart rrdcached'
|
||||||
|
}), 503
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
||||||
return jsonify({'error': str(e)}), 500
|
return jsonify({'error': str(e)}), 500
|
||||||
@@ -6381,8 +6401,23 @@ def api_node_metrics():
|
|||||||
'data': rrd_data
|
'data': rrd_data
|
||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
|
# Check if RRD file is empty or corrupted
|
||||||
|
stderr_lower = rrd_result.stderr.lower() if rrd_result.stderr else ''
|
||||||
|
if 'rrd' in stderr_lower or 'no such file' in stderr_lower or 'empty' in stderr_lower:
|
||||||
|
return jsonify({
|
||||||
|
'error': 'RRD data not available',
|
||||||
|
'details': 'The RRD database file may be empty or corrupted. This can happen if rrdcached was not running properly after Proxmox installation.',
|
||||||
|
'suggestion': 'Try restarting rrdcached: systemctl restart rrdcached'
|
||||||
|
}), 503 # Service Unavailable - more appropriate than 500
|
||||||
return jsonify({'error': f'Failed to get RRD data: {rrd_result.stderr}'}), 500
|
return jsonify({'error': f'Failed to get RRD data: {rrd_result.stderr}'}), 500
|
||||||
|
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# pvesh returned invalid JSON - likely empty RRD
|
||||||
|
return jsonify({
|
||||||
|
'error': 'RRD data not available',
|
||||||
|
'details': 'Unable to parse metrics data. The RRD database may be empty or corrupted.',
|
||||||
|
'suggestion': 'Try restarting rrdcached: systemctl restart rrdcached'
|
||||||
|
}), 503
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
||||||
return jsonify({'error': str(e)}), 500
|
return jsonify({'error': str(e)}), 500
|
||||||
|
|||||||
@@ -1001,7 +1001,7 @@ EVENT_GROUPS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# ─── Template Renderer ───────────────────────────────────────────
|
# ─── Template Renderer ─<EFBFBD><EFBFBD>─────────────────────────────────────────
|
||||||
|
|
||||||
def _get_hostname() -> str:
|
def _get_hostname() -> str:
|
||||||
"""Get short hostname for message titles."""
|
"""Get short hostname for message titles."""
|
||||||
@@ -1622,9 +1622,11 @@ BLANK LINES: Insert between logical sections (VM entries, before summary, before
|
|||||||
|
|
||||||
═══ EXAMPLES (follow these formats) ═══
|
═══ EXAMPLES (follow these formats) ═══
|
||||||
|
|
||||||
|
IMPORTANT: {hostname} is a placeholder. Always use the ACTUAL hostname from the original message.
|
||||||
|
|
||||||
BACKUP START:
|
BACKUP START:
|
||||||
[TITLE]
|
[TITLE]
|
||||||
💾🚀 pve01: Backup started
|
💾🚀 {hostname}: Backup started
|
||||||
[BODY]
|
[BODY]
|
||||||
Backup job starting on storage PBS.
|
Backup job starting on storage PBS.
|
||||||
🏷️ VMs: web01 (100)
|
🏷️ VMs: web01 (100)
|
||||||
@@ -1633,7 +1635,7 @@ Backup job starting on storage PBS.
|
|||||||
|
|
||||||
BACKUP COMPLETE:
|
BACKUP COMPLETE:
|
||||||
[TITLE]
|
[TITLE]
|
||||||
💾✅ pve01: Backup complete
|
💾✅ {hostname}: Backup complete
|
||||||
[BODY]
|
[BODY]
|
||||||
Backup job finished on storage local-bak.
|
Backup job finished on storage local-bak.
|
||||||
|
|
||||||
@@ -1647,7 +1649,7 @@ Backup job finished on storage local-bak.
|
|||||||
|
|
||||||
BACKUP PARTIAL FAIL:
|
BACKUP PARTIAL FAIL:
|
||||||
[TITLE]
|
[TITLE]
|
||||||
💾❌ pve01: Backup partially failed
|
💾❌ {hostname}: Backup partially failed
|
||||||
[BODY]
|
[BODY]
|
||||||
Backup job finished with errors.
|
Backup job finished with errors.
|
||||||
|
|
||||||
@@ -1662,7 +1664,7 @@ Backup job finished with errors.
|
|||||||
|
|
||||||
UPDATES:
|
UPDATES:
|
||||||
[TITLE]
|
[TITLE]
|
||||||
📦 amd: Updates available
|
📦 {hostname}: Updates available
|
||||||
[BODY]
|
[BODY]
|
||||||
📦 Total updates: 24
|
📦 Total updates: 24
|
||||||
🔒 Security updates: 6
|
🔒 Security updates: 6
|
||||||
@@ -1673,14 +1675,14 @@ UPDATES:
|
|||||||
|
|
||||||
VM/CT START:
|
VM/CT START:
|
||||||
[TITLE]
|
[TITLE]
|
||||||
🚀 pve01: VM arch-linux (100) started
|
🚀 {hostname}: VM arch-linux (100) started
|
||||||
[BODY]
|
[BODY]
|
||||||
🏷️ Virtual machine arch-linux (ID: 100)
|
🏷️ Virtual machine arch-linux (ID: 100)
|
||||||
✔️ Now running
|
✔️ Now running
|
||||||
|
|
||||||
HEALTH DEGRADED:
|
HEALTH DEGRADED:
|
||||||
[TITLE]
|
[TITLE]
|
||||||
⚠️ amd: Health warning — Disk I/O
|
⚠️ {hostname}: Health warning — Disk I/O
|
||||||
[BODY]
|
[BODY]
|
||||||
💿 Device: /dev/sda
|
💿 Device: /dev/sda
|
||||||
⚠️ 1 sector unreadable (pending)
|
⚠️ 1 sector unreadable (pending)
|
||||||
|
|||||||
Reference in New Issue
Block a user