mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-05 20:03:48 +00:00
update ai_context_enrichment.py
This commit is contained in:
@@ -304,26 +304,22 @@ def enrich_context_for_ai(
|
||||
context_parts = []
|
||||
combined_text = f"{title} {body} {journal_context}"
|
||||
|
||||
# 1. System uptime - only relevant for failure/error events, not informational
|
||||
# 1. System uptime - ONLY for critical system-level failures
|
||||
# Uptime helps distinguish startup issues from runtime failures
|
||||
# Only include uptime when something FAILED or has CRITICAL/WARNING status
|
||||
uptime_relevant_types = [
|
||||
'fail', 'error', 'critical', 'crash', 'panic', 'oom',
|
||||
'disk_error', 'smart_error', 'io_error', 'service_fail',
|
||||
'split_brain', 'quorum_lost', 'node_offline'
|
||||
]
|
||||
# Exclude informational events (success, start, stop, complete, etc.)
|
||||
informational_types = [
|
||||
'update', 'upgrade', 'available', 'info', 'resolved',
|
||||
'start', 'stop', 'shutdown', 'restart', 'complete',
|
||||
'backup_complete', 'backup_start', 'migration'
|
||||
# BUT it's noise for disk errors, warnings, or routine operations
|
||||
# Only include for: system crash, kernel panic, OOM, cluster failures
|
||||
uptime_critical_types = [
|
||||
'crash', 'panic', 'oom', 'kernel',
|
||||
'split_brain', 'quorum_lost', 'node_offline', 'node_fail',
|
||||
'system_fail', 'boot_fail'
|
||||
]
|
||||
|
||||
is_uptime_relevant = any(t in event_type.lower() for t in uptime_relevant_types)
|
||||
is_informational = any(t in event_type.lower() for t in informational_types)
|
||||
# Check if this is a critical system-level event (not disk/service/hardware)
|
||||
event_lower = event_type.lower()
|
||||
is_critical_system_event = any(t in event_lower for t in uptime_critical_types)
|
||||
|
||||
# Only add uptime for actual failures, not routine operations
|
||||
if is_uptime_relevant and not is_informational:
|
||||
# Only add uptime for critical system failures, nothing else
|
||||
if is_critical_system_event:
|
||||
uptime = get_system_uptime()
|
||||
if uptime and uptime != "unknown":
|
||||
context_parts.append(f"System uptime: {uptime}")
|
||||
|
||||
@@ -6192,6 +6192,8 @@ def api_network_interface_metrics(interface_name):
|
||||
|
||||
rrd_data = []
|
||||
|
||||
rrd_error = None
|
||||
|
||||
if interface_type == 'vm_lxc':
|
||||
# For VM/LXC interfaces, get data from the VM/LXC RRD
|
||||
vmid, vm_type = extract_vmid_from_interface(interface_name)
|
||||
@@ -6202,19 +6204,20 @@ def api_network_interface_metrics(interface_name):
|
||||
capture_output=True, text=True, timeout=10)
|
||||
|
||||
if rrd_result.returncode == 0:
|
||||
all_data = json.loads(rrd_result.stdout)
|
||||
# Filter to only network-related fields
|
||||
for point in all_data:
|
||||
filtered_point = {'time': point.get('time')}
|
||||
# Add network fields if they exist
|
||||
for key in ['netin', 'netout']:
|
||||
if key in point:
|
||||
filtered_point[key] = point[key]
|
||||
rrd_data.append(filtered_point)
|
||||
|
||||
try:
|
||||
all_data = json.loads(rrd_result.stdout)
|
||||
# Filter to only network-related fields
|
||||
for point in all_data:
|
||||
filtered_point = {'time': point.get('time')}
|
||||
# Add network fields if they exist
|
||||
for key in ['netin', 'netout']:
|
||||
if key in point:
|
||||
filtered_point[key] = point[key]
|
||||
rrd_data.append(filtered_point)
|
||||
except json.JSONDecodeError:
|
||||
rrd_error = f'RRD data for {vm_type.upper()} {vmid} is empty or corrupted'
|
||||
else:
|
||||
# print(f"[v0] ERROR: Failed to get RRD data for VM/LXC")
|
||||
pass
|
||||
rrd_error = f'Failed to get RRD data: {rrd_result.stderr}'
|
||||
else:
|
||||
# For physical/bridge interfaces, get data from node RRD
|
||||
|
||||
@@ -6223,38 +6226,42 @@ def api_network_interface_metrics(interface_name):
|
||||
capture_output=True, text=True, timeout=10)
|
||||
|
||||
if rrd_result.returncode == 0:
|
||||
all_data = json.loads(rrd_result.stdout)
|
||||
# Filter to only network-related fields for this interface
|
||||
for point in all_data:
|
||||
filtered_point = {'time': point.get('time')}
|
||||
# Add network fields if they exist
|
||||
for key in ['netin', 'netout']:
|
||||
if key in point:
|
||||
filtered_point[key] = point[key]
|
||||
rrd_data.append(filtered_point)
|
||||
|
||||
try:
|
||||
all_data = json.loads(rrd_result.stdout)
|
||||
# Filter to only network-related fields for this interface
|
||||
for point in all_data:
|
||||
filtered_point = {'time': point.get('time')}
|
||||
# Add network fields if they exist
|
||||
for key in ['netin', 'netout']:
|
||||
if key in point:
|
||||
filtered_point[key] = point[key]
|
||||
rrd_data.append(filtered_point)
|
||||
except json.JSONDecodeError:
|
||||
rrd_error = 'Node RRD data is empty or corrupted'
|
||||
else:
|
||||
# print(f"[v0] ERROR: Failed to get RRD data for node")
|
||||
pass
|
||||
rrd_error = f'Failed to get RRD data: {rrd_result.stderr}'
|
||||
|
||||
|
||||
# If there was an RRD error and no data collected, return error with details
|
||||
if rrd_error and not rrd_data:
|
||||
return jsonify({
|
||||
'error': 'RRD data not available',
|
||||
'details': rrd_error,
|
||||
'suggestion': 'The RRD database may be empty or corrupted. Try: systemctl restart rrdcached'
|
||||
}), 503
|
||||
|
||||
return jsonify({
|
||||
'interface': interface_name,
|
||||
'type': interface_type,
|
||||
'timeframe': timeframe,
|
||||
'data': rrd_data
|
||||
'data': rrd_data,
|
||||
'warning': rrd_error if rrd_error else None # Include warning if there was an error but some data exists
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
@app.route('/api/vms', methods=['GET'])
|
||||
@require_auth
|
||||
def api_vms():
|
||||
"""Get virtual machine information"""
|
||||
return jsonify(get_proxmox_vms())
|
||||
|
||||
@app.route('/api/vms/<int:vmid>/metrics', methods=['GET'])
|
||||
@require_auth
|
||||
def api_vm_metrics(vmid):
|
||||
@@ -6316,9 +6323,22 @@ def api_vm_metrics(vmid):
|
||||
'data': rrd_data
|
||||
})
|
||||
else:
|
||||
|
||||
# Check if RRD file is empty or corrupted
|
||||
stderr_lower = rrd_result.stderr.lower() if rrd_result.stderr else ''
|
||||
if 'rrd' in stderr_lower or 'no such file' in stderr_lower or 'empty' in stderr_lower:
|
||||
return jsonify({
|
||||
'error': 'RRD data not available',
|
||||
'details': f'The RRD database for {vm_type.upper()} {vmid} may be empty or corrupted.',
|
||||
'suggestion': 'Try restarting rrdcached: systemctl restart rrdcached'
|
||||
}), 503
|
||||
return jsonify({'error': f'Failed to get RRD data: {rrd_result.stderr}'}), 500
|
||||
|
||||
|
||||
except json.JSONDecodeError:
|
||||
return jsonify({
|
||||
'error': 'RRD data not available',
|
||||
'details': f'Unable to parse metrics data for VM/LXC {vmid}.',
|
||||
'suggestion': 'Try restarting rrdcached: systemctl restart rrdcached'
|
||||
}), 503
|
||||
except Exception as e:
|
||||
|
||||
return jsonify({'error': str(e)}), 500
|
||||
@@ -6381,8 +6401,23 @@ def api_node_metrics():
|
||||
'data': rrd_data
|
||||
})
|
||||
else:
|
||||
# Check if RRD file is empty or corrupted
|
||||
stderr_lower = rrd_result.stderr.lower() if rrd_result.stderr else ''
|
||||
if 'rrd' in stderr_lower or 'no such file' in stderr_lower or 'empty' in stderr_lower:
|
||||
return jsonify({
|
||||
'error': 'RRD data not available',
|
||||
'details': 'The RRD database file may be empty or corrupted. This can happen if rrdcached was not running properly after Proxmox installation.',
|
||||
'suggestion': 'Try restarting rrdcached: systemctl restart rrdcached'
|
||||
}), 503 # Service Unavailable - more appropriate than 500
|
||||
return jsonify({'error': f'Failed to get RRD data: {rrd_result.stderr}'}), 500
|
||||
|
||||
except json.JSONDecodeError:
|
||||
# pvesh returned invalid JSON - likely empty RRD
|
||||
return jsonify({
|
||||
'error': 'RRD data not available',
|
||||
'details': 'Unable to parse metrics data. The RRD database may be empty or corrupted.',
|
||||
'suggestion': 'Try restarting rrdcached: systemctl restart rrdcached'
|
||||
}), 503
|
||||
except Exception as e:
|
||||
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
@@ -1001,7 +1001,7 @@ EVENT_GROUPS = {
|
||||
}
|
||||
|
||||
|
||||
# ─── Template Renderer ───────────────────────────────────────────
|
||||
# ─── Template Renderer ─<EFBFBD><EFBFBD>─────────────────────────────────────────
|
||||
|
||||
def _get_hostname() -> str:
|
||||
"""Get short hostname for message titles."""
|
||||
@@ -1622,9 +1622,11 @@ BLANK LINES: Insert between logical sections (VM entries, before summary, before
|
||||
|
||||
═══ EXAMPLES (follow these formats) ═══
|
||||
|
||||
IMPORTANT: {hostname} is a placeholder. Always use the ACTUAL hostname from the original message.
|
||||
|
||||
BACKUP START:
|
||||
[TITLE]
|
||||
💾🚀 pve01: Backup started
|
||||
💾🚀 {hostname}: Backup started
|
||||
[BODY]
|
||||
Backup job starting on storage PBS.
|
||||
🏷️ VMs: web01 (100)
|
||||
@@ -1633,7 +1635,7 @@ Backup job starting on storage PBS.
|
||||
|
||||
BACKUP COMPLETE:
|
||||
[TITLE]
|
||||
💾✅ pve01: Backup complete
|
||||
💾✅ {hostname}: Backup complete
|
||||
[BODY]
|
||||
Backup job finished on storage local-bak.
|
||||
|
||||
@@ -1647,7 +1649,7 @@ Backup job finished on storage local-bak.
|
||||
|
||||
BACKUP PARTIAL FAIL:
|
||||
[TITLE]
|
||||
💾❌ pve01: Backup partially failed
|
||||
💾❌ {hostname}: Backup partially failed
|
||||
[BODY]
|
||||
Backup job finished with errors.
|
||||
|
||||
@@ -1662,7 +1664,7 @@ Backup job finished with errors.
|
||||
|
||||
UPDATES:
|
||||
[TITLE]
|
||||
📦 amd: Updates available
|
||||
📦 {hostname}: Updates available
|
||||
[BODY]
|
||||
📦 Total updates: 24
|
||||
🔒 Security updates: 6
|
||||
@@ -1673,14 +1675,14 @@ UPDATES:
|
||||
|
||||
VM/CT START:
|
||||
[TITLE]
|
||||
🚀 pve01: VM arch-linux (100) started
|
||||
🚀 {hostname}: VM arch-linux (100) started
|
||||
[BODY]
|
||||
🏷️ Virtual machine arch-linux (ID: 100)
|
||||
✔️ Now running
|
||||
|
||||
HEALTH DEGRADED:
|
||||
[TITLE]
|
||||
⚠️ amd: Health warning — Disk I/O
|
||||
⚠️ {hostname}: Health warning — Disk I/O
|
||||
[BODY]
|
||||
💿 Device: /dev/sda
|
||||
⚠️ 1 sector unreadable (pending)
|
||||
|
||||
Reference in New Issue
Block a user