mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-18 10:02:16 +00:00
Update health_monitor.py
This commit is contained in:
@@ -2863,40 +2863,40 @@ class HealthMonitor:
|
|||||||
for line in journalctl_output.split('\n'):
|
for line in journalctl_output.split('\n'):
|
||||||
line_lower = line.lower()
|
line_lower = line.lower()
|
||||||
|
|
||||||
# VM QMP errors (skip during active backup -- normal behavior)
|
# VM QMP errors (skip during active backup -- normal behavior)
|
||||||
vm_qmp_match = re.search(r'vm\s+(\d+)\s+qmp\s+command.*(?:failed|unable|timeout)', line_lower)
|
vm_qmp_match = re.search(r'vm\s+(\d+)\s+qmp\s+command.*(?:failed|unable|timeout)', line_lower)
|
||||||
if vm_qmp_match:
|
if vm_qmp_match:
|
||||||
if _vzdump_running:
|
if _vzdump_running:
|
||||||
continue # Normal during backup
|
continue # Normal during backup
|
||||||
vmid = vm_qmp_match.group(1)
|
vmid = vm_qmp_match.group(1)
|
||||||
|
|
||||||
# Skip if VM no longer exists (deleted after error occurred)
|
# Skip if VM no longer exists (deleted after error occurred)
|
||||||
if not self._vm_ct_exists(vmid):
|
if not self._vm_ct_exists(vmid):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Skip if VM is now running - the QMP error is stale/resolved
|
# Skip if VM is now running - the QMP error is stale/resolved
|
||||||
# This prevents re-detecting old journal entries after VM recovery
|
# This prevents re-detecting old journal entries after VM recovery
|
||||||
if self._is_vm_running(vmid):
|
if self._is_vm_running(vmid):
|
||||||
# Auto-resolve any existing error for this VM
|
# Auto-resolve any existing error for this VM
|
||||||
health_persistence.check_vm_running(vmid)
|
health_persistence.check_vm_running(vmid)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
vm_name = self._resolve_vm_name(vmid)
|
vm_name = self._resolve_vm_name(vmid)
|
||||||
display = f"VM {vmid} ({vm_name})" if vm_name else f"VM {vmid}"
|
display = f"VM {vmid} ({vm_name})" if vm_name else f"VM {vmid}"
|
||||||
error_key = f'vm_{vmid}'
|
error_key = f'vm_{vmid}'
|
||||||
if error_key not in vm_details:
|
if error_key not in vm_details:
|
||||||
rec_result = health_persistence.record_error(
|
rec_result = health_persistence.record_error(
|
||||||
error_key=error_key,
|
error_key=error_key,
|
||||||
category='vms',
|
category='vms',
|
||||||
severity='WARNING',
|
severity='WARNING',
|
||||||
reason=f'{display}: QMP command failed or timed out.\n{line.strip()[:200]}',
|
reason=f'{display}: QMP command failed or timed out.\n{line.strip()[:200]}',
|
||||||
details={'id': vmid, 'vmname': vm_name, 'type': 'VM'}
|
details={'id': vmid, 'vmname': vm_name, 'type': 'VM'}
|
||||||
)
|
)
|
||||||
if not rec_result or rec_result.get('type') != 'skipped_acknowledged':
|
if not rec_result or rec_result.get('type') != 'skipped_acknowledged':
|
||||||
issues.append(f'{display}: QMP communication issue')
|
issues.append(f'{display}: QMP communication issue')
|
||||||
vm_details[error_key] = {
|
vm_details[error_key] = {
|
||||||
'status': 'WARNING',
|
'status': 'WARNING',
|
||||||
'reason': f'{display}: QMP command failed or timed out',
|
'reason': f'{display}: QMP command failed or timed out',
|
||||||
'id': vmid,
|
'id': vmid,
|
||||||
'vmname': vm_name,
|
'vmname': vm_name,
|
||||||
'type': 'VM'
|
'type': 'VM'
|
||||||
|
|||||||
Reference in New Issue
Block a user