mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-06 04:13:48 +00:00
Update notification service
This commit is contained in:
@@ -1108,6 +1108,52 @@ class HealthMonitor:
|
||||
# Get physical disks list for UI display
|
||||
physical_disks = self._get_physical_disks_list()
|
||||
|
||||
# Collect disk error entries (SMART, I/O, etc.) from checks that should be merged with disk entries
|
||||
# These have keys like '/Dev/Sda', '/dev/sda', 'sda', etc.
|
||||
disk_errors_by_device = {}
|
||||
keys_to_remove = []
|
||||
for key, val in checks.items():
|
||||
# Skip non-disk error entries (like lvm_check, root_fs, etc.)
|
||||
key_lower = key.lower()
|
||||
|
||||
# Check if this looks like a disk error entry
|
||||
is_disk_error = False
|
||||
device_name = None
|
||||
|
||||
if key_lower.startswith('/dev/') or key_lower.startswith('dev/'):
|
||||
# Keys like '/Dev/Sda', '/dev/sda'
|
||||
device_name = key_lower.replace('/dev/', '').replace('dev/', '').strip('/')
|
||||
is_disk_error = True
|
||||
elif key_lower.startswith('sd') or key_lower.startswith('nvme') or key_lower.startswith('hd'):
|
||||
# Keys like 'sda', 'nvme0n1'
|
||||
device_name = key_lower
|
||||
is_disk_error = True
|
||||
|
||||
if is_disk_error and device_name and len(device_name) <= 15:
|
||||
# Store the error info, merging if we already have an error for this device
|
||||
if device_name not in disk_errors_by_device:
|
||||
disk_errors_by_device[device_name] = {
|
||||
'status': val.get('status', 'WARNING'),
|
||||
'detail': val.get('detail', val.get('reason', '')),
|
||||
'error_key': val.get('error_key'),
|
||||
'dismissable': val.get('dismissable', True),
|
||||
'dismissed': val.get('dismissed', False),
|
||||
}
|
||||
else:
|
||||
# Merge: keep the worst status
|
||||
existing = disk_errors_by_device[device_name]
|
||||
if val.get('status') == 'CRITICAL':
|
||||
existing['status'] = 'CRITICAL'
|
||||
# Append details
|
||||
new_detail = val.get('detail', val.get('reason', ''))
|
||||
if new_detail and new_detail not in existing.get('detail', ''):
|
||||
existing['detail'] = f"{existing.get('detail', '')}; {new_detail}".strip('; ')
|
||||
keys_to_remove.append(key)
|
||||
|
||||
# Remove the old disk error entries - they'll be merged into disk entries
|
||||
for key in keys_to_remove:
|
||||
del checks[key]
|
||||
|
||||
# Add individual disk checks for UI display (like Network interfaces)
|
||||
for disk in physical_disks:
|
||||
device = disk.get('device', '')
|
||||
@@ -1120,21 +1166,32 @@ class HealthMonitor:
|
||||
# Format check key - use device path for uniqueness
|
||||
check_key = device.lower().replace('/', '_') # e.g., _dev_sda
|
||||
|
||||
# Determine status
|
||||
if final_health == 'critical':
|
||||
# Check if there's a disk error (SMART, I/O, etc.) for this disk
|
||||
disk_error = disk_errors_by_device.get(name.lower())
|
||||
|
||||
# Determine status - use disk error status if present, otherwise use final_health
|
||||
if disk_error and disk_error.get('status') in ('WARNING', 'CRITICAL'):
|
||||
status = disk_error['status']
|
||||
error_detail = disk_error.get('detail', '')
|
||||
elif final_health == 'critical':
|
||||
status = 'CRITICAL'
|
||||
error_detail = ''
|
||||
elif final_health == 'warning':
|
||||
status = 'WARNING'
|
||||
error_detail = ''
|
||||
else:
|
||||
status = 'OK'
|
||||
error_detail = ''
|
||||
|
||||
# Build detail string
|
||||
disk_type = 'USB' if is_usb else ('NVMe' if disk.get('is_nvme') else 'SATA')
|
||||
detail = f'{serial}' if serial else 'Unknown serial'
|
||||
if final_reason:
|
||||
detail += f' - {final_reason}'
|
||||
elif error_detail:
|
||||
detail += f' - {error_detail}'
|
||||
|
||||
# Only add to checks if not already present (avoid duplicating error entries)
|
||||
# Only add to checks if not already present
|
||||
if check_key not in checks:
|
||||
checks[check_key] = {
|
||||
'status': status,
|
||||
@@ -1150,7 +1207,15 @@ class HealthMonitor:
|
||||
|
||||
# If disk has issues, it needs an error_key for dismiss functionality
|
||||
if status != 'OK':
|
||||
checks[check_key]['error_key'] = f'disk_{name}_{serial}' if serial else f'disk_{name}'
|
||||
# Use disk error_key if available, otherwise generate one
|
||||
if disk_error and disk_error.get('error_key'):
|
||||
checks[check_key]['error_key'] = disk_error['error_key']
|
||||
else:
|
||||
checks[check_key]['error_key'] = f'disk_{name}_{serial}' if serial else f'disk_{name}'
|
||||
checks[check_key]['dismissable'] = True
|
||||
# Preserve dismissed state from disk error
|
||||
if disk_error and disk_error.get('dismissed'):
|
||||
checks[check_key]['dismissed'] = True
|
||||
|
||||
if not issues:
|
||||
return {'status': 'OK', 'checks': checks, 'physical_disks': physical_disks}
|
||||
|
||||
@@ -1231,11 +1231,26 @@ class HealthPersistence:
|
||||
# a different device_name (e.g. 'ata8' instead of 'sdh'),
|
||||
# update that entry's device_name so observations carry over.
|
||||
if serial:
|
||||
# Try exact match first
|
||||
cursor.execute('''
|
||||
SELECT id, device_name FROM disk_registry
|
||||
WHERE serial = ? AND serial != '' AND device_name != ?
|
||||
''', (serial, device_name))
|
||||
old_rows = cursor.fetchall()
|
||||
|
||||
# If no exact match, try normalized match (for USB disks with special chars)
|
||||
if not old_rows:
|
||||
normalized = self._normalize_serial(serial)
|
||||
if normalized and normalized != serial:
|
||||
cursor.execute(
|
||||
'SELECT id, device_name, serial FROM disk_registry '
|
||||
'WHERE serial != "" AND device_name != ?', (device_name,))
|
||||
for row in cursor.fetchall():
|
||||
db_normalized = self._normalize_serial(row[2])
|
||||
if db_normalized == normalized or normalized in db_normalized or db_normalized in normalized:
|
||||
old_rows.append((row[0], row[1]))
|
||||
break
|
||||
|
||||
for old_id, old_dev in old_rows:
|
||||
# Only consolidate ATA names -> block device names
|
||||
if old_dev.startswith('ata') and not device_name.startswith('ata'):
|
||||
@@ -1273,6 +1288,23 @@ class HealthPersistence:
|
||||
except Exception as e:
|
||||
print(f"[HealthPersistence] Error registering disk {device_name}: {e}")
|
||||
|
||||
def _normalize_serial(self, serial: str) -> str:
|
||||
"""Normalize serial number for comparison.
|
||||
|
||||
USB disks can have serials with escape sequences like \\x06\\x18
|
||||
or non-printable characters. This normalizes them for matching.
|
||||
"""
|
||||
if not serial:
|
||||
return ''
|
||||
import re
|
||||
# Remove escape sequences like \x06, \x18
|
||||
normalized = re.sub(r'\\x[0-9a-fA-F]{2}', '', serial)
|
||||
# Remove non-printable characters
|
||||
normalized = ''.join(c for c in normalized if c.isprintable())
|
||||
# Remove common prefixes that vary
|
||||
normalized = normalized.strip()
|
||||
return normalized
|
||||
|
||||
def _get_disk_registry_id(self, cursor, device_name: str,
|
||||
serial: Optional[str] = None) -> Optional[int]:
|
||||
"""Find disk_registry.id, matching by serial first, then device_name.
|
||||
@@ -1281,12 +1313,25 @@ class HealthPersistence:
|
||||
checks entries with ATA names that share the same serial.
|
||||
"""
|
||||
if serial:
|
||||
# Try exact match first
|
||||
cursor.execute(
|
||||
'SELECT id FROM disk_registry WHERE serial = ? AND serial != "" ORDER BY last_seen DESC LIMIT 1',
|
||||
(serial,))
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row[0]
|
||||
|
||||
# Try normalized serial match (for USB disks with special chars)
|
||||
normalized = self._normalize_serial(serial)
|
||||
if normalized and normalized != serial:
|
||||
# Search for serials that start with or contain the normalized version
|
||||
cursor.execute(
|
||||
'SELECT id, serial FROM disk_registry WHERE serial != "" ORDER BY last_seen DESC')
|
||||
for row in cursor.fetchall():
|
||||
db_normalized = self._normalize_serial(row[1])
|
||||
if db_normalized == normalized or normalized in db_normalized or db_normalized in normalized:
|
||||
return row[0]
|
||||
|
||||
# Fallback: match by device_name (strip /dev/ prefix)
|
||||
clean_dev = device_name.replace('/dev/', '')
|
||||
cursor.execute(
|
||||
@@ -1295,6 +1340,7 @@ class HealthPersistence:
|
||||
row = cursor.fetchone()
|
||||
if row:
|
||||
return row[0]
|
||||
|
||||
# Last resort: search for ATA-named entries that might refer to this device
|
||||
# This handles cases where observations were recorded under 'ata8'
|
||||
# but we're querying for 'sdh'
|
||||
|
||||
@@ -749,6 +749,7 @@ class JournalWatcher:
|
||||
"""Extract device info from a smartd system-mail and record as disk observation."""
|
||||
try:
|
||||
import re as _re
|
||||
import subprocess
|
||||
from health_persistence import health_persistence
|
||||
|
||||
# Extract device path: "Device: /dev/sdh [SAT]" or "Device: /dev/sda"
|
||||
@@ -769,6 +770,21 @@ class JournalWatcher:
|
||||
if model_match:
|
||||
model = model_match.group(1).strip()
|
||||
|
||||
# If no serial from message, try to get it from smartctl (important for USB disks)
|
||||
if not serial or len(serial) < 3:
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['smartctl', '-i', '-j', f'/dev/{base_dev}'],
|
||||
capture_output=True, text=True, timeout=5
|
||||
)
|
||||
import json as _json
|
||||
data = _json.loads(result.stdout)
|
||||
serial = data.get('serial_number', '') or serial
|
||||
if not model:
|
||||
model = data.get('model_name', '') or data.get('model_family', '')
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Extract error signature from title: "SMART error (FailedReadSmartSelfTestLog)"
|
||||
sig_match = _re.search(r'SMART error\s*\((\w+)\)', title)
|
||||
if sig_match:
|
||||
|
||||
Reference in New Issue
Block a user