Update notification service

This commit is contained in:
MacRimi
2026-03-08 20:01:02 +01:00
parent d1d44afc9d
commit b8cff3e699
3 changed files with 131 additions and 4 deletions

View File

@@ -1108,6 +1108,52 @@ class HealthMonitor:
# Get physical disks list for UI display
physical_disks = self._get_physical_disks_list()
# Collect disk error entries (SMART, I/O, etc.) from checks that should be merged with disk entries
# These have keys like '/Dev/Sda', '/dev/sda', 'sda', etc.
disk_errors_by_device = {}
keys_to_remove = []
for key, val in checks.items():
# Skip non-disk error entries (like lvm_check, root_fs, etc.)
key_lower = key.lower()
# Check if this looks like a disk error entry
is_disk_error = False
device_name = None
if key_lower.startswith('/dev/') or key_lower.startswith('dev/'):
# Keys like '/Dev/Sda', '/dev/sda'
device_name = key_lower.replace('/dev/', '').replace('dev/', '').strip('/')
is_disk_error = True
elif key_lower.startswith('sd') or key_lower.startswith('nvme') or key_lower.startswith('hd'):
# Keys like 'sda', 'nvme0n1'
device_name = key_lower
is_disk_error = True
if is_disk_error and device_name and len(device_name) <= 15:
# Store the error info, merging if we already have an error for this device
if device_name not in disk_errors_by_device:
disk_errors_by_device[device_name] = {
'status': val.get('status', 'WARNING'),
'detail': val.get('detail', val.get('reason', '')),
'error_key': val.get('error_key'),
'dismissable': val.get('dismissable', True),
'dismissed': val.get('dismissed', False),
}
else:
# Merge: keep the worst status
existing = disk_errors_by_device[device_name]
if val.get('status') == 'CRITICAL':
existing['status'] = 'CRITICAL'
# Append details
new_detail = val.get('detail', val.get('reason', ''))
if new_detail and new_detail not in existing.get('detail', ''):
existing['detail'] = f"{existing.get('detail', '')}; {new_detail}".strip('; ')
keys_to_remove.append(key)
# Remove the old disk error entries - they'll be merged into disk entries
for key in keys_to_remove:
del checks[key]
# Add individual disk checks for UI display (like Network interfaces)
for disk in physical_disks:
device = disk.get('device', '')
@@ -1120,21 +1166,32 @@ class HealthMonitor:
# Format check key - use device path for uniqueness
check_key = device.lower().replace('/', '_') # e.g., _dev_sda
# Determine status
if final_health == 'critical':
# Check if there's a disk error (SMART, I/O, etc.) for this disk
disk_error = disk_errors_by_device.get(name.lower())
# Determine status - use disk error status if present, otherwise use final_health
if disk_error and disk_error.get('status') in ('WARNING', 'CRITICAL'):
status = disk_error['status']
error_detail = disk_error.get('detail', '')
elif final_health == 'critical':
status = 'CRITICAL'
error_detail = ''
elif final_health == 'warning':
status = 'WARNING'
error_detail = ''
else:
status = 'OK'
error_detail = ''
# Build detail string
disk_type = 'USB' if is_usb else ('NVMe' if disk.get('is_nvme') else 'SATA')
detail = f'{serial}' if serial else 'Unknown serial'
if final_reason:
detail += f' - {final_reason}'
elif error_detail:
detail += f' - {error_detail}'
# Only add to checks if not already present (avoid duplicating error entries)
# Only add to checks if not already present
if check_key not in checks:
checks[check_key] = {
'status': status,
@@ -1150,7 +1207,15 @@ class HealthMonitor:
# If disk has issues, it needs an error_key for dismiss functionality
if status != 'OK':
checks[check_key]['error_key'] = f'disk_{name}_{serial}' if serial else f'disk_{name}'
# Use disk error_key if available, otherwise generate one
if disk_error and disk_error.get('error_key'):
checks[check_key]['error_key'] = disk_error['error_key']
else:
checks[check_key]['error_key'] = f'disk_{name}_{serial}' if serial else f'disk_{name}'
checks[check_key]['dismissable'] = True
# Preserve dismissed state from disk error
if disk_error and disk_error.get('dismissed'):
checks[check_key]['dismissed'] = True
if not issues:
return {'status': 'OK', 'checks': checks, 'physical_disks': physical_disks}

View File

@@ -1231,11 +1231,26 @@ class HealthPersistence:
# a different device_name (e.g. 'ata8' instead of 'sdh'),
# update that entry's device_name so observations carry over.
if serial:
# Try exact match first
cursor.execute('''
SELECT id, device_name FROM disk_registry
WHERE serial = ? AND serial != '' AND device_name != ?
''', (serial, device_name))
old_rows = cursor.fetchall()
# If no exact match, try normalized match (for USB disks with special chars)
if not old_rows:
normalized = self._normalize_serial(serial)
if normalized and normalized != serial:
cursor.execute(
'SELECT id, device_name, serial FROM disk_registry '
'WHERE serial != "" AND device_name != ?', (device_name,))
for row in cursor.fetchall():
db_normalized = self._normalize_serial(row[2])
if db_normalized == normalized or normalized in db_normalized or db_normalized in normalized:
old_rows.append((row[0], row[1]))
break
for old_id, old_dev in old_rows:
# Only consolidate ATA names -> block device names
if old_dev.startswith('ata') and not device_name.startswith('ata'):
@@ -1273,6 +1288,23 @@ class HealthPersistence:
except Exception as e:
print(f"[HealthPersistence] Error registering disk {device_name}: {e}")
def _normalize_serial(self, serial: str) -> str:
"""Normalize serial number for comparison.
USB disks can have serials with escape sequences like \\x06\\x18
or non-printable characters. This normalizes them for matching.
"""
if not serial:
return ''
import re
# Remove escape sequences like \x06, \x18
normalized = re.sub(r'\\x[0-9a-fA-F]{2}', '', serial)
# Remove non-printable characters
normalized = ''.join(c for c in normalized if c.isprintable())
# Remove common prefixes that vary
normalized = normalized.strip()
return normalized
def _get_disk_registry_id(self, cursor, device_name: str,
serial: Optional[str] = None) -> Optional[int]:
"""Find disk_registry.id, matching by serial first, then device_name.
@@ -1281,12 +1313,25 @@ class HealthPersistence:
checks entries with ATA names that share the same serial.
"""
if serial:
# Try exact match first
cursor.execute(
'SELECT id FROM disk_registry WHERE serial = ? AND serial != "" ORDER BY last_seen DESC LIMIT 1',
(serial,))
row = cursor.fetchone()
if row:
return row[0]
# Try normalized serial match (for USB disks with special chars)
normalized = self._normalize_serial(serial)
if normalized and normalized != serial:
# Search for serials that start with or contain the normalized version
cursor.execute(
'SELECT id, serial FROM disk_registry WHERE serial != "" ORDER BY last_seen DESC')
for row in cursor.fetchall():
db_normalized = self._normalize_serial(row[1])
if db_normalized == normalized or normalized in db_normalized or db_normalized in normalized:
return row[0]
# Fallback: match by device_name (strip /dev/ prefix)
clean_dev = device_name.replace('/dev/', '')
cursor.execute(
@@ -1295,6 +1340,7 @@ class HealthPersistence:
row = cursor.fetchone()
if row:
return row[0]
# Last resort: search for ATA-named entries that might refer to this device
# This handles cases where observations were recorded under 'ata8'
# but we're querying for 'sdh'

View File

@@ -749,6 +749,7 @@ class JournalWatcher:
"""Extract device info from a smartd system-mail and record as disk observation."""
try:
import re as _re
import subprocess
from health_persistence import health_persistence
# Extract device path: "Device: /dev/sdh [SAT]" or "Device: /dev/sda"
@@ -769,6 +770,21 @@ class JournalWatcher:
if model_match:
model = model_match.group(1).strip()
# If no serial from message, try to get it from smartctl (important for USB disks)
if not serial or len(serial) < 3:
try:
result = subprocess.run(
['smartctl', '-i', '-j', f'/dev/{base_dev}'],
capture_output=True, text=True, timeout=5
)
import json as _json
data = _json.loads(result.stdout)
serial = data.get('serial_number', '') or serial
if not model:
model = data.get('model_name', '') or data.get('model_family', '')
except Exception:
pass
# Extract error signature from title: "SMART error (FailedReadSmartSelfTestLog)"
sig_match = _re.search(r'SMART error\s*\((\w+)\)', title)
if sig_match: