mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-18 10:02:16 +00:00
Update notification service
This commit is contained in:
@@ -1108,6 +1108,52 @@ class HealthMonitor:
|
|||||||
# Get physical disks list for UI display
|
# Get physical disks list for UI display
|
||||||
physical_disks = self._get_physical_disks_list()
|
physical_disks = self._get_physical_disks_list()
|
||||||
|
|
||||||
|
# Collect disk error entries (SMART, I/O, etc.) from checks that should be merged with disk entries
|
||||||
|
# These have keys like '/Dev/Sda', '/dev/sda', 'sda', etc.
|
||||||
|
disk_errors_by_device = {}
|
||||||
|
keys_to_remove = []
|
||||||
|
for key, val in checks.items():
|
||||||
|
# Skip non-disk error entries (like lvm_check, root_fs, etc.)
|
||||||
|
key_lower = key.lower()
|
||||||
|
|
||||||
|
# Check if this looks like a disk error entry
|
||||||
|
is_disk_error = False
|
||||||
|
device_name = None
|
||||||
|
|
||||||
|
if key_lower.startswith('/dev/') or key_lower.startswith('dev/'):
|
||||||
|
# Keys like '/Dev/Sda', '/dev/sda'
|
||||||
|
device_name = key_lower.replace('/dev/', '').replace('dev/', '').strip('/')
|
||||||
|
is_disk_error = True
|
||||||
|
elif key_lower.startswith('sd') or key_lower.startswith('nvme') or key_lower.startswith('hd'):
|
||||||
|
# Keys like 'sda', 'nvme0n1'
|
||||||
|
device_name = key_lower
|
||||||
|
is_disk_error = True
|
||||||
|
|
||||||
|
if is_disk_error and device_name and len(device_name) <= 15:
|
||||||
|
# Store the error info, merging if we already have an error for this device
|
||||||
|
if device_name not in disk_errors_by_device:
|
||||||
|
disk_errors_by_device[device_name] = {
|
||||||
|
'status': val.get('status', 'WARNING'),
|
||||||
|
'detail': val.get('detail', val.get('reason', '')),
|
||||||
|
'error_key': val.get('error_key'),
|
||||||
|
'dismissable': val.get('dismissable', True),
|
||||||
|
'dismissed': val.get('dismissed', False),
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# Merge: keep the worst status
|
||||||
|
existing = disk_errors_by_device[device_name]
|
||||||
|
if val.get('status') == 'CRITICAL':
|
||||||
|
existing['status'] = 'CRITICAL'
|
||||||
|
# Append details
|
||||||
|
new_detail = val.get('detail', val.get('reason', ''))
|
||||||
|
if new_detail and new_detail not in existing.get('detail', ''):
|
||||||
|
existing['detail'] = f"{existing.get('detail', '')}; {new_detail}".strip('; ')
|
||||||
|
keys_to_remove.append(key)
|
||||||
|
|
||||||
|
# Remove the old disk error entries - they'll be merged into disk entries
|
||||||
|
for key in keys_to_remove:
|
||||||
|
del checks[key]
|
||||||
|
|
||||||
# Add individual disk checks for UI display (like Network interfaces)
|
# Add individual disk checks for UI display (like Network interfaces)
|
||||||
for disk in physical_disks:
|
for disk in physical_disks:
|
||||||
device = disk.get('device', '')
|
device = disk.get('device', '')
|
||||||
@@ -1120,21 +1166,32 @@ class HealthMonitor:
|
|||||||
# Format check key - use device path for uniqueness
|
# Format check key - use device path for uniqueness
|
||||||
check_key = device.lower().replace('/', '_') # e.g., _dev_sda
|
check_key = device.lower().replace('/', '_') # e.g., _dev_sda
|
||||||
|
|
||||||
# Determine status
|
# Check if there's a disk error (SMART, I/O, etc.) for this disk
|
||||||
if final_health == 'critical':
|
disk_error = disk_errors_by_device.get(name.lower())
|
||||||
|
|
||||||
|
# Determine status - use disk error status if present, otherwise use final_health
|
||||||
|
if disk_error and disk_error.get('status') in ('WARNING', 'CRITICAL'):
|
||||||
|
status = disk_error['status']
|
||||||
|
error_detail = disk_error.get('detail', '')
|
||||||
|
elif final_health == 'critical':
|
||||||
status = 'CRITICAL'
|
status = 'CRITICAL'
|
||||||
|
error_detail = ''
|
||||||
elif final_health == 'warning':
|
elif final_health == 'warning':
|
||||||
status = 'WARNING'
|
status = 'WARNING'
|
||||||
|
error_detail = ''
|
||||||
else:
|
else:
|
||||||
status = 'OK'
|
status = 'OK'
|
||||||
|
error_detail = ''
|
||||||
|
|
||||||
# Build detail string
|
# Build detail string
|
||||||
disk_type = 'USB' if is_usb else ('NVMe' if disk.get('is_nvme') else 'SATA')
|
disk_type = 'USB' if is_usb else ('NVMe' if disk.get('is_nvme') else 'SATA')
|
||||||
detail = f'{serial}' if serial else 'Unknown serial'
|
detail = f'{serial}' if serial else 'Unknown serial'
|
||||||
if final_reason:
|
if final_reason:
|
||||||
detail += f' - {final_reason}'
|
detail += f' - {final_reason}'
|
||||||
|
elif error_detail:
|
||||||
|
detail += f' - {error_detail}'
|
||||||
|
|
||||||
# Only add to checks if not already present (avoid duplicating error entries)
|
# Only add to checks if not already present
|
||||||
if check_key not in checks:
|
if check_key not in checks:
|
||||||
checks[check_key] = {
|
checks[check_key] = {
|
||||||
'status': status,
|
'status': status,
|
||||||
@@ -1150,7 +1207,15 @@ class HealthMonitor:
|
|||||||
|
|
||||||
# If disk has issues, it needs an error_key for dismiss functionality
|
# If disk has issues, it needs an error_key for dismiss functionality
|
||||||
if status != 'OK':
|
if status != 'OK':
|
||||||
checks[check_key]['error_key'] = f'disk_{name}_{serial}' if serial else f'disk_{name}'
|
# Use disk error_key if available, otherwise generate one
|
||||||
|
if disk_error and disk_error.get('error_key'):
|
||||||
|
checks[check_key]['error_key'] = disk_error['error_key']
|
||||||
|
else:
|
||||||
|
checks[check_key]['error_key'] = f'disk_{name}_{serial}' if serial else f'disk_{name}'
|
||||||
|
checks[check_key]['dismissable'] = True
|
||||||
|
# Preserve dismissed state from disk error
|
||||||
|
if disk_error and disk_error.get('dismissed'):
|
||||||
|
checks[check_key]['dismissed'] = True
|
||||||
|
|
||||||
if not issues:
|
if not issues:
|
||||||
return {'status': 'OK', 'checks': checks, 'physical_disks': physical_disks}
|
return {'status': 'OK', 'checks': checks, 'physical_disks': physical_disks}
|
||||||
|
|||||||
@@ -1231,11 +1231,26 @@ class HealthPersistence:
|
|||||||
# a different device_name (e.g. 'ata8' instead of 'sdh'),
|
# a different device_name (e.g. 'ata8' instead of 'sdh'),
|
||||||
# update that entry's device_name so observations carry over.
|
# update that entry's device_name so observations carry over.
|
||||||
if serial:
|
if serial:
|
||||||
|
# Try exact match first
|
||||||
cursor.execute('''
|
cursor.execute('''
|
||||||
SELECT id, device_name FROM disk_registry
|
SELECT id, device_name FROM disk_registry
|
||||||
WHERE serial = ? AND serial != '' AND device_name != ?
|
WHERE serial = ? AND serial != '' AND device_name != ?
|
||||||
''', (serial, device_name))
|
''', (serial, device_name))
|
||||||
old_rows = cursor.fetchall()
|
old_rows = cursor.fetchall()
|
||||||
|
|
||||||
|
# If no exact match, try normalized match (for USB disks with special chars)
|
||||||
|
if not old_rows:
|
||||||
|
normalized = self._normalize_serial(serial)
|
||||||
|
if normalized and normalized != serial:
|
||||||
|
cursor.execute(
|
||||||
|
'SELECT id, device_name, serial FROM disk_registry '
|
||||||
|
'WHERE serial != "" AND device_name != ?', (device_name,))
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
db_normalized = self._normalize_serial(row[2])
|
||||||
|
if db_normalized == normalized or normalized in db_normalized or db_normalized in normalized:
|
||||||
|
old_rows.append((row[0], row[1]))
|
||||||
|
break
|
||||||
|
|
||||||
for old_id, old_dev in old_rows:
|
for old_id, old_dev in old_rows:
|
||||||
# Only consolidate ATA names -> block device names
|
# Only consolidate ATA names -> block device names
|
||||||
if old_dev.startswith('ata') and not device_name.startswith('ata'):
|
if old_dev.startswith('ata') and not device_name.startswith('ata'):
|
||||||
@@ -1273,6 +1288,23 @@ class HealthPersistence:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[HealthPersistence] Error registering disk {device_name}: {e}")
|
print(f"[HealthPersistence] Error registering disk {device_name}: {e}")
|
||||||
|
|
||||||
|
def _normalize_serial(self, serial: str) -> str:
|
||||||
|
"""Normalize serial number for comparison.
|
||||||
|
|
||||||
|
USB disks can have serials with escape sequences like \\x06\\x18
|
||||||
|
or non-printable characters. This normalizes them for matching.
|
||||||
|
"""
|
||||||
|
if not serial:
|
||||||
|
return ''
|
||||||
|
import re
|
||||||
|
# Remove escape sequences like \x06, \x18
|
||||||
|
normalized = re.sub(r'\\x[0-9a-fA-F]{2}', '', serial)
|
||||||
|
# Remove non-printable characters
|
||||||
|
normalized = ''.join(c for c in normalized if c.isprintable())
|
||||||
|
# Remove common prefixes that vary
|
||||||
|
normalized = normalized.strip()
|
||||||
|
return normalized
|
||||||
|
|
||||||
def _get_disk_registry_id(self, cursor, device_name: str,
|
def _get_disk_registry_id(self, cursor, device_name: str,
|
||||||
serial: Optional[str] = None) -> Optional[int]:
|
serial: Optional[str] = None) -> Optional[int]:
|
||||||
"""Find disk_registry.id, matching by serial first, then device_name.
|
"""Find disk_registry.id, matching by serial first, then device_name.
|
||||||
@@ -1281,12 +1313,25 @@ class HealthPersistence:
|
|||||||
checks entries with ATA names that share the same serial.
|
checks entries with ATA names that share the same serial.
|
||||||
"""
|
"""
|
||||||
if serial:
|
if serial:
|
||||||
|
# Try exact match first
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
'SELECT id FROM disk_registry WHERE serial = ? AND serial != "" ORDER BY last_seen DESC LIMIT 1',
|
'SELECT id FROM disk_registry WHERE serial = ? AND serial != "" ORDER BY last_seen DESC LIMIT 1',
|
||||||
(serial,))
|
(serial,))
|
||||||
row = cursor.fetchone()
|
row = cursor.fetchone()
|
||||||
if row:
|
if row:
|
||||||
return row[0]
|
return row[0]
|
||||||
|
|
||||||
|
# Try normalized serial match (for USB disks with special chars)
|
||||||
|
normalized = self._normalize_serial(serial)
|
||||||
|
if normalized and normalized != serial:
|
||||||
|
# Search for serials that start with or contain the normalized version
|
||||||
|
cursor.execute(
|
||||||
|
'SELECT id, serial FROM disk_registry WHERE serial != "" ORDER BY last_seen DESC')
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
db_normalized = self._normalize_serial(row[1])
|
||||||
|
if db_normalized == normalized or normalized in db_normalized or db_normalized in normalized:
|
||||||
|
return row[0]
|
||||||
|
|
||||||
# Fallback: match by device_name (strip /dev/ prefix)
|
# Fallback: match by device_name (strip /dev/ prefix)
|
||||||
clean_dev = device_name.replace('/dev/', '')
|
clean_dev = device_name.replace('/dev/', '')
|
||||||
cursor.execute(
|
cursor.execute(
|
||||||
@@ -1295,6 +1340,7 @@ class HealthPersistence:
|
|||||||
row = cursor.fetchone()
|
row = cursor.fetchone()
|
||||||
if row:
|
if row:
|
||||||
return row[0]
|
return row[0]
|
||||||
|
|
||||||
# Last resort: search for ATA-named entries that might refer to this device
|
# Last resort: search for ATA-named entries that might refer to this device
|
||||||
# This handles cases where observations were recorded under 'ata8'
|
# This handles cases where observations were recorded under 'ata8'
|
||||||
# but we're querying for 'sdh'
|
# but we're querying for 'sdh'
|
||||||
|
|||||||
@@ -749,6 +749,7 @@ class JournalWatcher:
|
|||||||
"""Extract device info from a smartd system-mail and record as disk observation."""
|
"""Extract device info from a smartd system-mail and record as disk observation."""
|
||||||
try:
|
try:
|
||||||
import re as _re
|
import re as _re
|
||||||
|
import subprocess
|
||||||
from health_persistence import health_persistence
|
from health_persistence import health_persistence
|
||||||
|
|
||||||
# Extract device path: "Device: /dev/sdh [SAT]" or "Device: /dev/sda"
|
# Extract device path: "Device: /dev/sdh [SAT]" or "Device: /dev/sda"
|
||||||
@@ -769,6 +770,21 @@ class JournalWatcher:
|
|||||||
if model_match:
|
if model_match:
|
||||||
model = model_match.group(1).strip()
|
model = model_match.group(1).strip()
|
||||||
|
|
||||||
|
# If no serial from message, try to get it from smartctl (important for USB disks)
|
||||||
|
if not serial or len(serial) < 3:
|
||||||
|
try:
|
||||||
|
result = subprocess.run(
|
||||||
|
['smartctl', '-i', '-j', f'/dev/{base_dev}'],
|
||||||
|
capture_output=True, text=True, timeout=5
|
||||||
|
)
|
||||||
|
import json as _json
|
||||||
|
data = _json.loads(result.stdout)
|
||||||
|
serial = data.get('serial_number', '') or serial
|
||||||
|
if not model:
|
||||||
|
model = data.get('model_name', '') or data.get('model_family', '')
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
# Extract error signature from title: "SMART error (FailedReadSmartSelfTestLog)"
|
# Extract error signature from title: "SMART error (FailedReadSmartSelfTestLog)"
|
||||||
sig_match = _re.search(r'SMART error\s*\((\w+)\)', title)
|
sig_match = _re.search(r'SMART error\s*\((\w+)\)', title)
|
||||||
if sig_match:
|
if sig_match:
|
||||||
|
|||||||
Reference in New Issue
Block a user