Update notification service

This commit is contained in:
MacRimi
2026-02-27 19:47:36 +01:00
parent be119a69af
commit 171e7ddcae
4 changed files with 84 additions and 43 deletions

View File

@@ -408,10 +408,10 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
key={checkKey}
className="flex items-center justify-between gap-1.5 sm:gap-2 text-[10px] sm:text-xs py-1.5 px-2 sm:px-3 rounded-md hover:bg-muted/40 transition-colors"
>
<div className="flex items-center gap-1.5 sm:gap-2 min-w-0 flex-1 overflow-hidden">
{getStatusIcon(checkData.status, "sm")}
<div className="flex items-start gap-1.5 sm:gap-2 min-w-0 flex-1">
<span className="mt-0.5 shrink-0">{getStatusIcon(checkData.status, "sm")}</span>
<span className="font-medium shrink-0">{formatCheckLabel(checkKey)}</span>
<span className="text-muted-foreground truncate block">{checkData.detail}</span>
<span className="text-muted-foreground break-words whitespace-pre-wrap min-w-0">{checkData.detail}</span>
{checkData.dismissed && (
<Badge variant="outline" className="text-[9px] px-1 py-0 h-4 shrink-0 text-blue-400 border-blue-400/30">
Dismissed
@@ -520,8 +520,8 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
</div>
{healthData.summary && healthData.summary !== "All systems operational" && (
<div className="text-sm p-3 rounded-lg bg-muted/20 border overflow-hidden max-w-full">
<p className="font-medium text-foreground truncate" title={healthData.summary}>{healthData.summary}</p>
<div className="text-xs sm:text-sm p-3 rounded-lg bg-muted/20 border overflow-hidden max-w-full">
<p className="font-medium text-foreground break-words whitespace-pre-wrap">{healthData.summary}</p>
</div>
)}
@@ -559,7 +559,7 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
)}
</div>
{reason && !isExpanded && (
<p className="text-[10px] sm:text-xs text-muted-foreground mt-0.5 truncate" title={reason}>{reason}</p>
<p className="text-[10px] sm:text-xs text-muted-foreground mt-0.5 line-clamp-2 break-words">{reason}</p>
)}
</div>
<div className="flex items-center gap-1 sm:gap-2 shrink-0">
@@ -578,7 +578,7 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
{isExpanded && (
<div className="border-t border-border/50 bg-muted/5 px-1.5 sm:px-2 py-1.5 overflow-hidden">
{reason && (
<p className="text-xs text-muted-foreground px-3 py-1.5 mb-1 break-words">{reason}</p>
<p className="text-xs text-muted-foreground px-3 py-1.5 mb-1 break-words whitespace-pre-wrap">{reason}</p>
)}
{hasChecks ? (
renderChecks(checks, key)

View File

@@ -34,6 +34,12 @@ interface DiskInfo {
wear_leveling_count?: number // SSD: Wear Leveling Count
total_lbas_written?: number // SSD/NVMe: Total LBAs Written (GB)
ssd_life_left?: number // SSD: SSD Life Left percentage
io_errors?: {
count: number
severity: string
sample: string
reason: string
}
}
interface ZFSPool {
@@ -776,6 +782,17 @@ export function StorageOverview() {
</div>
</div>
{disk.io_errors && disk.io_errors.count > 0 && (
<div className={`flex items-start gap-2 p-2 rounded text-xs ${
disk.io_errors.severity === 'CRITICAL'
? 'bg-red-500/10 text-red-400 border border-red-500/20'
: 'bg-yellow-500/10 text-yellow-400 border border-yellow-500/20'
}`}>
<AlertTriangle className="h-3.5 w-3.5 flex-shrink-0 mt-0.5" />
<span>{disk.io_errors.count} I/O error{disk.io_errors.count !== 1 ? 's' : ''} in 5 min</span>
</div>
)}
<div className="grid grid-cols-2 gap-4 text-sm">
{disk.size_formatted && (
<div>
@@ -841,6 +858,22 @@ export function StorageOverview() {
</div>
</div>
{disk.io_errors && disk.io_errors.count > 0 && (
<div className={`flex items-start gap-2 p-2 rounded text-xs ${
disk.io_errors.severity === 'CRITICAL'
? 'bg-red-500/10 text-red-400 border border-red-500/20'
: 'bg-yellow-500/10 text-yellow-400 border border-yellow-500/20'
}`}>
<AlertTriangle className="h-3.5 w-3.5 flex-shrink-0 mt-0.5" />
<div>
<span className="font-medium">{disk.io_errors.count} I/O error{disk.io_errors.count !== 1 ? 's' : ''} in 5 min</span>
{disk.io_errors.sample && (
<p className="mt-0.5 opacity-80 font-mono truncate max-w-md">{disk.io_errors.sample}</p>
)}
</div>
</div>
)}
<div className="grid grid-cols-2 md:grid-cols-4 gap-4 text-sm">
{disk.size_formatted && (
<div>

View File

@@ -324,7 +324,8 @@ class HealthMonitor:
Returns JSON structure with ALL 10 categories always present.
Now includes persistent error tracking.
"""
# Run cleanup on every status check to auto-resolve stale errors
# Run cleanup on every status check so stale errors are auto-resolved
# using the user-configured Suppression Duration (single source of truth).
try:
health_persistence.cleanup_old_errors()
except Exception:
@@ -2157,18 +2158,18 @@ class HealthMonitor:
# Get a representative critical error reason
representative_error = next(iter(critical_errors_found.values()))
reason = f'Critical error detected: {representative_error[:100]}'
elif cascade_count > 0:
status = 'WARNING'
samples = _get_samples(cascading_errors, 3)
reason = f'Error cascade ({cascade_count} patterns repeating):\n' + '\n'.join(f' - {s}' for s in samples)
elif spike_count > 0:
status = 'WARNING'
samples = _get_samples(spike_errors, 3)
reason = f'Error spike ({spike_count} patterns with 4x increase):\n' + '\n'.join(f' - {s}' for s in samples)
elif persistent_count > 0:
status = 'WARNING'
samples = _get_samples(persistent_errors, 3)
reason = f'Persistent errors ({persistent_count} patterns over 15+ min):\n' + '\n'.join(f' - {s}' for s in samples)
elif cascade_count > 0:
status = 'WARNING'
samples = _get_samples(cascading_errors, 3)
reason = f'Error cascade ({cascade_count} patterns repeating):\n' + '\n'.join(f' - {s}' for s in samples)
elif spike_count > 0:
status = 'WARNING'
samples = _get_samples(spike_errors, 3)
reason = f'Error spike ({spike_count} patterns with 4x increase):\n' + '\n'.join(f' - {s}' for s in samples)
elif persistent_count > 0:
status = 'WARNING'
samples = _get_samples(persistent_errors, 3)
reason = f'Persistent errors ({persistent_count} patterns over 15+ min):\n' + '\n'.join(f' - {s}' for s in samples)
else:
# No significant issues found
status = 'OK'
@@ -2189,23 +2190,23 @@ class HealthMonitor:
'log_critical_errors': {'active': unique_critical_count > 0, 'severity': 'CRITICAL',
'reason': f'{unique_critical_count} critical error(s) found', 'dismissable': False},
}
# Track which sub-checks were dismissed
dismissed_keys = set()
for err_key, info in log_sub_checks.items():
if info['active']:
is_dismissable = info.get('dismissable', True)
result = health_persistence.record_error(
error_key=err_key,
category='logs',
severity=info['severity'],
reason=info['reason'],
details={'dismissable': is_dismissable}
)
if result and result.get('type') == 'skipped_acknowledged':
dismissed_keys.add(err_key)
elif health_persistence.is_error_active(err_key):
health_persistence.clear_error(err_key)
# Track which sub-checks were dismissed
dismissed_keys = set()
for err_key, info in log_sub_checks.items():
if info['active']:
is_dismissable = info.get('dismissable', True)
result = health_persistence.record_error(
error_key=err_key,
category='logs',
severity=info['severity'],
reason=info['reason'],
details={'dismissable': is_dismissable}
)
if result and result.get('type') == 'skipped_acknowledged':
dismissed_keys.add(err_key)
elif health_persistence.is_error_active(err_key):
health_persistence.clear_error(err_key)
# Build checks dict - downgrade dismissed items to INFO
def _log_check_status(key, active, severity):

View File

@@ -26,7 +26,7 @@ class HealthPersistence:
"""Manages persistent health error tracking"""
# Default suppression duration when no user setting exists for a category.
# Users can override per-category via the Suppression Duration settings.
# Users override per-category via the Suppression Duration settings UI.
DEFAULT_SUPPRESSION_HOURS = 24
# Mapping from error categories to settings keys
@@ -498,13 +498,16 @@ class HealthPersistence:
cutoff_resolved = (now - timedelta(days=7)).isoformat()
cursor.execute('DELETE FROM errors WHERE resolved_at < ?', (cutoff_resolved,))
# ── Auto-resolve stale errors using user-configured Suppression Duration ──
# Read the per-category suppression hours from user_settings.
# If the user hasn't configured a category, fall back to DEFAULT_SUPPRESSION_HOURS.
# ── Auto-resolve stale errors using Suppression Duration settings ──
# Read per-category suppression hours from user_settings.
# If the user hasn't configured a value, use DEFAULT_SUPPRESSION_HOURS.
# This is the SINGLE source of truth for auto-resolution timing.
user_settings = {}
try:
cursor.execute('SELECT setting_key, setting_value FROM user_settings WHERE setting_key LIKE ?', ('suppress_%',))
cursor.execute(
'SELECT setting_key, setting_value FROM user_settings WHERE setting_key LIKE ?',
('suppress_%',)
)
for row in cursor.fetchall():
user_settings[row[0]] = row[1]
except Exception:
@@ -517,6 +520,10 @@ class HealthPersistence:
except (ValueError, TypeError):
hours = self.DEFAULT_SUPPRESSION_HOURS
# -1 means permanently suppressed -- skip auto-resolve
if hours < 0:
continue
cutoff = (now - timedelta(hours=hours)).isoformat()
cursor.execute('''
UPDATE errors
@@ -527,7 +534,7 @@ class HealthPersistence:
AND acknowledged = 0
''', (now_iso, category, cutoff))
# Catch-all: auto-resolve ANY error from an unmapped category
# Catch-all: auto-resolve any error from an unmapped category
# whose last_seen exceeds DEFAULT_SUPPRESSION_HOURS.
fallback_cutoff = (now - timedelta(hours=self.DEFAULT_SUPPRESSION_HOURS)).isoformat()
cursor.execute('''