mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-17 17:42:19 +00:00
Update notification service
This commit is contained in:
@@ -175,7 +175,7 @@ class HealthMonitor:
|
||||
r'proxmenux-monitor.*failed at step exec',
|
||||
r'proxmenux-monitor\.appimage',
|
||||
|
||||
# ─<EFBFBD><EFBFBD><EFBFBD> PVE scheduler operational noise ──
|
||||
# ── PVE scheduler operational noise ──
|
||||
# pvescheduler emits "could not update job state" every minute
|
||||
# when a scheduled job reference is stale. This is cosmetic,
|
||||
# not a system problem.
|
||||
@@ -2118,7 +2118,7 @@ class HealthMonitor:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ── Record disk observation (always, even if transient) <EFBFBD><EFBFBD><EFBFBD>─
|
||||
# ── Record disk observation (always, even if transient) ──
|
||||
# Signature must be stable across cycles: strip volatile
|
||||
# data (hex values, counts, timestamps) to dedup properly.
|
||||
# e.g. "ata8.00: exception Emask 0x1 SAct 0xc1000000"
|
||||
|
||||
@@ -79,7 +79,7 @@ class _SharedState:
|
||||
_shared_state = _SharedState()
|
||||
|
||||
|
||||
# ─── Event Object ──────────────<EFBFBD><EFBFBD>──────────────────────────────────
|
||||
# ─── Event Object ─────────────────────────────────────────────────
|
||||
|
||||
class NotificationEvent:
|
||||
"""Represents a detected event ready for notification dispatch.
|
||||
@@ -2154,6 +2154,9 @@ class PollingCollector:
|
||||
- Journal errors (for AI enrichment)
|
||||
|
||||
Emits a single "system_startup" notification with full report data.
|
||||
|
||||
IMPORTANT: Only emits if this is a REAL system boot, not a service restart.
|
||||
Checks system uptime to distinguish between the two cases.
|
||||
"""
|
||||
# Wait until health grace period is over (5 min) for complete picture
|
||||
if startup_grace.is_startup_health_grace():
|
||||
@@ -2163,6 +2166,14 @@ class PollingCollector:
|
||||
if startup_grace.was_startup_aggregated():
|
||||
return
|
||||
|
||||
# CRITICAL: Check if this is a real system boot
|
||||
# If the system was already running for > 10 min when service started,
|
||||
# this is just a service restart, not a system boot - skip notification
|
||||
if not startup_grace.is_real_system_boot():
|
||||
# Mark as aggregated to prevent future checks, but don't send notification
|
||||
startup_grace.mark_startup_aggregated()
|
||||
return
|
||||
|
||||
# Collect comprehensive startup report
|
||||
report = startup_grace.collect_startup_report()
|
||||
|
||||
@@ -2332,7 +2343,7 @@ class PollingCollector:
|
||||
for pkg in all_pkgs:
|
||||
if pkg['name'] in self._IMPORTANT_PKGS and pkg['cur']:
|
||||
important_lines.append(
|
||||
f"{pkg['name']} ({pkg['cur']} -> {pkg['new']})"
|
||||
f"{pkg['name']} ({pkg['cur']} → {pkg['new']})"
|
||||
)
|
||||
|
||||
# ── Emit structured update_summary ─────────────────────
|
||||
@@ -2358,7 +2369,7 @@ class PollingCollector:
|
||||
'current_version': pve_manager_info['cur'],
|
||||
'new_version': pve_manager_info['new'],
|
||||
'version': pve_manager_info['new'],
|
||||
'details': f"pve-manager {pve_manager_info['cur']} -> {pve_manager_info['new']}",
|
||||
'details': f"pve-manager {pve_manager_info['cur']} → {pve_manager_info['new']}",
|
||||
}
|
||||
self._queue.put(NotificationEvent(
|
||||
'pve_update', 'INFO', pve_data,
|
||||
|
||||
@@ -28,6 +28,22 @@ STARTUP_VM_GRACE_SECONDS = 180 # 3 minutes for VM/CT start aggregation
|
||||
STARTUP_HEALTH_GRACE_SECONDS = 300 # 5 minutes for health warning suppression
|
||||
SHUTDOWN_GRACE_SECONDS = 120 # 2 minutes for VM/CT stop suppression
|
||||
|
||||
# Maximum system uptime to consider this a real server boot (not just service restart)
|
||||
# If system uptime > this value when service starts, skip startup notification
|
||||
MAX_BOOT_UPTIME_SECONDS = 600 # 10 minutes - if system was up longer, it's a service restart
|
||||
|
||||
|
||||
def _get_system_uptime() -> float:
|
||||
"""
|
||||
Get actual system uptime in seconds from /proc/uptime.
|
||||
Returns 0 if unable to read (will default to treating as new boot).
|
||||
"""
|
||||
try:
|
||||
with open('/proc/uptime', 'r') as f:
|
||||
return float(f.readline().split()[0])
|
||||
except Exception:
|
||||
return 0
|
||||
|
||||
# Categories to suppress during startup grace period
|
||||
# These categories typically have transient issues during boot
|
||||
STARTUP_GRACE_CATEGORIES: Set[str] = {
|
||||
@@ -68,6 +84,11 @@ class _StartupGraceState:
|
||||
# Startup time = when service started (module load time)
|
||||
self._startup_time: float = time.time()
|
||||
|
||||
# Check if this is a REAL system boot or just a service restart
|
||||
# by comparing system uptime to our threshold
|
||||
system_uptime = _get_system_uptime()
|
||||
self._is_real_boot: bool = system_uptime < MAX_BOOT_UPTIME_SECONDS
|
||||
|
||||
# Shutdown tracking
|
||||
self._shutdown_time: float = 0
|
||||
|
||||
@@ -115,6 +136,19 @@ class _StartupGraceState:
|
||||
return self.is_startup_health_grace()
|
||||
return False
|
||||
|
||||
def is_real_system_boot(self) -> bool:
|
||||
"""
|
||||
Check if the service started during a real system boot.
|
||||
|
||||
Returns False if the system was already running for more than 10 minutes
|
||||
when the service started (indicates a service restart, not a system boot).
|
||||
|
||||
This prevents sending "System startup completed" notifications when
|
||||
just restarting the ProxMenux Monitor service.
|
||||
"""
|
||||
with self._lock:
|
||||
return self._is_real_boot
|
||||
|
||||
def get_startup_elapsed(self) -> float:
|
||||
"""Get seconds elapsed since service startup."""
|
||||
with self._lock:
|
||||
@@ -230,6 +264,19 @@ def was_startup_aggregated() -> bool:
|
||||
"""Check if startup aggregation has already been processed."""
|
||||
return _state.was_startup_aggregated()
|
||||
|
||||
def is_real_system_boot() -> bool:
|
||||
"""
|
||||
Check if this is a real system boot (not just a service restart).
|
||||
|
||||
Returns True if the system uptime was less than 10 minutes when the
|
||||
service started. Returns False if the system was already running
|
||||
longer (indicates the service was restarted, not the whole system).
|
||||
|
||||
Use this to prevent sending "System startup completed" notifications
|
||||
when just restarting the ProxMenux Monitor service.
|
||||
"""
|
||||
return _state.is_real_system_boot()
|
||||
|
||||
|
||||
# ─── Startup Report Collection ───────────────────────────────────────────────
|
||||
|
||||
|
||||
Reference in New Issue
Block a user