Update notification service

2026-04-18 10:02:16 +00:00 · 2026-03-21 22:27:54 +01:00
parent 18aa9a77dd
commit 41fa6f4b10
2 changed files with 93 additions and 14 deletions
--- a/AppImage/scripts/flask_server.py
+++ b/AppImage/scripts/flask_server.py
@@ -585,17 +585,51 @@ def get_temperature_history(timeframe="hour"):
        return {"data": [], "stats": {"min": 0, "max": 0, "avg": 0, "current": 0}}
 def _temperature_collector_loop():
-    """Background thread: collect temperature every 60s, cleanup every hour."""
+    """Background thread: collect temperature and latency with staggered timing.
-    cleanup_counter = 0
+    
    Staggered schedule to avoid CPU spikes:
    - Temperature record: every 60s at offset 40s
    - Latency pings: every 60s at offset 25s  
    - Cleanup: every 60 min at offset 120s
    """
    import time as _time
    RECORD_INTERVAL = 60
    TEMP_OFFSET = 40      # Record temp at :40 of each minute
    LATENCY_OFFSET = 25   # Record latency at :25 of each minute
    CLEANUP_INTERVAL = 3600  # 60 minutes
    CLEANUP_OFFSET = 120  # Cleanup at 2 min after the hour mark
    # Initial delays to stagger from other collectors
    _time.sleep(LATENCY_OFFSET)  # Start latency first
    last_temp = _time.monotonic()
    last_latency = _time.monotonic()
    last_cleanup = _time.monotonic() - CLEANUP_INTERVAL + CLEANUP_OFFSET  # First cleanup after offset
    while True:
        now = _time.monotonic()
        # Latency pings (offset 25s - runs first in each cycle)
        if now - last_latency >= RECORD_INTERVAL:
            _record_latency()
            last_latency = now
        # Temperature record (offset 40s - 15s after latency)
        _time.sleep(15)
        _record_temperature()
-        _record_latency()  # Also record latency in the same loop
+        last_temp = _time.monotonic()
-        cleanup_counter += 1
+        
-        if cleanup_counter >= 60:  # Every 60 iterations = 60 minutes
+        # Cleanup check (every hour, offset from main cycles)
        if _time.monotonic() - last_cleanup >= CLEANUP_INTERVAL:
            _cleanup_old_temperature_data()
            _cleanup_old_latency_data()
-            cleanup_counter = 0
+            last_cleanup = _time.monotonic()
-        time.sleep(60)
+        
        # Sleep remaining time until next cycle
        elapsed = _time.monotonic() - last_latency
        remaining = max(RECORD_INTERVAL - elapsed, 1)
        _time.sleep(remaining)
 # ── Latency History (SQLite) ──────────────────────────────────────────────────
@@ -817,11 +851,13 @@ def get_current_latency(target='gateway'):
 def _health_collector_loop():
    """Background thread: run full health checks every 5 minutes.
    Keeps the health cache always fresh and records events/errors in the DB.
-    Also emits notifications when a health category degrades (OK -> WARNING/CRITICAL)."""
+    Also emits notifications when a health category degrades (OK -> WARNING/CRITICAL).
    Staggered: starts at 55s offset to avoid collision with other collectors."""
    from health_monitor import health_monitor
-    # Wait 30s after startup to let other services initialize
+    # Wait 55s after startup (staggered from other collectors: temp=40s, latency=25s)
-    time.sleep(30)
+    time.sleep(55)
    # Track previous status per category to detect transitions
    _prev_statuses = {}
@@ -950,19 +986,22 @@ def _vital_signs_sampler():
    Runs independently of the 5-min health collector loop.
    - CPU usage:   sampled every 30s  (3 samples in 1.5 min for hysteresis)
-    - Temperature:  sampled every 10s  (18 samples in 3 min for temporal logic)
+    - Temperature: sampled every 15s  (12 samples in 3 min for temporal logic)
    Uses time.monotonic() to avoid drift.
    Staggered intervals: CPU at offset 0, Temp at offset 7s to avoid collision.
    """
    from health_monitor import health_monitor
    # Wait 15s after startup for sensors to be ready
    time.sleep(15)
-    TEMP_INTERVAL = 10   # seconds
+    TEMP_INTERVAL = 15   # seconds (was 10s - reduced frequency by 33%)
    CPU_INTERVAL  = 30   # seconds
-    next_temp = time.monotonic()
+    # Stagger: CPU starts immediately, Temp starts after 7s offset
    next_cpu  = time.monotonic()
    next_temp = time.monotonic() + 7
    print("[ProxMenux] Vital signs sampler started (CPU: 30s, Temp: 10s)")
--- a/AppImage/scripts/notification_events.py
+++ b/AppImage/scripts/notification_events.py
@@ -1697,6 +1697,13 @@ class PollingCollector:
    def stop(self):
        self._running = False
    def _sleep_until_offset(self, cycle_start: float, offset: float):
        """Sleep until the specified offset within the current cycle."""
        target = cycle_start + offset
        now = time.time()
        if now < target:
            time.sleep(target - now)
    # ── Main loop ──────────────────────────────────────────────
    def _poll_loop(self):
@@ -1707,16 +1714,49 @@ class PollingCollector:
                return
            time.sleep(1)
        # Staggered execution: spread checks across the polling interval
        # to avoid CPU spikes when multiple checks run simultaneously.
        # Schedule: health=10s, updates=30s, proxmenux=45s, ai_model=50s
        STAGGER_HEALTH = 10
        STAGGER_UPDATES = 30
        STAGGER_PROXMENUX = 45
        STAGGER_AI_MODEL = 50
        while self._running:
            cycle_start = time.time()
            try:
                # Health check at offset 10s
                self._sleep_until_offset(cycle_start, STAGGER_HEALTH)
                if not self._running:
                    return
                self._check_persistent_health()
                # Updates check at offset 30s
                self._sleep_until_offset(cycle_start, STAGGER_UPDATES)
                if not self._running:
                    return
                self._check_updates()
                # ProxMenux check at offset 45s
                self._sleep_until_offset(cycle_start, STAGGER_PROXMENUX)
                if not self._running:
                    return
                self._check_proxmenux_updates()
                # AI model check at offset 50s
                self._sleep_until_offset(cycle_start, STAGGER_AI_MODEL)
                if not self._running:
                    return
                self._check_ai_model_availability()
            except Exception as e:
                print(f"[PollingCollector] Error: {e}")
-            for _ in range(self._poll_interval):
+            # Sleep remaining time until next cycle
            elapsed = time.time() - cycle_start
            remaining = max(self._poll_interval - elapsed, 1)
            for _ in range(int(remaining)):
                if not self._running:
                    return
                time.sleep(1)