From 41fa6f4b10dd2f3bf583dda4fccd69d5e9aa826c Mon Sep 17 00:00:00 2001
From: MacRimi <ricoextincion@gmail.com>
Date: Sat, 21 Mar 2026 22:27:54 +0100
Subject: [PATCH] Update notification service

---
 AppImage/scripts/flask_server.py        | 65 ++++++++++++++++++++-----
 AppImage/scripts/notification_events.py | 42 +++++++++++++++-
 2 files changed, 93 insertions(+), 14 deletions(-)

diff --git a/AppImage/scripts/flask_server.py b/AppImage/scripts/flask_server.py
index 826c7226..ecf6ef5f 100644
--- a/AppImage/scripts/flask_server.py
+++ b/AppImage/scripts/flask_server.py
@@ -585,17 +585,51 @@ def get_temperature_history(timeframe="hour"):
         return {"data": [], "stats": {"min": 0, "max": 0, "avg": 0, "current": 0}}
 
 def _temperature_collector_loop():
-    """Background thread: collect temperature every 60s, cleanup every hour."""
-    cleanup_counter = 0
+    """Background thread: collect temperature and latency with staggered timing.
+    
+    Staggered schedule to avoid CPU spikes:
+    - Temperature record: every 60s at offset 40s
+    - Latency pings: every 60s at offset 25s  
+    - Cleanup: every 60 min at offset 120s
+    """
+    import time as _time
+    
+    RECORD_INTERVAL = 60
+    TEMP_OFFSET = 40      # Record temp at :40 of each minute
+    LATENCY_OFFSET = 25   # Record latency at :25 of each minute
+    CLEANUP_INTERVAL = 3600  # 60 minutes
+    CLEANUP_OFFSET = 120  # Cleanup at 2 min after the hour mark
+    
+    # Initial delays to stagger from other collectors
+    _time.sleep(LATENCY_OFFSET)  # Start latency first
+    
+    last_temp = _time.monotonic()
+    last_latency = _time.monotonic()
+    last_cleanup = _time.monotonic() - CLEANUP_INTERVAL + CLEANUP_OFFSET  # First cleanup after offset
+    
     while True:
+        now = _time.monotonic()
+        
+        # Latency pings (offset 25s - runs first in each cycle)
+        if now - last_latency >= RECORD_INTERVAL:
+            _record_latency()
+            last_latency = now
+        
+        # Temperature record (offset 40s - 15s after latency)
+        _time.sleep(15)
         _record_temperature()
-        _record_latency()  # Also record latency in the same loop
-        cleanup_counter += 1
-        if cleanup_counter >= 60:  # Every 60 iterations = 60 minutes
+        last_temp = _time.monotonic()
+        
+        # Cleanup check (every hour, offset from main cycles)
+        if _time.monotonic() - last_cleanup >= CLEANUP_INTERVAL:
             _cleanup_old_temperature_data()
             _cleanup_old_latency_data()
-            cleanup_counter = 0
-        time.sleep(60)
+            last_cleanup = _time.monotonic()
+        
+        # Sleep remaining time until next cycle
+        elapsed = _time.monotonic() - last_latency
+        remaining = max(RECORD_INTERVAL - elapsed, 1)
+        _time.sleep(remaining)
 
 
 # ── Latency History (SQLite) ──────────────────────────────────────────────────
@@ -817,11 +851,13 @@ def get_current_latency(target='gateway'):
 def _health_collector_loop():
     """Background thread: run full health checks every 5 minutes.
     Keeps the health cache always fresh and records events/errors in the DB.
-    Also emits notifications when a health category degrades (OK -> WARNING/CRITICAL)."""
+    Also emits notifications when a health category degrades (OK -> WARNING/CRITICAL).
+    
+    Staggered: starts at 55s offset to avoid collision with other collectors."""
     from health_monitor import health_monitor
     
-    # Wait 30s after startup to let other services initialize
-    time.sleep(30)
+    # Wait 55s after startup (staggered from other collectors: temp=40s, latency=25s)
+    time.sleep(55)
     
     # Track previous status per category to detect transitions
     _prev_statuses = {}
@@ -950,19 +986,22 @@ def _vital_signs_sampler():
     
     Runs independently of the 5-min health collector loop.
     - CPU usage:   sampled every 30s  (3 samples in 1.5 min for hysteresis)
-    - Temperature:  sampled every 10s  (18 samples in 3 min for temporal logic)
+    - Temperature: sampled every 15s  (12 samples in 3 min for temporal logic)
     Uses time.monotonic() to avoid drift.
+    
+    Staggered intervals: CPU at offset 0, Temp at offset 7s to avoid collision.
     """
     from health_monitor import health_monitor
     
     # Wait 15s after startup for sensors to be ready
     time.sleep(15)
     
-    TEMP_INTERVAL = 10   # seconds
+    TEMP_INTERVAL = 15   # seconds (was 10s - reduced frequency by 33%)
     CPU_INTERVAL  = 30   # seconds
     
-    next_temp = time.monotonic()
+    # Stagger: CPU starts immediately, Temp starts after 7s offset
     next_cpu  = time.monotonic()
+    next_temp = time.monotonic() + 7
     
     print("[ProxMenux] Vital signs sampler started (CPU: 30s, Temp: 10s)")
     
diff --git a/AppImage/scripts/notification_events.py b/AppImage/scripts/notification_events.py
index 246915cf..a52e4a3e 100644
--- a/AppImage/scripts/notification_events.py
+++ b/AppImage/scripts/notification_events.py
@@ -1697,6 +1697,13 @@ class PollingCollector:
     def stop(self):
         self._running = False
     
+    def _sleep_until_offset(self, cycle_start: float, offset: float):
+        """Sleep until the specified offset within the current cycle."""
+        target = cycle_start + offset
+        now = time.time()
+        if now < target:
+            time.sleep(target - now)
+    
     # ── Main loop ──────────────────────────────────────────────
     
     def _poll_loop(self):
@@ -1707,16 +1714,49 @@ class PollingCollector:
                 return
             time.sleep(1)
         
+        # Staggered execution: spread checks across the polling interval
+        # to avoid CPU spikes when multiple checks run simultaneously.
+        # Schedule: health=10s, updates=30s, proxmenux=45s, ai_model=50s
+        STAGGER_HEALTH = 10
+        STAGGER_UPDATES = 30
+        STAGGER_PROXMENUX = 45
+        STAGGER_AI_MODEL = 50
+        
         while self._running:
+            cycle_start = time.time()
+            
             try:
+                # Health check at offset 10s
+                self._sleep_until_offset(cycle_start, STAGGER_HEALTH)
+                if not self._running:
+                    return
                 self._check_persistent_health()
+                
+                # Updates check at offset 30s
+                self._sleep_until_offset(cycle_start, STAGGER_UPDATES)
+                if not self._running:
+                    return
                 self._check_updates()
+                
+                # ProxMenux check at offset 45s
+                self._sleep_until_offset(cycle_start, STAGGER_PROXMENUX)
+                if not self._running:
+                    return
                 self._check_proxmenux_updates()
+                
+                # AI model check at offset 50s
+                self._sleep_until_offset(cycle_start, STAGGER_AI_MODEL)
+                if not self._running:
+                    return
                 self._check_ai_model_availability()
+                
             except Exception as e:
                 print(f"[PollingCollector] Error: {e}")
             
-            for _ in range(self._poll_interval):
+            # Sleep remaining time until next cycle
+            elapsed = time.time() - cycle_start
+            remaining = max(self._poll_interval - elapsed, 1)
+            for _ in range(int(remaining)):
                 if not self._running:
                     return
                 time.sleep(1)