Update flask_server.py

This commit is contained in:
MacRimi
2026-05-25 18:01:24 +02:00
parent b299227da2
commit a3aa5d9c1a
+28 -11
View File
@@ -1296,7 +1296,8 @@ def _vital_signs_sampler():
"""Dedicated thread for rapid CPU, memory & temperature sampling. """Dedicated thread for rapid CPU, memory & temperature sampling.
Runs independently of the 5-min health collector loop. Runs independently of the 5-min health collector loop.
- CPU usage: sampled every 30s (10 samples in 5 min for sustained detection) - CPU usage: sampled every 5s (matches dashboard refresh; also feeds /api/system + /api/prometheus
through state_history cache to avoid gevent races with psutil.cpu_percent)
- Memory: sampled every 30s (10 samples in 5 min for sustained detection) - Memory: sampled every 30s (10 samples in 5 min for sustained detection)
- Temperature: sampled every 15s (12 samples in 3 min for temporal logic) - Temperature: sampled every 15s (12 samples in 3 min for temporal logic)
Uses time.monotonic() to avoid drift. Uses time.monotonic() to avoid drift.
@@ -1309,15 +1310,16 @@ def _vital_signs_sampler():
time.sleep(15) time.sleep(15)
TEMP_INTERVAL = 15 # seconds (was 10s - reduced frequency by 33%) TEMP_INTERVAL = 15 # seconds (was 10s - reduced frequency by 33%)
CPU_INTERVAL = 30 # seconds CPU_INTERVAL = 5 # seconds — exclusive owner of psutil.cpu_percent baseline;
MEM_INTERVAL = 30 # seconds (aligned with CPU for sustained-RAM detection) # API handlers read the cached value to avoid 0% reads under gevent.
MEM_INTERVAL = 30 # seconds (aligned with original CPU cadence for sustained-RAM detection)
# Stagger: CPU starts immediately, Temp after 7s, Mem after 15s # Stagger: CPU starts immediately, Temp after 7s, Mem after 15s
next_cpu = time.monotonic() next_cpu = time.monotonic()
next_temp = time.monotonic() + 7 next_temp = time.monotonic() + 7
next_mem = time.monotonic() + 15 next_mem = time.monotonic() + 15
print("[ProxMenux] Vital signs sampler started (CPU: 30s, Mem: 30s, Temp: 15s)") print("[ProxMenux] Vital signs sampler started (CPU: 5s, Mem: 30s, Temp: 15s)")
while True: while True:
try: try:
@@ -7601,10 +7603,19 @@ def _get_hardware_info_uncached():
def api_system(): def api_system():
"""Get system information including CPU, memory, and temperature""" """Get system information including CPU, memory, and temperature"""
try: try:
# Non-blocking: returns %CPU since the last psutil call (sampler or prior API hit). # Read from the vital-signs sampler cache. The sampler is the *only*
# The background vital-signs sampler keeps psutil's internal state primed. # consumer of psutil.cpu_percent under gevent, so the API handler never
cpu_usage = psutil.cpu_percent(interval=0) # races against it (calling psutil.cpu_percent here under monkey-patched
# gevent would return 0% whenever a concurrent greenlet had primed the
# baseline less than one /proc/stat tick ago — typical for the dashboard's
# parallel system+vms+storage+network requests every 5s).
try:
from health_monitor import health_monitor
_hist = health_monitor.state_history.get('cpu_usage') or []
cpu_usage = _hist[-1]['value'] if _hist else psutil.cpu_percent(interval=0.1)
except Exception:
cpu_usage = psutil.cpu_percent(interval=0.1)
memory = psutil.virtual_memory() memory = psutil.virtual_memory()
memory_used_gb = memory.used / (1024 ** 3) memory_used_gb = memory.used / (1024 ** 3)
memory_total_gb = memory.total / (1024 ** 3) memory_total_gb = memory.total / (1024 ** 3)
@@ -10707,9 +10718,15 @@ def api_prometheus():
timestamp = int(datetime.now().timestamp() * 1000) timestamp = int(datetime.now().timestamp() * 1000)
node = socket.gethostname() node = socket.gethostname()
# Non-blocking: returns %CPU since the last psutil call (sampler keeps state primed). # Read from the vital-signs sampler cache to avoid the gevent race
# Avoids 500ms worker block on each Prometheus scrape. # that turns concurrent psutil.cpu_percent(interval=0) calls into 0%.
cpu_usage = psutil.cpu_percent(interval=0) # Falls back to a 100ms sample only if the cache is empty (cold start).
try:
from health_monitor import health_monitor
_hist = health_monitor.state_history.get('cpu_usage') or []
cpu_usage = _hist[-1]['value'] if _hist else psutil.cpu_percent(interval=0.1)
except Exception:
cpu_usage = psutil.cpu_percent(interval=0.1)
memory = psutil.virtual_memory() memory = psutil.virtual_memory()
load_avg = os.getloadavg() load_avg = os.getloadavg()
uptime_seconds = time.time() - psutil.boot_time() uptime_seconds = time.time() - psutil.boot_time()