mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-05-20 15:55:00 +00:00
1.2.1.1-beta: notification + LXC + post-install fixes
- flask_notification_routes: PVE webhook X-Webhook-Secret written in standard base64 so PVE can decode it (GH #198) - notification_channels: Gmail SMTP App Password handling — normalize tls_mode (None/empty → starttls), reject creds without host (false- positive sendmail delivery), surface "AUTH not advertised" hint - notification_events: is_vzdump_active_on_host() reads /var/log/pve/ tasks/active directly so backup_start fallback and vm_shutdown suppression survive a Monitor restart mid-backup - notification_templates: extract --storage flag from vzdump log → "PBS-Cloud: vm/104/…" instead of generic "PBS:" prefix when multiple PBS endpoints exist - health_monitor: pve_storage_capacity + zfs_pool_capacity respect per-item dismiss (don't keep category WARNING/CRITICAL after user dismisses); updates_check cache invalidated when /var/log/apt/ history.log mtime advances - lxc_mount_points: PVE volume size from subvol quota (df via /proc/<host_pid>/root/<target> + lxc.conf size=NNNG fallback); host_source_state detects "host detached" zombie binds; per-mount subprocess work parallelised via ThreadPoolExecutor so a CT with many bind mounts doesn't trip the Caddy 3s reverse-proxy timeout - virtual-machines: "host detached" badge on bind mounts whose host source path disappeared - auto/customizable_post_install: log2ram FUNC_VERSION 1.1 → 1.2; new log2ram-check.sh vacuums journal + truncates non-rotating logs (pveproxy/access.log, pveam.log) instead of only calling `log2ram write` (which leaves the tmpfs full); auto flow gains the missing SystemMaxUse in /etc/systemd/journald.conf Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -292,6 +292,61 @@ def _record_smartd_observation_impl(title: str, message: str):
|
||||
print(f"[smartd_observation] Error recording smartd observation: {e}")
|
||||
|
||||
|
||||
# ─── Vzdump activity detector (shared, restart-tolerant) ─────────
|
||||
#
|
||||
# A single source of truth for "is a vzdump backup job running on this
|
||||
# host RIGHT NOW", consultable from any watcher and surviving Monitor
|
||||
# restarts. Reads `/var/log/pve/tasks/active` directly — PVE writes the
|
||||
# active UPID there at backup start and removes it on completion, so
|
||||
# it persists across our process restarts.
|
||||
#
|
||||
# Without this, JournalWatcher's in-memory `_last_backup_job_ts` got
|
||||
# reset by every Monitor restart, and any `Starting Backup of VM X`
|
||||
# log lines arriving after that point were treated as standalone
|
||||
# backups — emitting one `backup_start` per guest with `storage=local`
|
||||
# (the fallback path that doesn't see the parent job's --storage flag).
|
||||
# Reported by JC Miñarro 18/05 after a Monitor redeploy mid-job.
|
||||
_VZDUMP_ACTIVE_FILE = '/var/log/pve/tasks/active'
|
||||
_vzdump_active_cache_ts: float = 0
|
||||
_vzdump_active_cache_value: bool = False
|
||||
_VZDUMP_ACTIVE_CACHE_TTL = 5 # seconds
|
||||
|
||||
|
||||
def is_vzdump_active_on_host() -> bool:
|
||||
"""Return True if `/var/log/pve/tasks/active` contains an active
|
||||
vzdump UPID (i.e. backup currently running). Cached 5s to avoid
|
||||
hammering the file on every notification.
|
||||
|
||||
Caller-safe: returns False on any I/O / parse error.
|
||||
"""
|
||||
global _vzdump_active_cache_ts, _vzdump_active_cache_value
|
||||
now = time.time()
|
||||
if now - _vzdump_active_cache_ts < _VZDUMP_ACTIVE_CACHE_TTL:
|
||||
return _vzdump_active_cache_value
|
||||
found = False
|
||||
try:
|
||||
with open(_VZDUMP_ACTIVE_FILE, 'r') as f:
|
||||
for line in f:
|
||||
# UPID format: UPID:node:pid:pstart:starttime:type:id:user:
|
||||
if ':vzdump:' not in line:
|
||||
continue
|
||||
parts = line.strip().split(':')
|
||||
if len(parts) < 3:
|
||||
continue
|
||||
try:
|
||||
pid = int(parts[2], 16) # PID in UPID is hex
|
||||
os.kill(pid, 0)
|
||||
found = True
|
||||
break
|
||||
except (ValueError, ProcessLookupError, PermissionError):
|
||||
continue
|
||||
except (OSError, IOError):
|
||||
pass
|
||||
_vzdump_active_cache_ts = now
|
||||
_vzdump_active_cache_value = found
|
||||
return found
|
||||
|
||||
|
||||
# ─── Journal Watcher (Real-time) ─────────────────────────────────
|
||||
|
||||
class JournalWatcher:
|
||||
@@ -1238,6 +1293,14 @@ class JournalWatcher:
|
||||
now = time.time()
|
||||
if now - self._last_backup_job_ts < self._BACKUP_JOB_SUPPRESS_WINDOW:
|
||||
return # Part of an active job -- already notified
|
||||
# Restart-tolerant fallback: if the in-memory timestamp was
|
||||
# cleared (Monitor restarted mid-job) but PVE still has an
|
||||
# active vzdump UPID, this per-guest line is part of that
|
||||
# job — drop it instead of emitting a wrong "Backup started
|
||||
# on local" with storage default. Reported by JC Miñarro 18/05
|
||||
# after a Monitor redeploy during an active PBS backup.
|
||||
if is_vzdump_active_on_host():
|
||||
return
|
||||
fallback_guest = fb.group(1)
|
||||
else:
|
||||
return
|
||||
@@ -1893,10 +1956,15 @@ class TaskWatcher:
|
||||
# Suppress VM/CT start/stop/shutdown while a vzdump is active.
|
||||
# These are backup-induced operations (mode=stop), not user actions.
|
||||
# Exception: if a VM/CT FAILS or has WARNINGS, that IS important.
|
||||
# We check BOTH our in-memory tracking (`_is_vzdump_active`) AND
|
||||
# `tasks/active` on disk (`is_vzdump_active_on_host`). The disk
|
||||
# check survives Monitor restarts mid-backup, which otherwise
|
||||
# cleared `_vzdump_running_since` and exposed the post-restart
|
||||
# shutdown notifications to the user (JC Miñarro 18/05).
|
||||
_BACKUP_NOISE = {'vm_start', 'vm_stop', 'vm_shutdown', 'vm_restart',
|
||||
'ct_start', 'ct_stop', 'ct_shutdown', 'ct_restart'}
|
||||
if event_type in _BACKUP_NOISE and not is_error and not is_warning:
|
||||
if self._is_vzdump_active():
|
||||
if self._is_vzdump_active() or is_vzdump_active_on_host():
|
||||
return
|
||||
|
||||
# Suppress VM/CT stop/shutdown during host shutdown/reboot.
|
||||
|
||||
Reference in New Issue
Block a user