mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-05-23 17:14:44 +00:00
1.2.1.1-beta: notification + LXC + post-install fixes
- flask_notification_routes: PVE webhook X-Webhook-Secret written in standard base64 so PVE can decode it (GH #198) - notification_channels: Gmail SMTP App Password handling — normalize tls_mode (None/empty → starttls), reject creds without host (false- positive sendmail delivery), surface "AUTH not advertised" hint - notification_events: is_vzdump_active_on_host() reads /var/log/pve/ tasks/active directly so backup_start fallback and vm_shutdown suppression survive a Monitor restart mid-backup - notification_templates: extract --storage flag from vzdump log → "PBS-Cloud: vm/104/…" instead of generic "PBS:" prefix when multiple PBS endpoints exist - health_monitor: pve_storage_capacity + zfs_pool_capacity respect per-item dismiss (don't keep category WARNING/CRITICAL after user dismisses); updates_check cache invalidated when /var/log/apt/ history.log mtime advances - lxc_mount_points: PVE volume size from subvol quota (df via /proc/<host_pid>/root/<target> + lxc.conf size=NNNG fallback); host_source_state detects "host detached" zombie binds; per-mount subprocess work parallelised via ThreadPoolExecutor so a CT with many bind mounts doesn't trip the Caddy 3s reverse-proxy timeout - virtual-machines: "host detached" badge on bind mounts whose host source path disappeared - auto/customizable_post_install: log2ram FUNC_VERSION 1.1 → 1.2; new log2ram-check.sh vacuums journal + truncates non-rotating logs (pveproxy/access.log, pveam.log) instead of only calling `log2ram write` (which leaves the tmpfs full); auto flow gains the missing SystemMaxUse in /etc/systemd/journald.conf Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Binary file not shown.
@@ -1 +1 @@
|
|||||||
6249ae8d51e0d7dbd3035ba49f4244ff035c2c6d97d5c55f69ab0dac6a4ea021 ProxMenux-1.2.1.1-beta.AppImage
|
70a510025df81652319d16e0d36e77bea95a965163608232e9aca60ada9c9fbf ProxMenux-1.2.1.1-beta.AppImage
|
||||||
|
|||||||
@@ -170,6 +170,12 @@ interface LxcMountPoint {
|
|||||||
runtime_readonly?: boolean
|
runtime_readonly?: boolean
|
||||||
runtime_reachable?: boolean
|
runtime_reachable?: boolean
|
||||||
runtime_error?: string | null
|
runtime_error?: string | null
|
||||||
|
// Sprint 14.x: host-side bind source state. Detects the case where the
|
||||||
|
// CT still reports a bind as mounted even though the host already
|
||||||
|
// umounted the source (Ignacio Seijo 11/05). Null = N/A (PVE volume,
|
||||||
|
// not a host path).
|
||||||
|
host_source_exists?: boolean | null
|
||||||
|
host_source_is_mountpoint?: boolean | null
|
||||||
}
|
}
|
||||||
|
|
||||||
const fetcher = async (url: string) => {
|
const fetcher = async (url: string) => {
|
||||||
@@ -321,9 +327,18 @@ function MountPointCard({ mp }: { mp: LxcMountPoint }) {
|
|||||||
const isStale = mp.runtime_reachable === false
|
const isStale = mp.runtime_reachable === false
|
||||||
const isReadonly = !isStale && mp.runtime_readonly === true
|
const isReadonly = !isStale && mp.runtime_readonly === true
|
||||||
const isDivergent = mp.runtime_mounted === false // configured but not actually mounted
|
const isDivergent = mp.runtime_mounted === false // configured but not actually mounted
|
||||||
|
// "Zombie bind": the host removed the source (e.g. USB pulled, manual
|
||||||
|
// umount) but the CT mount namespace still shows the bind as mounted.
|
||||||
|
// Reported by Ignacio Seijo (11/05). Only flag host_bind /
|
||||||
|
// pve_storage_bind sources — PVE volume sources have no host path
|
||||||
|
// and `host_source_exists` comes back null for them.
|
||||||
|
const isHostDetached =
|
||||||
|
mp.runtime_mounted === true &&
|
||||||
|
(mp.type === "host_bind" || mp.type === "pve_storage_bind") &&
|
||||||
|
mp.host_source_exists === false
|
||||||
const cardClasses = isStale
|
const cardClasses = isStale
|
||||||
? "border-red-500/50 bg-red-500/5"
|
? "border-red-500/50 bg-red-500/5"
|
||||||
: isDivergent
|
: isDivergent || isHostDetached
|
||||||
? "border-amber-500/40 bg-amber-500/5"
|
? "border-amber-500/40 bg-amber-500/5"
|
||||||
: isReadonly
|
: isReadonly
|
||||||
? "border-amber-500/30 bg-amber-500/5"
|
? "border-amber-500/30 bg-amber-500/5"
|
||||||
@@ -395,7 +410,7 @@ function MountPointCard({ mp }: { mp: LxcMountPoint }) {
|
|||||||
className={
|
className={
|
||||||
isStale
|
isStale
|
||||||
? "bg-red-500/10 text-red-500 border-red-500/20"
|
? "bg-red-500/10 text-red-500 border-red-500/20"
|
||||||
: isDivergent
|
: isDivergent || isHostDetached
|
||||||
? "bg-amber-500/10 text-amber-500 border-amber-500/20"
|
? "bg-amber-500/10 text-amber-500 border-amber-500/20"
|
||||||
: isReadonly
|
: isReadonly
|
||||||
? "bg-amber-500/10 text-amber-500 border-amber-500/20"
|
? "bg-amber-500/10 text-amber-500 border-amber-500/20"
|
||||||
@@ -408,11 +423,13 @@ function MountPointCard({ mp }: { mp: LxcMountPoint }) {
|
|||||||
? "stale"
|
? "stale"
|
||||||
: isDivergent
|
: isDivergent
|
||||||
? "not mounted"
|
? "not mounted"
|
||||||
: isReadonly
|
: isHostDetached
|
||||||
? "read-only"
|
? "host detached"
|
||||||
: mp.runtime_mounted === null
|
: isReadonly
|
||||||
? "stopped"
|
? "read-only"
|
||||||
: "mounted"}
|
: mp.runtime_mounted === null
|
||||||
|
? "stopped"
|
||||||
|
: "mounted"}
|
||||||
</Badge>
|
</Badge>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
@@ -191,6 +191,24 @@ def _bad_request(msg: str):
|
|||||||
return jsonify({'error': msg}), 400
|
return jsonify({'error': msg}), 400
|
||||||
|
|
||||||
|
|
||||||
|
def _is_loopback_addr(value: str) -> bool:
|
||||||
|
"""Return True for IPv4, IPv6 and IPv4-mapped loopback addresses.
|
||||||
|
|
||||||
|
When Flask is bound to ``::`` for dual-stack support, an HTTP request
|
||||||
|
sent to ``127.0.0.1`` can be reported as ``::ffff:127.0.0.1``. Treat it
|
||||||
|
as local so the PVE webhook keeps the intended localhost trust path.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import ipaddress
|
||||||
|
addr = ipaddress.ip_address(value)
|
||||||
|
if addr.is_loopback:
|
||||||
|
return True
|
||||||
|
ipv4_mapped = getattr(addr, 'ipv4_mapped', None)
|
||||||
|
return bool(ipv4_mapped and ipv4_mapped.is_loopback)
|
||||||
|
except ValueError:
|
||||||
|
return value == 'localhost'
|
||||||
|
|
||||||
|
|
||||||
def _validate_event_type(value: str) -> bool:
|
def _validate_event_type(value: str) -> bool:
|
||||||
return isinstance(value, str) and bool(_EVENT_TYPE_RE.match(value))
|
return isinstance(value, str) and bool(_EVENT_TYPE_RE.match(value))
|
||||||
|
|
||||||
@@ -983,9 +1001,15 @@ def setup_pve_webhook_core() -> dict:
|
|||||||
# endpoint depends entirely on the localhost-bypass and any move
|
# endpoint depends entirely on the localhost-bypass and any move
|
||||||
# to a non-loopback bind silently breaks auth. Audit Tier 3.1 —
|
# to a non-loopback bind silently breaks auth. Audit Tier 3.1 —
|
||||||
# `setup_pve_webhook_core` no escribe secret en priv cfg.
|
# `setup_pve_webhook_core` no escribe secret en priv cfg.
|
||||||
|
#
|
||||||
|
# PVE stores `secret value=` in STANDARD base64 and decodes it
|
||||||
|
# before emitting the header. Writing the raw token here triggered
|
||||||
|
# `could not decode UTF8 string from base64, key 'X-Webhook-Secret' (500)`
|
||||||
|
# whenever `token_urlsafe` produced `-` or `_` chars (GH #198).
|
||||||
|
secret_b64 = base64.b64encode(secret.encode()).decode()
|
||||||
priv_block = (
|
priv_block = (
|
||||||
f"webhook: {_PVE_ENDPOINT_ID}\n"
|
f"webhook: {_PVE_ENDPOINT_ID}\n"
|
||||||
f" secret name=X-Webhook-Secret,value={secret}\n"
|
f" secret name=X-Webhook-Secret,value={secret_b64}\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
if priv_text is not None:
|
if priv_text is not None:
|
||||||
@@ -1225,7 +1249,7 @@ def proxmox_webhook():
|
|||||||
_reject = lambda code, error, status: (jsonify({'accepted': False, 'error': error}), status)
|
_reject = lambda code, error, status: (jsonify({'accepted': False, 'error': error}), status)
|
||||||
|
|
||||||
client_ip = request.remote_addr or ''
|
client_ip = request.remote_addr or ''
|
||||||
is_localhost = client_ip in ('127.0.0.1', '::1')
|
is_localhost = _is_loopback_addr(client_ip)
|
||||||
|
|
||||||
# CSRF defence-in-depth: reject `application/x-www-form-urlencoded`
|
# CSRF defence-in-depth: reject `application/x-www-form-urlencoded`
|
||||||
# bodies. PVE always sends `application/json`; form-encoded bodies
|
# bodies. PVE always sends `application/json`; form-encoded bodies
|
||||||
|
|||||||
@@ -4197,22 +4197,37 @@ class HealthMonitor:
|
|||||||
"""
|
"""
|
||||||
cache_key = 'updates_check'
|
cache_key = 'updates_check'
|
||||||
current_time = time.time()
|
current_time = time.time()
|
||||||
|
apt_history_path = '/var/log/apt/history.log'
|
||||||
# Cache for 10 minutes
|
|
||||||
if cache_key in self.last_check_times:
|
# Detect a manual `apt install/upgrade` since the last check by
|
||||||
if current_time - self.last_check_times[cache_key] < 600:
|
# comparing /var/log/apt/history.log's mtime against the cache
|
||||||
return self.cached_results.get(cache_key)
|
# timestamp. apt appends to this file on every transaction, so a
|
||||||
|
# newer mtime means the local package state changed and the cached
|
||||||
|
# pending-updates list is stale. Reported by Alberto (14/5): the
|
||||||
|
# dashboard tile kept showing pending updates ~hours after he ran
|
||||||
|
# `apt upgrade` manually. Cheap stat call; runs at most once per
|
||||||
|
# /api/health/full request.
|
||||||
|
history_mtime = None
|
||||||
|
try:
|
||||||
|
if os.path.exists(apt_history_path):
|
||||||
|
history_mtime = os.path.getmtime(apt_history_path)
|
||||||
|
except Exception:
|
||||||
|
history_mtime = None
|
||||||
|
|
||||||
|
if cache_key in self.last_check_times:
|
||||||
|
cache_ts = self.last_check_times[cache_key]
|
||||||
|
history_changed = (history_mtime is not None and history_mtime > cache_ts)
|
||||||
|
if not history_changed and current_time - cache_ts < 600:
|
||||||
|
return self.cached_results.get(cache_key)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
apt_history_path = '/var/log/apt/history.log'
|
|
||||||
last_update_days = None
|
last_update_days = None
|
||||||
sec_result = None
|
sec_result = None
|
||||||
age_result = None
|
age_result = None
|
||||||
|
|
||||||
if os.path.exists(apt_history_path):
|
if history_mtime is not None:
|
||||||
try:
|
try:
|
||||||
mtime = os.path.getmtime(apt_history_path)
|
days_since_update = (current_time - history_mtime) / 86400
|
||||||
days_since_update = (current_time - mtime) / 86400
|
|
||||||
last_update_days = int(days_since_update)
|
last_update_days = int(days_since_update)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
@@ -5775,12 +5790,24 @@ class HealthMonitor:
|
|||||||
'used_bytes': used,
|
'used_bytes': used,
|
||||||
}
|
}
|
||||||
error_key = f'pve_storage_full_{name}'
|
error_key = f'pve_storage_full_{name}'
|
||||||
|
# If the user already dismissed this exact error (within the
|
||||||
|
# suppression window), don't count it toward the category
|
||||||
|
# severity badge. Without this guard the storage section stayed
|
||||||
|
# WARNING/CRITICAL forever even after dismiss because the
|
||||||
|
# underlying % is unchanged — `record_error` correctly returned
|
||||||
|
# `skipped_acknowledged` to silence the notification side, but
|
||||||
|
# the dashboard counter ignored that signal and the user saw
|
||||||
|
# "Storage: 1 Warning" with no way to clear it. Reported on
|
||||||
|
# the community channel re: PBS-lleno (17-18/05).
|
||||||
|
is_dismissed = health_persistence.is_error_acknowledged(error_key)
|
||||||
if pct >= crit_pct:
|
if pct >= crit_pct:
|
||||||
entry['status'] = 'CRITICAL'
|
entry['status'] = 'CRITICAL' if not is_dismissed else 'INFO'
|
||||||
entry['error_key'] = error_key
|
entry['error_key'] = error_key
|
||||||
entry['dismissable'] = True
|
entry['dismissable'] = True
|
||||||
|
entry['dismissed'] = is_dismissed
|
||||||
checks[label] = entry
|
checks[label] = entry
|
||||||
critical_labels.append(label)
|
if not is_dismissed:
|
||||||
|
critical_labels.append(label)
|
||||||
emitted_keys.add(error_key)
|
emitted_keys.add(error_key)
|
||||||
health_persistence.record_error(
|
health_persistence.record_error(
|
||||||
error_key=error_key,
|
error_key=error_key,
|
||||||
@@ -5790,11 +5817,13 @@ class HealthMonitor:
|
|||||||
details=entry,
|
details=entry,
|
||||||
)
|
)
|
||||||
elif pct >= warn_pct:
|
elif pct >= warn_pct:
|
||||||
entry['status'] = 'WARNING'
|
entry['status'] = 'WARNING' if not is_dismissed else 'INFO'
|
||||||
entry['error_key'] = error_key
|
entry['error_key'] = error_key
|
||||||
entry['dismissable'] = True
|
entry['dismissable'] = True
|
||||||
|
entry['dismissed'] = is_dismissed
|
||||||
checks[label] = entry
|
checks[label] = entry
|
||||||
warning_labels.append(label)
|
if not is_dismissed:
|
||||||
|
warning_labels.append(label)
|
||||||
emitted_keys.add(error_key)
|
emitted_keys.add(error_key)
|
||||||
health_persistence.record_error(
|
health_persistence.record_error(
|
||||||
error_key=error_key,
|
error_key=error_key,
|
||||||
@@ -5877,12 +5906,18 @@ class HealthMonitor:
|
|||||||
'pool_name': name,
|
'pool_name': name,
|
||||||
}
|
}
|
||||||
error_key = f'zfs_pool_full_{name}'
|
error_key = f'zfs_pool_full_{name}'
|
||||||
|
# Same dismiss-respect as `_check_pve_storage_capacity`. A pool
|
||||||
|
# that the user dismissed keeps its underlying % but should no
|
||||||
|
# longer flip the category badge to WARNING/CRITICAL.
|
||||||
|
is_dismissed = health_persistence.is_error_acknowledged(error_key)
|
||||||
if pct >= crit_pct:
|
if pct >= crit_pct:
|
||||||
entry['status'] = 'CRITICAL'
|
entry['status'] = 'CRITICAL' if not is_dismissed else 'INFO'
|
||||||
entry['error_key'] = error_key
|
entry['error_key'] = error_key
|
||||||
entry['dismissable'] = True
|
entry['dismissable'] = True
|
||||||
|
entry['dismissed'] = is_dismissed
|
||||||
checks[name] = entry
|
checks[name] = entry
|
||||||
critical_labels.append(name)
|
if not is_dismissed:
|
||||||
|
critical_labels.append(name)
|
||||||
emitted_keys.add(error_key)
|
emitted_keys.add(error_key)
|
||||||
health_persistence.record_error(
|
health_persistence.record_error(
|
||||||
error_key=error_key,
|
error_key=error_key,
|
||||||
@@ -5892,11 +5927,13 @@ class HealthMonitor:
|
|||||||
details=entry,
|
details=entry,
|
||||||
)
|
)
|
||||||
elif pct >= warn_pct:
|
elif pct >= warn_pct:
|
||||||
entry['status'] = 'WARNING'
|
entry['status'] = 'WARNING' if not is_dismissed else 'INFO'
|
||||||
entry['error_key'] = error_key
|
entry['error_key'] = error_key
|
||||||
entry['dismissable'] = True
|
entry['dismissable'] = True
|
||||||
|
entry['dismissed'] = is_dismissed
|
||||||
checks[name] = entry
|
checks[name] = entry
|
||||||
warning_labels.append(name)
|
if not is_dismissed:
|
||||||
|
warning_labels.append(name)
|
||||||
emitted_keys.add(error_key)
|
emitted_keys.add(error_key)
|
||||||
health_persistence.record_error(
|
health_persistence.record_error(
|
||||||
error_key=error_key,
|
error_key=error_key,
|
||||||
|
|||||||
@@ -231,17 +231,134 @@ def _df_path(path: str) -> dict[str, Optional[int]]:
|
|||||||
return empty
|
return empty
|
||||||
|
|
||||||
|
|
||||||
|
_SIZE_UNIT_TO_BYTES = {
|
||||||
|
"": 1, "B": 1,
|
||||||
|
"K": 1024, "KB": 1024, "KIB": 1024,
|
||||||
|
"M": 1024 ** 2, "MB": 1024 ** 2, "MIB": 1024 ** 2,
|
||||||
|
"G": 1024 ** 3, "GB": 1024 ** 3, "GIB": 1024 ** 3,
|
||||||
|
"T": 1024 ** 4, "TB": 1024 ** 4, "TIB": 1024 ** 4,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_pve_size(value: str) -> Optional[int]:
|
||||||
|
"""Convert PVE-style sizes (``150G``, ``32M``, ``2T``) to bytes.
|
||||||
|
|
||||||
|
PVE stores volume sizes in lxc.conf as ``size=<num><unit>`` where
|
||||||
|
unit is a single letter from {K,M,G,T} (powers of 1024). Returns
|
||||||
|
None for empty/unparseable input — callers fall through to
|
||||||
|
pvesm-based totals.
|
||||||
|
"""
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
s = str(value).strip().upper()
|
||||||
|
if not s:
|
||||||
|
return None
|
||||||
|
m = re.match(r"^(\d+(?:\.\d+)?)\s*([KMGT]?I?B?)$", s)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
magnitude = float(m.group(1))
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
unit = m.group(2) or ""
|
||||||
|
multiplier = _SIZE_UNIT_TO_BYTES.get(unit)
|
||||||
|
if multiplier is None:
|
||||||
|
return None
|
||||||
|
return int(magnitude * multiplier)
|
||||||
|
|
||||||
|
|
||||||
|
def _df_via_host_pid(host_pid: str, ct_target: str) -> dict[str, Optional[int]]:
|
||||||
|
"""``df`` the CT-internal path via ``/proc/<pid>/root`` so we get
|
||||||
|
the filesystem as the container sees it, including ZFS dataset
|
||||||
|
quotas. Used for ``pve_volume`` mounts whose ``pvesm status``
|
||||||
|
numbers reflect the whole storage pool instead of the per-subvol
|
||||||
|
quota — without this the UI showed 851 GB total for a 150 GB ZFS
|
||||||
|
subvol because pvesm reports the rpool's free space.
|
||||||
|
"""
|
||||||
|
empty = {"total_bytes": None, "used_bytes": None, "available_bytes": None}
|
||||||
|
if not host_pid or not ct_target:
|
||||||
|
return empty
|
||||||
|
full = f"/proc/{host_pid}/root{ct_target}"
|
||||||
|
try:
|
||||||
|
proc = subprocess.run(
|
||||||
|
["df", "-B1", "--output=size,used,avail", full],
|
||||||
|
capture_output=True, text=True, timeout=_STAT_TIMEOUT,
|
||||||
|
)
|
||||||
|
if proc.returncode != 0:
|
||||||
|
return empty
|
||||||
|
lines = [ln for ln in proc.stdout.strip().splitlines() if ln.strip()]
|
||||||
|
if len(lines) < 2:
|
||||||
|
return empty
|
||||||
|
parts = lines[-1].split()
|
||||||
|
if len(parts) < 3:
|
||||||
|
return empty
|
||||||
|
return {
|
||||||
|
"total_bytes": int(parts[0]),
|
||||||
|
"used_bytes": int(parts[1]),
|
||||||
|
"available_bytes": int(parts[2]),
|
||||||
|
}
|
||||||
|
except (subprocess.TimeoutExpired, OSError, ValueError):
|
||||||
|
return empty
|
||||||
|
|
||||||
|
|
||||||
def _capacity_for(source: str, classification: dict[str, Any],
|
def _capacity_for(source: str, classification: dict[str, Any],
|
||||||
pve_storages: dict[str, dict[str, Any]]) -> dict[str, Optional[int]]:
|
pve_storages: dict[str, dict[str, Any]],
|
||||||
|
config_options: Optional[dict[str, Any]] = None,
|
||||||
|
host_pid: str = "",
|
||||||
|
target: str = "") -> dict[str, Optional[int]]:
|
||||||
"""Return total/used/available bytes for the *source* of a mount.
|
"""Return total/used/available bytes for the *source* of a mount.
|
||||||
|
|
||||||
``pve_volume`` and ``pve_storage_bind`` reuse the numbers from
|
``pve_volume`` quota handling (Sprint 14.x — Ignacio Seijo 10/05):
|
||||||
``pvesm status`` (already loaded once). ``host_bind`` falls back to
|
A ``mp6: local-zfs:subvol-310-disk-1,size=150G,...`` line carved
|
||||||
``df`` of the host path. None values mean the lookup didn't
|
out a 150 GB subvol from a 1 TB pool. The previous code read
|
||||||
succeed and the UI will render n/a.
|
``pvesm status local-zfs`` and reported 851 GB total / 19% used —
|
||||||
|
reflecting the whole pool, not the subvol. We now prefer, in
|
||||||
|
order:
|
||||||
|
1) ``df`` of ``/proc/<host_pid>/root/<target>`` when the CT is
|
||||||
|
up — gives the correct view-from-inside numbers including
|
||||||
|
the quota.
|
||||||
|
2) ``size=<N>`` from lxc.conf as the total; usage is unknown
|
||||||
|
when the CT isn't running, so the UI shows total only.
|
||||||
|
3) Fallback to ``pvesm status`` (pool numbers) when the entry
|
||||||
|
has no declared size — that's the legacy behaviour for
|
||||||
|
sizeless block volumes (lvm raw, rbd).
|
||||||
|
|
||||||
|
``pve_storage_bind`` mounts (NFS, CIFS at ``/mnt/pve/...``) keep
|
||||||
|
the pvesm-based numbers because the storage IS the source of truth
|
||||||
|
for those.
|
||||||
|
|
||||||
|
``host_bind`` falls back to ``df`` of the host path. None values
|
||||||
|
mean the lookup didn't succeed and the UI will render n/a.
|
||||||
"""
|
"""
|
||||||
ctype = classification.get("type")
|
ctype = classification.get("type")
|
||||||
if ctype in ("pve_volume", "pve_storage_bind"):
|
config_options = config_options or {}
|
||||||
|
declared_size_bytes = _parse_pve_size(config_options.get("size"))
|
||||||
|
|
||||||
|
if ctype == "pve_volume":
|
||||||
|
# 1) Live numbers from inside the CT (respects quota).
|
||||||
|
if host_pid and target:
|
||||||
|
live = _df_via_host_pid(host_pid, target)
|
||||||
|
if live.get("total_bytes") is not None:
|
||||||
|
return live
|
||||||
|
# 2) CT down (or df failed): expose declared quota as total.
|
||||||
|
if declared_size_bytes is not None:
|
||||||
|
return {
|
||||||
|
"total_bytes": declared_size_bytes,
|
||||||
|
"used_bytes": None,
|
||||||
|
"available_bytes": None,
|
||||||
|
}
|
||||||
|
# 3) No quota declared: legacy pool-level numbers.
|
||||||
|
sid = classification.get("origin_storage", "")
|
||||||
|
st = pve_storages.get(sid)
|
||||||
|
if not st:
|
||||||
|
return {"total_bytes": None, "used_bytes": None, "available_bytes": None}
|
||||||
|
return {
|
||||||
|
"total_bytes": st["total_kib"] * 1024 if st.get("total_kib") is not None else None,
|
||||||
|
"used_bytes": st["used_kib"] * 1024 if st.get("used_kib") is not None else None,
|
||||||
|
"available_bytes": st["avail_kib"] * 1024 if st.get("avail_kib") is not None else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
if ctype == "pve_storage_bind":
|
||||||
sid = classification.get("origin_storage", "")
|
sid = classification.get("origin_storage", "")
|
||||||
st = pve_storages.get(sid)
|
st = pve_storages.get(sid)
|
||||||
if not st:
|
if not st:
|
||||||
@@ -312,6 +429,45 @@ def _read_ct_proc_mounts(host_pid: str) -> list[dict[str, Any]]:
|
|||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _host_source_state(source: str) -> dict[str, Any]:
|
||||||
|
"""Inspect a host-side bind source to detect 'zombie' binds.
|
||||||
|
|
||||||
|
Reported by Ignacio Seijo (11/05): when the host unmounted
|
||||||
|
``/mnt/nas1_con_backup`` the CT kept reporting it as ``mounted``
|
||||||
|
because the bind into the CT's mount namespace was still live —
|
||||||
|
the kernel doesn't propagate the host-side umount to the child
|
||||||
|
namespace. The CT's view becomes a frozen snapshot of whatever
|
||||||
|
was under the path at bind time (usually an empty dir).
|
||||||
|
|
||||||
|
Returns ``{exists, is_mountpoint, error}``. ``exists=False`` means
|
||||||
|
the source path is gone entirely (e.g. a USB drive that was
|
||||||
|
physically removed). ``is_mountpoint=False`` while ``exists=True``
|
||||||
|
is the zombie-bind case the UI flags.
|
||||||
|
|
||||||
|
Only meaningful for absolute host paths. Storage-id sources
|
||||||
|
(``local-zfs:subvol-...``) return ``{None, None, None}`` since
|
||||||
|
there is no host path to inspect.
|
||||||
|
"""
|
||||||
|
empty = {"exists": None, "is_mountpoint": None, "error": None}
|
||||||
|
if not source or not source.startswith("/"):
|
||||||
|
return empty
|
||||||
|
try:
|
||||||
|
st_exists = os.path.exists(source)
|
||||||
|
except OSError as e:
|
||||||
|
return {"exists": None, "is_mountpoint": None, "error": str(e)}
|
||||||
|
if not st_exists:
|
||||||
|
return {"exists": False, "is_mountpoint": False, "error": "path missing"}
|
||||||
|
try:
|
||||||
|
proc = subprocess.run(
|
||||||
|
["mountpoint", "-q", source],
|
||||||
|
capture_output=True, text=True, timeout=_STAT_TIMEOUT,
|
||||||
|
)
|
||||||
|
is_mp = (proc.returncode == 0)
|
||||||
|
return {"exists": True, "is_mountpoint": is_mp, "error": None}
|
||||||
|
except (subprocess.TimeoutExpired, OSError) as e:
|
||||||
|
return {"exists": True, "is_mountpoint": None, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
def _stat_via_host(host_pid: str, ct_target: str,
|
def _stat_via_host(host_pid: str, ct_target: str,
|
||||||
timeout: int = _STAT_TIMEOUT) -> dict[str, Any]:
|
timeout: int = _STAT_TIMEOUT) -> dict[str, Any]:
|
||||||
"""Stat the container-internal target through /proc/<pid>/root —
|
"""Stat the container-internal target through /proc/<pid>/root —
|
||||||
@@ -366,11 +522,37 @@ def get_lxc_mount_points(vmid: str) -> dict[str, Any]:
|
|||||||
out: list[dict[str, Any]] = []
|
out: list[dict[str, Any]] = []
|
||||||
matched_targets: set[str] = set()
|
matched_targets: set[str] = set()
|
||||||
|
|
||||||
for entry in config_entries:
|
# Pre-compute per-entry subprocess work in parallel so a CT with
|
||||||
|
# many mountpoints doesn't pay N×(_STAT_TIMEOUT + _STAT_TIMEOUT)
|
||||||
|
# serialised cost. The previous serial path tripped Caddy's 3s
|
||||||
|
# reverse-proxy timeout (Ignacio Seijo 11/05: "/api/lxc/210/
|
||||||
|
# mount-points → 502 (3.00s)") on hosts with 5+ binds. ThreadPool
|
||||||
|
# is the right primitive — these are all I/O-bound `df`/`stat`
|
||||||
|
# calls hitting independent paths.
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
|
def _gather_one(entry):
|
||||||
|
src = entry.get("source", "")
|
||||||
|
tgt = entry.get("target", "")
|
||||||
|
classification = _classify(src, pve_storages)
|
||||||
|
capacity = _capacity_for(
|
||||||
|
src, classification, pve_storages,
|
||||||
|
config_options=entry.get("config_options", {}),
|
||||||
|
host_pid=host_pid if running else "",
|
||||||
|
target=tgt,
|
||||||
|
)
|
||||||
|
host_src = _host_source_state(src)
|
||||||
|
live_target = bool(running and tgt and tgt in rt_by_target)
|
||||||
|
health = _stat_via_host(host_pid, tgt) if live_target else None
|
||||||
|
return entry, classification, capacity, host_src, live_target, health
|
||||||
|
|
||||||
|
max_workers = max(2, min(8, len(config_entries) or 1))
|
||||||
|
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
||||||
|
gathered = list(pool.map(_gather_one, config_entries))
|
||||||
|
|
||||||
|
for entry, cls, cap, host_src, live_target, health in gathered:
|
||||||
source = entry.get("source", "")
|
source = entry.get("source", "")
|
||||||
target = entry.get("target", "")
|
target = entry.get("target", "")
|
||||||
cls = _classify(source, pve_storages)
|
|
||||||
cap = _capacity_for(source, cls, pve_storages)
|
|
||||||
|
|
||||||
item: dict[str, Any] = {
|
item: dict[str, Any] = {
|
||||||
"mp_index": entry.get("mp_index", ""),
|
"mp_index": entry.get("mp_index", ""),
|
||||||
@@ -382,13 +564,14 @@ def get_lxc_mount_points(vmid: str) -> dict[str, Any]:
|
|||||||
"origin_label": cls.get("origin_label", source),
|
"origin_label": cls.get("origin_label", source),
|
||||||
"config_options": entry.get("config_options", {}),
|
"config_options": entry.get("config_options", {}),
|
||||||
"config_flags": entry.get("config_flags", []),
|
"config_flags": entry.get("config_flags", []),
|
||||||
|
"host_source_exists": host_src["exists"],
|
||||||
|
"host_source_is_mountpoint": host_src["is_mountpoint"],
|
||||||
**cap,
|
**cap,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Runtime enrichment when CT is up.
|
# Runtime enrichment when CT is up.
|
||||||
if running and target and target in rt_by_target:
|
if live_target:
|
||||||
rt = rt_by_target[target]
|
rt = rt_by_target[target]
|
||||||
health = _stat_via_host(host_pid, target)
|
|
||||||
item.update({
|
item.update({
|
||||||
"runtime_mounted": True,
|
"runtime_mounted": True,
|
||||||
"runtime_source": rt["rt_source"],
|
"runtime_source": rt["rt_source"],
|
||||||
@@ -416,34 +599,42 @@ def get_lxc_mount_points(vmid: str) -> dict[str, Any]:
|
|||||||
# original Sprint 13.24 issue revolves around catching them.
|
# original Sprint 13.24 issue revolves around catching them.
|
||||||
ad_hoc: list[dict[str, Any]] = []
|
ad_hoc: list[dict[str, Any]] = []
|
||||||
if running:
|
if running:
|
||||||
for rt in rt_mounts:
|
ad_hoc_candidates = [
|
||||||
target = rt["rt_target"]
|
rt for rt in rt_mounts
|
||||||
if target in matched_targets:
|
if rt["rt_target"] not in matched_targets
|
||||||
continue
|
and _REMOTE_FS_RE.match(rt["rt_fstype"])
|
||||||
if not _REMOTE_FS_RE.match(rt["rt_fstype"]):
|
]
|
||||||
continue
|
# Same parallelisation as the configured-mp loop: stat'ing
|
||||||
health = _stat_via_host(host_pid, target)
|
# stale NFS exports serially can dominate the request and
|
||||||
ad_hoc.append({
|
# push it past the proxy timeout.
|
||||||
"mp_index": "",
|
if ad_hoc_candidates:
|
||||||
"source": rt["rt_source"],
|
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
||||||
"target": target,
|
healths = list(pool.map(
|
||||||
"type": "ad_hoc",
|
lambda rt: _stat_via_host(host_pid, rt["rt_target"]),
|
||||||
"origin_storage": "",
|
ad_hoc_candidates,
|
||||||
"origin_storage_type": "",
|
))
|
||||||
"origin_label": rt["rt_source"],
|
for rt, health in zip(ad_hoc_candidates, healths):
|
||||||
"config_options": {},
|
ad_hoc.append({
|
||||||
"config_flags": [],
|
"mp_index": "",
|
||||||
"total_bytes": None,
|
"source": rt["rt_source"],
|
||||||
"used_bytes": None,
|
"target": rt["rt_target"],
|
||||||
"available_bytes": None,
|
"type": "ad_hoc",
|
||||||
"runtime_mounted": True,
|
"origin_storage": "",
|
||||||
"runtime_source": rt["rt_source"],
|
"origin_storage_type": "",
|
||||||
"runtime_fstype": rt["rt_fstype"],
|
"origin_label": rt["rt_source"],
|
||||||
"runtime_options": rt["rt_options"],
|
"config_options": {},
|
||||||
"runtime_readonly": rt["rt_readonly"],
|
"config_flags": [],
|
||||||
"runtime_reachable": health["reachable"],
|
"total_bytes": None,
|
||||||
"runtime_error": health["error"],
|
"used_bytes": None,
|
||||||
})
|
"available_bytes": None,
|
||||||
|
"runtime_mounted": True,
|
||||||
|
"runtime_source": rt["rt_source"],
|
||||||
|
"runtime_fstype": rt["rt_fstype"],
|
||||||
|
"runtime_options": rt["rt_options"],
|
||||||
|
"runtime_readonly": rt["rt_readonly"],
|
||||||
|
"runtime_reachable": health["reachable"],
|
||||||
|
"runtime_error": health["error"],
|
||||||
|
})
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"ok": True,
|
"ok": True,
|
||||||
|
|||||||
@@ -508,14 +508,22 @@ class EmailChannel(NotificationChannel):
|
|||||||
|
|
||||||
def __init__(self, config: Dict[str, str]):
|
def __init__(self, config: Dict[str, str]):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.host = config.get('host', '')
|
self.host = (config.get('host', '') or '').strip()
|
||||||
self.port = int(config.get('port', 587) or 587)
|
self.port = int(config.get('port', 587) or 587)
|
||||||
self.username = config.get('username', '')
|
self.username = config.get('username', '') or ''
|
||||||
self.password = config.get('password', '')
|
self.password = config.get('password', '') or ''
|
||||||
self.tls_mode = config.get('tls_mode', 'starttls') # none | starttls | ssl
|
# `dict.get(k, default)` only returns default when the key is MISSING;
|
||||||
self.from_address = config.get('from_address', '')
|
# if the user previously saved an empty string or null, we'd end up
|
||||||
|
# with `tls_mode=''` and silently skip STARTTLS — which causes
|
||||||
|
# `SMTPNotSupportedError: SMTP AUTH extension not supported by server`
|
||||||
|
# on Gmail/Outlook because they only advertise AUTH post-STARTTLS.
|
||||||
|
tls_raw = (config.get('tls_mode') or 'starttls').strip().lower()
|
||||||
|
if tls_raw not in ('none', 'starttls', 'ssl'):
|
||||||
|
tls_raw = 'starttls'
|
||||||
|
self.tls_mode = tls_raw
|
||||||
|
self.from_address = config.get('from_address', '') or ''
|
||||||
self.to_addresses = self._parse_recipients(config.get('to_addresses', ''))
|
self.to_addresses = self._parse_recipients(config.get('to_addresses', ''))
|
||||||
self.subject_prefix = config.get('subject_prefix', '[ProxMenux]')
|
self.subject_prefix = config.get('subject_prefix', '[ProxMenux]') or '[ProxMenux]'
|
||||||
self.timeout = int(config.get('timeout', 10) or 10)
|
self.timeout = int(config.get('timeout', 10) or 10)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -529,6 +537,17 @@ class EmailChannel(NotificationChannel):
|
|||||||
return False, 'No recipients configured'
|
return False, 'No recipients configured'
|
||||||
if not self.from_address:
|
if not self.from_address:
|
||||||
return False, 'No from address configured'
|
return False, 'No from address configured'
|
||||||
|
# Credentials without an explicit SMTP host would silently fall back to
|
||||||
|
# `/usr/sbin/sendmail`, which ignores username/password entirely — the
|
||||||
|
# test returns OK because Postfix queued the message, but the relay is
|
||||||
|
# never authenticated and the mail rots in the local mailq. Reported by
|
||||||
|
# Ignacio Seijo: "dejando host/puerto en blanco el test pasa pero el
|
||||||
|
# correo nunca llega".
|
||||||
|
if (self.username or self.password) and not self.host:
|
||||||
|
return False, ('SMTP credentials provided but no host configured. '
|
||||||
|
'Set host (e.g. smtp.gmail.com) and port (587) — '
|
||||||
|
'without a host the message goes to the local MTA '
|
||||||
|
'and your username/password are ignored.')
|
||||||
# Must have SMTP host OR local sendmail available
|
# Must have SMTP host OR local sendmail available
|
||||||
if not self.host:
|
if not self.host:
|
||||||
import os
|
import os
|
||||||
@@ -591,8 +610,33 @@ class EmailChannel(NotificationChannel):
|
|||||||
server.ehlo() # Re-identify after TLS -- server re-announces AUTH
|
server.ehlo() # Re-identify after TLS -- server re-announces AUTH
|
||||||
|
|
||||||
if self.username and self.password:
|
if self.username and self.password:
|
||||||
|
# If the server doesn't advertise AUTH after our EHLO sequence,
|
||||||
|
# smtplib's `login()` raises `SMTPNotSupportedError` with the
|
||||||
|
# opaque message "SMTP AUTH extension not supported by server".
|
||||||
|
# That fired for users who left tls_mode blank or pointed at
|
||||||
|
# port 587 without STARTTLS — Gmail only advertises AUTH after
|
||||||
|
# the TLS handshake. Surface the real reason here.
|
||||||
|
if not server.has_extn('auth'):
|
||||||
|
hint = (
|
||||||
|
f"server={self.host}:{self.port} tls_mode={self.tls_mode}"
|
||||||
|
)
|
||||||
|
if self.tls_mode == 'none':
|
||||||
|
return 0, (
|
||||||
|
'SMTP server did not advertise AUTH after EHLO. '
|
||||||
|
'TLS is disabled — most providers (Gmail, Outlook, '
|
||||||
|
'Office365) only allow login after STARTTLS or SSL. '
|
||||||
|
f'Switch TLS Mode to STARTTLS (port 587) or SSL/TLS '
|
||||||
|
f'(port 465). [{hint}]'
|
||||||
|
)
|
||||||
|
return 0, (
|
||||||
|
'SMTP server did not advertise AUTH after EHLO. '
|
||||||
|
'Verify the host/port/TLS combination. For Gmail use '
|
||||||
|
'smtp.gmail.com:587 with STARTTLS and an App Password '
|
||||||
|
'(https://myaccount.google.com/apppasswords); for '
|
||||||
|
f'Outlook use smtp.office365.com:587 with STARTTLS. [{hint}]'
|
||||||
|
)
|
||||||
server.login(self.username, self.password)
|
server.login(self.username, self.password)
|
||||||
|
|
||||||
server.send_message(msg)
|
server.send_message(msg)
|
||||||
server.quit()
|
server.quit()
|
||||||
server = None
|
server = None
|
||||||
@@ -601,8 +645,10 @@ class EmailChannel(NotificationChannel):
|
|||||||
return 0, f'SMTP authentication failed (check username/password or app-specific password): {e}'
|
return 0, f'SMTP authentication failed (check username/password or app-specific password): {e}'
|
||||||
except smtplib.SMTPNotSupportedError as e:
|
except smtplib.SMTPNotSupportedError as e:
|
||||||
return 0, (f'SMTP AUTH not supported by server. '
|
return 0, (f'SMTP AUTH not supported by server. '
|
||||||
f'This may mean the server requires OAuth2 or an App Password '
|
f'TLS mode: {self.tls_mode}, port: {self.port}. '
|
||||||
f'instead of regular credentials: {e}')
|
f'Gmail/Outlook require STARTTLS on 587 or SSL/TLS on 465. '
|
||||||
|
f'For Gmail, generate an App Password at '
|
||||||
|
f'https://myaccount.google.com/apppasswords. Detail: {e}')
|
||||||
except smtplib.SMTPConnectError as e:
|
except smtplib.SMTPConnectError as e:
|
||||||
return 0, f'SMTP connection failed: {e}'
|
return 0, f'SMTP connection failed: {e}'
|
||||||
except smtplib.SMTPException as e:
|
except smtplib.SMTPException as e:
|
||||||
|
|||||||
@@ -292,6 +292,61 @@ def _record_smartd_observation_impl(title: str, message: str):
|
|||||||
print(f"[smartd_observation] Error recording smartd observation: {e}")
|
print(f"[smartd_observation] Error recording smartd observation: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Vzdump activity detector (shared, restart-tolerant) ─────────
|
||||||
|
#
|
||||||
|
# A single source of truth for "is a vzdump backup job running on this
|
||||||
|
# host RIGHT NOW", consultable from any watcher and surviving Monitor
|
||||||
|
# restarts. Reads `/var/log/pve/tasks/active` directly — PVE writes the
|
||||||
|
# active UPID there at backup start and removes it on completion, so
|
||||||
|
# it persists across our process restarts.
|
||||||
|
#
|
||||||
|
# Without this, JournalWatcher's in-memory `_last_backup_job_ts` got
|
||||||
|
# reset by every Monitor restart, and any `Starting Backup of VM X`
|
||||||
|
# log lines arriving after that point were treated as standalone
|
||||||
|
# backups — emitting one `backup_start` per guest with `storage=local`
|
||||||
|
# (the fallback path that doesn't see the parent job's --storage flag).
|
||||||
|
# Reported by JC Miñarro 18/05 after a Monitor redeploy mid-job.
|
||||||
|
_VZDUMP_ACTIVE_FILE = '/var/log/pve/tasks/active'
|
||||||
|
_vzdump_active_cache_ts: float = 0
|
||||||
|
_vzdump_active_cache_value: bool = False
|
||||||
|
_VZDUMP_ACTIVE_CACHE_TTL = 5 # seconds
|
||||||
|
|
||||||
|
|
||||||
|
def is_vzdump_active_on_host() -> bool:
|
||||||
|
"""Return True if `/var/log/pve/tasks/active` contains an active
|
||||||
|
vzdump UPID (i.e. backup currently running). Cached 5s to avoid
|
||||||
|
hammering the file on every notification.
|
||||||
|
|
||||||
|
Caller-safe: returns False on any I/O / parse error.
|
||||||
|
"""
|
||||||
|
global _vzdump_active_cache_ts, _vzdump_active_cache_value
|
||||||
|
now = time.time()
|
||||||
|
if now - _vzdump_active_cache_ts < _VZDUMP_ACTIVE_CACHE_TTL:
|
||||||
|
return _vzdump_active_cache_value
|
||||||
|
found = False
|
||||||
|
try:
|
||||||
|
with open(_VZDUMP_ACTIVE_FILE, 'r') as f:
|
||||||
|
for line in f:
|
||||||
|
# UPID format: UPID:node:pid:pstart:starttime:type:id:user:
|
||||||
|
if ':vzdump:' not in line:
|
||||||
|
continue
|
||||||
|
parts = line.strip().split(':')
|
||||||
|
if len(parts) < 3:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
pid = int(parts[2], 16) # PID in UPID is hex
|
||||||
|
os.kill(pid, 0)
|
||||||
|
found = True
|
||||||
|
break
|
||||||
|
except (ValueError, ProcessLookupError, PermissionError):
|
||||||
|
continue
|
||||||
|
except (OSError, IOError):
|
||||||
|
pass
|
||||||
|
_vzdump_active_cache_ts = now
|
||||||
|
_vzdump_active_cache_value = found
|
||||||
|
return found
|
||||||
|
|
||||||
|
|
||||||
# ─── Journal Watcher (Real-time) ─────────────────────────────────
|
# ─── Journal Watcher (Real-time) ─────────────────────────────────
|
||||||
|
|
||||||
class JournalWatcher:
|
class JournalWatcher:
|
||||||
@@ -1238,6 +1293,14 @@ class JournalWatcher:
|
|||||||
now = time.time()
|
now = time.time()
|
||||||
if now - self._last_backup_job_ts < self._BACKUP_JOB_SUPPRESS_WINDOW:
|
if now - self._last_backup_job_ts < self._BACKUP_JOB_SUPPRESS_WINDOW:
|
||||||
return # Part of an active job -- already notified
|
return # Part of an active job -- already notified
|
||||||
|
# Restart-tolerant fallback: if the in-memory timestamp was
|
||||||
|
# cleared (Monitor restarted mid-job) but PVE still has an
|
||||||
|
# active vzdump UPID, this per-guest line is part of that
|
||||||
|
# job — drop it instead of emitting a wrong "Backup started
|
||||||
|
# on local" with storage default. Reported by JC Miñarro 18/05
|
||||||
|
# after a Monitor redeploy during an active PBS backup.
|
||||||
|
if is_vzdump_active_on_host():
|
||||||
|
return
|
||||||
fallback_guest = fb.group(1)
|
fallback_guest = fb.group(1)
|
||||||
else:
|
else:
|
||||||
return
|
return
|
||||||
@@ -1893,10 +1956,15 @@ class TaskWatcher:
|
|||||||
# Suppress VM/CT start/stop/shutdown while a vzdump is active.
|
# Suppress VM/CT start/stop/shutdown while a vzdump is active.
|
||||||
# These are backup-induced operations (mode=stop), not user actions.
|
# These are backup-induced operations (mode=stop), not user actions.
|
||||||
# Exception: if a VM/CT FAILS or has WARNINGS, that IS important.
|
# Exception: if a VM/CT FAILS or has WARNINGS, that IS important.
|
||||||
|
# We check BOTH our in-memory tracking (`_is_vzdump_active`) AND
|
||||||
|
# `tasks/active` on disk (`is_vzdump_active_on_host`). The disk
|
||||||
|
# check survives Monitor restarts mid-backup, which otherwise
|
||||||
|
# cleared `_vzdump_running_since` and exposed the post-restart
|
||||||
|
# shutdown notifications to the user (JC Miñarro 18/05).
|
||||||
_BACKUP_NOISE = {'vm_start', 'vm_stop', 'vm_shutdown', 'vm_restart',
|
_BACKUP_NOISE = {'vm_start', 'vm_stop', 'vm_shutdown', 'vm_restart',
|
||||||
'ct_start', 'ct_stop', 'ct_shutdown', 'ct_restart'}
|
'ct_start', 'ct_stop', 'ct_shutdown', 'ct_restart'}
|
||||||
if event_type in _BACKUP_NOISE and not is_error and not is_warning:
|
if event_type in _BACKUP_NOISE and not is_error and not is_warning:
|
||||||
if self._is_vzdump_active():
|
if self._is_vzdump_active() or is_vzdump_active_on_host():
|
||||||
return
|
return
|
||||||
|
|
||||||
# Suppress VM/CT stop/shutdown during host shutdown/reboot.
|
# Suppress VM/CT stop/shutdown during host shutdown/reboot.
|
||||||
|
|||||||
@@ -223,14 +223,28 @@ def _parse_vzdump_message(message: str) -> Optional[Dict[str, Any]]:
|
|||||||
else:
|
else:
|
||||||
total_time = f"{secs}s"
|
total_time = f"{secs}s"
|
||||||
|
|
||||||
|
# ── Extract the storage target name (PBS, PBS-Cloud, local, …) ──
|
||||||
|
# PVE logs the full command on the first line:
|
||||||
|
# "INFO: starting new backup job: vzdump 104 105 --storage PBS-Cloud --mode stop"
|
||||||
|
# We surface it so the notification body can say "PBS-Cloud: vm/104/…"
|
||||||
|
# instead of the generic "PBS:" prefix when multiple PBS endpoints
|
||||||
|
# are configured. Reported by JC Miñarro 18/05.
|
||||||
|
storage_name = ''
|
||||||
|
for line in lines:
|
||||||
|
m_storage = re.search(r'--storage\s+(\S+)', line)
|
||||||
|
if m_storage:
|
||||||
|
storage_name = m_storage.group(1).strip()
|
||||||
|
break
|
||||||
|
|
||||||
if not vms and not total_size:
|
if not vms and not total_size:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'vms': vms,
|
'vms': vms,
|
||||||
'total_time': total_time,
|
'total_time': total_time,
|
||||||
'total_size': total_size,
|
'total_size': total_size,
|
||||||
'vm_count': len(vms),
|
'vm_count': len(vms),
|
||||||
|
'storage_name': storage_name,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -277,13 +291,19 @@ def _format_vzdump_body(parsed: Dict[str, Any], is_success: bool) -> str:
|
|||||||
if detail_line:
|
if detail_line:
|
||||||
parts.append(' | '.join(detail_line))
|
parts.append(' | '.join(detail_line))
|
||||||
|
|
||||||
# PBS/File on separate line with icon
|
# PBS/File on separate line with icon. When we know the
|
||||||
|
# storage name (e.g. "PBS-Cloud", "PBS-Office") prefix it so
|
||||||
|
# the user can tell which destination this archive lives in \u2014
|
||||||
|
# critical when there are multiple PBS endpoints configured.
|
||||||
if vm.get('filename'):
|
if vm.get('filename'):
|
||||||
fname = vm['filename']
|
fname = vm['filename']
|
||||||
|
storage_name = parsed.get('storage_name', '') or ''
|
||||||
if re.match(r'^(?:ct|vm)/\d+/', fname):
|
if re.match(r'^(?:ct|vm)/\d+/', fname):
|
||||||
parts.append(f"\U0001F5C4\uFE0F PBS: {fname}")
|
label = storage_name if storage_name else 'PBS'
|
||||||
|
parts.append(f"\U0001F5C4\uFE0F {label}: {fname}")
|
||||||
else:
|
else:
|
||||||
parts.append(f"\U0001F4C1 File: {fname}")
|
label = storage_name if storage_name else 'File'
|
||||||
|
parts.append(f"\U0001F4C1 {label}: {fname}")
|
||||||
|
|
||||||
# Error reason if failed
|
# Error reason if failed
|
||||||
if status != 'ok' and vm.get('error'):
|
if status != 'ok' and vm.get('error'):
|
||||||
|
|||||||
@@ -601,7 +601,7 @@ EOF
|
|||||||
|
|
||||||
|
|
||||||
install_log2ram_auto() {
|
install_log2ram_auto() {
|
||||||
local FUNC_VERSION="1.1"
|
local FUNC_VERSION="1.2"
|
||||||
# description: Install Log2RAM with size auto-tuned to host RAM (128M/256M/512M); SSD/M.2 detection skips on rotational disks.
|
# description: Install Log2RAM with size auto-tuned to host RAM (128M/256M/512M); SSD/M.2 detection skips on rotational disks.
|
||||||
# ── Reinstall detection ─────────────────────────────────────────────────
|
# ── Reinstall detection ─────────────────────────────────────────────────
|
||||||
# If log2ram was previously installed by ProxMenux, skip hardware detection
|
# If log2ram was previously installed by ProxMenux, skip hardware detection
|
||||||
@@ -732,6 +732,13 @@ EOF
|
|||||||
|
|
||||||
cat > /usr/local/bin/log2ram-check.sh <<'EOF'
|
cat > /usr/local/bin/log2ram-check.sh <<'EOF'
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
# v1.2 — `log2ram write` only copies tmpfs→disk; it does NOT shrink
|
||||||
|
# the tmpfs. When journald or pveproxy/access.log grow past their
|
||||||
|
# limits the tmpfs hit 100% and PVE crashed with "No space left on
|
||||||
|
# device" on Shell open (community-reported: JC Miñarro, Nicolás P.
|
||||||
|
# de A., 17-18/05). We now vacuum the journal and truncate the
|
||||||
|
# non-rotating logs that actually consume the tmpfs before calling
|
||||||
|
# `log2ram write`.
|
||||||
PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
||||||
|
|
||||||
CONF_FILE="/etc/log2ram.conf"
|
CONF_FILE="/etc/log2ram.conf"
|
||||||
@@ -742,7 +749,8 @@ L2R_BIN="$(command -v log2ram || true)"
|
|||||||
SIZE_MiB="$(grep -E '^SIZE=' "$CONF_FILE" 2>/dev/null | cut -d'=' -f2 | tr -dc '0-9')"
|
SIZE_MiB="$(grep -E '^SIZE=' "$CONF_FILE" 2>/dev/null | cut -d'=' -f2 | tr -dc '0-9')"
|
||||||
[[ -z "$SIZE_MiB" ]] && SIZE_MiB=128
|
[[ -z "$SIZE_MiB" ]] && SIZE_MiB=128
|
||||||
LIMIT_BYTES=$(( SIZE_MiB * 1024 * 1024 ))
|
LIMIT_BYTES=$(( SIZE_MiB * 1024 * 1024 ))
|
||||||
THRESHOLD_BYTES=$(( LIMIT_BYTES * 95 / 100 ))
|
WARN_BYTES=$(( LIMIT_BYTES * 80 / 100 ))
|
||||||
|
EMERGENCY_BYTES=$(( LIMIT_BYTES * 92 / 100 ))
|
||||||
|
|
||||||
USED_BYTES="$(df -B1 --output=used /var/log 2>/dev/null | tail -1 | tr -dc '0-9')"
|
USED_BYTES="$(df -B1 --output=used /var/log 2>/dev/null | tail -1 | tr -dc '0-9')"
|
||||||
[[ -z "$USED_BYTES" ]] && exit 0
|
[[ -z "$USED_BYTES" ]] && exit 0
|
||||||
@@ -751,8 +759,24 @@ LOCK="/run/log2ram-check.lock"
|
|||||||
exec 9>"$LOCK" 2>/dev/null || exit 0
|
exec 9>"$LOCK" 2>/dev/null || exit 0
|
||||||
flock -n 9 || exit 0
|
flock -n 9 || exit 0
|
||||||
|
|
||||||
if (( USED_BYTES > THRESHOLD_BYTES )); then
|
# `log2ram write` alone leaves the tmpfs full. Real recovery requires:
|
||||||
"$L2R_BIN" write 2>/dev/null || true
|
# (a) journal vacuum — journald respects --vacuum-size unconditionally,
|
||||||
|
# unlike SystemMaxUse which only enforces on rotation boundaries;
|
||||||
|
# (b) truncating logs that aren't rotated by logrotate (pveproxy, pveam);
|
||||||
|
# (c) THEN syncing to disk so the persistent copy reflects reality.
|
||||||
|
if (( USED_BYTES > EMERGENCY_BYTES )); then
|
||||||
|
SAFE_JOURNAL_MB=$(( SIZE_MiB * 5 / 100 ))
|
||||||
|
[[ "$SAFE_JOURNAL_MB" -lt 16 ]] && SAFE_JOURNAL_MB=16
|
||||||
|
journalctl --vacuum-size="${SAFE_JOURNAL_MB}M" >/dev/null 2>&1 || true
|
||||||
|
: > /var/log/pveproxy/access.log 2>/dev/null || true
|
||||||
|
: > /var/log/pveproxy/error.log 2>/dev/null || true
|
||||||
|
: > /var/log/pveam.log 2>/dev/null || true
|
||||||
|
"$L2R_BIN" write 2>/dev/null || true
|
||||||
|
elif (( USED_BYTES > WARN_BYTES )); then
|
||||||
|
SOFT_JOURNAL_MB=$(( SIZE_MiB * 30 / 100 ))
|
||||||
|
[[ "$SOFT_JOURNAL_MB" -lt 32 ]] && SOFT_JOURNAL_MB=32
|
||||||
|
journalctl --vacuum-size="${SOFT_JOURNAL_MB}M" >/dev/null 2>&1 || true
|
||||||
|
"$L2R_BIN" write 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
EOF
|
EOF
|
||||||
chmod +x /usr/local/bin/log2ram-check.sh
|
chmod +x /usr/local/bin/log2ram-check.sh
|
||||||
@@ -770,7 +794,7 @@ EOF
|
|||||||
chown root:root /etc/cron.d/log2ram-auto-sync
|
chown root:root /etc/cron.d/log2ram-auto-sync
|
||||||
|
|
||||||
systemctl restart cron >/dev/null 2>&1 || true
|
systemctl restart cron >/dev/null 2>&1 || true
|
||||||
msg_ok "$(translate "Auto-sync enabled when /var/log exceeds 95% of") $LOG2RAM_SIZE"
|
msg_ok "$(translate "Auto-sync enabled when /var/log exceeds 80% of") $LOG2RAM_SIZE"
|
||||||
|
|
||||||
|
|
||||||
msg_info "$(translate "Adjusting systemd-journald limits to match Log2RAM size...")"
|
msg_info "$(translate "Adjusting systemd-journald limits to match Log2RAM size...")"
|
||||||
@@ -801,6 +825,11 @@ Storage=persistent
|
|||||||
SplitMode=none
|
SplitMode=none
|
||||||
RateLimitIntervalSec=30s
|
RateLimitIntervalSec=30s
|
||||||
RateLimitBurst=1000
|
RateLimitBurst=1000
|
||||||
|
ForwardToSyslog=no
|
||||||
|
ForwardToWall=no
|
||||||
|
Seal=no
|
||||||
|
Compress=yes
|
||||||
|
SystemMaxUse=${USE_MB}M
|
||||||
SystemKeepFree=${KEEP_MB}M
|
SystemKeepFree=${KEEP_MB}M
|
||||||
RuntimeMaxUse=${RUNTIME_MB}M
|
RuntimeMaxUse=${RUNTIME_MB}M
|
||||||
# MaxLevelStore=info: required for ProxMenux Monitor log display and Fail2Ban detection.
|
# MaxLevelStore=info: required for ProxMenux Monitor log display and Fail2Ban detection.
|
||||||
|
|||||||
@@ -2350,7 +2350,7 @@ update_pve_appliance_manager() {
|
|||||||
|
|
||||||
|
|
||||||
configure_log2ram() {
|
configure_log2ram() {
|
||||||
local FUNC_VERSION="1.1"
|
local FUNC_VERSION="1.2"
|
||||||
# description: Install Log2RAM with user-chosen RAM size; prompts for size and SSD/M.2 awareness before applying.
|
# description: Install Log2RAM with user-chosen RAM size; prompts for size and SSD/M.2 awareness before applying.
|
||||||
msg_info2 "$(translate "Preparing Log2RAM configuration")"
|
msg_info2 "$(translate "Preparing Log2RAM configuration")"
|
||||||
sleep 1
|
sleep 1
|
||||||
@@ -2477,6 +2477,13 @@ EOF
|
|||||||
if [[ "$ENABLE_AUTOSYNC" == true ]]; then
|
if [[ "$ENABLE_AUTOSYNC" == true ]]; then
|
||||||
cat > /usr/local/bin/log2ram-check.sh <<'EOF'
|
cat > /usr/local/bin/log2ram-check.sh <<'EOF'
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
# v1.2 — `log2ram write` only copies tmpfs→disk; it does NOT shrink
|
||||||
|
# the tmpfs. When journald or pveproxy/access.log grow past their
|
||||||
|
# limits the tmpfs hit 100% and PVE crashed with "No space left on
|
||||||
|
# device" on Shell open (community-reported: JC Miñarro, Nicolás P.
|
||||||
|
# de A., 17-18/05). We now vacuum the journal and truncate the
|
||||||
|
# non-rotating logs that actually consume the tmpfs before calling
|
||||||
|
# `log2ram write`.
|
||||||
PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
||||||
CONF_FILE="/etc/log2ram.conf"
|
CONF_FILE="/etc/log2ram.conf"
|
||||||
L2R_BIN="$(command -v log2ram || true)"
|
L2R_BIN="$(command -v log2ram || true)"
|
||||||
@@ -2486,7 +2493,8 @@ L2R_BIN="$(command -v log2ram || true)"
|
|||||||
SIZE_MiB="$(grep -E '^SIZE=' "$CONF_FILE" 2>/dev/null | cut -d'=' -f2 | tr -dc '0-9')"
|
SIZE_MiB="$(grep -E '^SIZE=' "$CONF_FILE" 2>/dev/null | cut -d'=' -f2 | tr -dc '0-9')"
|
||||||
[[ -z "$SIZE_MiB" ]] && SIZE_MiB=128
|
[[ -z "$SIZE_MiB" ]] && SIZE_MiB=128
|
||||||
LIMIT_BYTES=$(( SIZE_MiB * 1024 * 1024 ))
|
LIMIT_BYTES=$(( SIZE_MiB * 1024 * 1024 ))
|
||||||
THRESHOLD_BYTES=$(( LIMIT_BYTES * 90 / 100 ))
|
WARN_BYTES=$(( LIMIT_BYTES * 80 / 100 ))
|
||||||
|
EMERGENCY_BYTES=$(( LIMIT_BYTES * 92 / 100 ))
|
||||||
|
|
||||||
USED_BYTES="$(df -B1 --output=used /var/log 2>/dev/null | tail -1 | tr -dc '0-9')"
|
USED_BYTES="$(df -B1 --output=used /var/log 2>/dev/null | tail -1 | tr -dc '0-9')"
|
||||||
[[ -z "$USED_BYTES" ]] && exit 0
|
[[ -z "$USED_BYTES" ]] && exit 0
|
||||||
@@ -2495,8 +2503,24 @@ LOCK="/run/log2ram-check.lock"
|
|||||||
exec 9>"$LOCK" 2>/dev/null || exit 0
|
exec 9>"$LOCK" 2>/dev/null || exit 0
|
||||||
flock -n 9 || exit 0
|
flock -n 9 || exit 0
|
||||||
|
|
||||||
if (( USED_BYTES > THRESHOLD_BYTES )); then
|
# `log2ram write` alone leaves the tmpfs full. Real recovery requires:
|
||||||
"$L2R_BIN" write 2>/dev/null || true
|
# (a) journal vacuum — journald respects --vacuum-size unconditionally,
|
||||||
|
# unlike SystemMaxUse which only enforces on rotation boundaries;
|
||||||
|
# (b) truncating logs that aren't rotated by logrotate (pveproxy, pveam);
|
||||||
|
# (c) THEN syncing to disk so the persistent copy reflects reality.
|
||||||
|
if (( USED_BYTES > EMERGENCY_BYTES )); then
|
||||||
|
SAFE_JOURNAL_MB=$(( SIZE_MiB * 5 / 100 ))
|
||||||
|
[[ "$SAFE_JOURNAL_MB" -lt 16 ]] && SAFE_JOURNAL_MB=16
|
||||||
|
journalctl --vacuum-size="${SAFE_JOURNAL_MB}M" >/dev/null 2>&1 || true
|
||||||
|
: > /var/log/pveproxy/access.log 2>/dev/null || true
|
||||||
|
: > /var/log/pveproxy/error.log 2>/dev/null || true
|
||||||
|
: > /var/log/pveam.log 2>/dev/null || true
|
||||||
|
"$L2R_BIN" write 2>/dev/null || true
|
||||||
|
elif (( USED_BYTES > WARN_BYTES )); then
|
||||||
|
SOFT_JOURNAL_MB=$(( SIZE_MiB * 30 / 100 ))
|
||||||
|
[[ "$SOFT_JOURNAL_MB" -lt 32 ]] && SOFT_JOURNAL_MB=32
|
||||||
|
journalctl --vacuum-size="${SOFT_JOURNAL_MB}M" >/dev/null 2>&1 || true
|
||||||
|
"$L2R_BIN" write 2>/dev/null || true
|
||||||
fi
|
fi
|
||||||
EOF
|
EOF
|
||||||
chmod +x /usr/local/bin/log2ram-check.sh
|
chmod +x /usr/local/bin/log2ram-check.sh
|
||||||
@@ -2510,7 +2534,7 @@ MAILTO=""
|
|||||||
EOF
|
EOF
|
||||||
chmod 0644 /etc/cron.d/log2ram-auto-sync
|
chmod 0644 /etc/cron.d/log2ram-auto-sync
|
||||||
chown root:root /etc/cron.d/log2ram-auto-sync
|
chown root:root /etc/cron.d/log2ram-auto-sync
|
||||||
msg_ok "$(translate "Auto-sync enabled when /var/log exceeds 90% of") $LOG2RAM_SIZE"
|
msg_ok "$(translate "Auto-sync enabled when /var/log exceeds 80% of") $LOG2RAM_SIZE"
|
||||||
else
|
else
|
||||||
rm -f /usr/local/bin/log2ram-check.sh /etc/cron.d/log2ram-auto-sync 2>/dev/null || true
|
rm -f /usr/local/bin/log2ram-check.sh /etc/cron.d/log2ram-auto-sync 2>/dev/null || true
|
||||||
msg_info2 "$(translate "Auto-sync was not enabled")"
|
msg_info2 "$(translate "Auto-sync was not enabled")"
|
||||||
|
|||||||
Reference in New Issue
Block a user