Update AppImage

This commit is contained in:
MacRimi
2026-05-20 18:14:32 +02:00
parent 1087a87ea2
commit 4112323961
20 changed files with 1638 additions and 261 deletions
+122 -7
View File
@@ -1019,10 +1019,16 @@ def _capture_health_journal_context(categories: list, reason: str = '') -> str:
if not pattern:
return ""
# Capture recent journal entries matching keywords
# Use -b 0 to only include logs from the current boot
# Capture recent journal entries matching keywords.
# Use -b 0 to only include logs from the current boot.
# Filter out the Monitor's own stdout (AppRun, [HealthPersistence],
# proxmenux-auth, etc.) BEFORE keyword matching — otherwise a startup
# line like "[HealthPersistence] Database initialized with 13 tables"
# leaks into the AI context because grep -iE 'ata' matches the
# substring "ata" in "dATAbase". Self-logs are never system evidence.
cmd = (
f"journalctl -b 0 --since='10 minutes ago' --no-pager -n 500 2>/dev/null | "
f"grep -vE 'AppRun\\[|proxmenux-auth|\\[HealthPersistence\\]|\\[ProxMenux\\]|\\[NotificationManager\\]|\\[AIEnhancer\\]' | "
f"grep -iE '{pattern}' | tail -n 30"
)
@@ -1131,12 +1137,28 @@ def _health_collector_loop():
'updates': 'update_summary',
}
# Sub-categories already rolled up into details['storage']
# by _check_proxmox_storage_status. Emitting them as their
# own health_degraded entries duplicates the same warning
# (e.g. "Storage Mounts & Space" + "PVE Storage Capacity"
# both saying "PBS-Cloud (pbs) usage ≥70%"). Skip them at
# the notification layer — they still update _prev_statuses
# so a future degradation transition is detected normally.
_STORAGE_SUBCATEGORIES = {
'pve_storage_capacity', 'zfs_pool_capacity',
'lxc_disk', 'lxc_mounts', 'remote_mounts',
}
for cat_key, cat_data in details.items():
cur_status = cat_data.get('status', 'OK')
prev_status = _prev_statuses.get(cat_key, 'OK')
cur_rank = _SEV_RANK.get(cur_status, 0)
prev_rank = _SEV_RANK.get(prev_status, 0)
if cat_key in _STORAGE_SUBCATEGORIES:
_prev_statuses[cat_key] = cur_status
continue
if cur_rank > prev_rank and cur_rank >= 2: # WARNING or CRITICAL
reason = cat_data.get('reason', f'{cat_key} status changed to {cur_status}')
reason_lower = reason.lower()
@@ -4676,16 +4698,56 @@ def get_network_info():
'vm_lxc_total_count': 0
}
def _get_lxc_update_status_map() -> dict:
"""Read the managed_installs registry and project the LXC update
state into a quick lookup ``{vmid: {available, count, security_count,
last_check, packages[]}}``. Used to decorate ``/api/vms`` output
without forcing the frontend to fetch a second endpoint.
Returns an empty dict if the registry module isn't available or
nothing is registered callers must treat absence as "no info".
"""
try:
import managed_installs
except Exception:
return {}
try:
active = managed_installs.get_active_items() or []
except Exception:
return {}
out: dict = {}
for it in active:
if it.get('type') != 'lxc':
continue
vmid = it.get('_vmid') or it.get('id', '').removeprefix('lxc:')
if not vmid:
continue
update = it.get('update_check') or {}
out[str(vmid)] = {
'available': bool(update.get('available')),
'count': int(update.get('_count') or 0),
'security_count': int(update.get('_security_count') or 0),
'last_check': update.get('last_check'),
'latest': update.get('latest'),
'error': update.get('error'),
# Cap packages list shipped to UI — modal uses first 30 max
'packages': (update.get('_packages') or [])[:30],
}
return out
def get_proxmox_vms():
"""Get Proxmox VM and LXC information (requires pvesh command) - only from local node"""
try:
all_vms = []
lxc_updates_map = _get_lxc_update_status_map()
try:
# local_node = socket.gethostname()
local_node = get_proxmox_node_name()
# print(f"[v0] Local node detected: {local_node}")
resources = get_cached_pvesh_cluster_resources_vm()
if resources:
for resource in resources:
@@ -4693,12 +4755,13 @@ def get_proxmox_vms():
if node != local_node:
# print(f"[v0] Skipping VM {resource.get('vmid')} from remote node: {node}")
continue
vm_type = 'lxc' if resource.get('type') == 'lxc' else 'qemu'
vm_data = {
'vmid': resource.get('vmid'),
'name': resource.get('name', f"VM-{resource.get('vmid')}"),
'status': resource.get('status', 'unknown'),
'type': 'lxc' if resource.get('type') == 'lxc' else 'qemu',
'type': vm_type,
'cpu': resource.get('cpu', 0),
'mem': resource.get('mem', 0),
'maxmem': resource.get('maxmem', 0),
@@ -4710,6 +4773,14 @@ def get_proxmox_vms():
'diskread': resource.get('diskread', 0),
'diskwrite': resource.get('diskwrite', 0)
}
# Decorate LXC rows with the apt update status if the
# managed_installs registry has it. Absent key means
# either the user hasn't enabled the feature or the
# CT isn't running / isn't Debian/Ubuntu.
if vm_type == 'lxc':
upd = lxc_updates_map.get(str(resource.get('vmid')))
if upd is not None:
vm_data['update_check'] = upd
all_vms.append(vm_data)
return all_vms
@@ -11035,9 +11106,53 @@ def api_vm_control(vmid):
'message': f'Successfully executed {action} on {vm_info.get("name")}'
})
else:
# `pvesh` failed → fire the matching vm_fail / ct_fail
# notification so the user gets paged on their channels
# too, not just an in-dashboard alert. Previously this
# path silently returned a 500 to the browser and lost
# the event entirely (reported on .1.10: tried to start
# VM 106 while log2ram tmpfs was full → 500 in the UI
# but no Telegram message). The stderr is the most
# useful single line we have — `pvesh` reliably prints
# the underlying daemon failure there (e.g.
# "start failed: command '/usr/bin/kvm …' failed with
# exit code 1: no space left on device").
err_text = (control_result.stderr or '').strip() \
or (control_result.stdout or '').strip() \
or f'{action} returned exit code {control_result.returncode}'
# Truncate runaway stderr (some pvesh failures dump
# multi-KB tracebacks) — keep the notification readable.
if len(err_text) > 500:
err_text = err_text[:500] + ''
try:
from notification_manager import notification_manager as _nm
import socket as _sock
_host = _sock.gethostname()
event_type = 'ct_fail' if vm_type == 'lxc' else 'vm_fail'
_nm.emit_event(
event_type=event_type,
severity='CRITICAL',
data={
'hostname': _host,
'vmid': str(vmid),
'vmname': vm_info.get('name') or f'{vm_type}-{vmid}',
'reason': f'{action} failed: {err_text}',
'action': action,
},
source='dashboard',
entity='vm',
entity_id=str(vmid),
)
except Exception as _emit_err:
print(f"[api_vm_control] failed to emit {vm_type}_fail "
f"notification: {type(_emit_err).__name__}: {_emit_err}")
return jsonify({
'success': False,
'error': control_result.stderr
'vmid': vmid,
'action': action,
'error': err_text,
}), 500
else:
return jsonify({'error': 'Failed to get VM details'}), 500
+41 -7
View File
@@ -92,7 +92,15 @@ class HealthPersistence:
self.data_dir.mkdir(parents=True, exist_ok=True)
self.db_path = self.data_dir / 'health_monitor.db'
self._db_lock = threading.Lock()
# Reentrant lock: `record_disk_observation` acquires this and then
# calls `register_disk` which acquires it again on the same thread.
# With a plain `threading.Lock` that second acquire deadlocks and the
# caller hangs forever — visible symptom on RimegraVE (Pedro Rico
# 19/05): no disk_observation update since the day a thread first
# walked that path. `RLock` allows re-entry from the same thread
# while still serialising cross-thread writes, which is what the
# serialisation rationale (race-free UPSERT dedup) actually wants.
self._db_lock = threading.RLock()
self._init_database()
def _get_conn(self) -> sqlite3.Connection:
@@ -228,6 +236,29 @@ class HealthPersistence:
'CREATE INDEX IF NOT EXISTS idx_digest_pending_channel '
'ON digest_pending(channel, ts)'
)
# Sibling table for events buffered DURING Quiet Hours. Same
# shape as digest_pending so the existing summary renderer can
# be reused. Kept separate because the lifecycle is different:
# digest_pending flushes once per day at digest_time, while
# quiet_pending flushes once per Quiet Hours close (an arbitrary
# time that depends on the user's window settings).
cursor.execute('''
CREATE TABLE IF NOT EXISTS quiet_pending (
id INTEGER PRIMARY KEY AUTOINCREMENT,
channel TEXT NOT NULL,
event_type TEXT NOT NULL,
event_group TEXT NOT NULL,
severity TEXT NOT NULL,
ts INTEGER NOT NULL,
title TEXT NOT NULL,
body TEXT NOT NULL
)
''')
cursor.execute(
'CREATE INDEX IF NOT EXISTS idx_quiet_pending_channel '
'ON quiet_pending(channel, ts)'
)
# Migration: add missing columns to errors table for existing DBs
cursor.execute("PRAGMA table_info(errors)")
@@ -2289,11 +2320,15 @@ class HealthPersistence:
# Upsert observation: if same (disk, type, signature), bump count + update last timestamp.
# IMPORTANT: Do NOT reset dismissed — if the user dismissed this observation,
# re-detecting the same journal entry must not un-dismiss it. Also do not
# increment the occurrence_count on dismissed rows (audit Tier 5 — once
# the user has dismissed, we don't want the counter to keep growing for
# journal events that no longer interest them; this also stops the badge
# from drifting upward for dismissed conditions).
# re-detecting the same journal entry must not un-dismiss it. BUT we DO
# keep counting + updating last_occurrence even when dismissed, because the
# responsible-monitoring contract is: every error counts toward the
# accumulated total shown in the disk modal ("324 connection errors"),
# even errors of the same signature the user already saw once. Dismissed
# only mutes notifications, NOT the per-disk error history surfaced in the
# UI. Reverting the earlier "WHERE dismissed=0" gate that froze the
# counter and last_occurrence for /dev/sdh on 2026-05-09, leaving 10
# silent days of unreported ATA errors (Pedro Rico, 19/05).
cursor.execute(f'''
INSERT INTO disk_observations
(disk_registry_id, {type_col}, error_signature, {first_col},
@@ -2303,7 +2338,6 @@ class HealthPersistence:
{last_col} = excluded.{last_col},
occurrence_count = occurrence_count + 1,
severity = CASE WHEN excluded.severity = 'critical' THEN 'critical' ELSE severity END
WHERE dismissed = 0
''', (disk_id, error_type, error_signature, now, now, raw_message, severity))
conn.commit()
+68 -9
View File
@@ -274,6 +274,12 @@ def _df_via_host_pid(host_pid: str, ct_target: str) -> dict[str, Optional[int]]:
numbers reflect the whole storage pool instead of the per-subvol
quota — without this the UI showed 851 GB total for a 150 GB ZFS
subvol because pvesm reports the rpool's free space.
Note: this path does NOT measure NFS/CIFS mounts that were set up
from INSIDE the CT (`mount -t nfs` / `/etc/fstab` inside the
container). Those live in the CT's own mount namespace and aren't
visible to the host's `df` even through `/proc/<pid>/root`. Use
`_df_via_pct_exec` for ad-hoc mounts.
"""
empty = {"total_bytes": None, "used_bytes": None, "available_bytes": None}
if not host_pid or not ct_target:
@@ -301,6 +307,44 @@ def _df_via_host_pid(host_pid: str, ct_target: str) -> dict[str, Optional[int]]:
return empty
def _df_via_pct_exec(vmid: str, ct_target: str,
timeout: int = 6) -> dict[str, Optional[int]]:
"""``df`` a path from INSIDE the CT via ``pct exec``. Needed for
ad-hoc NFS/CIFS mounts that live in the CT's own mount namespace
and aren't visible from the host (so `_df_via_host_pid` returns
empty for them).
Heavier than the host-side df (full `pct exec` round-trip ~1-3s),
so we only use it for ad-hoc mounts. The 6s timeout is generous
enough for NFS over slow links but won't drag the request past
the proxy timeout.
"""
empty = {"total_bytes": None, "used_bytes": None, "available_bytes": None}
if not vmid or not ct_target:
return empty
try:
proc = subprocess.run(
[_PCT, "exec", vmid, "--", "df", "-B1",
"--output=size,used,avail", ct_target],
capture_output=True, text=True, timeout=timeout,
)
if proc.returncode != 0:
return empty
lines = [ln for ln in proc.stdout.strip().splitlines() if ln.strip()]
if len(lines) < 2:
return empty
parts = lines[-1].split()
if len(parts) < 3:
return empty
return {
"total_bytes": int(parts[0]),
"used_bytes": int(parts[1]),
"available_bytes": int(parts[2]),
}
except (subprocess.TimeoutExpired, OSError, ValueError):
return empty
def _capacity_for(source: str, classification: dict[str, Any],
pve_storages: dict[str, dict[str, Any]],
config_options: Optional[dict[str, Any]] = None,
@@ -606,14 +650,29 @@ def get_lxc_mount_points(vmid: str) -> dict[str, Any]:
]
# Same parallelisation as the configured-mp loop: stat'ing
# stale NFS exports serially can dominate the request and
# push it past the proxy timeout.
# push it past the proxy timeout. Capacity (`df`) is fetched
# in the SAME pool so the UI can render the usage bar for
# ad-hoc NFS/CIFS mounts too — null capacity was a regression
# spotted on CT 103 /mnt/Media. Skip df when stat already
# showed the mount as unreachable, otherwise the df subprocess
# blocks on the same broken export.
if ad_hoc_candidates:
with ThreadPoolExecutor(max_workers=max_workers) as pool:
healths = list(pool.map(
lambda rt: _stat_via_host(host_pid, rt["rt_target"]),
ad_hoc_candidates,
))
for rt, health in zip(ad_hoc_candidates, healths):
def _gather_adhoc(rt):
h = _stat_via_host(host_pid, rt["rt_target"])
if h.get("reachable"):
# NFS/CIFS mounts done inside the CT live in the
# container's own mount namespace and aren't
# visible to `df` from the host even via
# /proc/<pid>/root — use `pct exec df` instead.
cap = _df_via_pct_exec(vmid, rt["rt_target"])
else:
cap = {"total_bytes": None, "used_bytes": None,
"available_bytes": None}
return rt, h, cap
results = list(pool.map(_gather_adhoc, ad_hoc_candidates))
for rt, health, cap in results:
ad_hoc.append({
"mp_index": "",
"source": rt["rt_source"],
@@ -624,9 +683,9 @@ def get_lxc_mount_points(vmid: str) -> dict[str, Any]:
"origin_label": rt["rt_source"],
"config_options": {},
"config_flags": [],
"total_bytes": None,
"used_bytes": None,
"available_bytes": None,
"total_bytes": cap["total_bytes"],
"used_bytes": cap["used_bytes"],
"available_bytes": cap["available_bytes"],
"runtime_mounted": True,
"runtime_source": rt["rt_source"],
"runtime_fstype": rt["rt_fstype"],
+328 -1
View File
@@ -189,12 +189,169 @@ def _detect_oci_apps() -> list[dict]:
return out
# ── LXC containers (Phase 1: apt-based update detection) ────────────
#
# Each running Debian/Ubuntu CT becomes a registry entry of type "lxc".
# Detection is opt-in: gated on the `lxc_updates_available` notification
# being enabled somewhere, so the heavy `pct exec` work doesn't run on
# hosts where the user hasn't asked for this.
#
# Phase 2 hook: once helper-scripts metadata is integrated, entries can
# carry `_helper_script_app` so the checker swaps generic apt counting
# for app-specific upstream-release tracking (Vaultwarden, Jellyfin,
# etc.). For now every LXC uses the generic apt path.
_PCT_BIN = "/usr/sbin/pct"
_LXC_EXEC_TIMEOUT_SEC = 10
_LXC_OS_PROBE_TIMEOUT_SEC = 5
def _lxc_updates_notification_enabled() -> bool:
"""Return True if the user has enabled `lxc_updates_available` on
at least one configured channel. Used to gate the heavy detection
+ checker work — when disabled we don't touch any CT at all.
"""
try:
import notification_manager as _nm_mod
nm = _nm_mod.notification_manager
return bool(nm.is_event_enabled("lxc_updates_available"))
except Exception:
return False
def _list_pve_lxcs() -> list[dict]:
"""Return basic info per LXC on this node via ``pct list``. Each
item is ``{vmid, status, name}``. Empty list on any failure — never
raises so the detector caller can continue.
"""
try:
r = subprocess.run(
[_PCT_BIN, "list"],
capture_output=True, text=True, timeout=5,
)
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
return []
if r.returncode != 0:
return []
out: list[dict] = []
for line in r.stdout.splitlines()[1:]: # skip header row
# `pct list` columns: VMID Status Lock Name
# `Lock` is empty most of the time, so split max 4 ways
parts = line.split(None, 3)
if len(parts) < 2:
continue
vmid = parts[0]
status = parts[1]
# Name is the last column; in unlocked rows the 3rd col may
# be the name itself if Lock was omitted by the formatter.
name = parts[-1] if len(parts) >= 3 else ""
if not vmid.isdigit():
continue
out.append({"vmid": vmid, "status": status, "name": name})
return out
_SUPPORTED_OS_FAMILIES = ("debian", "ubuntu", "alpine")
def _probe_lxc_os(vmid: str) -> Optional[str]:
"""Return a normalized family identifier (``debian`` / ``ubuntu`` /
``alpine``) by reading ``/etc/os-release`` inside the running CT.
Returns None for distributions whose package manager we don't yet
speak — those CTs are skipped in detection so the framework
doesn't keep retrying a checker we can't run.
Cached per CT in the registry — re-probed only when the entry has
no ``_os_family`` yet, since the OS rarely changes for the life of
a CT.
"""
try:
r = subprocess.run(
[_PCT_BIN, "exec", vmid, "--", "cat", "/etc/os-release"],
capture_output=True, text=True,
timeout=_LXC_OS_PROBE_TIMEOUT_SEC,
)
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
return None
if r.returncode != 0:
return None
text = r.stdout.lower()
if "id=ubuntu" in text:
return "ubuntu"
if "id=debian" in text or "id_like=debian" in text:
return "debian"
if "id=alpine" in text:
return "alpine"
# Future Phase 1.5: CentOS/Rocky/Alma (dnf check-update), Arch
# (checkupdates), openSUSE (zypper list-updates). Each needs a
# parser similar to apt/apk — skip silently for now.
return None
def _detect_lxc_containers() -> list[dict]:
"""Enumerate running Debian/Ubuntu CTs as registry entries.
OS detection is cached in the registry entry (`_os_family`), so the
expensive ``pct exec cat /etc/os-release`` only runs the first time
a CT is seen. CT reinstalls with a different OS will keep the old
family cached until the user resets the registry — acceptable
trade-off vs paying the probe cost every 24h cycle.
"""
if not _lxc_updates_notification_enabled():
return []
# Read existing registry so we can preserve cached `_os_family`.
# No lock needed here — we only inspect; the framework holds the
# write lock when it merges back our results in detect_and_register.
try:
existing = _read_registry().get("items", [])
except Exception:
existing = []
existing_by_id = {
it.get("id"): it for it in existing
if isinstance(it, dict) and it.get("type") == "lxc"
}
cts = _list_pve_lxcs()
out: list[dict] = []
for ct in cts:
if ct["status"] != "running":
continue
vmid = ct["vmid"]
cid = f"lxc:{vmid}"
prior = existing_by_id.get(cid) or {}
os_family = prior.get("_os_family")
if not os_family:
os_family = _probe_lxc_os(vmid)
if os_family not in _SUPPORTED_OS_FAMILIES:
# Distribution we don't yet have a package-manager
# parser for. Skip silently. The framework marks any
# existing entry as removed_at if it stops appearing
# in the detector output.
continue
out.append({
"id": cid,
"type": "lxc",
"name": ct.get("name") or f"CT-{vmid}",
"current_version": None, # apt has no single version
"menu_label": None, # user upgrades inside the CT
"menu_script": None,
"_vmid": vmid,
"_os_family": os_family,
# Phase 2 hook: populate `_helper_script_app` here once we
# learn how to read the community-scripts marker.
})
return out
# Detectors registered here. Each returns either a single entry dict
# or a list (for sources that yield multiple items, like OCI). The
# framework normalises both shapes.
_DETECTORS: list[Callable[[], Any]] = [
_detect_nvidia_xfree86,
_detect_oci_apps,
_detect_lxc_containers,
]
@@ -514,9 +671,173 @@ def _check_nvidia_xfree86(entry: dict) -> dict:
}
def _parse_apt_list_upgradable(text: str) -> list[dict]:
"""Parse the output of ``apt list --upgradable`` into structured rows.
Each upgradable line looks like::
package/release version arch [upgradable from: oldversion]
Returns a list of ``{name, current, latest, security}``. Lines that
can't be parsed are skipped; the header ``Listing...`` is ignored
because it lacks the ``[upgradable`` marker.
"security" flag is detected from the release/suite name (e.g.
``bookworm-security``, ``jammy-security``). Some derivatives don't
use that naming and will report security=False even when patches
are present — acceptable for Phase 1, refined later if needed.
"""
rows: list[dict] = []
for line in text.splitlines():
line = line.strip()
if not line or "[upgradable" not in line or "/" not in line:
continue
try:
head, _, tail = line.partition(" ")
name, _, release = head.partition("/")
tail_parts = tail.split()
if not tail_parts:
continue
new_ver = tail_parts[0]
old_ver = ""
if "from:" in line:
old_ver = line.split("from:", 1)[1].strip().rstrip("]").strip()
release_lower = release.lower()
is_security = "-security" in release_lower or "/security" in release_lower
rows.append({
"name": name,
"current": old_ver,
"latest": new_ver,
"security": is_security,
})
except Exception:
continue
return rows
def _parse_apk_list_upgradable(text: str) -> list[dict]:
"""Parse the output of ``apk list -u`` into structured rows.
Lines look like::
busybox-1.36.1-r29 x86_64 {busybox} (GPL-2.0-only) [upgradable from: busybox-1.36.1-r28]
apk smashes name + version into the leading token, so reliable
name/version splitting requires walking from the right (versions
end in ``-r<num>``). For the badge + notification we only need a
count and a representative sample, so we keep the parser tolerant
and surface the raw token as the package "name". Alpine's main
repos don't expose a separate "security" suite via apk metadata,
so we mark every row as ``security=False`` — security==0 always.
"""
rows: list[dict] = []
for line in text.splitlines():
line = line.strip()
if not line or "[upgradable" not in line:
continue
try:
first_tok = line.split(" ", 1)[0]
old = ""
if "from:" in line:
old = line.split("from:", 1)[1].strip().rstrip("]").strip()
rows.append({
"name": first_tok,
"current": old,
"latest": first_tok,
"security": False,
})
except Exception:
continue
return rows
def _run_pct_pkg_listing(vmid: str, cmd: str) -> tuple[bool, str, str]:
"""Run a package-listing command inside ``vmid`` via ``pct exec``.
Returns ``(ok, stdout, error_message)``. Centralises the timeout
and stderr handling so apt/apk callers stay symmetric.
"""
try:
r = subprocess.run(
[_PCT_BIN, "exec", vmid, "--", "sh", "-c", cmd],
capture_output=True, text=True,
timeout=_LXC_EXEC_TIMEOUT_SEC,
)
except subprocess.TimeoutExpired:
return False, "", f"{cmd.split()[0]} listing timed out"
except (FileNotFoundError, OSError) as e:
return False, "", str(e)
if r.returncode != 0:
return False, "", (r.stderr or "package listing failed").strip()[:200]
return True, r.stdout, ""
def _check_lxc_updates(entry: dict) -> dict:
"""Inspect pending package updates inside the LXC and report them.
Dispatches to the right package-manager parser based on the cached
``_os_family``. Uses the CT's existing metadata cache — never runs
``apt update`` / ``apk update`` from outside, so the user's own
update cadence (unattended-upgrades, cron) is preserved.
The dedup fingerprint (``latest``) combines count, security count
and the sorted top package names so a stable set of pending
updates doesn't re-notify daily, while a meaningfully different
update set does.
"""
vmid = entry.get("_vmid")
family = (entry.get("_os_family") or "").lower()
if not vmid:
return {
"available": False, "latest": None,
"last_check": _now_iso(), "error": "no vmid in entry",
}
if family in ("debian", "ubuntu"):
ok, stdout, err = _run_pct_pkg_listing(
vmid, "apt list --upgradable 2>/dev/null"
)
packages = _parse_apt_list_upgradable(stdout) if ok else []
elif family == "alpine":
ok, stdout, err = _run_pct_pkg_listing(
vmid, "apk list -u 2>/dev/null"
)
packages = _parse_apk_list_upgradable(stdout) if ok else []
else:
return {
"available": False, "latest": None,
"last_check": _now_iso(),
"error": f"unsupported family: {family}",
}
if not ok:
return {
"available": False, "latest": None,
"last_check": _now_iso(), "error": err,
}
count = len(packages)
sec_count = sum(1 for p in packages if p.get("security"))
available = count > 0
latest_fp = None
if available:
top_names = ",".join(sorted(p["name"] for p in packages)[:5])
latest_fp = f"{count}:{sec_count}:{top_names}"
return {
"available": available,
"latest": latest_fp,
"last_check": _now_iso(),
"error": None,
"_count": count,
"_security_count": sec_count,
"_packages": packages[:30], # cap to keep the registry compact
}
_CHECKERS: dict[str, Callable[[dict], dict]] = {
"oci_app": _check_oci_app,
"nvidia_xfree86": _check_nvidia_xfree86,
"lxc": _check_lxc_updates,
}
@@ -562,8 +883,14 @@ def check_for_updates(force: bool = False) -> list[dict]:
}
if result.get("current") and not it.get("current_version"):
it["current_version"] = result["current"]
# Per-checker extras carried through into the persisted
# `update_check` blob. Add new keys here when a future
# checker needs to surface fields beyond available/latest.
# `_count` + `_security_count` were missing originally, so
# the LXC checker's counts dropped on the floor and the
# frontend badge couldn't render.
for extra_key in ("_packages", "_upgrade_kind", "_kernel",
"_kernel_note"):
"_kernel_note", "_count", "_security_count"):
if extra_key in result:
it["update_check"][extra_key] = result[extra_key]
+166 -21
View File
@@ -327,14 +327,27 @@ def is_vzdump_active_on_host() -> bool:
try:
with open(_VZDUMP_ACTIVE_FILE, 'r') as f:
for line in f:
# UPID format: UPID:node:pid:pstart:starttime:type:id:user:
# tasks/active row layout (whitespace separated):
# "<UPID> 1" ← running
# "<UPID> 1 <endtime_hex> <STATUS>" ← finished
# PVE leaves finished rows lingering for hours
# sometimes — without the field-count check below the
# PID-recycling case fires a false positive (an
# unrelated process inherited the old vzdump's PID
# and `os.kill(pid, 0)` succeeds).
if ':vzdump:' not in line:
continue
parts = line.strip().split(':')
if len(parts) < 3:
fields = line.split()
if not fields:
continue
# >2 fields means endtime + status are written → terminated.
if len(fields) > 2:
continue
upid_parts = fields[0].split(':')
if len(upid_parts) < 3:
continue
try:
pid = int(parts[2], 16) # PID in UPID is hex
pid = int(upid_parts[2], 16) # PID in UPID is hex
os.kill(pid, 0)
found = True
break
@@ -1033,21 +1046,28 @@ class JournalWatcher:
else:
resolved = re.sub(r'\d+$', '', raw_device) if raw_device.startswith('sd') else raw_device
# ── Gate 1: SMART must confirm disk failure ──
# If the disk is healthy (PASSED) or we can't verify
# (UNKNOWN / unresolvable ATA port), do NOT notify.
# ── ALWAYS persist the observation, regardless of SMART ──
# The disk_observation_contract is explicit (memory note
# disk-observation-contract): every kernel-surfaced disk
# error must be recorded in disk_observations *even when
# SMART reports PASSED*. Silent errors on a "healthy" disk
# are exactly the early-warning signal the modal histogram
# exists to surface ("324 connection errors on this disk").
# Previously this line lived AFTER a `return` gate keyed on
# smart_health != 'FAILED', so the 3162 ata8 errors on
# .1.10 (PASSED SMART) all dropped on the floor instead of
# accumulating in the per-disk audit history.
self._record_disk_io_observation(resolved, msg)
# ── Gate 1: only NOTIFY when SMART reports FAILED ──
# Observation is already saved above. We avoid spamming a
# CRITICAL notification for transient ATA/SCSI noise on
# otherwise-healthy disks — the modal histogram surfaces
# those without paging the user at 3 AM.
smart_health = self._quick_smart_health(resolved)
if smart_health != 'FAILED':
return
# ── Persist observation (before the cooldown gate) ──
# The 24h cooldown below only suppresses RE-notification; the
# per-disk observations history must reflect every genuine
# detection. The DB UPSERT dedups same-signature events via
# occurrence_count, so calling this on every match is safe.
# Aligns with the parallel path in HealthMonitor._check_disks_optimized.
self._record_disk_io_observation(resolved, msg)
# ── Gate 2: 24-hour dedup per device ──
# Check both in-memory cache AND the DB (user dismiss clears DB cooldowns).
# If user dismissed the error, _clear_disk_io_cooldown() removed the DB
@@ -1814,12 +1834,31 @@ class TaskWatcher:
line = line.strip()
if not line:
continue
upid = line.split()[0] if line.split() else line
parts = line.split()
if not parts:
continue
upid = parts[0]
current_upids.add(upid)
if ':vzdump:' in upid:
if ':vzdump:' not in upid:
continue
# PVE writes each line in tasks/active as:
# "<UPID> 1" ← task still running
# "<UPID> 1 <endtime_hex> <STATUS>" ← task already finished
# PVE doesn't always prune finished rows from this
# file (observed on RimegraVE 19/05: 25 OK/error
# entries lingering for hours after job end). Just
# matching ':vzdump:' kept `_vzdump_running_since`
# permanently fresh, which then made
# `_is_vzdump_active()` return True forever and
# silenced every vm_start / vm_stop / vm_shutdown
# via the _BACKUP_NOISE filter. Only treat the row
# as a live vzdump when no end-time / status has
# been written yet (≤ 2 fields: UPID + version).
if len(parts) <= 2:
found_vzdump = True
# Keep _vzdump_running_since fresh as long as vzdump is in active
if found_vzdump:
self._vzdump_running_since = time.time()
@@ -2175,6 +2214,16 @@ class PollingCollector:
# has an update".
self._last_managed_check = 0
self._notified_managed_updates: dict[str, str] = {}
# LXC notifications are grouped — one event per polling cycle
# covering every running Debian/Ubuntu CT with pending apt
# updates. The fingerprint encodes the per-CT state so a stable
# batch doesn't re-notify while a meaningful change does.
self._notified_lxc_batch: str | None = None
# Track previous state of the LXC-updates notification toggle
# so a user enabling it post-startup bypasses the 24h gate
# ONCE — the next polling cycle runs a fresh detection without
# waiting up to a day. Cleared after the forced run completes.
self._lxc_was_enabled: bool = False
# Track notified ProxMenux versions to avoid duplicates
self._notified_proxmenux_version: str | None = None
self._notified_proxmenux_beta_version: str | None = None
@@ -3101,7 +3150,24 @@ class PollingCollector:
NVIDIA driver → ``nvidia_driver_update_available``, etc.).
"""
now = time.time()
if now - self._last_managed_check < self.UPDATE_CHECK_INTERVAL:
# Detect OFF→ON transition of the LXC update toggle. Without
# this, the first polling cycle after service start always sets
# the 24h gate — so a user who enables the toggle later (which
# is the normal flow, since the toggle defaults to OFF) would
# have to wait up to 24h or restart the service before the
# detector ran. A one-shot bypass on the transition fixes that
# without weakening the 24h cadence in steady state.
try:
import managed_installs as _mi
lxc_enabled_now = _mi._lxc_updates_notification_enabled()
except Exception:
lxc_enabled_now = False
lxc_just_enabled = lxc_enabled_now and not self._lxc_was_enabled
self._lxc_was_enabled = lxc_enabled_now
if (not lxc_just_enabled
and now - self._last_managed_check < self.UPDATE_CHECK_INTERVAL):
return
self._last_managed_check = now
@@ -3117,8 +3183,15 @@ class PollingCollector:
print(f"[PollingCollector] managed_installs update run failed: {e}")
return
# Split LXC updates out of the per-item event stream — they get
# one grouped notification per cycle instead of one per CT, to
# avoid spamming the user when 15 CTs have pending updates the
# same day. Non-LXC types keep their existing per-item flow.
lxc_updates = [u for u in updates if u.get('type') == 'lxc']
other_updates = [u for u in updates if u.get('type') != 'lxc']
seen_ids: set[str] = set()
for item in updates:
for item in other_updates:
item_id = item.get('id', '')
if not item_id:
continue
@@ -3143,6 +3216,17 @@ class PollingCollector:
entity_id=f'managed_{item_id}',
))
# LXC: emit one grouped event with all CTs that have pending
# updates. The batch fingerprint is recomputed every cycle and
# compared with the last notified one — if the set of CTs or
# their per-CT fingerprints changed, we notify again.
if lxc_updates:
self._emit_lxc_updates_batch(lxc_updates)
else:
# Empty batch — clear the dedup so a fresh batch later fires
# a new notification even with the same CTs/versions.
self._notified_lxc_batch = None
# Forget items that no longer have an update available. If
# the user installs the update and then a later release lands,
# the dedup state is already cleared so the next notification
@@ -3159,6 +3243,67 @@ class PollingCollector:
if stale_id not in active_with_update:
self._notified_managed_updates.pop(stale_id, None)
def _emit_lxc_updates_batch(self, items: list[dict]) -> None:
"""Build and queue a single ``lxc_updates_available`` event for
every running CT that currently has pending apt updates.
The batch fingerprint combines every CT's per-CT fingerprint
(count + security_count + top package names). A new CT entering
the set OR an existing CT changing its per-CT fingerprint
produces a new batch fingerprint, so the cooldown is broken and
the event fires. A truly stable batch is silenced via the
equality check below.
"""
# Stable order so the fingerprint is deterministic
items_sorted = sorted(items, key=lambda x: x.get('id', ''))
ct_lines: list[str] = []
per_ct_fps: list[str] = []
total_packages = 0
total_security = 0
for idx, it in enumerate(items_sorted):
update = it.get('update_check', {}) or {}
count = int(update.get('_count') or 0)
sec_count = int(update.get('_security_count') or 0)
total_packages += count
total_security += sec_count
vmid = it.get('_vmid') or it.get('id', '').removeprefix('lxc:') or '?'
name = it.get('name') or f'CT {vmid}'
# Each CT renders across two/three lines so the count and the
# security count don't compete with the CT label on the same
# row — much easier to read in Telegram/Discord at a glance.
# A blank line before every CT except the first separates
# entries cleanly without a trailing blank at the end.
if idx > 0:
ct_lines.append("")
ct_lines.append(f"🏷️ CT {vmid} ({name}):")
ct_lines.append(f" 📦 {count} update(s)")
if sec_count:
ct_lines.append(f" 🔒 {sec_count} security")
per_ct_fps.append(f"{it.get('id', '')}={update.get('latest', '')}")
batch_fingerprint = '|'.join(per_ct_fps)
if self._notified_lxc_batch == batch_fingerprint:
return # same batch as last time — silent
self._notified_lxc_batch = batch_fingerprint
data = {
'hostname': self._hostname,
'count': len(items_sorted),
'total_packages': total_packages,
'security_count': total_security,
'ct_list': '\n'.join(ct_lines),
}
self._queue.put(NotificationEvent(
'lxc_updates_available', 'INFO', data,
source='polling',
entity='node',
# Hash so different batches get distinct cooldown keys
entity_id=f'lxc_batch_{abs(hash(batch_fingerprint)) % 10**10}',
))
def _build_managed_install_event(self, item: dict) -> tuple[str, dict]:
"""Translate a registry item into a (event_type, template_data)
pair. Per-type bodies live here so the registry stays
+168 -15
View File
@@ -973,7 +973,9 @@ class NotificationManager:
cleanup_interval = 3600 # Cleanup cooldowns every hour
flush_interval = 5 # Flush aggregation buckets every 5s
digest_check_interval = 60 # Re-evaluate digest schedule every minute
last_quiet_check = 0.0
quiet_check_interval = 60 # Re-evaluate per-channel quiet window every minute
while self._running:
try:
event = self._event_queue.get(timeout=2)
@@ -990,17 +992,36 @@ class NotificationManager:
if now_mono - last_digest_check > digest_check_interval:
self._maybe_flush_digests()
last_digest_check = now_mono
# Quiet Hours close → flush buffered sub-CRITICAL events
# as a single grouped summary. Has to run even when the
# queue is idle, otherwise users who don't generate any
# events post-window would never see their summary.
if now_mono - last_quiet_check > quiet_check_interval:
self._maybe_flush_quiet_hours()
last_quiet_check = now_mono
continue
try:
self._process_event(event)
except Exception as e:
print(f"[NotificationManager] Dispatch error: {e}")
# Also flush aggregation after each event
if time.monotonic() - last_flush > flush_interval:
now_mono = time.monotonic()
if now_mono - last_flush > flush_interval:
self._flush_aggregation()
last_flush = time.monotonic()
last_flush = now_mono
# Re-check digest schedule after each event too. The idle-only
# check above misses the daily flush window when the queue stays
# busy through the digest_time minute (rare but real: a burst of
# journal events arriving at the same minute as the target). The
# 23h guard inside _maybe_flush_digests keeps it idempotent.
if now_mono - last_digest_check > digest_check_interval:
self._maybe_flush_digests()
last_digest_check = now_mono
if now_mono - last_quiet_check > quiet_check_interval:
self._maybe_flush_quiet_hours()
last_quiet_check = now_mono
def _flush_aggregation(self):
"""Flush expired aggregation buckets and dispatch summaries."""
@@ -1171,20 +1192,20 @@ class NotificationManager:
# ── Per-channel quiet hours ──
# The user marks a window (e.g. 22:00 → 06:00) during which only
# CRITICAL events reach this channel. Anything below CRITICAL is
# dropped silently — not buffered, not retried — because the
# whole point is "don't wake me up at 3 AM unless the disk
# exploded". CRITICAL always wins. The window is configured
# per-channel; same channel can have different rules from
# another. See _in_quiet_hours() for boundary semantics.
# CRITICAL events reach this channel. Sub-CRITICAL events are
# **buffered** to `quiet_pending` and flushed as a SINGLE grouped
# summary when the window closes — so the user doesn't get
# paged at 3 AM but also doesn't lose 8h of activity overnight.
# CRITICAL always wins. The window is configured per-channel.
# See _in_quiet_hours() for boundary semantics.
# `_dispatch_to_channels` does NOT receive the NotificationEvent
# object — only the rendered primitives. Using `event.X` here
# raised `NameError: name 'event' is not defined` for every
# event passing through (silenced by the dispatch loop's broad
# except → no notifications EVER delivered after Quiet Hours +
# Daily Digest were merged). All community-reported "stopped
# receiving notifications after update" cases trace back here.
# raised `NameError` for every event passing through, silenced
# by the dispatch loop's broad except → no notifications EVER
# delivered after Quiet Hours + Daily Digest were merged.
if severity != 'CRITICAL' and self._in_quiet_hours(ch_name):
self._buffer_quiet_event(ch_name, event_type, event_group,
severity, title, body)
continue
# ── Per-channel daily digest ──
@@ -1537,6 +1558,126 @@ class NotificationManager:
)
return '\n'.join(lines).rstrip() + '\n'
# ─── Quiet Hours buffer + flush ────────────────────────────
# Reused infrastructure: `quiet_pending` table (created in
# health_persistence) has the same shape as `digest_pending`, so
# `_compose_digest_body` renders the summary unchanged. What
# differs is the lifecycle — quiet_pending flushes when each
# channel's window CLOSES, not at a fixed daily time. We track
# that transition via `self._was_in_quiet_hours[ch_name]`.
def _buffer_quiet_event(self, ch_name: str, event_type: str,
event_group: str, severity: str,
title: str, body: str) -> None:
"""Append a sub-CRITICAL event to the channel's quiet-hours
buffer in SQLite. Mirrors `_buffer_digest_event` — same shape,
different table.
"""
try:
conn = sqlite3.connect(str(DB_PATH), timeout=10)
conn.execute('PRAGMA journal_mode=WAL')
conn.execute('PRAGMA busy_timeout=5000')
conn.execute(
'INSERT INTO quiet_pending '
'(channel, event_type, event_group, severity, ts, title, body) '
'VALUES (?, ?, ?, ?, ?, ?, ?)',
(ch_name, event_type, event_group, severity,
int(time.time()), title, body),
)
conn.commit()
conn.close()
except Exception as e:
print(f"[NotificationManager] quiet_pending write failed: {e}")
def _maybe_flush_quiet_hours(self) -> None:
"""Detect per-channel quiet-hours close (in→out transition) and
emit one summary notification with everything buffered during
the window. Called every ~60s from the dispatch loop.
State held in-memory: `self._was_in_quiet_hours[ch_name]`. On
first run after restart all channels start as "unknown" — we
seed with the current window status WITHOUT firing a summary,
so a Monitor restart in the middle of someone's quiet window
doesn't trigger a fake close-of-window flush.
"""
if not hasattr(self, '_was_in_quiet_hours'):
self._was_in_quiet_hours = {}
for ch_name, channel in list(self._channels.items()):
currently_in = self._in_quiet_hours(ch_name)
previously_in = self._was_in_quiet_hours.get(ch_name)
self._was_in_quiet_hours[ch_name] = currently_in
# Seed run (no prior state) — don't fire anything.
if previously_in is None:
continue
# Still in the window → just buffer.
if currently_in:
continue
# Was in window, now out → close transition → flush.
if previously_in and not currently_in:
try:
self._flush_quiet_for_channel(ch_name, channel)
except Exception as e:
print(f"[NotificationManager] quiet flush failed for "
f"{ch_name}: {e}")
def _flush_quiet_for_channel(self, ch_name: str, channel: Any) -> None:
"""Send a single grouped summary of everything buffered for
`ch_name` during the just-closed quiet window, then drop the
buffer rows. Reuses `_compose_digest_body` for rendering since
the row shape is identical.
"""
try:
conn = sqlite3.connect(str(DB_PATH), timeout=10)
conn.execute('PRAGMA journal_mode=WAL')
cursor = conn.cursor()
cursor.execute(
'SELECT id, event_type, event_group, ts, title, body '
'FROM quiet_pending WHERE channel = ? ORDER BY ts ASC',
(ch_name,),
)
rows = cursor.fetchall()
conn.close()
except Exception as e:
print(f"[NotificationManager] quiet read failed for {ch_name}: {e}")
return
if not rows:
return
host = _hostname(self._config)
summary_title = (
f"{host}: {len(rows)} events buffered during Quiet Hours"
)
summary_body = self._compose_digest_body(rows)
try:
channel.send(summary_title, summary_body, severity='INFO',
data={'_quiet_hours_summary': True, '_count': len(rows)})
except Exception as e:
print(f"[NotificationManager] quiet send failed for "
f"{ch_name}: {e}")
return
# Only drop the rows after a successful send so a transient
# transport failure (Telegram timeout, SMTP outage) doesn't
# lose the user's overnight context.
try:
ids = [r[0] for r in rows]
conn = sqlite3.connect(str(DB_PATH), timeout=10)
conn.execute('PRAGMA journal_mode=WAL')
placeholders = ','.join('?' * len(ids))
conn.execute(
f'DELETE FROM quiet_pending WHERE id IN ({placeholders})',
ids,
)
conn.commit()
conn.close()
except Exception as e:
print(f"[NotificationManager] quiet cleanup failed for "
f"{ch_name}: {e}")
def _passes_cooldown(self, event: NotificationEvent) -> bool:
"""Check if the event passes cooldown rules WITHOUT stamping.
@@ -2315,6 +2456,18 @@ class NotificationManager:
ch_cfg: Dict[str, Any] = {
'enabled': self._config.get(f'{ch_type}.enabled', 'false') == 'true',
'rich_format': self._config.get(f'{ch_type}.rich_format', 'false') == 'true',
# Quiet Hours + Daily Digest live in the same per-channel
# namespace but weren't being projected back to the UI —
# the toggles round-tripped through POST but the GET only
# returned `enabled`/`rich_format` plus channel-specific
# config_keys, so after a reload the user saw the toggle
# off even though the DB had it on. Reported on .1.10
# along with the post-window delivery bug.
'quiet_enabled': self._config.get(f'{ch_type}.quiet_enabled', 'false') == 'true',
'quiet_start': self._config.get(f'{ch_type}.quiet_start', '22:00'),
'quiet_end': self._config.get(f'{ch_type}.quiet_end', '06:00'),
'digest_enabled': self._config.get(f'{ch_type}.digest_enabled', 'false') == 'true',
'digest_time': self._config.get(f'{ch_type}.digest_time', '09:00'),
}
for config_key in info['config_keys']:
full_key = f'{ch_type}.{config_key}'
+41 -10
View File
@@ -484,6 +484,23 @@ TEMPLATES = {
},
# ── VM / CT events ──
# Phase 1: apt-based update detection inside running Debian/Ubuntu
# LXCs. Grouped — one notification per cycle covers every CT with
# pending updates. Opt-in (default_enabled=False) because the check
# uses `pct exec` to inspect package state inside the user's CTs.
# Phase 2 (community-scripts metadata) will extend this without
# changing the event type.
'lxc_updates_available': {
'title': '{hostname}: {count} LXC(s) with package updates available',
'body': (
'📊 {count} LXC(s) with pending package updates '
'(📦 {total_packages} total, 🔒 {security_count} security):\n\n'
'{ct_list}'
),
'label': 'LXC updates available (experimental)',
'group': 'vm_ct',
'default_enabled': False,
},
'vm_start': {
'title': '{hostname}: VM {vmname} ({vmid}) started',
'body': 'Virtual machine {vmname} (ID: {vmid}) is now running.',
@@ -1109,8 +1126,8 @@ TEMPLATES = {
'title': '{hostname}: {count} ProxMenux optimization update(s) available',
'body': (
'{count} optimization update(s) detected on this host.\n\n'
'Tools:\n{tool_list}\n\n'
'How to apply:\n'
'🛠️ Tools:\n{tool_list}\n\n'
'💡 How to apply:\n'
' • ProxMenux Monitor → Settings → ProxMenux Optimizations\n'
' • Or run the post-install menu (option 2) → "Apply available updates"'
),
@@ -1129,12 +1146,12 @@ TEMPLATES = {
'secure_gateway_update_available': {
'title': '{hostname}: {app_name} update available — v{latest_version}',
'body': (
'{app_name} (managed by ProxMenux) has {package_count} package update(s) '
'{app_name} (managed by ProxMenux) has 📦 {package_count} package update(s) '
'pending in its container.\n'
'Current Tailscale: v{current_version} → Latest: v{latest_version}\n\n'
'Open ProxMenux Monitor > Settings > Secure Gateway and click '
'🔹 Current Tailscale: v{current_version} 🟢 Latest: v{latest_version}\n\n'
'💡 Open ProxMenux Monitor > Settings > Secure Gateway and click '
'"Update" to apply.\n\n'
'Packages:\n{package_list}'
'🗂️ Packages:\n{package_list}'
),
'label': 'Secure Gateway update available',
'group': 'updates',
@@ -1147,10 +1164,10 @@ TEMPLATES = {
'title': '{hostname}: NVIDIA driver update available — v{latest_version}',
'body': (
'A newer NVIDIA driver compatible with kernel {kernel} is available.\n'
'Currently installed: v{current_version}\n'
'Latest available: v{latest_version}\n\n'
'🔹 Currently installed: v{current_version}\n'
'🟢 Latest available: v{latest_version}\n\n'
'{upgrade_reason}\n\n'
'To reinstall:\n'
'💡 To reinstall:\n'
' • From the ProxMenux post-install menu: {menu_label}\n\n'
'Reinstalling rebuilds the DKMS module against the running kernel and '
'requires a reboot to load the new driver.'
@@ -1465,6 +1482,7 @@ CATEGORY_EMOJI = {
# Event-specific title icons (override category default when present)
EVENT_EMOJI = {
# VM / CT
'lxc_updates_available': '\U0001F4E6', # \uD83D\uDCE6 package \u2014 pending CT updates
'vm_start': '\u25B6\uFE0F', # play button
'vm_start_warning': '\u26A0\uFE0F', # warning sign - started with warnings
'vm_stop': '\u23F9\uFE0F', # stop button
@@ -1768,6 +1786,14 @@ Your job: translate alerts into {language} and enrich them with context when pro
═══ ABSOLUTE CONSTRAINTS (NO EXCEPTIONS) ═══
- NO HALLUCINATIONS: Do not invent causes, solutions, or facts not present in the provided data
- NO SPECULATION: If something is unclear, state what IS known, not what MIGHT be
- NO FILLER LINES: Every output line must derive from the input message, the journal context,
or the known-error database. NEVER add generic statements like "Event detected during normal
operation", "No further issues", or padding lines just to fill space. If a field has no evidence,
OMIT it — a shorter output is always better than invented content.
- 📝 Log lines: ONLY include when the journal context contains an actual relevant log line.
Convey its meaning faithfully, do not invent one. If no relevant log exists, OMIT the 📝 line.
- ⏱️ Duration/timing lines: ONLY for backup/migration durations explicitly present in the input.
NEVER use ⏱️ for vague "event detected at X" filler.
- NO CONVERSATIONAL TEXT: Never write "Here is...", "I've translated...", "Let me explain..."
- ONLY use information from: the message, journal context, and known error database (if provided)
@@ -1884,7 +1910,12 @@ Your goal is to maintain the original structure of the message while using emoji
ESPECIALLY when adding new context, formatting technical data, or writing tips.
RULES:
1. PRESERVE BASE STRUCTURE: Respect the original fields and layout provided in the input message.
1. PRESERVE BASE STRUCTURE AND INPUT EMOJIS: Respect the original fields and layout provided in
the input message. **CRITICAL: every emoji already present in the input (📊, 🏷️, 📦, 🔒, 🛠️,
💡, ⚠️, ✨, 🌐, 🔥, 💧, 📝, ⏱️, etc.) MUST appear in the output, in the same position relative
to its label.** Translating the surrounding words is fine; deleting or relocating the emoji is
not. You may add additional context-appropriate emojis from BODY EMOJIS below, but never strip
the ones the template already provides.
2. ENHANCE WITH ICONS: Place emojis at the START of a line to identify the data type.
3. NEW CONTEXT: When adding journal info, SMART data, or known errors, use appropriate icons to make it readable.
4. NO SPAM: Do not put emojis in the middle or end of sentences. Use 1-3 emojis at START of lines where they add clarity. Combine when meaningful (💾✅ backup ok).