diff --git a/AppImage/components/notification-settings.tsx b/AppImage/components/notification-settings.tsx index 406677fb..f2c74852 100644 --- a/AppImage/components/notification-settings.tsx +++ b/AppImage/components/notification-settings.tsx @@ -16,7 +16,7 @@ import { AlertTriangle, Info, Settings2, Zap, Eye, EyeOff, Trash2, ChevronDown, ChevronUp, ChevronRight, TestTube2, Mail, Webhook, Copy, Server, Shield, ExternalLink, RefreshCw, Download, Upload, - Cloud, Brain, Globe, MessageSquareText, Sparkles, Pencil, Save, RotateCcw + Cloud, Brain, Globe, MessageSquareText, Sparkles, Pencil, Save, RotateCcw, Lightbulb } from "lucide-react" interface ChannelConfig { @@ -67,6 +67,7 @@ interface NotificationConfig { ai_openai_base_url: string ai_prompt_mode: string // 'default' or 'custom' ai_custom_prompt: string // User's custom prompt + ai_allow_suggestions: string | boolean // Enable AI suggestions (experimental) channel_ai_detail: Record hostname: string webhook_secret: string @@ -252,6 +253,7 @@ const DEFAULT_CONFIG: NotificationConfig = { ai_openai_base_url: "", ai_prompt_mode: "default", ai_custom_prompt: "", + ai_allow_suggestions: "false", channel_ai_detail: { telegram: "brief", gotify: "brief", @@ -321,9 +323,10 @@ export function NotificationSettings() { openai: "", openrouter: "", }, - ai_prompt_mode: data.config.ai_prompt_mode || "default", - ai_custom_prompt: data.config.ai_custom_prompt || "", - } + ai_prompt_mode: data.config.ai_prompt_mode || "default", + ai_custom_prompt: data.config.ai_custom_prompt || "", + ai_allow_suggestions: data.config.ai_allow_suggestions || "false", + } // If ai_model exists but ai_models doesn't have it, save it if (configWithDefaults.ai_model && !configWithDefaults.ai_models[configWithDefaults.ai_provider]) { configWithDefaults.ai_models[configWithDefaults.ai_provider] = configWithDefaults.ai_model @@ -545,8 +548,9 @@ export function NotificationSettings() { ai_language: cfg.ai_language, ai_ollama_url: cfg.ai_ollama_url, ai_openai_base_url: cfg.ai_openai_base_url, - ai_prompt_mode: cfg.ai_prompt_mode || "default", - ai_custom_prompt: cfg.ai_custom_prompt || "", + ai_prompt_mode: cfg.ai_prompt_mode || "default", + ai_custom_prompt: cfg.ai_custom_prompt || "", + ai_allow_suggestions: cfg.ai_allow_suggestions || "false", hostname: cfg.hostname, webhook_secret: cfg.webhook_secret, webhook_allowed_ips: cfg.webhook_allowed_ips, @@ -1846,6 +1850,26 @@ export function NotificationSettings() {

+ + {/* Experimental: AI Suggestions toggle */} +
+
+
+ + + BETA +
+ updateConfig(p => ({ ...p, ai_allow_suggestions: v ? "true" : "false" }))} + disabled={!editMode} + /> +
+

+ When enabled, AI may add brief troubleshooting tips based on journal log context. + Tips are factual and based only on what the logs show. +

+
)} diff --git a/AppImage/scripts/flask_server.py b/AppImage/scripts/flask_server.py index 83ace29f..dfeca440 100644 --- a/AppImage/scripts/flask_server.py +++ b/AppImage/scripts/flask_server.py @@ -848,6 +848,91 @@ def get_current_latency(target='gateway'): return {'target': target, 'latency_avg': None, 'status': 'error'} +def _capture_health_journal_context(categories: list, reason: str = '') -> str: + """Capture journal context relevant to health issues. + + Maps health categories to specific journal keywords so the AI + receives relevant system logs for diagnosis. + + Args: + categories: List of health category keys (e.g., ['storage', 'network']) + reason: The reason string from health check (used to extract more keywords) + + Returns: + Filtered journal output as string + """ + import subprocess + import re + + # Map health categories to relevant journal keywords + CATEGORY_KEYWORDS = { + 'storage': ['mount', 'nfs', 'cifs', 'smb', 'zfs', 'lvm', 'disk', 'nvme', + 'sata', 'ata', 'I/O error', 'read error', 'write error', + 'filesystem', 'ext4', 'xfs', 'btrfs', 'pbs', 'datastore'], + 'disks': ['smartd', 'smart', 'ata', 'sata', 'nvme', 'disk', 'I/O error', + 'bad sector', 'reallocated', 'pending sector', 'uncorrectable'], + 'network': ['bond', 'bridge', 'vmbr', 'eth', 'network', 'link down', + 'carrier', 'no route', 'unreachable', 'timeout', 'connection'], + 'services': ['pveproxy', 'pvedaemon', 'pvestatd', 'corosync', 'ceph', + 'systemd', 'failed', 'service', 'unit', 'start', 'stop'], + 'vms': ['qemu', 'kvm', 'lxc', 'vzdump', 'qm', 'pct', 'guest agent', + 'qemu-ga', 'migration', 'snapshot'], + 'memory': ['oom', 'out of memory', 'killed process', 'swap', 'memory'], + 'cpu': ['thermal', 'temperature', 'throttl', 'mce', 'machine check'], + 'updates': ['apt', 'dpkg', 'upgrade', 'update', 'package'], + 'certificates': ['ssl', 'certificate', 'cert', 'expired', 'pve-ssl'], + 'logs': ['rsyslog', 'journal', 'log rotation'], + 'latency': ['ping', 'latency', 'timeout', 'unreachable', 'network'], + } + + # Collect keywords for all degraded categories + keywords = set() + for cat in categories: + cat_lower = cat.lower() + if cat_lower in CATEGORY_KEYWORDS: + keywords.update(CATEGORY_KEYWORDS[cat_lower]) + + # Extract additional keywords from reason (IPs, hostnames, storage names) + if reason: + # Find IP addresses + ips = re.findall(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', reason) + keywords.update(ips) + + # Find storage/service names (words in quotes or after colon) + quoted = re.findall(r"'([^']+)'|\"([^\"]+)\"", reason) + for match in quoted: + keywords.update(w for w in match if w) + + if not keywords: + return "" + + try: + # Build grep pattern + pattern = "|".join(re.escape(k) for k in keywords if k) + if not pattern: + return "" + + # Capture recent journal entries matching keywords + cmd = ( + f"journalctl --since='10 minutes ago' --no-pager -n 500 2>/dev/null | " + f"grep -iE '{pattern}' | tail -n 30" + ) + + result = subprocess.run( + cmd, + shell=True, + capture_output=True, + text=True, + timeout=5 + ) + + if result.returncode == 0 and result.stdout.strip(): + return result.stdout.strip() + return "" + except Exception: + return "" + + def _health_collector_loop(): """Background thread: run full health checks every 5 minutes. Keeps the health cache always fresh and records events/errors in the DB. @@ -942,6 +1027,7 @@ def _health_collector_loop(): if not skip_notification: degraded.append({ + 'cat_key': cat_key, # Original key for journal capture 'category': cat_name, 'status': cur_status, 'reason': reason, @@ -956,6 +1042,12 @@ def _health_collector_loop(): import socket as _sock hostname = _sock.gethostname() + # Capture journal context for AI enrichment + # Extract category keys and reasons for keyword matching + cat_keys = [d.get('cat_key', d.get('category', '').lower()) for d in degraded] + all_reasons = ' '.join(d.get('reason', '') for d in degraded) + journal_context = _capture_health_journal_context(cat_keys, all_reasons) + if len(degraded) == 1: d = degraded[0] title = f"{hostname}: Health {d['status']} - {d['category']}" @@ -977,7 +1069,11 @@ def _health_collector_loop(): severity=severity, title=title, message=body, - data={'hostname': hostname, 'count': str(len(degraded))}, + data={ + 'hostname': hostname, + 'count': str(len(degraded)), + '_journal_context': journal_context, # For AI enrichment + }, source='health_monitor', ) except Exception as e: diff --git a/AppImage/scripts/health_monitor.py b/AppImage/scripts/health_monitor.py index 5fa59dd9..3b398d92 100644 --- a/AppImage/scripts/health_monitor.py +++ b/AppImage/scripts/health_monitor.py @@ -175,7 +175,7 @@ class HealthMonitor: r'proxmenux-monitor.*failed at step exec', r'proxmenux-monitor\.appimage', - # ── PVE scheduler operational noise ── + # ─��� PVE scheduler operational noise ── # pvescheduler emits "could not update job state" every minute # when a scheduled job reference is stale. This is cosmetic, # not a system problem. @@ -2118,7 +2118,7 @@ class HealthMonitor: except Exception: pass - # ── Record disk observation (always, even if transient) ── + # ── Record disk observation (always, even if transient) ���─ # Signature must be stable across cycles: strip volatile # data (hex values, counts, timestamps) to dedup properly. # e.g. "ata8.00: exception Emask 0x1 SAct 0xc1000000" @@ -4580,10 +4580,18 @@ class HealthMonitor: Returns None if the module is not available. Respects storage exclusions: excluded storages are reported as INFO, not CRITICAL. + + During startup grace period (first 5 minutes after boot): + - Storage errors are reported as INFO instead of CRITICAL + - No persistent errors are recorded + This prevents false positives when NFS/PBS/remote storage is still mounting. """ if not PROXMOX_STORAGE_AVAILABLE: return None + # Check if we're in startup grace period + in_grace_period = _is_startup_health_grace() + try: # Reload configuration to ensure we have the latest storage definitions proxmox_storage_monitor.reload_configuration() @@ -4649,19 +4657,21 @@ class HealthMonitor: else: reason = f"Storage '{storage_name}' has status: {status_detail}." - # Record a persistent CRITICAL error for each unavailable storage - health_persistence.record_error( - error_key=error_key, - category='storage', - severity='CRITICAL', - reason=reason, - details={ - 'storage_name': storage_name, - 'storage_type': storage.get('type', 'unknown'), - 'status_detail': status_detail, - 'dismissable': False - } - ) + # During grace period, don't record persistent errors (storage may still be mounting) + # After grace period, record as CRITICAL + if not in_grace_period: + health_persistence.record_error( + error_key=error_key, + category='storage', + severity='CRITICAL', + reason=reason, + details={ + 'storage_name': storage_name, + 'storage_type': storage.get('type', 'unknown'), + 'status_detail': status_detail, + 'dismissable': False + } + ) # Add to details dict with dismissable false for frontend storage_details[storage_name] = { @@ -4672,13 +4682,22 @@ class HealthMonitor: } # Build checks from storage_details + # During grace period, report as INFO instead of CRITICAL checks = {} for st_name, st_info in storage_details.items(): - checks[st_name] = { - 'status': 'CRITICAL', - 'detail': st_info.get('reason', 'Unavailable'), - 'dismissable': False - } + if in_grace_period: + checks[st_name] = { + 'status': 'INFO', + 'detail': f"[Startup] {st_info.get('reason', 'Unavailable')} (checking...)", + 'dismissable': False, + 'grace_period': True + } + else: + checks[st_name] = { + 'status': 'CRITICAL', + 'detail': st_info.get('reason', 'Unavailable'), + 'dismissable': False + } # Add excluded unavailable storages as INFO (not as errors) for st in excluded_unavailable: @@ -4702,12 +4721,22 @@ class HealthMonitor: # Determine overall status based on non-excluded issues only if real_unavailable: - return { - 'status': 'CRITICAL', - 'reason': f'{len(real_unavailable)} Proxmox storage(s) unavailable', - 'details': storage_details, - 'checks': checks - } + # During grace period, return INFO instead of CRITICAL + if in_grace_period: + return { + 'status': 'INFO', + 'reason': f'{len(real_unavailable)} storage(s) not yet available (startup)', + 'details': storage_details, + 'checks': checks, + 'grace_period': True + } + else: + return { + 'status': 'CRITICAL', + 'reason': f'{len(real_unavailable)} Proxmox storage(s) unavailable', + 'details': storage_details, + 'checks': checks + } else: # Only excluded storages are unavailable - this is OK return { diff --git a/AppImage/scripts/health_persistence.py b/AppImage/scripts/health_persistence.py index 77c7a631..70a81170 100644 --- a/AppImage/scripts/health_persistence.py +++ b/AppImage/scripts/health_persistence.py @@ -1093,7 +1093,7 @@ class HealthPersistence: conn.commit() conn.close() - # ─── System Capabilities Cache ────────────────────��────────── + # ─── System Capabilities Cache ─────────────────────────────── def get_capability(self, cap_key: str) -> Optional[str]: """ diff --git a/AppImage/scripts/notification_events.py b/AppImage/scripts/notification_events.py index 642eb9e7..2b3272a8 100644 --- a/AppImage/scripts/notification_events.py +++ b/AppImage/scripts/notification_events.py @@ -79,7 +79,7 @@ class _SharedState: _shared_state = _SharedState() -# ─── Event Object ───────────────────────────────────────────────── +# ─── Event Object ──────────────��────────────────────────────────── class NotificationEvent: """Represents a detected event ready for notification dispatch. @@ -2538,7 +2538,7 @@ class PollingCollector: except Exception as e: print(f"[PollingCollector] AI model check failed: {e}") - # ── Persistence helpers ──────────────────────────────��───── + # ── Persistence helpers ──────────────────────────────────── def _load_last_notified(self): """Load per-error notification timestamps from DB on startup.""" diff --git a/AppImage/scripts/notification_manager.py b/AppImage/scripts/notification_manager.py index 1261bec3..b70e1dcf 100644 --- a/AppImage/scripts/notification_manager.py +++ b/AppImage/scripts/notification_manager.py @@ -763,8 +763,10 @@ class NotificationManager: ch_title, ch_body = title, body # ── Per-channel settings ── + # Email defaults to 'detailed' (technical report), others to 'standard' detail_level_key = f'{ch_name}.ai_detail_level' - detail_level = self._config.get(detail_level_key, 'standard') + default_detail = 'detailed' if ch_name == 'email' else 'standard' + detail_level = self._config.get(detail_level_key, default_detail) rich_key = f'{ch_name}.rich_format' use_rich_format = self._config.get(rich_key, 'false') == 'true' diff --git a/AppImage/scripts/notification_templates.py b/AppImage/scripts/notification_templates.py index 57ee75b8..86c2cef2 100644 --- a/AppImage/scripts/notification_templates.py +++ b/AppImage/scripts/notification_templates.py @@ -1382,241 +1382,146 @@ AI_DETAIL_TOKENS = { 'detailed': 3000, # Complete technical reports with all details } -# System prompt template - informative, no recommendations -AI_SYSTEM_PROMPT = """You are a system notification formatter for ProxMenux Monitor, a Proxmox VE monitoring tool. +# System prompt template - optimized hybrid version +AI_SYSTEM_PROMPT = """You are a notification FORMATTER for ProxMenux Monitor (Proxmox VE). +Your job: translate and reformat alerts into {language}. You are NOT an analyst — do not interpret or diagnose. -Your task is to translate and lightly reformat incoming server alert messages into {language}. +═══ WHAT TO TRANSLATE ═══ +Translate: labels, descriptions, status words, units (GB→Go in French, etc.) +DO NOT translate: hostnames, IPs, paths, VM/CT IDs, device names (/dev/sdX), technical identifiers -═══ CORE ROLE ═══ -You are a formatter, not an analyst. -Translate, clean, and present the message clearly. -Do NOT reinterpret the event, do NOT add meaning, and do NOT rebuild the message from scratch. - -═══ ABSOLUTE RULES ═══ -1. Translate BOTH title and body into {language}. - -2. Translate human-readable text only. - Do NOT translate: - - hostnames - - device paths (/dev/sdX, /dev/nvmeXnX) - - filesystem paths - - IDs, VMIDs, CTIDs, UUIDs - - timestamps, dates, archive names, PBS paths - - version numbers - - technical units (B, KB, MB, GB, TB, KiB, MiB, GiB, TiB, %, ms, s) - -3. Plain text only. - No markdown: no **bold**, no *italic*, no `code`, no headers (#), no markdown lists (- or *). - The bullet character "•" is allowed only where explicitly required. - -4. Tone: factual, concise, technical. - No greetings, no closings, no apologies, no conversational filler. - -5. Do NOT add recommendations, action items, remediation, or suggestions. - -6. Present ONLY the facts already present in the input. - Do NOT invent, assume, explain, soften, or escalate anything. - -7. Do NOT change severity or status meaning. - For example: - - "failed" must stay a failure - - "warning" must stay a warning - - "degraded" must stay degraded - -8. Preserve structure whenever possible. - Keep the same fields, lines, and data already present in the input. - Do NOT remove important lines such as storage, archive path, totals, durations, target node, reason, or summaries. - -9. Reordering must be minimal. - Only reorder lines if it clearly improves readability without changing meaning. - -10. PLAIN NARRATIVE LINES: - If a line is already a complete sentence, translate it as a sentence. - Do NOT prepend labels like "Message:", "Note:", or "Details:" unless they already exist in the input. - -11. Detail level to apply: {detail_level} - - brief → compact output, keep only essential lines, but never remove critical facts - - standard → preserve structure with moderate cleanup - - detailed → preserve all available technical details - -12. DEDUPLICATION: - Remove ONLY exact duplicates or obviously duplicated repeated lines. - Do NOT merge distinct facts just because they look similar. - Do NOT summarize multiple separate events into one. - -13. Keep the "hostname: " prefix in the title. - Translate only the descriptive part. - Example: "pve01: Updates available" → "pve01: Actualizaciones disponibles" - -14. EMPTY VALUES: - If a list field is empty, "none", "0", or equivalent, write the translated word for "none". - Never leave a declared field blank. - -15. UNKNOWN INPUT: - If the message format is unfamiliar, preserve it as closely as possible and translate faithfully. - Do NOT force it into another template. - -═══ PROXMOX CONTEXT ═══ -Silently replace raw Proxmox technical references with the clearer forms below. -Do NOT explain them. Just use the friendly equivalent directly. - -Service / process mappings: -- "pve-container@XXXX.service" → "Container CT XXXX" -- "qemu-server@XXXX.service" → "Virtual Machine VM XXXX" -- "pvesr-XXXX" → "storage replication job for XXXX" -- "vzdump" → "backup process" -- "pveproxy" → "Proxmox web proxy" -- "pvedaemon" → "Proxmox daemon" -- "pvestatd" → "Proxmox statistics service" -- "pvescheduler" → "Proxmox task scheduler" -- "pve-cluster" → "Proxmox cluster service" -- "corosync" → "cluster communication service" -- "ceph-osd@N" → "Ceph storage disk N" -- "ceph-mon" → "Ceph monitor service" - -Systemd-style patterns: -- "systemd[1]: pve-container@9000.service: Failed" - → "Container CT 9000 service failed" -- "systemd[1]: qemu-server@100.service: Failed with result 'exit-code'" - → "Virtual Machine VM 100 failed to start" -- "systemd[1]: Started pve-container@9000.service" - → "Container CT 9000 started" - -Kernel / storage patterns: -- "ata8.00: exception Emask ..." - → "ATA controller error on port 8" -- "blk_update_request: I/O error, dev sdX, sector NNNN" - → "I/O error on disk /dev/sdX at sector NNNN" -- "SCSI error: return code = 0x08000002" - → "SCSI communication error" - -Apply these mappings in titles, field values, and body text when the raw technical string appears. +═══ CORE RULES ═══ +1. Plain text only — NO markdown, no **bold**, no `code`, no bullet lists (use "• " for packages only) +2. Preserve severity: "failed" stays "failed", "warning" stays "warning" — never soften errors +3. Preserve structure: keep same fields and line order, only translate content +4. Detail level "{detail_level}": brief (2-3 lines) | standard (short paragraph) | detailed (full report) +5. DEDUPLICATION: merge duplicate facts from multiple sources into one clear statement +6. EMPTY LISTS: write translated "none" after label, never leave blank +7. Keep "hostname:" prefix in title — translate only the descriptive part +8. DO NOT add recommendations or suggestions ("you should...", "try...", "consider...") +{suggestions_addon}9. Present facts from message AND journal context — describe what happened, do NOT speculate +10. OUTPUT ONLY the final result — no "Original:", no before/after comparisons +11. Unknown input: preserve as closely as possible, translate what you can +═══ PROXMOX MAPPINGS (use directly, never explain) ═══ +pve-container@XXXX → "CT XXXX" | qemu-server@XXXX → "VM XXXX" | vzdump → "backup" +pveproxy/pvedaemon/pvestatd → "Proxmox service" | corosync → "cluster service" +"ata8.00: exception Emask..." → "ATA error on port 8" +"blk_update_request: I/O error, dev sdX" → "I/O error on /dev/sdX" {emoji_instructions} +═══ MESSAGE FORMATS ═══ -═══ MESSAGE-TYPE GUIDANCE ═══ +BACKUP: List each VM/CT with status/size/duration/storage. End with summary. + - Partial failure (some OK, some failed) = "Backup partially failed", not "failed" + - NEVER collapse multi-VM backup into one line — show each VM separately + - ALWAYS include storage path and summary line -BACKUP (backup_complete / backup_fail / backup_start): -- Preserve per-VM / per-CT detail if present. -- Preserve size, duration, storage/archive path, and final summary if present. -- If both successes and failures are present in the same backup job, use a title equivalent to "Backup partially failed". -- Do NOT collapse multi-guest backup results into a single generic sentence. +UPDATES: Counts on own lines. Packages use "• " under header. No redundant summary. -UPDATES (update_summary): -- Keep each count on its own line. -- Keep the important packages block if present. -- Use "• " for package items. -- Do NOT add a redundant summary line repeating totals already shown. +DISK/SMART: Device + specific error. Deduplicate repeated info. -PVE UPDATE (pve_update): -- Preserve current version, new version, and package list if present. -- Keep the announcement concise. +HEALTH: Category + severity + what changed. Duration if resolved. -DISK / SMART / STORAGE (disk_io_error / storage_unavailable): -- Preserve device, specific error, failing attribute, and counts if present. -- Do NOT repeat the same disk fact twice. +VM/CT LIFECYCLE: Confirm event with key facts (1-2 lines). -RESOURCES (cpu_high / ram_high / temp_high / load_high): -- Preserve current value, threshold, and context if present. - -SECURITY (auth_fail / ip_block): -- Keep source IP, user, service, jail, and failure count on separate clear lines if present. - -VM / CT LIFECYCLE (vm_*, ct_*, migration_*, replication_*): -- Keep name, ID, state, reason, and target node if present. -- Keep lifecycle messages compact unless detail_level is detailed. - -CLUSTER / HEALTH: -- Preserve node name, quorum, category, severity, duration, and reason if present. - -═══ OUTPUT FORMAT ═══ +═══ OUTPUT FORMAT (CRITICAL - parsers rely on exact structure) ═══ [TITLE] -translated title here +translated title here (NO [TITLE] text in actual title) [BODY] -translated body here +translated body here (NO [BODY] text in actual body) -CRITICAL OUTPUT RULES: -- Write [TITLE] on its own line -- Write the title on the next line -- Write [BODY] on its own line -- Write the body starting on the next line -- Do NOT replace these markers with "Title:" or "Body:" -- Do NOT include any extra text before or after the formatted result -- Do NOT add blank lines between [TITLE] and the title -- Do NOT add blank lines between [BODY] and the first body line""" +CRITICAL RULES: +- [TITLE] and [BODY] are PARSING MARKERS ONLY — they must NOT appear in your actual content +- Write [TITLE] on line 1, title text on line 2 (no blank line between) +- Write [BODY] on line 3, body text starting line 4 (no blank line between) +- Do NOT write "Title:", "Body:", "[TITLE]", "[BODY]" inside the translated text +- Do NOT include markers in emojis line: WRONG "🔽[TITLE] server shutdown" → RIGHT "🔽 server shutdown" +- Output ONLY the formatted result — no explanations, no "Original:", no commentary""" + +# Addon for experimental suggestions mode +AI_SUGGESTIONS_ADDON = """ When journal context shows a clear problem, you MAY add ONE brief tip at the end, + prefixed with "Tip:" (translated). Keep tips factual, based only on what logs show. +""" # Emoji instructions injected into AI_SYSTEM_PROMPT for rich channels (Telegram, Discord, Pushover) AI_EMOJI_INSTRUCTIONS = """ ═══ EMOJI RULES ═══ -Place ONE emoji at the START of every non-empty line (title and each body line). -Never skip a line. Never put the emoji at the end. -A blank line must be completely empty — no emoji, no spaces. - - TITLE emoji — one per event type: - ✅ success / resolved / complete / reconnected - ❌ failed / FAILED / error - 💥 crash / I/O error / hardware fault - 🆘 new critical health issue - 📦 backup started / updates available (update_summary) - 🆕 new PVE version available (pve_update) - 🔺 escalated / severity increased - 📋 health digest / persistent issues - 🚚 migration started - 🔌 network down / node disconnected - 🚨 auth failure / security alert - 🚷 IP banned / blocked - 🔑 permission change - 💢 split-brain - 💣 OOM kill - 🚀 VM or CT started - ⏹️ VM or CT stopped - 🔽 VM or CT shutdown - 🔄 restarted / reboot / proxmox updates - 🔥 high CPU / firewall issue - 💧 high memory - 🌡️ high temperature - ⚠️ warning / degraded / high load / system problem - 📉 low disk space - 🚫 storage unavailable - 🐢 high latency - 📸 snapshot created - ⏻ system shutdown - - BODY LINE emoji — one per line based on content: - 🏷️ VM name / CT name / ID line (first line of VM/CT lifecycle events) - ✔️ status ok / success / action confirmed - ❌ status error / failed - 💽 size (individual VM/CT backup) - 💾 total backup size (summary line only) - ⏱️ duration - 🗄️ storage location / PBS path - 📦 total updates count - 🔒 security updates / jail - 🔄 proxmox updates - ⚙️ kernel updates / service name - 🗂️ important packages header - 🌐 source IP - 👤 user - 📝 reason / details - 🌡️ temperature - 🔥 CPU usage - 💧 memory usage - 📊 summary line / statistics - 👥 quorum / cluster nodes - 💿 disk device - 📂 filesystem / mount point - 📌 category / package item (pve_update) - 🚦 severity - 🖥️ node name - 🎯 target node - 🔹 current version (pve_update) - 🟢 new version (pve_update) +Use 1-2 emojis at START of lines where they add clarity. Combine when meaningful (💾✅ backup ok). +Not every line needs emoji — use them to highlight, not as filler. Blank lines = completely empty. +TITLE: ✅success ❌failed 💥crash 🆘critical 📦updates 🆕pve-update 🚚migration ⏹️stop + 🔽shutdown ⚠️warning 💢split-brain 🔌disconnect 🚨auth-fail 🚷banned 📋digest + 🚀 = something STARTS (VM/CT start, backup start, server boot, task begin) + Combine: 💾🚀backup-start 🖥️🚀system-boot 🚀VM/CT-start - BLANK LINES: - Insert one blank line only between logical sections inside the body. - Do not add a blank line before the first body line or after the last one. - """ +BODY: 🏷️VM/CT name ✔️ok ❌error 💽size 💾total ⏱️duration 🗄️storage 📊summary + 📦updates 🔒security 🔄proxmox ⚙️kernel 🗂️packages 💿disk 📝reason + 🌐IP 👤user 🌡️temp 🔥CPU 💧RAM 🎯target 🔹current 🟢new 📌item + +BLANK LINES: Insert between logical sections (VM entries, before summary, before packages block). + +═══ EXAMPLES (follow these formats) ═══ + +BACKUP START: +[TITLE] +💾🚀 pve01: Backup started +[BODY] +Backup job starting on storage PBS. +🏷️ VMs: web01 (100), db (101) + +BACKUP COMPLETE: +[TITLE] +💾✅ pve01: Backup complete +[BODY] +Backup job finished on storage local-bak. + +🏷️ VM web01 (ID: 100) +✔️ Status: ok +💽 Size: 12.3 GiB +⏱️ Duration: 00:04:21 +🗄️ Storage: vm/100/2026-03-17T22:00:08Z + +📊 Total: 1 backup | 💾 12.3 GiB | ⏱️ 00:04:21 + +BACKUP PARTIAL FAIL: +[TITLE] +💾❌ pve01: Backup partially failed +[BODY] +Backup job finished with errors. + +🏷️ VM web01 (ID: 100) +✔️ Status: ok +💽 Size: 12.3 GiB + +🏷️ VM broken (ID: 102) +❌ Status: error + +📊 Total: 2 backups | ❌ 1 failed + +UPDATES: +[TITLE] +📦 amd: Updates available +[BODY] +📦 Total updates: 24 +🔒 Security updates: 6 +🔄 Proxmox updates: 0 + +🗂️ Important packages: +• none + +VM/CT START: +[TITLE] +🚀 pve01: VM arch-linux (100) started +[BODY] +🏷️ Virtual machine arch-linux (ID: 100) +✔️ Now running + +HEALTH DEGRADED: +[TITLE] +⚠️ amd: Health warning — Disk I/O +[BODY] +💿 Device: /dev/sda +⚠️ 1 sector unreadable (pending)""" # No emoji instructions for email/plain text channels @@ -1721,10 +1626,18 @@ class AIEnhancer: # Default prompt: use detail level and emoji settings max_tokens = AI_DETAIL_TOKENS.get(detail_level, 200) emoji_instructions = AI_EMOJI_INSTRUCTIONS if use_emojis else AI_NO_EMOJI_INSTRUCTIONS + + # Check if experimental suggestions mode is enabled + allow_suggestions = self.config.get('ai_allow_suggestions', 'false') + if isinstance(allow_suggestions, str): + allow_suggestions = allow_suggestions.lower() == 'true' + suggestions_addon = AI_SUGGESTIONS_ADDON if allow_suggestions else '' + system_prompt = AI_SYSTEM_PROMPT.format( language=language_name, detail_level=detail_level, - emoji_instructions=emoji_instructions + emoji_instructions=emoji_instructions, + suggestions_addon=suggestions_addon ) # Build user message diff --git a/AppImage/scripts/startup_grace.py b/AppImage/scripts/startup_grace.py index 4c573754..2c8f6014 100644 --- a/AppImage/scripts/startup_grace.py +++ b/AppImage/scripts/startup_grace.py @@ -120,7 +120,7 @@ class _StartupGraceState: with self._lock: return time.time() - self._startup_time - # ─── Shutdown Tracking ────────────────────────────────────────��────────── + # ─── Shutdown Tracking ─────────────────────────────────────────────────── def mark_shutdown(self): """