mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-05 20:03:48 +00:00
Update notification service
This commit is contained in:
@@ -16,7 +16,7 @@ import {
|
||||
AlertTriangle, Info, Settings2, Zap, Eye, EyeOff,
|
||||
Trash2, ChevronDown, ChevronUp, ChevronRight, TestTube2, Mail, Webhook,
|
||||
Copy, Server, Shield, ExternalLink, RefreshCw, Download, Upload,
|
||||
Cloud, Brain, Globe, MessageSquareText, Sparkles, Pencil, Save, RotateCcw
|
||||
Cloud, Brain, Globe, MessageSquareText, Sparkles, Pencil, Save, RotateCcw, Lightbulb
|
||||
} from "lucide-react"
|
||||
|
||||
interface ChannelConfig {
|
||||
@@ -67,6 +67,7 @@ interface NotificationConfig {
|
||||
ai_openai_base_url: string
|
||||
ai_prompt_mode: string // 'default' or 'custom'
|
||||
ai_custom_prompt: string // User's custom prompt
|
||||
ai_allow_suggestions: string | boolean // Enable AI suggestions (experimental)
|
||||
channel_ai_detail: Record<string, string>
|
||||
hostname: string
|
||||
webhook_secret: string
|
||||
@@ -252,6 +253,7 @@ const DEFAULT_CONFIG: NotificationConfig = {
|
||||
ai_openai_base_url: "",
|
||||
ai_prompt_mode: "default",
|
||||
ai_custom_prompt: "",
|
||||
ai_allow_suggestions: "false",
|
||||
channel_ai_detail: {
|
||||
telegram: "brief",
|
||||
gotify: "brief",
|
||||
@@ -321,9 +323,10 @@ export function NotificationSettings() {
|
||||
openai: "",
|
||||
openrouter: "",
|
||||
},
|
||||
ai_prompt_mode: data.config.ai_prompt_mode || "default",
|
||||
ai_custom_prompt: data.config.ai_custom_prompt || "",
|
||||
}
|
||||
ai_prompt_mode: data.config.ai_prompt_mode || "default",
|
||||
ai_custom_prompt: data.config.ai_custom_prompt || "",
|
||||
ai_allow_suggestions: data.config.ai_allow_suggestions || "false",
|
||||
}
|
||||
// If ai_model exists but ai_models doesn't have it, save it
|
||||
if (configWithDefaults.ai_model && !configWithDefaults.ai_models[configWithDefaults.ai_provider]) {
|
||||
configWithDefaults.ai_models[configWithDefaults.ai_provider] = configWithDefaults.ai_model
|
||||
@@ -545,8 +548,9 @@ export function NotificationSettings() {
|
||||
ai_language: cfg.ai_language,
|
||||
ai_ollama_url: cfg.ai_ollama_url,
|
||||
ai_openai_base_url: cfg.ai_openai_base_url,
|
||||
ai_prompt_mode: cfg.ai_prompt_mode || "default",
|
||||
ai_custom_prompt: cfg.ai_custom_prompt || "",
|
||||
ai_prompt_mode: cfg.ai_prompt_mode || "default",
|
||||
ai_custom_prompt: cfg.ai_custom_prompt || "",
|
||||
ai_allow_suggestions: cfg.ai_allow_suggestions || "false",
|
||||
hostname: cfg.hostname,
|
||||
webhook_secret: cfg.webhook_secret,
|
||||
webhook_allowed_ips: cfg.webhook_allowed_ips,
|
||||
@@ -1846,6 +1850,26 @@ export function NotificationSettings() {
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Experimental: AI Suggestions toggle */}
|
||||
<div className="space-y-2 pt-3 border-t border-border/50">
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex items-center gap-2">
|
||||
<Lightbulb className="h-4 w-4 text-yellow-400" />
|
||||
<Label className="text-xs sm:text-sm text-foreground/80">AI Suggestions</Label>
|
||||
<span className="text-[10px] px-1.5 py-0.5 rounded bg-yellow-500/20 text-yellow-400 font-medium">BETA</span>
|
||||
</div>
|
||||
<Switch
|
||||
checked={config.ai_allow_suggestions === "true" || config.ai_allow_suggestions === true}
|
||||
onCheckedChange={v => updateConfig(p => ({ ...p, ai_allow_suggestions: v ? "true" : "false" }))}
|
||||
disabled={!editMode}
|
||||
/>
|
||||
</div>
|
||||
<p className="text-xs text-muted-foreground leading-relaxed">
|
||||
When enabled, AI may add brief troubleshooting tips based on journal log context.
|
||||
Tips are factual and based only on what the logs show.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
|
||||
@@ -848,6 +848,91 @@ def get_current_latency(target='gateway'):
|
||||
return {'target': target, 'latency_avg': None, 'status': 'error'}
|
||||
|
||||
|
||||
def _capture_health_journal_context(categories: list, reason: str = '') -> str:
|
||||
"""Capture journal context relevant to health issues.
|
||||
|
||||
Maps health categories to specific journal keywords so the AI
|
||||
receives relevant system logs for diagnosis.
|
||||
|
||||
Args:
|
||||
categories: List of health category keys (e.g., ['storage', 'network'])
|
||||
reason: The reason string from health check (used to extract more keywords)
|
||||
|
||||
Returns:
|
||||
Filtered journal output as string
|
||||
"""
|
||||
import subprocess
|
||||
import re
|
||||
|
||||
# Map health categories to relevant journal keywords
|
||||
CATEGORY_KEYWORDS = {
|
||||
'storage': ['mount', 'nfs', 'cifs', 'smb', 'zfs', 'lvm', 'disk', 'nvme',
|
||||
'sata', 'ata', 'I/O error', 'read error', 'write error',
|
||||
'filesystem', 'ext4', 'xfs', 'btrfs', 'pbs', 'datastore'],
|
||||
'disks': ['smartd', 'smart', 'ata', 'sata', 'nvme', 'disk', 'I/O error',
|
||||
'bad sector', 'reallocated', 'pending sector', 'uncorrectable'],
|
||||
'network': ['bond', 'bridge', 'vmbr', 'eth', 'network', 'link down',
|
||||
'carrier', 'no route', 'unreachable', 'timeout', 'connection'],
|
||||
'services': ['pveproxy', 'pvedaemon', 'pvestatd', 'corosync', 'ceph',
|
||||
'systemd', 'failed', 'service', 'unit', 'start', 'stop'],
|
||||
'vms': ['qemu', 'kvm', 'lxc', 'vzdump', 'qm', 'pct', 'guest agent',
|
||||
'qemu-ga', 'migration', 'snapshot'],
|
||||
'memory': ['oom', 'out of memory', 'killed process', 'swap', 'memory'],
|
||||
'cpu': ['thermal', 'temperature', 'throttl', 'mce', 'machine check'],
|
||||
'updates': ['apt', 'dpkg', 'upgrade', 'update', 'package'],
|
||||
'certificates': ['ssl', 'certificate', 'cert', 'expired', 'pve-ssl'],
|
||||
'logs': ['rsyslog', 'journal', 'log rotation'],
|
||||
'latency': ['ping', 'latency', 'timeout', 'unreachable', 'network'],
|
||||
}
|
||||
|
||||
# Collect keywords for all degraded categories
|
||||
keywords = set()
|
||||
for cat in categories:
|
||||
cat_lower = cat.lower()
|
||||
if cat_lower in CATEGORY_KEYWORDS:
|
||||
keywords.update(CATEGORY_KEYWORDS[cat_lower])
|
||||
|
||||
# Extract additional keywords from reason (IPs, hostnames, storage names)
|
||||
if reason:
|
||||
# Find IP addresses
|
||||
ips = re.findall(r'\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b', reason)
|
||||
keywords.update(ips)
|
||||
|
||||
# Find storage/service names (words in quotes or after colon)
|
||||
quoted = re.findall(r"'([^']+)'|\"([^\"]+)\"", reason)
|
||||
for match in quoted:
|
||||
keywords.update(w for w in match if w)
|
||||
|
||||
if not keywords:
|
||||
return ""
|
||||
|
||||
try:
|
||||
# Build grep pattern
|
||||
pattern = "|".join(re.escape(k) for k in keywords if k)
|
||||
if not pattern:
|
||||
return ""
|
||||
|
||||
# Capture recent journal entries matching keywords
|
||||
cmd = (
|
||||
f"journalctl --since='10 minutes ago' --no-pager -n 500 2>/dev/null | "
|
||||
f"grep -iE '{pattern}' | tail -n 30"
|
||||
)
|
||||
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
shell=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5
|
||||
)
|
||||
|
||||
if result.returncode == 0 and result.stdout.strip():
|
||||
return result.stdout.strip()
|
||||
return ""
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _health_collector_loop():
|
||||
"""Background thread: run full health checks every 5 minutes.
|
||||
Keeps the health cache always fresh and records events/errors in the DB.
|
||||
@@ -942,6 +1027,7 @@ def _health_collector_loop():
|
||||
|
||||
if not skip_notification:
|
||||
degraded.append({
|
||||
'cat_key': cat_key, # Original key for journal capture
|
||||
'category': cat_name,
|
||||
'status': cur_status,
|
||||
'reason': reason,
|
||||
@@ -956,6 +1042,12 @@ def _health_collector_loop():
|
||||
import socket as _sock
|
||||
hostname = _sock.gethostname()
|
||||
|
||||
# Capture journal context for AI enrichment
|
||||
# Extract category keys and reasons for keyword matching
|
||||
cat_keys = [d.get('cat_key', d.get('category', '').lower()) for d in degraded]
|
||||
all_reasons = ' '.join(d.get('reason', '') for d in degraded)
|
||||
journal_context = _capture_health_journal_context(cat_keys, all_reasons)
|
||||
|
||||
if len(degraded) == 1:
|
||||
d = degraded[0]
|
||||
title = f"{hostname}: Health {d['status']} - {d['category']}"
|
||||
@@ -977,7 +1069,11 @@ def _health_collector_loop():
|
||||
severity=severity,
|
||||
title=title,
|
||||
message=body,
|
||||
data={'hostname': hostname, 'count': str(len(degraded))},
|
||||
data={
|
||||
'hostname': hostname,
|
||||
'count': str(len(degraded)),
|
||||
'_journal_context': journal_context, # For AI enrichment
|
||||
},
|
||||
source='health_monitor',
|
||||
)
|
||||
except Exception as e:
|
||||
|
||||
@@ -175,7 +175,7 @@ class HealthMonitor:
|
||||
r'proxmenux-monitor.*failed at step exec',
|
||||
r'proxmenux-monitor\.appimage',
|
||||
|
||||
# ── PVE scheduler operational noise ──
|
||||
# ─<EFBFBD><EFBFBD><EFBFBD> PVE scheduler operational noise ──
|
||||
# pvescheduler emits "could not update job state" every minute
|
||||
# when a scheduled job reference is stale. This is cosmetic,
|
||||
# not a system problem.
|
||||
@@ -2118,7 +2118,7 @@ class HealthMonitor:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ── Record disk observation (always, even if transient) ──
|
||||
# ── Record disk observation (always, even if transient) <EFBFBD><EFBFBD><EFBFBD>─
|
||||
# Signature must be stable across cycles: strip volatile
|
||||
# data (hex values, counts, timestamps) to dedup properly.
|
||||
# e.g. "ata8.00: exception Emask 0x1 SAct 0xc1000000"
|
||||
@@ -4580,10 +4580,18 @@ class HealthMonitor:
|
||||
Returns None if the module is not available.
|
||||
|
||||
Respects storage exclusions: excluded storages are reported as INFO, not CRITICAL.
|
||||
|
||||
During startup grace period (first 5 minutes after boot):
|
||||
- Storage errors are reported as INFO instead of CRITICAL
|
||||
- No persistent errors are recorded
|
||||
This prevents false positives when NFS/PBS/remote storage is still mounting.
|
||||
"""
|
||||
if not PROXMOX_STORAGE_AVAILABLE:
|
||||
return None
|
||||
|
||||
# Check if we're in startup grace period
|
||||
in_grace_period = _is_startup_health_grace()
|
||||
|
||||
try:
|
||||
# Reload configuration to ensure we have the latest storage definitions
|
||||
proxmox_storage_monitor.reload_configuration()
|
||||
@@ -4649,19 +4657,21 @@ class HealthMonitor:
|
||||
else:
|
||||
reason = f"Storage '{storage_name}' has status: {status_detail}."
|
||||
|
||||
# Record a persistent CRITICAL error for each unavailable storage
|
||||
health_persistence.record_error(
|
||||
error_key=error_key,
|
||||
category='storage',
|
||||
severity='CRITICAL',
|
||||
reason=reason,
|
||||
details={
|
||||
'storage_name': storage_name,
|
||||
'storage_type': storage.get('type', 'unknown'),
|
||||
'status_detail': status_detail,
|
||||
'dismissable': False
|
||||
}
|
||||
)
|
||||
# During grace period, don't record persistent errors (storage may still be mounting)
|
||||
# After grace period, record as CRITICAL
|
||||
if not in_grace_period:
|
||||
health_persistence.record_error(
|
||||
error_key=error_key,
|
||||
category='storage',
|
||||
severity='CRITICAL',
|
||||
reason=reason,
|
||||
details={
|
||||
'storage_name': storage_name,
|
||||
'storage_type': storage.get('type', 'unknown'),
|
||||
'status_detail': status_detail,
|
||||
'dismissable': False
|
||||
}
|
||||
)
|
||||
|
||||
# Add to details dict with dismissable false for frontend
|
||||
storage_details[storage_name] = {
|
||||
@@ -4672,13 +4682,22 @@ class HealthMonitor:
|
||||
}
|
||||
|
||||
# Build checks from storage_details
|
||||
# During grace period, report as INFO instead of CRITICAL
|
||||
checks = {}
|
||||
for st_name, st_info in storage_details.items():
|
||||
checks[st_name] = {
|
||||
'status': 'CRITICAL',
|
||||
'detail': st_info.get('reason', 'Unavailable'),
|
||||
'dismissable': False
|
||||
}
|
||||
if in_grace_period:
|
||||
checks[st_name] = {
|
||||
'status': 'INFO',
|
||||
'detail': f"[Startup] {st_info.get('reason', 'Unavailable')} (checking...)",
|
||||
'dismissable': False,
|
||||
'grace_period': True
|
||||
}
|
||||
else:
|
||||
checks[st_name] = {
|
||||
'status': 'CRITICAL',
|
||||
'detail': st_info.get('reason', 'Unavailable'),
|
||||
'dismissable': False
|
||||
}
|
||||
|
||||
# Add excluded unavailable storages as INFO (not as errors)
|
||||
for st in excluded_unavailable:
|
||||
@@ -4702,12 +4721,22 @@ class HealthMonitor:
|
||||
|
||||
# Determine overall status based on non-excluded issues only
|
||||
if real_unavailable:
|
||||
return {
|
||||
'status': 'CRITICAL',
|
||||
'reason': f'{len(real_unavailable)} Proxmox storage(s) unavailable',
|
||||
'details': storage_details,
|
||||
'checks': checks
|
||||
}
|
||||
# During grace period, return INFO instead of CRITICAL
|
||||
if in_grace_period:
|
||||
return {
|
||||
'status': 'INFO',
|
||||
'reason': f'{len(real_unavailable)} storage(s) not yet available (startup)',
|
||||
'details': storage_details,
|
||||
'checks': checks,
|
||||
'grace_period': True
|
||||
}
|
||||
else:
|
||||
return {
|
||||
'status': 'CRITICAL',
|
||||
'reason': f'{len(real_unavailable)} Proxmox storage(s) unavailable',
|
||||
'details': storage_details,
|
||||
'checks': checks
|
||||
}
|
||||
else:
|
||||
# Only excluded storages are unavailable - this is OK
|
||||
return {
|
||||
|
||||
@@ -1093,7 +1093,7 @@ class HealthPersistence:
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
# ─── System Capabilities Cache ────────────────────<EFBFBD><EFBFBD>──────────
|
||||
# ─── System Capabilities Cache ───────────────────────────────
|
||||
|
||||
def get_capability(self, cap_key: str) -> Optional[str]:
|
||||
"""
|
||||
|
||||
@@ -79,7 +79,7 @@ class _SharedState:
|
||||
_shared_state = _SharedState()
|
||||
|
||||
|
||||
# ─── Event Object ─────────────────────────────────────────────────
|
||||
# ─── Event Object ──────────────<EFBFBD><EFBFBD>──────────────────────────────────
|
||||
|
||||
class NotificationEvent:
|
||||
"""Represents a detected event ready for notification dispatch.
|
||||
@@ -2538,7 +2538,7 @@ class PollingCollector:
|
||||
except Exception as e:
|
||||
print(f"[PollingCollector] AI model check failed: {e}")
|
||||
|
||||
# ── Persistence helpers ──────────────────────────────<EFBFBD><EFBFBD>─────
|
||||
# ── Persistence helpers ────────────────────────────────────
|
||||
|
||||
def _load_last_notified(self):
|
||||
"""Load per-error notification timestamps from DB on startup."""
|
||||
|
||||
@@ -763,8 +763,10 @@ class NotificationManager:
|
||||
ch_title, ch_body = title, body
|
||||
|
||||
# ── Per-channel settings ──
|
||||
# Email defaults to 'detailed' (technical report), others to 'standard'
|
||||
detail_level_key = f'{ch_name}.ai_detail_level'
|
||||
detail_level = self._config.get(detail_level_key, 'standard')
|
||||
default_detail = 'detailed' if ch_name == 'email' else 'standard'
|
||||
detail_level = self._config.get(detail_level_key, default_detail)
|
||||
|
||||
rich_key = f'{ch_name}.rich_format'
|
||||
use_rich_format = self._config.get(rich_key, 'false') == 'true'
|
||||
|
||||
@@ -1382,241 +1382,146 @@ AI_DETAIL_TOKENS = {
|
||||
'detailed': 3000, # Complete technical reports with all details
|
||||
}
|
||||
|
||||
# System prompt template - informative, no recommendations
|
||||
AI_SYSTEM_PROMPT = """You are a system notification formatter for ProxMenux Monitor, a Proxmox VE monitoring tool.
|
||||
# System prompt template - optimized hybrid version
|
||||
AI_SYSTEM_PROMPT = """You are a notification FORMATTER for ProxMenux Monitor (Proxmox VE).
|
||||
Your job: translate and reformat alerts into {language}. You are NOT an analyst — do not interpret or diagnose.
|
||||
|
||||
Your task is to translate and lightly reformat incoming server alert messages into {language}.
|
||||
═══ WHAT TO TRANSLATE ═══
|
||||
Translate: labels, descriptions, status words, units (GB→Go in French, etc.)
|
||||
DO NOT translate: hostnames, IPs, paths, VM/CT IDs, device names (/dev/sdX), technical identifiers
|
||||
|
||||
═══ CORE ROLE ═══
|
||||
You are a formatter, not an analyst.
|
||||
Translate, clean, and present the message clearly.
|
||||
Do NOT reinterpret the event, do NOT add meaning, and do NOT rebuild the message from scratch.
|
||||
|
||||
═══ ABSOLUTE RULES ═══
|
||||
1. Translate BOTH title and body into {language}.
|
||||
|
||||
2. Translate human-readable text only.
|
||||
Do NOT translate:
|
||||
- hostnames
|
||||
- device paths (/dev/sdX, /dev/nvmeXnX)
|
||||
- filesystem paths
|
||||
- IDs, VMIDs, CTIDs, UUIDs
|
||||
- timestamps, dates, archive names, PBS paths
|
||||
- version numbers
|
||||
- technical units (B, KB, MB, GB, TB, KiB, MiB, GiB, TiB, %, ms, s)
|
||||
|
||||
3. Plain text only.
|
||||
No markdown: no **bold**, no *italic*, no `code`, no headers (#), no markdown lists (- or *).
|
||||
The bullet character "•" is allowed only where explicitly required.
|
||||
|
||||
4. Tone: factual, concise, technical.
|
||||
No greetings, no closings, no apologies, no conversational filler.
|
||||
|
||||
5. Do NOT add recommendations, action items, remediation, or suggestions.
|
||||
|
||||
6. Present ONLY the facts already present in the input.
|
||||
Do NOT invent, assume, explain, soften, or escalate anything.
|
||||
|
||||
7. Do NOT change severity or status meaning.
|
||||
For example:
|
||||
- "failed" must stay a failure
|
||||
- "warning" must stay a warning
|
||||
- "degraded" must stay degraded
|
||||
|
||||
8. Preserve structure whenever possible.
|
||||
Keep the same fields, lines, and data already present in the input.
|
||||
Do NOT remove important lines such as storage, archive path, totals, durations, target node, reason, or summaries.
|
||||
|
||||
9. Reordering must be minimal.
|
||||
Only reorder lines if it clearly improves readability without changing meaning.
|
||||
|
||||
10. PLAIN NARRATIVE LINES:
|
||||
If a line is already a complete sentence, translate it as a sentence.
|
||||
Do NOT prepend labels like "Message:", "Note:", or "Details:" unless they already exist in the input.
|
||||
|
||||
11. Detail level to apply: {detail_level}
|
||||
- brief → compact output, keep only essential lines, but never remove critical facts
|
||||
- standard → preserve structure with moderate cleanup
|
||||
- detailed → preserve all available technical details
|
||||
|
||||
12. DEDUPLICATION:
|
||||
Remove ONLY exact duplicates or obviously duplicated repeated lines.
|
||||
Do NOT merge distinct facts just because they look similar.
|
||||
Do NOT summarize multiple separate events into one.
|
||||
|
||||
13. Keep the "hostname: " prefix in the title.
|
||||
Translate only the descriptive part.
|
||||
Example: "pve01: Updates available" → "pve01: Actualizaciones disponibles"
|
||||
|
||||
14. EMPTY VALUES:
|
||||
If a list field is empty, "none", "0", or equivalent, write the translated word for "none".
|
||||
Never leave a declared field blank.
|
||||
|
||||
15. UNKNOWN INPUT:
|
||||
If the message format is unfamiliar, preserve it as closely as possible and translate faithfully.
|
||||
Do NOT force it into another template.
|
||||
|
||||
═══ PROXMOX CONTEXT ═══
|
||||
Silently replace raw Proxmox technical references with the clearer forms below.
|
||||
Do NOT explain them. Just use the friendly equivalent directly.
|
||||
|
||||
Service / process mappings:
|
||||
- "pve-container@XXXX.service" → "Container CT XXXX"
|
||||
- "qemu-server@XXXX.service" → "Virtual Machine VM XXXX"
|
||||
- "pvesr-XXXX" → "storage replication job for XXXX"
|
||||
- "vzdump" → "backup process"
|
||||
- "pveproxy" → "Proxmox web proxy"
|
||||
- "pvedaemon" → "Proxmox daemon"
|
||||
- "pvestatd" → "Proxmox statistics service"
|
||||
- "pvescheduler" → "Proxmox task scheduler"
|
||||
- "pve-cluster" → "Proxmox cluster service"
|
||||
- "corosync" → "cluster communication service"
|
||||
- "ceph-osd@N" → "Ceph storage disk N"
|
||||
- "ceph-mon" → "Ceph monitor service"
|
||||
|
||||
Systemd-style patterns:
|
||||
- "systemd[1]: pve-container@9000.service: Failed"
|
||||
→ "Container CT 9000 service failed"
|
||||
- "systemd[1]: qemu-server@100.service: Failed with result 'exit-code'"
|
||||
→ "Virtual Machine VM 100 failed to start"
|
||||
- "systemd[1]: Started pve-container@9000.service"
|
||||
→ "Container CT 9000 started"
|
||||
|
||||
Kernel / storage patterns:
|
||||
- "ata8.00: exception Emask ..."
|
||||
→ "ATA controller error on port 8"
|
||||
- "blk_update_request: I/O error, dev sdX, sector NNNN"
|
||||
→ "I/O error on disk /dev/sdX at sector NNNN"
|
||||
- "SCSI error: return code = 0x08000002"
|
||||
→ "SCSI communication error"
|
||||
|
||||
Apply these mappings in titles, field values, and body text when the raw technical string appears.
|
||||
═══ CORE RULES ═══
|
||||
1. Plain text only — NO markdown, no **bold**, no `code`, no bullet lists (use "• " for packages only)
|
||||
2. Preserve severity: "failed" stays "failed", "warning" stays "warning" — never soften errors
|
||||
3. Preserve structure: keep same fields and line order, only translate content
|
||||
4. Detail level "{detail_level}": brief (2-3 lines) | standard (short paragraph) | detailed (full report)
|
||||
5. DEDUPLICATION: merge duplicate facts from multiple sources into one clear statement
|
||||
6. EMPTY LISTS: write translated "none" after label, never leave blank
|
||||
7. Keep "hostname:" prefix in title — translate only the descriptive part
|
||||
8. DO NOT add recommendations or suggestions ("you should...", "try...", "consider...")
|
||||
{suggestions_addon}9. Present facts from message AND journal context — describe what happened, do NOT speculate
|
||||
10. OUTPUT ONLY the final result — no "Original:", no before/after comparisons
|
||||
11. Unknown input: preserve as closely as possible, translate what you can
|
||||
|
||||
═══ PROXMOX MAPPINGS (use directly, never explain) ═══
|
||||
pve-container@XXXX → "CT XXXX" | qemu-server@XXXX → "VM XXXX" | vzdump → "backup"
|
||||
pveproxy/pvedaemon/pvestatd → "Proxmox service" | corosync → "cluster service"
|
||||
"ata8.00: exception Emask..." → "ATA error on port 8"
|
||||
"blk_update_request: I/O error, dev sdX" → "I/O error on /dev/sdX"
|
||||
{emoji_instructions}
|
||||
═══ MESSAGE FORMATS ═══
|
||||
|
||||
═══ MESSAGE-TYPE GUIDANCE ═══
|
||||
BACKUP: List each VM/CT with status/size/duration/storage. End with summary.
|
||||
- Partial failure (some OK, some failed) = "Backup partially failed", not "failed"
|
||||
- NEVER collapse multi-VM backup into one line — show each VM separately
|
||||
- ALWAYS include storage path and summary line
|
||||
|
||||
BACKUP (backup_complete / backup_fail / backup_start):
|
||||
- Preserve per-VM / per-CT detail if present.
|
||||
- Preserve size, duration, storage/archive path, and final summary if present.
|
||||
- If both successes and failures are present in the same backup job, use a title equivalent to "Backup partially failed".
|
||||
- Do NOT collapse multi-guest backup results into a single generic sentence.
|
||||
UPDATES: Counts on own lines. Packages use "• " under header. No redundant summary.
|
||||
|
||||
UPDATES (update_summary):
|
||||
- Keep each count on its own line.
|
||||
- Keep the important packages block if present.
|
||||
- Use "• " for package items.
|
||||
- Do NOT add a redundant summary line repeating totals already shown.
|
||||
DISK/SMART: Device + specific error. Deduplicate repeated info.
|
||||
|
||||
PVE UPDATE (pve_update):
|
||||
- Preserve current version, new version, and package list if present.
|
||||
- Keep the announcement concise.
|
||||
HEALTH: Category + severity + what changed. Duration if resolved.
|
||||
|
||||
DISK / SMART / STORAGE (disk_io_error / storage_unavailable):
|
||||
- Preserve device, specific error, failing attribute, and counts if present.
|
||||
- Do NOT repeat the same disk fact twice.
|
||||
VM/CT LIFECYCLE: Confirm event with key facts (1-2 lines).
|
||||
|
||||
RESOURCES (cpu_high / ram_high / temp_high / load_high):
|
||||
- Preserve current value, threshold, and context if present.
|
||||
|
||||
SECURITY (auth_fail / ip_block):
|
||||
- Keep source IP, user, service, jail, and failure count on separate clear lines if present.
|
||||
|
||||
VM / CT LIFECYCLE (vm_*, ct_*, migration_*, replication_*):
|
||||
- Keep name, ID, state, reason, and target node if present.
|
||||
- Keep lifecycle messages compact unless detail_level is detailed.
|
||||
|
||||
CLUSTER / HEALTH:
|
||||
- Preserve node name, quorum, category, severity, duration, and reason if present.
|
||||
|
||||
═══ OUTPUT FORMAT ═══
|
||||
═══ OUTPUT FORMAT (CRITICAL - parsers rely on exact structure) ═══
|
||||
[TITLE]
|
||||
translated title here
|
||||
translated title here (NO [TITLE] text in actual title)
|
||||
[BODY]
|
||||
translated body here
|
||||
translated body here (NO [BODY] text in actual body)
|
||||
|
||||
CRITICAL OUTPUT RULES:
|
||||
- Write [TITLE] on its own line
|
||||
- Write the title on the next line
|
||||
- Write [BODY] on its own line
|
||||
- Write the body starting on the next line
|
||||
- Do NOT replace these markers with "Title:" or "Body:"
|
||||
- Do NOT include any extra text before or after the formatted result
|
||||
- Do NOT add blank lines between [TITLE] and the title
|
||||
- Do NOT add blank lines between [BODY] and the first body line"""
|
||||
CRITICAL RULES:
|
||||
- [TITLE] and [BODY] are PARSING MARKERS ONLY — they must NOT appear in your actual content
|
||||
- Write [TITLE] on line 1, title text on line 2 (no blank line between)
|
||||
- Write [BODY] on line 3, body text starting line 4 (no blank line between)
|
||||
- Do NOT write "Title:", "Body:", "[TITLE]", "[BODY]" inside the translated text
|
||||
- Do NOT include markers in emojis line: WRONG "🔽[TITLE] server shutdown" → RIGHT "🔽 server shutdown"
|
||||
- Output ONLY the formatted result — no explanations, no "Original:", no commentary"""
|
||||
|
||||
# Addon for experimental suggestions mode
|
||||
AI_SUGGESTIONS_ADDON = """ When journal context shows a clear problem, you MAY add ONE brief tip at the end,
|
||||
prefixed with "Tip:" (translated). Keep tips factual, based only on what logs show.
|
||||
"""
|
||||
|
||||
# Emoji instructions injected into AI_SYSTEM_PROMPT for rich channels (Telegram, Discord, Pushover)
|
||||
AI_EMOJI_INSTRUCTIONS = """
|
||||
═══ EMOJI RULES ═══
|
||||
Place ONE emoji at the START of every non-empty line (title and each body line).
|
||||
Never skip a line. Never put the emoji at the end.
|
||||
A blank line must be completely empty — no emoji, no spaces.
|
||||
|
||||
TITLE emoji — one per event type:
|
||||
✅ success / resolved / complete / reconnected
|
||||
❌ failed / FAILED / error
|
||||
💥 crash / I/O error / hardware fault
|
||||
🆘 new critical health issue
|
||||
📦 backup started / updates available (update_summary)
|
||||
🆕 new PVE version available (pve_update)
|
||||
🔺 escalated / severity increased
|
||||
📋 health digest / persistent issues
|
||||
🚚 migration started
|
||||
🔌 network down / node disconnected
|
||||
🚨 auth failure / security alert
|
||||
🚷 IP banned / blocked
|
||||
🔑 permission change
|
||||
💢 split-brain
|
||||
💣 OOM kill
|
||||
🚀 VM or CT started
|
||||
⏹️ VM or CT stopped
|
||||
🔽 VM or CT shutdown
|
||||
🔄 restarted / reboot / proxmox updates
|
||||
🔥 high CPU / firewall issue
|
||||
💧 high memory
|
||||
🌡️ high temperature
|
||||
⚠️ warning / degraded / high load / system problem
|
||||
📉 low disk space
|
||||
🚫 storage unavailable
|
||||
🐢 high latency
|
||||
📸 snapshot created
|
||||
⏻ system shutdown
|
||||
|
||||
BODY LINE emoji — one per line based on content:
|
||||
🏷️ VM name / CT name / ID line (first line of VM/CT lifecycle events)
|
||||
✔️ status ok / success / action confirmed
|
||||
❌ status error / failed
|
||||
💽 size (individual VM/CT backup)
|
||||
💾 total backup size (summary line only)
|
||||
⏱️ duration
|
||||
🗄️ storage location / PBS path
|
||||
📦 total updates count
|
||||
🔒 security updates / jail
|
||||
🔄 proxmox updates
|
||||
⚙️ kernel updates / service name
|
||||
🗂️ important packages header
|
||||
🌐 source IP
|
||||
👤 user
|
||||
📝 reason / details
|
||||
🌡️ temperature
|
||||
🔥 CPU usage
|
||||
💧 memory usage
|
||||
📊 summary line / statistics
|
||||
👥 quorum / cluster nodes
|
||||
💿 disk device
|
||||
📂 filesystem / mount point
|
||||
📌 category / package item (pve_update)
|
||||
🚦 severity
|
||||
🖥️ node name
|
||||
🎯 target node
|
||||
🔹 current version (pve_update)
|
||||
🟢 new version (pve_update)
|
||||
Use 1-2 emojis at START of lines where they add clarity. Combine when meaningful (💾✅ backup ok).
|
||||
Not every line needs emoji — use them to highlight, not as filler. Blank lines = completely empty.
|
||||
|
||||
TITLE: ✅success ❌failed 💥crash 🆘critical 📦updates 🆕pve-update 🚚migration ⏹️stop
|
||||
🔽shutdown ⚠️warning 💢split-brain 🔌disconnect 🚨auth-fail 🚷banned 📋digest
|
||||
🚀 = something STARTS (VM/CT start, backup start, server boot, task begin)
|
||||
Combine: 💾🚀backup-start 🖥️🚀system-boot 🚀VM/CT-start
|
||||
|
||||
BLANK LINES:
|
||||
Insert one blank line only between logical sections inside the body.
|
||||
Do not add a blank line before the first body line or after the last one.
|
||||
"""
|
||||
BODY: 🏷️VM/CT name ✔️ok ❌error 💽size 💾total ⏱️duration 🗄️storage 📊summary
|
||||
📦updates 🔒security 🔄proxmox ⚙️kernel 🗂️packages 💿disk 📝reason
|
||||
🌐IP 👤user 🌡️temp 🔥CPU 💧RAM 🎯target 🔹current 🟢new 📌item
|
||||
|
||||
BLANK LINES: Insert between logical sections (VM entries, before summary, before packages block).
|
||||
|
||||
═══ EXAMPLES (follow these formats) ═══
|
||||
|
||||
BACKUP START:
|
||||
[TITLE]
|
||||
💾🚀 pve01: Backup started
|
||||
[BODY]
|
||||
Backup job starting on storage PBS.
|
||||
🏷️ VMs: web01 (100), db (101)
|
||||
|
||||
BACKUP COMPLETE:
|
||||
[TITLE]
|
||||
💾✅ pve01: Backup complete
|
||||
[BODY]
|
||||
Backup job finished on storage local-bak.
|
||||
|
||||
🏷️ VM web01 (ID: 100)
|
||||
✔️ Status: ok
|
||||
💽 Size: 12.3 GiB
|
||||
⏱️ Duration: 00:04:21
|
||||
🗄️ Storage: vm/100/2026-03-17T22:00:08Z
|
||||
|
||||
📊 Total: 1 backup | 💾 12.3 GiB | ⏱️ 00:04:21
|
||||
|
||||
BACKUP PARTIAL FAIL:
|
||||
[TITLE]
|
||||
💾❌ pve01: Backup partially failed
|
||||
[BODY]
|
||||
Backup job finished with errors.
|
||||
|
||||
🏷️ VM web01 (ID: 100)
|
||||
✔️ Status: ok
|
||||
💽 Size: 12.3 GiB
|
||||
|
||||
🏷️ VM broken (ID: 102)
|
||||
❌ Status: error
|
||||
|
||||
📊 Total: 2 backups | ❌ 1 failed
|
||||
|
||||
UPDATES:
|
||||
[TITLE]
|
||||
📦 amd: Updates available
|
||||
[BODY]
|
||||
📦 Total updates: 24
|
||||
🔒 Security updates: 6
|
||||
🔄 Proxmox updates: 0
|
||||
|
||||
🗂️ Important packages:
|
||||
• none
|
||||
|
||||
VM/CT START:
|
||||
[TITLE]
|
||||
🚀 pve01: VM arch-linux (100) started
|
||||
[BODY]
|
||||
🏷️ Virtual machine arch-linux (ID: 100)
|
||||
✔️ Now running
|
||||
|
||||
HEALTH DEGRADED:
|
||||
[TITLE]
|
||||
⚠️ amd: Health warning — Disk I/O
|
||||
[BODY]
|
||||
💿 Device: /dev/sda
|
||||
⚠️ 1 sector unreadable (pending)"""
|
||||
|
||||
|
||||
# No emoji instructions for email/plain text channels
|
||||
@@ -1721,10 +1626,18 @@ class AIEnhancer:
|
||||
# Default prompt: use detail level and emoji settings
|
||||
max_tokens = AI_DETAIL_TOKENS.get(detail_level, 200)
|
||||
emoji_instructions = AI_EMOJI_INSTRUCTIONS if use_emojis else AI_NO_EMOJI_INSTRUCTIONS
|
||||
|
||||
# Check if experimental suggestions mode is enabled
|
||||
allow_suggestions = self.config.get('ai_allow_suggestions', 'false')
|
||||
if isinstance(allow_suggestions, str):
|
||||
allow_suggestions = allow_suggestions.lower() == 'true'
|
||||
suggestions_addon = AI_SUGGESTIONS_ADDON if allow_suggestions else ''
|
||||
|
||||
system_prompt = AI_SYSTEM_PROMPT.format(
|
||||
language=language_name,
|
||||
detail_level=detail_level,
|
||||
emoji_instructions=emoji_instructions
|
||||
emoji_instructions=emoji_instructions,
|
||||
suggestions_addon=suggestions_addon
|
||||
)
|
||||
|
||||
# Build user message
|
||||
|
||||
@@ -120,7 +120,7 @@ class _StartupGraceState:
|
||||
with self._lock:
|
||||
return time.time() - self._startup_time
|
||||
|
||||
# ─── Shutdown Tracking ────────────────────────────────────────<EFBFBD><EFBFBD>──────────
|
||||
# ─── Shutdown Tracking ───────────────────────────────────────────────────
|
||||
|
||||
def mark_shutdown(self):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user