mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-06-02 13:34:41 +00:00
5ca3463bf6
Full rewrite of the docs site under app/[locale]/ with next-intl in localePrefix:"always" mode. Every page now exists at both /en/<path> and /es/<path>; the root / shows a meta-refresh + JS redirect to /<defaultLocale>/ so GitHub Pages serves something on the apex URL. Highlights: - 107 doc pages migrated to file-per-page JSON namespaces under messages/en/ and messages/es/. Spanish content is fully translated (no copy-of-English placeholders). - New documentation for the Active Suppressions section in the Settings tab and the per-event Dismiss dropdown in the Health Monitor modal. - New screenshots: dismiss-duration-dropdown.png and an updated health-suppression-settings.png. - Pagefind integrated for client-side search; index is built on every CI deploy (not committed). - RSS feeds: per-locale at /<locale>/rss.xml plus root /rss.xml for backward compat. - Removed the dead app/[locale]/guides/[slug]/ route — every guide now has its own static page and no markdown source remains. - Fixed orphan link /guides/nvidia -> /guides/nvidia-manual in docs/hardware/nvidia-host. - Removed obsolete components (footer2, calendar, drawer). Verified locally with `npm ci && npm run build`: 2804 files in out/, 231 pages indexed by pagefind, root redirect intact, both locale roots and the new Active Suppressions docs render OK.
269 lines
27 KiB
JSON
269 lines
27 KiB
JSON
{
|
||
"meta": {
|
||
"title": "ProxMenux Monitor — Dashboard: Storage tab | ProxMenux Documentation",
|
||
"description": "The Storage tab consolidates four views: Proxmox-managed storages with their state, ZFS pools, internal physical disks with SMART data, and external (USB) drives. Each disk drill-in exposes SMART attributes, wear & lifetime and the permanent observation history."
|
||
},
|
||
"header": {
|
||
"title": "Dashboard: Storage tab",
|
||
"description": "The host's storage state in one screen — Proxmox pools (NFS / CIFS / LVM / ZFS / dir), ZFS pool health, internal SATA / NVMe disks with SMART, and external USB drives. Click any disk to open a drill-in with the full SMART attribute table and the per-disk observation history.",
|
||
"section": "ProxMenux Monitor · Dashboard"
|
||
},
|
||
"intro": {
|
||
"title": "Backed by three sources",
|
||
"body": "Proxmox storages come from <code>pvesm status</code>; ZFS state from <code>zpool status</code>; physical disks from <code>lsblk</code> + <code>smartctl</code> (and <code>nvme</code> for NVMe-specific fields). The tab refreshes every ~60 seconds; the per-disk drill-in triggers a fresh SMART read on demand."
|
||
},
|
||
"thresholds": {
|
||
"title": "Status colours and thresholds applied here",
|
||
"intro": "Every bar, chip, and dot on this tab follows the same three-state classification — <green/> <strong>green</strong> below Warning, <amber/> <strong>amber</strong> from Warning to Critical, <red/> <strong>red</strong> at Critical and above. Recommended defaults shipped with ProxMenux:",
|
||
"items": [
|
||
"<strong>Capacity</strong> (host disks, PVE storages, ZFS pools, LXC mounts) — Warning 85 %, Critical 95 %.",
|
||
"<strong>Disk temperature</strong> — HDD 60/65 °C · SSD 70/75 °C · NVMe 80/85 °C · SAS 55/65 °C (warning / critical)."
|
||
],
|
||
"outro": "Every value is configurable per host — <link>Settings → Health Monitor Thresholds</link> is the single source of truth and explains how to tune them."
|
||
},
|
||
"topRow": {
|
||
"heading": "Top row: storage at-a-glance",
|
||
"intro": "Opening the Storage tab lands you on a four-card summary of the host's storage state — total capacity, what's used locally, what's used on remote storages, and the physical-disk inventory. Each card is a one-line answer to a common question; the cards below the row are where you drill into the detail.",
|
||
"imageAlt": "Storage tab — top row of four stat cards: Total Storage, Local Used, Remote Used, Physical Disks",
|
||
"imageCaption": "Top row of the Storage tab — total capacity and disk count, used bytes split into local vs remote storages, and a typed breakdown of physical disks with their health summary.",
|
||
"headerCard": "Card",
|
||
"headerWhat": "What it shows",
|
||
"totalLabel": "Total Storage",
|
||
"totalWhat": "Combined raw capacity across every physical disk. Footer line shows the count of physical disks discovered.",
|
||
"localLabel": "Local Used",
|
||
"localWhat": "Bytes used on local storages (LVM / LVM-thin / ZFS / dir on the host's own disks). Shows the used bytes prominently, with a footer line of <em>X.XX % of Y TB</em> so you see the fill-percentage at the same time.",
|
||
"remoteLabel": "Remote Used",
|
||
"remoteWhat": "Same shape as Local Used but for remote storages (NFS / CIFS / PBS / Ceph / iSCSI). Counted separately because remote outages don't affect local data and you typically size and monitor them differently.",
|
||
"disksLabel": "Physical Disks",
|
||
"disksIntro": "Two lines of breakdown for the inventory:",
|
||
"disksItems": [
|
||
"<strong>By type</strong> — counts of NVMe (purple), SSD (blue) and HDD (blue) discovered. Mixed-disk hosts get all three; an all-NVMe host shows only the NVMe count.",
|
||
"<strong>By health</strong> — counts of <em>normal</em> (green), <em>warning</em> (yellow) and <em>critical</em> (red) disks. The healthy state usually shows just \"X normal\"; warnings and critical only appear when something escalated."
|
||
]
|
||
},
|
||
"pveStorage": {
|
||
"heading": "Proxmox Storage card",
|
||
"intro": "One row per storage configured in <code>/etc/pve/storage.cfg</code>. Each row shows the type badge (<code>nfs</code> / <code>cifs</code> / <code>zfspool</code> / <code>lvm</code> / <code>lvmthin</code> / <code>dir</code> / <code>pbs</code>), the storage name, an active / error / not-monitored badge, the usage percentage and a coloured progress bar:",
|
||
"items": [
|
||
"<strong>< 75 %</strong> — blue progress bar, value in blue.",
|
||
"<strong>75 – 90 %</strong> — yellow progress bar, value in yellow (Health Monitor warns at this point).",
|
||
"<strong>> 90 %</strong> — red progress bar, value in red (Health Monitor escalates).",
|
||
"<strong>error</strong> — full row outlined in red, used when the storage is configured but unreachable (NFS server down, CIFS creds expired).",
|
||
"<strong>excluded</strong> — purple outline + the badge \"not monitored\". Storages explicitly excluded by the user from health checks (handy for manual / archive volumes that are intentionally offline)."
|
||
],
|
||
"calloutTitle": "Excluding a noisy storage",
|
||
"calloutBody": "From the storage row, the per-storage menu lets you mark it as <em>excluded from monitoring</em>. The flag is stored in the <code>excluded_storages</code> table and respected by both the dashboard view and the Health Monitor cycle — no notifications fire for excluded storages, and they don't bump the header pill."
|
||
},
|
||
"zfs": {
|
||
"heading": "ZFS Pools card",
|
||
"intro": "Renders only when ZFS is installed and at least one pool exists. One row per pool with a health badge, size / allocated / free, and an icon mirroring the health state:",
|
||
"items": [
|
||
"<strong>ONLINE</strong> — green. Everything healthy.",
|
||
"<strong>DEGRADED</strong> — yellow. Pool is serving data but at least one device is unavailable; replacement window starts.",
|
||
"<strong>FAULTED</strong> / <strong>UNAVAIL</strong> / <strong>SUSPENDED</strong> — red. Pool not serving data; immediate intervention required."
|
||
],
|
||
"outro": "Both ZFS state and the per-disk SMART status feed the <em>Disks & I/O</em> category of the <link>Health Monitor</link>."
|
||
},
|
||
"physical": {
|
||
"heading": "Physical Disks & SMART Status",
|
||
"intro": "Internal disks (SATA / NVMe). Each row condenses the most useful fields at a glance:",
|
||
"items": [
|
||
"<strong>Device path</strong> — <code>/dev/sda</code>, <code>/dev/nvme0n1</code>.",
|
||
"<strong>Type badge</strong> — SATA / NVMe (and the relevant icon).",
|
||
"<strong>System badge</strong> — orange tag that marks disks the host's OS is running from. The dashboard derives this from the mountpoints of <code>/</code> and <code>/boot</code>: any physical disk hosting them gets the <em>System</em> tag so you don't accidentally wipe or repurpose it. Disks without the tag are pure data drives.",
|
||
"<strong>Model</strong> — vendor + model string from <code>smartctl -i</code>.",
|
||
"<strong>Capacity</strong> — formatted human-readable.",
|
||
"<strong>Temperature</strong> — current °C, coloured by the disk-type threshold (NVMe runs warmer than SATA).",
|
||
"<strong>SMART status</strong> — passed / failed / unknown.",
|
||
"<strong>Observations badge</strong> — when the permanent <code>disk_observations</code> history has un-dismissed entries for this disk, a blue badge with the count appears (e.g. <em>3 obs.</em>). Click the disk to drill in and review them.",
|
||
"<strong>Health badge</strong> — Healthy / Warning / Critical, derived from the SMART check + recent observations."
|
||
],
|
||
"clickHint": "The whole row is clickable and opens the per-disk drill-in described below.",
|
||
"warningTitle": "Don't touch System-tagged disks lightly",
|
||
"warningBody": "Disks with the orange <strong>System</strong> badge host the running OS. The dashboard surfaces the tag as a guard rail — destructive actions launched from <link>ProxMenux → Disk Manager → Format / Wipe</link> explicitly refuse to act on them. If you really need to repurpose the boot disk, do it from a rescue environment, not from inside Proxmox."
|
||
},
|
||
"external": {
|
||
"heading": "External Storage (USB)",
|
||
"body": "A separate card for USB-attached drives, only renders when at least one is present. Same fields as internal disks plus an orange <strong>USB</strong> tag. USB drives often appear and disappear (cold backups, occasional offload jobs), so the Health Monitor is conservative about them — observations are retained, but I/O errors on a disconnected USB drive don't escalate."
|
||
},
|
||
"drillIn": {
|
||
"heading": "Disk drill-in modal",
|
||
"intro": "Clicking any disk row opens a four-tab modal: <strong>Overview</strong> · <strong>SMART</strong> · <strong>History</strong> · <strong>Schedule</strong>. The header always shows the device path, the model + capacity and the orange <em>System</em> badge if applicable.",
|
||
"overviewTitle": "Tab 1 — Overview",
|
||
"overviewImageAlt": "Disk drill-in modal — Overview tab with health status, Wear & Lifetime ring, and quick SMART attributes",
|
||
"overviewImageCaption": "Overview tab — identity, health badge, life-remaining ring with current wear and data written, plus a quick block of the most-watched SMART attributes.",
|
||
"overviewIntro": "The default landing tab — everything you need to answer \"is this disk OK?\" without running a test. Three blocks:",
|
||
"overviewItems": [
|
||
"<strong>Identity</strong> — model, serial, capacity, Health badge (Healthy / Warning / Critical).",
|
||
"<strong>Wear & Lifetime</strong> — large life-remaining ring (97 %, 50 %, …) with the source attribute spelled out (<em>Media Wearout Indicator</em>, <em>Percentage Used</em>, …), a wear bar (current consumption %), an <em>Est. Life</em> projection in years and the total Data Written. NVMe drives also show <em>Available Spare</em>.",
|
||
"<strong>SMART Attributes</strong> — six headline fields on a 2-column grid: Temperature, Power On Hours (with humanised duration like <em>3y 116d</em>), Rotation Rate (or <em>SSD</em>), Power Cycles, SMART Status, Reallocated Sectors, Pending Sectors, CRC Errors. The full attribute table lives in the SMART tab."
|
||
],
|
||
"smartTitle": "Tab 2 — SMART",
|
||
"smartImageAlt": "Disk drill-in modal — SMART tab with Run SMART Test buttons (Short / Extended), last-test result and the full SMART attribute table",
|
||
"smartImageCaption": "SMART tab — run a Short or Extended test, see the last-test outcome, scroll the full SMART attribute table, and generate the full PDF health report.",
|
||
"smartIntro": "Where the actions live. Three sections:",
|
||
"smartItems": [
|
||
"<strong>Run SMART Test</strong> — two buttons. <em>Short Test (~2 min)</em> runs synchronously and shows the result inline. <em>Extended Test (background)</em> can take hours on big drives, runs server-side and fires a notification when it completes.",
|
||
"<strong>Last Test</strong> — type, status badge (<em>passed</em> / <em>failed</em>) and timestamp of the most recent run.",
|
||
"<strong>SMART Attributes</strong> — the full attribute table (ID / name / value / worst / status with OK / warning / critical icons). For SATA / SAS, the classical numbered list. For NVMe, the structured fields from <code>nvme smart-log</code> (temperature, available spare, percentage used, data units written / read, host reads / writes, controller busy time, power cycles, unsafe shutdowns, media errors, error-log entries, warning / critical composite temperature time)."
|
||
],
|
||
"pdfTitle": "View Full SMART Report (PDF)",
|
||
"pdfIntro": "At the bottom of the SMART tab, the <strong>View Full SMART Report</strong> button generates a printable, archive-ready PDF — the same structured report you'd send to a vendor for an RMA.",
|
||
"pdfPreviewAlt": "First page of the generated SMART Health Report PDF — Executive Summary with the PASSED ring + Disk Information block",
|
||
"pdfPreviewCaption": "First page of the SMART Health Report — Executive Summary with the PASSED ring and the full Disk Information block. The full PDF below has the SSD wear ring, every SMART attribute and the test history.",
|
||
"pdfDownloadLabel": "Download sample SMART report (PDF)",
|
||
"pdfSectionsIntro": "The report has five top-level sections:",
|
||
"pdfSections": [
|
||
"<strong>Executive Summary</strong> — large PASSED / FAILED verdict, plain-language disk health assessment paragraph (\"your disk is healthy / showing signs of wear / failing\"), and four quick stats (report timestamp, last-test type, test result, attributes checked).",
|
||
"<strong>Disk Information</strong> — model, serial, capacity, type (HDD / SSD / NVMe), family, form factor, interface (SATA 3.3 · 6.0 Gb/s, …), TRIM support, current temperature with the optimal threshold, power-on time, power cycles, SMART status, plus the headline counters (pending sectors, CRC errors, reallocated sectors).",
|
||
"<strong>SSD Wear & Lifetime</strong> (SSD / NVMe only) — life-remaining ring, source attribute, current wear level, data written, power-on hours.",
|
||
"<strong>SMART Attributes (full)</strong> — every attribute the drive reports, with ID, name, value, worst, threshold, raw value and a status pill. The most user-relevant ones (Reallocated Sector Ct, Power On Hours, Reported Uncorrect, UDMA CRC Error Count, Media Wearout Indicator, …) include a one-line plain-language explanation under the row.",
|
||
"<strong>Last Self-Test Result + Full Self-Test History</strong> — the latest test (type, result, completion message, at which power-on-hours mark) plus a numbered table of every retained test.",
|
||
"<strong>Recommendations</strong> — action items based on the verdict: <em>Disk is Healthy / Schedule periodic tests / Backup strategy</em> for healthy drives, escalating language with replacement guidance when attributes are out of range."
|
||
],
|
||
"pdfOutro": "The PDF is produced server-side and downloaded with a stable filename pattern (<code>SMART-<short-id>.pdf</code>) so multiple snapshots over time can sit side-by-side in your archive. Useful when you're tracking degradation across months or sending evidence to vendor support.",
|
||
"historyTitle": "Tab 3 — History",
|
||
"historyImageAlt": "Disk drill-in modal — History tab listing past SMART tests with download and delete actions",
|
||
"historyImageCaption": "History tab — every retained SMART test for this disk. Per row: type, timestamp, \"X days ago\" tag, latest marker, download (raw <code>smartctl</code> output) and delete actions.",
|
||
"historyIntro": "The retained pool of SMART tests for this disk — both short and extended runs that completed. Each entry is the raw <code>smartctl</code> output captured at run time, plus the structured fields the Monitor parsed out for the dashboard. Per-row actions:",
|
||
"historyItems": [
|
||
"<strong>Download</strong> — saves the raw <code>smartctl -a</code> output as a text file. Identical to what the PDF report parses, useful when you need the exact line a vendor asks for.",
|
||
"<strong>Delete</strong> — removes the test from history. The retention limit set in the Schedule tab (<em>Last 5 / 10 / 20</em>) deletes oldest-first automatically; this action is the manual override."
|
||
],
|
||
"scheduleTitle": "Tab 4 — Schedule",
|
||
"scheduleImageAlt": "Disk drill-in modal — Schedule tab with the toggle for Automatic SMART Tests, the configured-schedules list and the Add Schedule button",
|
||
"scheduleImageCaption": "Schedule tab — pick test type, frequency and retention; the Monitor wires it into <code>cron</code> so tests run unattended.",
|
||
"scheduleIntro": "Cron-driven automatic SMART tests, no shell needed. The page has three areas:",
|
||
"scheduleItems": [
|
||
"<strong>Automatic SMART Tests toggle</strong> — global on/off switch for every schedule on this disk. Useful when you want to pause everything during maintenance without losing the schedule definitions.",
|
||
"<strong>Configured Schedules</strong> — one row per existing schedule with the test type badge (<em>short</em> / <em>long</em>), the cron expression in human form (<em>\"Day 1 of month at 03:00\"</em>, <em>\"Every Sunday at 02:00\"</em>), the disks it covers and the retention setting.",
|
||
"<strong>Add Schedule / Edit Schedule</strong> — form with: Test Type (<em>Short ~2 min</em> / <em>Long 1-4 h</em>), Frequency (<em>Daily / Weekly / Monthly</em>), Day of Month / Day of Week, Time, Keep Results (<em>Last 5 / 10 / 20</em>)."
|
||
],
|
||
"scheduleOutro": "The schedule is materialised as a cron entry on the host that calls back into the Monitor; results are saved to the same SMART history shown in Tab 3, and the retention setting auto-prunes the oldest test when a new one finishes.",
|
||
"tempTitle": "Temperature history modal",
|
||
"tempIntro": "Every disk that exposes a temperature sensor has its readings sampled continuously by the Monitor and persisted to a local time-series. The current value appears as one of the six headline SMART attributes in the Overview tab; clicking that block opens a dedicated temperature-history modal with the full picture.",
|
||
"tempImageAlt": "Disk temperature history modal — header with the disk path and model, a timeframe selector (1 Hour / 24 Hours / 7 Days / 30 Days), a row of four stat cards (Current / Min / Avg / Max), and a line chart of the temperature over the selected range coloured by the per-disk-type thresholds",
|
||
"tempImageCaption": "Temperature detail — opens from the Overview tab on any disk whose sensor returns a non-zero reading. The chart is coloured against the disk-type threshold (HDD / SSD / NVMe / SAS).",
|
||
"tempShowsTitle": "What the modal shows",
|
||
"tempShowsItems": [
|
||
"<strong>Timeframe selector</strong> with four ranges: <em>1 Hour</em>, <em>24 Hours</em> (default), <em>7 Days</em>, <em>30 Days</em>. Each one queries the same backend with a different downsampling so the chart stays readable at every horizon.",
|
||
"<strong>Four stat cards</strong> at the top of the modal: <em>Current</em>, <em>Min</em>, <em>Avg</em>, <em>Max</em> for the selected range. The <em>Current</em> card is coloured by the same status thresholds the Storage tab and the notifications use, so you can see at a glance whether the disk is in normal / warm / hot territory.",
|
||
"<strong>Line chart</strong> of the temperature over time, with the line and shaded area coloured by disk type:"
|
||
],
|
||
"tempDiskTypes": [
|
||
"HDD — typically cooler thresholds.",
|
||
"SSD — moderate thresholds.",
|
||
"NVMe — higher thresholds (NVMe runs hotter by design).",
|
||
"SAS — same defaults as HDD."
|
||
],
|
||
"tempConfigurable": "All four are configurable from <em>Settings → Health Monitor Thresholds</em>.",
|
||
"tempWhyTitle": "Why a history matters here",
|
||
"tempWhyItems": [
|
||
"<strong>Drift detection.</strong> Disks that progressively heat up over weeks (failing fan, dust build-up, neighbour disk dying and pushing hot air across) are invisible to a single \"current temperature\" readout. The 7-day and 30-day views surface the drift.",
|
||
"<strong>Spike correlation.</strong> When a backup window or a rebuild pushed the disk briefly over its threshold, the 1-hour and 24-hour ranges show whether it was a one-off or a recurring pattern.",
|
||
"<strong>Threshold tuning.</strong> Before raising or lowering a threshold in <em>Settings → Health Monitor Thresholds</em>, the 30-day chart shows the disk's actual operating range so the new value lines up with what the hardware really does rather than a guess."
|
||
],
|
||
"obsTitle": "Observation history (across tabs)",
|
||
"obsIntro": "Modern disks fail gradually. A disk can report SMART <strong>PASSED</strong> and still log occasional read errors in dmesg, drop SATA links, or expose pending sectors that come and go. The standard Proxmox UI shows you the current SMART verdict — it does not keep a history of those <em>signals</em>. ProxMenux does, and surfaces them right inside the disk modal.",
|
||
"obsImageAlt": "Disk Details modal Overview tab showing a healthy disk with SMART status Passed, 0 reallocated/pending/CRC errors, and an Observations section listing one recorded I/O Error event with the raw kernel message, a human translation of the ATA error code, first and last occurrence timestamps and an occurrence count",
|
||
"obsImageCaption": "A disk that <strong>SMART says is fine</strong> can still have an observation history. The card is the historical signal layer underneath the SMART verdict.",
|
||
"obsWhatTitle": "What an observation is",
|
||
"obsWhatIntro": "Anything ProxMenux catches in the kernel log, dmesg or SMART output that looks like a disk-level event — and that on its own would be too granular for a notification — is recorded as an <strong>observation</strong>. Each row shows:",
|
||
"obsWhatItems": [
|
||
"<strong>Type badge</strong> (I/O Error, SMART Error, Filesystem Error, ZFS Pool Error, Connection Error).",
|
||
"<strong>Raw kernel message</strong> as it appeared in dmesg — useful when copy-pasting into a search engine or a support ticket.",
|
||
"<strong>A human one-liner</strong> under the raw message for known ATA codes (<code>IDNF</code> → \"Sector address not found — possible bad sector or cable issue\", <code>UNC</code> → \"Uncorrectable read error — bad sector\", and the rest of the standard codes).",
|
||
"<strong>First and last occurrence timestamps</strong>, plus an <strong>occurrence count</strong> deduplicated by error signature."
|
||
],
|
||
"obsWhyTitle": "Why ProxMenux records and shows them",
|
||
"obsWhyItems": [
|
||
"<strong>Disk failure is rarely a single event.</strong> It usually starts with sporadic ATA bus errors, the odd UNC sector, or a couple of medium errors weeks before SMART flips to <em>FAILED</em>. Without persistence those early warnings disappear from dmesg on the next boot.",
|
||
"<strong>SMART can lie.</strong> A drive can show all attributes green and still be on the way out — the observation layer catches the symptoms SMART doesn't expose (especially ICRC, IDNF, link resets at lower SATA speeds).",
|
||
"<strong>It separates \"is happening now\" from \"happened recently\".</strong> The Health Monitor auto-resolves transient errors as soon as they stop firing, which is great for keeping the active alert list clean — but you still want to see, days later, that this disk had three I/O errors that night. The observation table is the answer.",
|
||
"<strong>It feeds the tiered notification model.</strong> The disk_io detector reads observation rate from this table to decide silent / WARNING / CRITICAL (the sliding 24h window introduced in 1.2.1.2). The history is what makes that classification possible."
|
||
],
|
||
"obsDedupTitle": "How dedup and re-notification work",
|
||
"obsDedupBody1": "Observations are deduplicated by their <strong>signature</strong> — a stable fingerprint of the error type, device and key fields of the kernel line. The same event repeating bumps the <code>occurrence_count</code> on the existing row rather than creating a new one. A <strong>different signature</strong> on the same disk creates a new observation and is treated as a new event for notification purposes.",
|
||
"obsDedupBody2": "Notifications follow an anti-cascade rule: the first occurrence of a given (disk, signature, severity) combination pages the operator, and ProxMenux then waits 24 hours before pinging again about the same combination — even if the count keeps climbing. Escalating severity (WARNING → CRITICAL) breaks the cooldown so the operator is told when things get worse, not just when they happen.",
|
||
"obsDismissTitle": "Dismissing vs resolving",
|
||
"obsDismissBody1": "Each row has a <strong>dismiss</strong> action. Dismissing an observation tells ProxMenux \"I've seen this, stop notifying me about it\". It does <strong>not</strong> freeze the occurrence counter — if the same fault keeps happening the count keeps climbing in the background, ready to alert again if it ever escalates to a different severity tier or signature. A dismissed observation stays visible on the card with a muted style, so a future operator can still see \"this disk had history here\".",
|
||
"obsDismissBody2": "Resolving on the active-error side (Health Monitor) is independent of observation dismiss — the observation persists past the active error's auto-resolve. That's the whole point: it survives, so a transient warning from last week is still visible on the disk card today. See <link>Health Monitor</link> for the active-error side of the same picture."
|
||
},
|
||
"dataCollected": {
|
||
"heading": "How the data is collected",
|
||
"headerSection": "Section of the tab",
|
||
"headerEndpoint": "Endpoint",
|
||
"headerSource": "Source",
|
||
"rows": [
|
||
{
|
||
"section": "Top summary cards",
|
||
"endpoint": "/api/storage/summary",
|
||
"source": "Aggregated from <code>lsblk</code>, <code>zpool list</code>, <code>vgs</code> / <code>lvs</code>."
|
||
},
|
||
{
|
||
"section": "Per-disk inventory",
|
||
"endpoint": "/api/storage",
|
||
"source": "<code>lsblk -O</code> + <code>smartctl -i</code> per device, with stable disk identity cache (cleared on hot-plug events)."
|
||
},
|
||
{
|
||
"section": "Proxmox storages",
|
||
"endpoint": "/api/proxmox-storage",
|
||
"source": "<code>pvesh get /nodes/<node>/storage</code> with the active/online state of each."
|
||
},
|
||
{
|
||
"section": "SMART current values",
|
||
"endpoint": "/api/storage/smart/<disk>",
|
||
"source": "<code>smartctl -A <dev></code> — refreshed on demand, not cached."
|
||
},
|
||
{
|
||
"section": "SMART self-test history",
|
||
"endpoint": "/api/storage/smart/<disk>/history",
|
||
"source": "Stored under <code>/var/lib/proxmenux-monitor/smart/<disk>/</code> as JSON snapshots."
|
||
},
|
||
{
|
||
"section": "Permanent observations",
|
||
"endpoint": "/api/storage/observations",
|
||
"source": "SQLite table fed by the Health Monitor every cycle (kept across auto-resolve)."
|
||
}
|
||
],
|
||
"outro": "Verifying the collection chain on the host:",
|
||
"codeComment1": "# Pull the current snapshot from a script",
|
||
"codeComment2": "# Cross-check what the dashboard sees against the raw OS view"
|
||
},
|
||
"whereNext": {
|
||
"heading": "Where to next",
|
||
"items": [
|
||
{
|
||
"label": "Health Monitor",
|
||
"href": "/docs/monitor/health-monitor",
|
||
"tail": " — the disks-and-I/O category and the suppression model."
|
||
},
|
||
{
|
||
"label": "API Reference",
|
||
"href": "/docs/monitor/api",
|
||
"tail": " — the storage and SMART endpoints."
|
||
},
|
||
{
|
||
"label": "Notifications",
|
||
"href": "/docs/monitor/notifications",
|
||
"tailRich": " — what <code>disk_io_error</code>, <code>storage_unavailable</code> and <code>smart_test_failed</code> trigger downstream."
|
||
},
|
||
{
|
||
"label": "Dashboard index",
|
||
"href": "/docs/monitor/dashboard",
|
||
"tail": " — the other tabs."
|
||
},
|
||
{
|
||
"label": "ProxMenux → Disk Manager",
|
||
"href": "/docs/disk-manager",
|
||
"tail": " — the actions side: format / wipe / SMART tests / import disks into VMs and CTs from the TUI."
|
||
},
|
||
{
|
||
"label": "ProxMenux → SMART Disk Health & Test",
|
||
"href": "/docs/disk-manager/smart-disk-test",
|
||
"tail": " — the CLI counterpart of this tab: schedule SMART tests, export the JSON the dashboard renders, and the deeper test-type / interpretation reference."
|
||
}
|
||
]
|
||
}
|
||
}
|