From 642bd8ecae9fce72e60e7f1a81940485f414ba1a Mon Sep 17 00:00:00 2001 From: MacRimi Date: Mon, 1 Jun 2026 23:52:11 +0200 Subject: [PATCH] health_persistence: stop leaking obs counts across NVMe device renames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `get_disks_observation_counts` maps each serial's count to that serial's "most recent" device_name (so renames like ata8 -> sdh keep the badge attached). When several physical disks have passed through the same kernel name across reboots — common with NVMe, the kernel probes in a different order depending on which slots are populated — disk_registry keeps a row per (device_name, serial) seen and the "most recent" device_name for a serial can now be in use by an entirely different disk. Concrete case from the wild: serial 211716800490 was nvme0n1 during the previous boot and earned a real I/O observation. After removing four of five NVMes, the surviving disk (serial 243332800236) booted into nvme0n1. The badge layer mirrored 211716800490's count onto nvme0n1 — which is now a different physical disk — and showed "1 obs." on the wrong drive, while the modal (which scopes by the current (device_name, serial) registry row) found nothing and rendered an empty history. Only mirror a serial's count onto its device_name when that device_name is currently owned by the same serial, determined from the freshest disk_registry row. The serial-keyed entry stays unconditional so observations remain reachable when the disk is re-plugged under another device name. Co-Authored-By: Claude Opus 4.7 --- AppImage/scripts/health_persistence.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/AppImage/scripts/health_persistence.py b/AppImage/scripts/health_persistence.py index 64f18985..114fdf79 100644 --- a/AppImage/scripts/health_persistence.py +++ b/AppImage/scripts/health_persistence.py @@ -2592,12 +2592,34 @@ class HealthPersistence: if serial not in serial_to_device: serial_to_device[serial] = device_name + # Resolve which serial currently OWNS each device_name. The + # kernel reuses NVMe / SD device names across reboots + # (e.g. the disk that was nvme4n1 with 5 NVMes plugged in + # comes back as nvme0n1 once 4 are removed), and + # disk_registry keeps a row for every (device_name, serial) + # combination it has ever seen. Without this check we would + # mirror an observation's count onto its serial's + # "most-recent" device_name even when that name is now in + # use by a DIFFERENT serial — surfacing a "1 obs." badge on + # a disk that has no observations of its own and a clean + # modal, since the modal correctly scopes by current + # (device_name, serial) pair. + cursor.execute(''' + SELECT device_name, serial FROM disk_registry + WHERE device_name IS NOT NULL AND device_name != '' + ORDER BY last_seen DESC + ''') + current_owner = {} + for device_name, dev_serial in cursor.fetchall(): + if device_name not in current_owner: + current_owner[device_name] = dev_serial + # Build result result = {} for serial, cnt in serial_counts.items(): result[f'serial:{serial}'] = cnt device_name = serial_to_device.get(serial) - if device_name: + if device_name and current_owner.get(device_name) == serial: result[device_name] = max(result.get(device_name, 0), cnt) # For disks WITHOUT serial: group by device_name