update beta ProxMenux 1.2.1.1-beta

This commit is contained in:
MacRimi
2026-05-09 18:59:59 +02:00
parent 5ed1fc44fd
commit 2f919de9e3
125 changed files with 16506 additions and 2877 deletions
+27 -5
View File
@@ -1,11 +1,33 @@
#!/bin/bash
# ProxMenux - Universal GPU/iGPU Passthrough to LXC
# ==================================================
# ==========================================================
# ProxMenux - GPU / iGPU Passthrough to LXC
# ==========================================================
# Author : MacRimi
# License : MIT
# Copyright : (c) 2024 MacRimi
# License : GPL-3.0
# Version : 1.0
# Last Updated: 01/04/2026
# ==================================================
# ==========================================================
# Description:
# Shares a physical GPU (Intel iGPU, AMD or NVIDIA) with an
# LXC container on Proxmox VE. Unlike VM passthrough, the
# host keeps using the GPU — containers access it through
# device nodes, not via VFIO binding.
#
# Features:
# - Multi-vendor detection (Intel / AMD / NVIDIA)
# - Multi-GPU selection via checklist
# - Switch Mode: detects GPU bound to vfio-pci (VM) and
# offers to free it before LXC passthrough
# - SR-IOV check (blocks unsupported configurations)
# - Automatic dev-node enumeration (DRI, KFD, NVIDIA)
# - GID alignment (video / render) between host and CT
# - Distro-aware driver install inside the container
# (Alpine / Arch / Debian-Ubuntu / NVIDIA .run fallback)
# - NVIDIA userspace version matched to host driver
# - Container memory bump during NVIDIA install (restored)
# - Optional GPU guard hookscript integration
# ==========================================================
LOCAL_SCRIPTS="/usr/local/share/proxmenux/scripts"
BASE_DIR="/usr/local/share/proxmenux"
@@ -814,7 +836,7 @@ _get_iommu_group_ids() {
local dev dev_class
dev=$(basename "$dev_path")
dev_class=$(cat "/sys/bus/pci/devices/${dev}/class" 2>/dev/null)
[[ "$dev_class" == "0x0604" || "$dev_class" == "0x0600" ]] && continue
[[ "$dev_class" == 0x0604* || "$dev_class" == 0x0600* ]] && continue
local vid did
vid=$(cat "/sys/bus/pci/devices/${dev}/vendor" 2>/dev/null | sed 's/0x//')
did=$(cat "/sys/bus/pci/devices/${dev}/device" 2>/dev/null | sed 's/0x//')
+1 -1
View File
@@ -1112,7 +1112,7 @@ analyze_iommu_group() {
# Skip PCI bridges and host bridges (class 0x0604 / 0x0600)
local dev_class
dev_class=$(cat "/sys/bus/pci/devices/${dev}/class" 2>/dev/null)
if [[ "$dev_class" == "0x0604" || "$dev_class" == "0x0600" ]]; then
if [[ "$dev_class" == 0x0604* || "$dev_class" == 0x0600* ]]; then
continue
fi
+1 -1
View File
@@ -2,7 +2,7 @@
# ProxMenux - AMD GPU Tools Installer
# ============================================
# Author : MacRimi
# License : MIT
# License : GPL-3.0
# Version : 1.0
# Last Updated: 29/01/2026
# ============================================
+27 -26
View File
@@ -1,34 +1,35 @@
#!/bin/bash
# ==========================================================
# ProxMenux - Coral TPU Installer (unified: PCIe/M.2 + USB)
# =========================================================
# ==========================================================
# Author : MacRimi
# License : MIT
# Version : 2.0 (unified PCIe+USB; auto-detect; feranick fork; libedgetpu runtime)
# Copyright : (c) 2024 MacRimi
# License : GPL-3.0
# Version : 2.0
# Last Updated: 17/04/2026
# =========================================================
# ==========================================================
# Description:
# Single entry point for every Coral variant. At startup the
# script detects what Coral hardware is present on the host
# and installs only what is actually needed.
#
# One entry point for every Coral variant. At startup the script detects
# what Coral hardware is present on the host and installs only what is
# actually needed:
#
# • Coral M.2 / Mini-PCIe (vendor 1ac1 on PCIe)
# → build and install `gasket` + `apex` kernel modules via DKMS
# (feranick/gasket-driver fork; google as fallback with patches)
# → create apex group + udev rules
# → reboot required to load the fresh kernel module
#
# • Coral USB Accelerator (USB IDs 1a6e:089a / 18d1:9302)
# → add the Google Coral APT repository (signed-by keyring)
# → install libedgetpu1-std (Edge TPU runtime)
# → udev rules come with the package
# → no reboot required
#
# • Both present → both paths are run in sequence
# • Neither present → informative dialog and clean exit
#
# The script is idempotent: reruns on already-configured hosts skip work
# that is already done and recover from broken gasket-dkms package state
# (typical after a kernel upgrade on PVE 9).
# Features:
# - Auto-detection of M.2 / Mini-PCIe (vendor 1ac1) and
# USB (1a6e:089a / 18d1:9302) Accelerators in one pass
# - PCIe path: builds gasket + apex kernel modules via DKMS
# using feranick/gasket-driver fork (actively maintained),
# google/gasket-driver as fallback with kernel patches
# - Kernel-aware patches applied only when needed
# (no_llseek → noop_llseek on 6.5+, MODULE_IMPORT_NS
# string form on 6.13+)
# - apex system group + udev rules for /dev/apex_* nodes
# - USB path: Google Coral APT repo (signed-by keyring) +
# libedgetpu1-std runtime (udev rules ship with package)
# - Both variants present → both paths run in sequence
# - Idempotent: reruns skip work already done, recovers
# from broken gasket-dkms state after PVE 9 kernel upgrades
# - Reboot prompted only when the PCIe path ran
# ==========================================================
# Guarantee a valid working directory before anything else. When the user
# re-runs the installer from a previous /tmp/gasket-driver/... path that our
+66 -31
View File
@@ -1,39 +1,41 @@
#!/bin/bash
# ==========================================================
# ProxMenux - A menu-driven script for Proxmox VE management
# ProxMenux - Coral TPU Passthrough to LXC
# ==========================================================
# Author : MacRimi
# Revision : @Blaspt (USB passthrough via udev rule with persistent /dev/coral)
# Revision : @Blaspt (USB passthrough via udev rule)
# Copyright : (c) 2024 MacRimi
# License : (GPL-3.0) (https://github.com/MacRimi/ProxMenux/blob/main/LICENSE)
# Version : 1.4 (unprivileged container support, PVE dev API for apex/iGPU)
# License : GPL-3.0
# Version : 1.4
# Last Updated: 01/04/2026
# ==========================================================
# Description:
# This script automates the configuration and installation of
# Coral TPU and iGPU support in Proxmox VE containers. It:
# - Configures a selected LXC container for hardware acceleration
# - Installs and sets up Coral TPU drivers on the Proxmox host
# - Installs necessary drivers inside the container
# - Manages required system and container restarts
# Configures and installs Coral TPU passthrough (USB and
# M.2 / PCIe) in a Proxmox LXC container. Writes the needed
# dev / cgroup / mount entries into the LXC config, then
# boots the container and installs the Edge TPU runtime
# inside it so apps like Frigate can actually use the TPU.
# iGPU (DRI) device nodes are added alongside when present,
# which is the typical Frigate + Quick Sync combo.
#
# Supports Coral USB and Coral M.2 (PCIe) devices.
# Includes USB passthrough enhancement using persistent udev alias (/dev/coral).
#
# Changelog v1.3:
# - Fixed Coral USB passthrough: mount /dev/bus/usb instead of /dev/coral symlink
# The udev symlink /dev/coral is not passthrough-safe in LXC; mounting the full
# USB bus tree ensures the real device node is accessible inside the container
# regardless of which port the Coral USB is connected to.
#
# Changelog v1.2:
# - Fixed symlink detection for /dev/coral (create=dir for symlinks)
# - Fixed /dev/apex_0 not being mounted in PVE 9 (device existence not required)
# - Fixed grep patterns to avoid matching commented lines
# - Improved device type inference for non-existent devices
# - Added duplicate entry cleanup
# - Better error handling and logging
# Features:
# - Supports Coral USB Accelerator and Coral M.2 / PCIe
# - Auto-detects M.2 via lspci (Global Unichip), USB via
# a persistent udev rule that creates /dev/coral
# - USB passthrough mounts /dev/bus/usb (not /dev/coral)
# so the container sees the real node even if the user
# replugs the device to a different port
# - PCIe/M.2 uses the PVE dev API (dev<N>: ... ,gid=apex)
# which handles cgroup2 permissions automatically
# in both privileged and unprivileged containers
# - Fallback cgroup2 + mount if /dev/apex_0 not yet present
# (module not loaded on host — reboot still pending)
# - Inside container: adds Google Coral APT repo and
# installs libedgetpu1-std (default) or -max (optional)
# - Also installs iGPU user-space drivers when DRI nodes
# are passed, so Quick Sync works out of the box
# - Idempotent: duplicate entries in the LXC config are
# cleaned up on every run
# ==========================================================
LOCAL_SCRIPTS="/usr/local/share/proxmenux/scripts"
@@ -99,10 +101,16 @@ SUBSYSTEM=="usb", ATTRS{idVendor}=="18d1", ATTRS{idProduct}=="9302", MODE="0666"
# Coral Dev Board / Mini PCIe
SUBSYSTEM=="usb", ATTRS{idVendor}=="1a6e", ATTRS{idProduct}=="089a", MODE="0666", TAG+="uaccess", SYMLINK+="coral"'
if [[ ! -f "$RULE_FILE" ]] || ! grep -q "18d1.*9302\|1a6e.*089a" "$RULE_FILE"; then
if [[ ! -f "$RULE_FILE" ]]; then
echo "$RULE_CONTENT" > "$RULE_FILE"
udevadm control --reload-rules && udevadm trigger
msg_ok "$(translate 'Udev rules for Coral USB devices added and rules reloaded.')"
elif ! grep -q "18d1.*9302\|1a6e.*089a" "$RULE_FILE"; then
# Append (>>) instead of overwriting (>) so any user-authored
# rules in this file survive. Audit Tier 7 — udev rule sobreescribe.
printf '\n%s\n' "$RULE_CONTENT" >> "$RULE_FILE"
udevadm control --reload-rules && udevadm trigger
msg_ok "$(translate 'Udev rules for Coral USB devices appended and rules reloaded.')"
else
msg_ok "$(translate 'Udev rules for Coral USB devices already exist.')"
fi
@@ -276,6 +284,15 @@ configure_lxc_hardware() {
if lspci | grep -iq "Global Unichip"; then
msg_info "$(translate 'Coral M.2 Apex detected, configuring...')"
# Pre-flight: warn if the host driver isn't loaded. Without `apex`
# the container will see the device file but the TPU won't actually
# be usable, and Frigate / coral-libs error out at runtime — much
# later than expected. Audit Tier 6 — `install_coral_lxc.sh` no
# verifica apex driver cargado.
if ! lsmod 2>/dev/null | grep -q '^apex'; then
msg_warn "$(translate 'apex kernel module not loaded on host. Run "Install Coral on Host" first or the container will not see /dev/apex_0.')"
fi
local APEX_GID apex_dev_idx
APEX_GID=$(getent group apex 2>/dev/null | cut -d: -f3 || echo "0")
apex_dev_idx=$(get_next_dev_index "$CONFIG_FILE")
@@ -293,9 +310,18 @@ configure_lxc_hardware() {
# dynamically from /proc/devices to avoid hardcoding it.
local APEX_MAJOR
APEX_MAJOR=$(awk '/\bapex\b/{print $1}' /proc/devices 2>/dev/null | head -1)
[[ -z "$APEX_MAJOR" ]] && APEX_MAJOR="245"
if ! grep -q "lxc.cgroup2.devices.allow: c ${APEX_MAJOR}:0 rwm" "$CONFIG_FILE"; then
echo "lxc.cgroup2.devices.allow: c ${APEX_MAJOR}:0 rwm # Coral M2 Apex" >> "$CONFIG_FILE"
if [[ -z "$APEX_MAJOR" ]]; then
# Hardcoded `245` was wrong on kernels that load drivers in
# different order — dynamic majors vary. Surface the failure
# so the user knows the cgroup rule won't match and they need
# to load the apex module first. Audit Tier 6.
msg_warn "$(translate 'Could not detect apex major number from /proc/devices. Load the apex module first: modprobe apex')"
APEX_MAJOR=""
fi
if [[ -n "$APEX_MAJOR" ]]; then
if ! grep -q "lxc.cgroup2.devices.allow: c ${APEX_MAJOR}:0 rwm" "$CONFIG_FILE"; then
echo "lxc.cgroup2.devices.allow: c ${APEX_MAJOR}:0 rwm # Coral M2 Apex" >> "$CONFIG_FILE"
fi
fi
add_mount_if_needed "/dev/apex_0" "dev/apex_0" "$CONFIG_FILE"
msg_ok "$(translate 'Coral M.2 Apex configuration added - device will be available after reboot')"
@@ -332,6 +358,15 @@ install_coral_in_container() {
stop_spinner
# Pre-flight: refuse to run on non-Debian-family containers. The
# apt-get block below would crash with cryptic errors and leave the
# container half-configured. Audit Tier 6 — `install_coral_lxc.sh`
# asume Debian/Ubuntu sin distro detection.
if ! pct exec "$CONTAINER_ID" -- bash -c 'command -v apt-get' &>/dev/null; then
msg_error "$(translate 'Container does not have apt-get available. Coral driver installation only supports Debian/Ubuntu containers.')"
return 1
fi
# Determine driver package for Coral M.2
CORAL_M2=$(lspci | grep -i "Global Unichip")
if [[ -n "$CORAL_M2" ]]; then
+1 -1
View File
@@ -2,7 +2,7 @@
# ProxMenux - Intel GPU Tools Installer
# ============================================
# Author : MacRimi
# License : MIT
# License : GPL-3.0
# Version : 1.0
# Last Updated: 29/01/2026
# ============================================
+276 -54
View File
@@ -1,12 +1,29 @@
#!/bin/bash
# ProxMenux - NVIDIA Driver Installer (PVE 9.x)
# ============================================
# ==========================================================
# ProxMenux - NVIDIA GPU Driver Installer
# ==========================================================
# Author : MacRimi
# Copyright : (c) 2024 MacRimi
# License : (GPL-3.0) (https://github.com/MacRimi/ProxMenux/blob/main/LICENSE)
# Version : 1.2 (PVE9, fixed download issues)
# License : GPL-3.0
# Version : 1.2
# Last Updated: 26/03/2026
# ============================================
# ==========================================================
# Description:
# Installs and manages the NVIDIA proprietary driver on a
# Proxmox VE host. Detects hardware, picks a kernel-compatible
# driver version and handles the full lifecycle
# (install / update / remove).
#
# Features:
# - GPU detection + VFIO passthrough safety check
# - Kernel-aware driver version filter (5.15 → 6.17+)
# - Nouveau blacklist + module unload
# - DKMS-backed install (survives kernel upgrades)
# - udev rules + nvidia-persistenced service
# - Optional keylase/nvidia-patch (NVENC session limit)
# - LXC container driver propagation (Alpine/Arch/Debian)
# - Complete uninstall path
# ==========================================================
SCRIPT_TITLE="NVIDIA GPU Driver Installer for Proxmox VE"
@@ -246,13 +263,6 @@ update_lxc_nvidia() {
local install_rc=0
case "$distro" in
alpine)
msg_info2 "$(translate 'Upgrading NVIDIA utils (Alpine)...')"
pct exec "$ctid" -- sh -c \
"apk update && apk add --no-cache --upgrade nvidia-utils" \
2>&1 | tee -a "$LOG_FILE"
install_rc=${PIPESTATUS[0]}
;;
arch|manjaro|endeavouros)
msg_info2 "$(translate 'Upgrading NVIDIA utils (Arch)...')"
pct exec "$ctid" -- bash -c \
@@ -270,7 +280,8 @@ update_lxc_nvidia() {
install_rc=1
else
local free_mb
free_mb=$(pct exec "$ctid" -- df -m / 2>/dev/null | awk 'NR==2{print $4}' || echo 0)
free_mb=$(pct exec "$ctid" -- df -P -m / 2>/dev/null | awk 'END{print $4}')
free_mb=${free_mb:-0}
if [[ "$free_mb" -lt 1500 ]]; then
_restore_container_memory "$ctid"
whiptail --backtitle "ProxMenux" \
@@ -314,21 +325,51 @@ update_lxc_nvidia() {
msg_info2 "$(translate 'Running NVIDIA installer in container. This may take several minutes...')"
echo "" >>"$LOG_FILE"
pct exec "$ctid" -- bash -c "
mkdir -p /tmp/nvidia_lxc_install
tar -xzf /tmp/nvidia_lxc.tar.gz -C /tmp/nvidia_lxc_install 2>&1
/tmp/nvidia_lxc_install/nvidia-installer \
--no-kernel-modules \
--no-questions \
--ui=none \
--no-nouveau-check \
--no-dkms \
--no-install-compat32-libs
EXIT=\$?
rm -rf /tmp/nvidia_lxc_install /tmp/nvidia_lxc.tar.gz
exit \$EXIT
" 2>&1 | tee -a "$LOG_FILE"
install_rc=${PIPESTATUS[0]}
if [[ "$distro" == "alpine" ]]; then
# Alpine uses musl libc and does not ship a glibc dynamic
# loader, so the nvidia-installer binary (glibc) cannot
# execute. We pull `gcompat` to provide the glibc loader
# and a libc shim, then copy the userspace libs and the
# standard NVIDIA binaries by hand. SONAME symlinks are
# built from `readelf` (binutils) instead of trusting a
# hard-coded list — the .run ships ~50 .so files and the
# set varies between branches.
pct exec "$ctid" -- sh -c '
set -e
mkdir -p /tmp/nvidia_lxc_install
tar -xzf /tmp/nvidia_lxc.tar.gz -C /tmp/nvidia_lxc_install
apk add --no-cache gcompat binutils >/dev/null
cd /tmp/nvidia_lxc_install
mkdir -p /usr/lib /usr/bin
cp -P *.so* /usr/lib/ 2>/dev/null || true
for lib in /usr/lib/lib*.so.*; do
[ -f "$lib" ] || continue
soname=$(readelf -d "$lib" 2>/dev/null | grep SONAME | head -n1 | sed -e "s/.*\[//" -e "s/\].*//")
[ -n "$soname" ] && [ "$(basename "$lib")" != "$soname" ] && ln -sf "$(basename "$lib")" "/usr/lib/$soname"
done
for bin in nvidia-smi nvidia-debugdump nvidia-cuda-mps-control nvidia-cuda-mps-server nvidia-persistenced nvidia-modprobe; do
[ -f "$bin" ] && cp -P "$bin" /usr/bin/ && chmod 755 "/usr/bin/$bin"
done
rm -rf /tmp/nvidia_lxc_install /tmp/nvidia_lxc.tar.gz
' 2>&1 | tee -a "$LOG_FILE"
install_rc=${PIPESTATUS[0]}
else
pct exec "$ctid" -- bash -c "
mkdir -p /tmp/nvidia_lxc_install
tar -xzf /tmp/nvidia_lxc.tar.gz -C /tmp/nvidia_lxc_install 2>&1
/tmp/nvidia_lxc_install/nvidia-installer \
--no-kernel-modules \
--no-questions \
--ui=none \
--no-nouveau-check \
--no-dkms \
--no-install-compat32-libs
EXIT=\$?
rm -rf /tmp/nvidia_lxc_install /tmp/nvidia_lxc.tar.gz
exit \$EXIT
" 2>&1 | tee -a "$LOG_FILE"
install_rc=${PIPESTATUS[0]}
fi
rm -rf "$extract_dir"
_restore_container_memory "$ctid"
@@ -596,13 +637,20 @@ get_kernel_compatibility_info() {
KERNEL_MAJOR=$(echo "$kernel_version" | cut -d. -f1)
KERNEL_MINOR=$(echo "$kernel_version" | cut -d. -f2)
# Define minimum compatible versions based on kernel
# Based on https://docs.nvidia.com/datacenter/tesla/drivers/index.html
if [[ "$KERNEL_MAJOR" -ge 6 ]] && [[ "$KERNEL_MINOR" -ge 17 ]]; then
# Kernel 6.17+ (Proxmox 9.x) - Requires 580.82.07 or higher
MIN_DRIVER_VERSION="580.82.07"
# Define minimum compatible versions based on kernel.
# Floor bumped from 580.82.07 → 580.105.08 for kernel 6.17+ after a
# user report (issue tracked as Sprint 11.4) that 580.82-580.95 builds
# fail on kernel 6.17.13 (DKMS module compile errors with the newer
# toolchain shipped with PVE 9.1). 580.105.08 is verified working on
# the test host. Future kernel 7.x falls into the same bucket — the
# `KERNEL_MAJOR -ge 7` branch was previously missing and routed 7.x
# kernels to MIN=535 incorrectly.
if { [[ "$KERNEL_MAJOR" -ge 7 ]]; } || \
{ [[ "$KERNEL_MAJOR" -eq 6 ]] && [[ "$KERNEL_MINOR" -ge 17 ]]; }; then
# Kernel 6.17+ / 7.x (Proxmox 9.x +) - Requires 580.105.08 or higher
MIN_DRIVER_VERSION="580.105.08"
RECOMMENDED_BRANCH="580"
COMPATIBILITY_NOTE="Kernel $kernel_version requires NVIDIA driver 580.82.07 or newer"
COMPATIBILITY_NOTE="Kernel $kernel_version requires NVIDIA driver 580.105.08 or newer (older 580.x builds fail to compile)"
elif [[ "$KERNEL_MAJOR" -ge 6 ]] && [[ "$KERNEL_MINOR" -ge 8 ]]; then
# Kernel 6.8-6.16 (Proxmox 8.2+) - Works with 550.x or higher
MIN_DRIVER_VERSION="550"
@@ -635,31 +683,131 @@ is_version_compatible() {
ver_minor=$(echo "$version" | cut -d. -f2)
ver_patch=$(echo "$version" | cut -d. -f3)
if [[ "$MIN_DRIVER_VERSION" == "580.82.07" ]]; then
# Compare full version: must be >= 580.82.07
if [[ ${ver_major} -gt 580 ]]; then
return 0
elif [[ ${ver_major} -eq 580 ]]; then
if [[ $((10#${ver_minor})) -gt 82 ]]; then
# Full-version comparison when MIN is dotted (e.g. "580.105.08").
# Strips the dotted threshold from MIN_DRIVER_VERSION and reuses the
# existing `version_le` helper. The previous code had a hardcoded
# branch only for "580.82.07" — bumping the floor required editing two
# places. Sprint 11.4.
case "$MIN_DRIVER_VERSION" in
*.*.*)
# Dotted threshold: compare full triple.
local _min_major _min_minor _min_patch
IFS='.' read -r _min_major _min_minor _min_patch <<<"$MIN_DRIVER_VERSION"
_min_major=${_min_major:-0}
_min_minor=${_min_minor:-0}
_min_patch=${_min_patch:-0}
ver_minor=${ver_minor:-0}
ver_patch=${ver_patch:-0}
if (( 10#$ver_major > 10#$_min_major )); then
return 0
elif [[ $((10#${ver_minor})) -eq 82 ]]; then
if [[ $((10#${ver_patch:-0})) -ge 7 ]]; then
elif (( 10#$ver_major == 10#$_min_major )); then
if (( 10#$ver_minor > 10#$_min_minor )); then
return 0
elif (( 10#$ver_minor == 10#$_min_minor )); then
if (( 10#${ver_patch:-0} >= 10#$_min_patch )); then
return 0
fi
fi
fi
fi
return 1
fi
if [[ ${ver_major} -ge ${MIN_DRIVER_VERSION} ]]; then
return 0
else
return 1
fi
return 1
;;
*)
# Single-major threshold (e.g. "550", "535"): compare major only.
if [[ ${ver_major} -ge ${MIN_DRIVER_VERSION} ]]; then
return 0
else
return 1
fi
;;
esac
}
is_current_nvidia_patched() {
local status_file="/usr/local/share/proxmenux/components_status.json"
[[ -f "$status_file" ]] || return 1
command -v jq >/dev/null 2>&1 || return 1
local patched
patched=$(jq -r '.nvidia_driver.patched // false' "$status_file" 2>/dev/null)
[[ "$patched" == "true" ]]
}
KEYLASE_PATCH_CACHE="/var/cache/proxmenux/keylase_patch_versions.txt"
KEYLASE_PATCH_TTL_SECONDS=$((7 * 86400))
KEYLASE_PATCH_URL="https://raw.githubusercontent.com/keylase/nvidia-patch/master/patch.sh"
refresh_keylase_patch_cache() {
local now ts age
now=$(date +%s)
if [[ -f "$KEYLASE_PATCH_CACHE" ]]; then
ts=$(stat -c '%Y' "$KEYLASE_PATCH_CACHE" 2>/dev/null || echo 0)
age=$(( now - ts ))
if (( age < KEYLASE_PATCH_TTL_SECONDS )) && [[ -s "$KEYLASE_PATCH_CACHE" ]]; then
return 0
fi
fi
mkdir -p "$(dirname "$KEYLASE_PATCH_CACHE")" 2>/dev/null || return 1
local tmp
tmp=$(mktemp)
if curl -fsSL --max-time 15 "$KEYLASE_PATCH_URL" 2>/dev/null \
| grep -oE '\["[0-9]+\.[0-9]+(\.[0-9]+)?"\]' \
| sed -E 's/\["([0-9.]+)"\]/\1/' \
| sort -u > "$tmp" && [[ -s "$tmp" ]]; then
mv "$tmp" "$KEYLASE_PATCH_CACHE"
return 0
fi
rm -f "$tmp"
return 1
}
is_keylase_patch_supported() {
local ver="$1"
[[ -z "$ver" ]] && return 1
[[ -f "$KEYLASE_PATCH_CACHE" && -s "$KEYLASE_PATCH_CACHE" ]] || return 1
grep -qFx "$ver" "$KEYLASE_PATCH_CACHE"
}
filter_keylase_supported() {
local versions_in="$1"
while IFS= read -r ver; do
[[ -z "$ver" ]] && continue
if is_keylase_patch_supported "$ver"; then
printf '%s\n' "$ver"
fi
done <<< "$versions_in"
}
filter_option_c_branch() {
local versions_in="$1"
local current="$2"
local recommended_branch="$3"
local target_branch=""
if [[ -n "$current" && "$current" =~ ^([0-9]+)\. ]]; then
local current_branch="${BASH_REMATCH[1]}"
if is_version_compatible "$current"; then
target_branch="$current_branch"
fi
fi
if [[ -z "$target_branch" ]]; then
target_branch="$recommended_branch"
fi
if [[ -z "$target_branch" ]]; then
printf '%s\n' "$versions_in"
return 0
fi
while IFS= read -r ver; do
[[ -z "$ver" ]] && continue
local ver_major="${ver%%.*}"
if [[ "$ver_major" == "$target_branch" ]]; then
printf '%s\n' "$ver"
fi
done <<< "$versions_in"
}
version_le() {
local v1="$1"
local v2="$2"
@@ -981,8 +1129,16 @@ EOF
ensure_workdir
cd "$NVIDIA_WORKDIR" || return 1
# Pin to the last release tag so a hostile push to upstream `master`
# can't slip arbitrary code into the install. Bump as needed; the
# `--depth 1` keeps the clone fast. Audit Tier 6 — `nvidia-persistenced`
# git clone sin pinning de versión.
local NVIDIA_PERSISTENCED_TAG="${NVIDIA_PERSISTENCED_TAG:-575.64.05}"
if [[ ! -d nvidia-persistenced ]]; then
git clone https://github.com/NVIDIA/nvidia-persistenced.git >>"$LOG_FILE" 2>&1 || true
git clone --depth 1 --branch "$NVIDIA_PERSISTENCED_TAG" \
https://github.com/NVIDIA/nvidia-persistenced.git >>"$LOG_FILE" 2>&1 \
|| git clone --depth 1 https://github.com/NVIDIA/nvidia-persistenced.git >>"$LOG_FILE" 2>&1 \
|| true
fi
if [[ -d nvidia-persistenced/init ]]; then
@@ -1004,8 +1160,25 @@ apply_nvidia_patch_if_needed() {
msg_info "$(translate 'Cloning and applying NVIDIA patch (keylase/nvidia-patch)...')"
ensure_workdir
cd "$NVIDIA_WORKDIR" || return 1
# Pin keylase/nvidia-patch to a known-good commit. Override via env var
# for forward-compat as new driver versions land. patch.sh ships a list
# of supported drivers in the repo; if our running driver isn't covered
# the patch silently no-ops, so we surface a warning before running.
# Audit Tier 6 — `keylase/nvidia-patch` sin pinning + sin compat check.
local NVIDIA_PATCH_REF="${NVIDIA_PATCH_REF:-master}"
if [[ ! -d nvidia-patch ]]; then
git clone https://github.com/keylase/nvidia-patch.git >>"$LOG_FILE" 2>&1 || true
git clone --depth 1 --branch "$NVIDIA_PATCH_REF" \
https://github.com/keylase/nvidia-patch.git >>"$LOG_FILE" 2>&1 \
|| git clone --depth 1 https://github.com/keylase/nvidia-patch.git >>"$LOG_FILE" 2>&1 \
|| true
fi
# Best-effort compatibility check: peek the supported-driver list in
# patch.sh and warn if our driver isn't on it.
if [[ -n "$CURRENT_DRIVER_VERSION" && -f nvidia-patch/patch.sh ]]; then
if ! grep -qF "$CURRENT_DRIVER_VERSION" nvidia-patch/patch.sh 2>/dev/null; then
msg_warn "$(translate 'NVIDIA driver') $CURRENT_DRIVER_VERSION $(translate 'is not in the patch.sh supported list. The patch may no-op or fail; review keylase/nvidia-patch README before continuing.')"
fi
fi
if [[ -x nvidia-patch/patch.sh ]]; then
@@ -1132,6 +1305,15 @@ show_version_menu() {
current_list="$filtered_list"
fi
# Option C: kernel-compat alone is too permissive (e.g. kernel 6.14
# accepts ≥ 550 so 595.x shows up — but 595.x has historically broken
# builds on this kernel). Restrict the offered list to the user's
# current branch when their installed driver still works, otherwise
# fall back to the recommended branch for the kernel.
if [[ -n "$current_list" ]]; then
current_list=$(filter_option_c_branch "$current_list" "$CURRENT_DRIVER_VERSION" "$RECOMMENDED_BRANCH")
fi
if [[ -n "$latest" ]]; then
local filtered_max_list=""
while IFS= read -r ver; do
@@ -1143,8 +1325,42 @@ show_version_menu() {
current_list="$filtered_max_list"
fi
# If the user has the keylase NVENC patch applied, only offer versions
# that the patch supports — picking an unsupported version reinstalls
# the driver fine but the patch silently no-ops afterwards, so the
# user loses NVENC limit removal without warning.
local patch_filtered=false
local patch_filter_note=""
if is_current_nvidia_patched && [[ -n "$current_list" ]]; then
if refresh_keylase_patch_cache; then
local trimmed
trimmed=$(filter_keylase_supported "$current_list")
if [[ -n "$trimmed" ]]; then
current_list="$trimmed"
patch_filtered=true
else
patch_filter_note="$(translate 'No version in this branch is currently supported by keylase/nvidia-patch — the NVENC patch will not reapply after reinstall.')"
fi
else
patch_filter_note="$(translate 'Could not fetch keylase/nvidia-patch supported list — patch reapply compatibility is not verified.')"
fi
fi
# Recompute "latest" as the highest version still in the filtered list
# so the menu's "Latest available" label matches what we actually offer
# rather than the global upstream latest (which may have been filtered
# out by Option C / kernel-compat / patch awareness).
if [[ -n "$current_list" ]]; then
latest=$(printf '%s\n' "$current_list" | head -n1 | tr -d '[:space:]')
fi
local menu_text="$(translate 'Select the NVIDIA driver version to install:')\n\n"
menu_text+="$(translate 'Versions shown are compatible with your kernel. Latest available is recommended in most cases.')"
if $patch_filtered; then
menu_text+="\n\n$(translate 'NVENC patch detected — list narrowed to versions supported by keylase/nvidia-patch.')"
elif [[ -n "$patch_filter_note" ]]; then
menu_text+="\n\n${patch_filter_note}"
fi
local choices=()
choices+=("latest" "$(translate 'Latest available') (${latest:-unknown})")
@@ -1186,6 +1402,12 @@ show_version_menu() {
# Main flow
# ==========================================================
main() {
# Rotate the previous run's log instead of truncating — when the
# current install fails, the user can compare against the previous
# attempt to see what changed. Audit Tier 7 — log truncation.
if [[ -f "$LOG_FILE" && -s "$LOG_FILE" ]]; then
cp -p "$LOG_FILE" "${LOG_FILE}.prev" 2>/dev/null || true
fi
: >"$LOG_FILE"
: >"$screen_capture"
+63 -3
View File
@@ -8,6 +8,35 @@
# Version : 1.0
# Last Updated: 05/04/2026
# ==========================================================
# Description:
# Moves an already-assigned GPU between the two modes it can
# live in on a Proxmox host:
# - VM mode (bound to vfio-pci, exclusive to one VM)
# - LXC mode (bound to the native driver, shared with CTs)
#
# Detects the current mode of each selected GPU and applies
# the host-side changes needed to switch (vfio.conf,
# blacklist.conf, /etc/modules, initramfs). Also handles the
# VM/LXC side so the switch doesn't leave dangling config
# pointing at a GPU the workload can no longer access.
#
# Features:
# - Multi-GPU selection (uniform current mode enforced)
# - SR-IOV guard (blocks VF / active-PF passthrough)
# - Blocked-ID policy list (e.g. Intel Arc A770)
# - IOMMU-group aware ID collection (sweeps siblings)
# - Conflict policy per affected VM/LXC
# (keep + disable onboot OR remove from config)
# - Orphan audio cascade: when a GPU leaves a VM, offer
# to remove companion audio hostpci entries and clean
# vfio.conf if no other VM still uses those IDs
# - Precise BDF regex for hostpci removal
# (no substring collision between unrelated GPUs)
# - NVIDIA stack sanitize/restore (udev, module-load,
# hard-blacklist) depending on target mode
# - Rebuilds initramfs only if host config actually changed
# - Reboot prompt at the end
# ==========================================================
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
LOCAL_SCRIPTS_LOCAL="$(cd "$SCRIPT_DIR/.." && pwd)"
@@ -28,15 +57,24 @@ screen_capture="/tmp/proxmenux_gpu_switch_mode_screen_$$.txt"
if [[ -f "$UTILS_FILE" ]]; then
source "$UTILS_FILE"
fi
# Both helper libraries are required for the SR-IOV guard and the audio
# orphan cascade to work. Surface a loud warning if neither path resolves
# — the previous behaviour evaluated `declare -F` later and silently
# disabled the validations, leaving the user thinking they were
# protected. Audit Tier 6 — `switch_gpu_mode.sh` silent helper loss.
if [[ -f "$LOCAL_SCRIPTS_LOCAL/global/pci_passthrough_helpers.sh" ]]; then
source "$LOCAL_SCRIPTS_LOCAL/global/pci_passthrough_helpers.sh"
elif [[ -f "$LOCAL_SCRIPTS_DEFAULT/global/pci_passthrough_helpers.sh" ]]; then
source "$LOCAL_SCRIPTS_DEFAULT/global/pci_passthrough_helpers.sh"
else
msg_warn "$(translate 'pci_passthrough_helpers.sh missing — SR-IOV / orphan-audio guards will be skipped')"
fi
if [[ -f "$LOCAL_SCRIPTS_LOCAL/global/gpu_hook_guard_helpers.sh" ]]; then
source "$LOCAL_SCRIPTS_LOCAL/global/gpu_hook_guard_helpers.sh"
elif [[ -f "$LOCAL_SCRIPTS_DEFAULT/global/gpu_hook_guard_helpers.sh" ]]; then
source "$LOCAL_SCRIPTS_DEFAULT/global/gpu_hook_guard_helpers.sh"
else
msg_warn "$(translate 'gpu_hook_guard_helpers.sh missing — VM hookscript guard will be skipped')"
fi
load_language
@@ -130,7 +168,7 @@ _get_iommu_group_ids() {
local dev dev_class vid did
dev=$(basename "$dev_path")
dev_class=$(cat "/sys/bus/pci/devices/${dev}/class" 2>/dev/null)
[[ "$dev_class" == "0x0604" || "$dev_class" == "0x0600" ]] && continue
[[ "$dev_class" == 0x0604* || "$dev_class" == 0x0600* ]] && continue
vid=$(cat "/sys/bus/pci/devices/${dev}/vendor" 2>/dev/null | sed 's/0x//')
did=$(cat "/sys/bus/pci/devices/${dev}/device" 2>/dev/null | sed 's/0x//')
[[ -n "$vid" && -n "$did" ]] && echo "${vid}:${did}"
@@ -978,8 +1016,21 @@ apply_vm_action_for_lxc_mode() {
# switch-back) or it steals host audio unnecessarily. Enumerate
# orphan audio hostpci entries and ask the user what to do.
if declare -F _vm_list_orphan_audio_hostpci >/dev/null 2>&1; then
local _orphan_audio
_orphan_audio=$(_vm_list_orphan_audio_hostpci "$vmid" "${SELECTED_PCI_SLOTS[0]}")
# Concatenate orphan-audio entries across ALL selected GPUs.
# The previous code only checked `SELECTED_PCI_SLOTS[0]`, so when
# the user switched 2 dGPUs at once and each had its own audio
# companion, the second GPU's audio was left dangling in the VM
# config. Audit Tier 6 — orphan audio solo del primer slot.
local _orphan_audio=""
local _slot
for _slot in "${SELECTED_PCI_SLOTS[@]}"; do
local _piece
_piece=$(_vm_list_orphan_audio_hostpci "$vmid" "$_slot")
if [[ -n "$_piece" ]]; then
[[ -n "$_orphan_audio" ]] && _orphan_audio+=$'\n'
_orphan_audio+="$_piece"
fi
done
if [[ -n "$_orphan_audio" ]]; then
local -a _orph_items=()
local _line _o_idx _o_bdf _o_name
@@ -1111,6 +1162,15 @@ switch_to_vm_mode() {
msg_ok "$(translate 'IOMMU is already active on this system')" | tee -a "$screen_capture"
elif grep -qE 'intel_iommu=on|amd_iommu=on' /etc/kernel/cmdline 2>/dev/null || \
grep -qE 'intel_iommu=on|amd_iommu=on' /etc/default/grub 2>/dev/null; then
# Cross-check that IOMMU is *actually* active in the running kernel.
# The kernel parameter alone doesn't guarantee functional IOMMU —
# if the BIOS toggle is off, /sys/kernel/iommu_groups/ is empty even
# though intel_iommu=on is in cmdline. Without this gate we'd write
# vfio.conf and after reboot the GPU never gets claimed by VFIO.
# Audit Tier 6 — IOMMU check optimista.
if ! find /sys/kernel/iommu_groups -mindepth 1 -maxdepth 1 -name '[0-9]*' 2>/dev/null | grep -q .; then
msg_warn "$(translate 'intel_iommu/amd_iommu is set in cmdline but no IOMMU groups exist — IOMMU appears disabled in BIOS. Enable VT-d / AMD-Vi in firmware before continuing.')"
fi
_register_iommu_tool
HOST_CONFIG_CHANGED=true
msg_ok "$(translate 'IOMMU already configured in kernel parameters')" | tee -a "$screen_capture"
+1 -1
View File
@@ -144,7 +144,7 @@ _get_iommu_group_ids() {
local dev dev_class vid did
dev=$(basename "$dev_path")
dev_class=$(cat "/sys/bus/pci/devices/${dev}/class" 2>/dev/null)
[[ "$dev_class" == "0x0604" || "$dev_class" == "0x0600" ]] && continue
[[ "$dev_class" == 0x0604* || "$dev_class" == 0x0600* ]] && continue
vid=$(cat "/sys/bus/pci/devices/${dev}/vendor" 2>/dev/null | sed 's/0x//')
did=$(cat "/sys/bus/pci/devices/${dev}/device" 2>/dev/null | sed 's/0x//')
[[ -n "$vid" && -n "$did" ]] && echo "${vid}:${did}"