mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-05-31 20:44:42 +00:00
update add_gpu_vm.sh
This commit is contained in:
@@ -355,3 +355,201 @@ function _pci_sriov_role() {
|
||||
fi
|
||||
echo "none"
|
||||
}
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Per-BDF VFIO binding via udev rules (multi-GPU safe, battle-tested)
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Writes one udev rule per BDF setting `ATTR{driver_override}="vfio-pci"`.
|
||||
# udev applies this rule at the PCI ADD event BEFORE any driver (nvidia,
|
||||
# amdgpu, i915) gets a chance to bind — when the kernel then tries to
|
||||
# attach a driver, it sees driver_override and routes the device to
|
||||
# vfio-pci instead. The native module (e.g. nvidia.ko) stays loaded for
|
||||
# OTHER GPUs of the same vendor, so multi-GPU NVIDIA scenarios work.
|
||||
#
|
||||
# State file: /etc/proxmenux/vfio-bind.bdfs (one BDF per line, source of truth)
|
||||
# Udev rules: /etc/udev/rules.d/10-proxmenux-vfio-bind.rules (regenerated
|
||||
# from the state file every time it changes)
|
||||
#
|
||||
# Why udev and not the initramfs hook (init-top) that we tried first:
|
||||
# init-top runs before sysfs is fully populated with PCI devices, and the
|
||||
# driver_override write loses the race against the native driver claiming
|
||||
# the device. Udev rules with ATTR{driver_override}= are processed at the
|
||||
# PCI subsystem ADD event, which is exactly when we need them.
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
PROXMENUX_VFIO_BIND_STATE="/etc/proxmenux/vfio-bind.bdfs"
|
||||
PROXMENUX_VFIO_BIND_UDEV_RULE="/etc/udev/rules.d/10-proxmenux-vfio-bind.rules"
|
||||
# Legacy artifact paths from a previous attempt — kept here so we can
|
||||
# remove them when migrating a host that ran the older init-top hook.
|
||||
PROXMENUX_VFIO_BIND_LEGACY_HOOK="/etc/initramfs-tools/scripts/init-top/proxmenux-vfio-bind"
|
||||
|
||||
_proxmenux_vfio_bind_write_udev_rule() {
|
||||
# Always nuke the obsolete init-top hook from earlier attempts (if it
|
||||
# still exists) so a stale copy in initramfs can't run alongside the
|
||||
# udev rule.
|
||||
_proxmenux_vfio_bind_cleanup_legacy
|
||||
|
||||
# Regenerates the udev rule file from the current state file.
|
||||
# No-op if state file is empty (rule file removed).
|
||||
if [[ ! -s "$PROXMENUX_VFIO_BIND_STATE" ]]; then
|
||||
rm -f "$PROXMENUX_VFIO_BIND_UDEV_RULE"
|
||||
return 0
|
||||
fi
|
||||
|
||||
mkdir -p "$(dirname "$PROXMENUX_VFIO_BIND_UDEV_RULE")"
|
||||
{
|
||||
echo "# ProxMenux: per-BDF VFIO driver override"
|
||||
echo "# Auto-generated from $PROXMENUX_VFIO_BIND_STATE"
|
||||
echo "# DO NOT EDIT MANUALLY — regenerated by add_gpu_vm.sh / switch_gpu_mode*.sh"
|
||||
while IFS= read -r bdf; do
|
||||
[[ -z "$bdf" ]] && continue
|
||||
[[ "$bdf" == \#* ]] && continue
|
||||
# KERNEL match expects the "0000:XX:YY.Z" form
|
||||
local full="$bdf"
|
||||
[[ "$full" != 0000:* ]] && full="0000:${full}"
|
||||
echo "SUBSYSTEM==\"pci\", KERNEL==\"${full}\", ATTR{driver_override}=\"vfio-pci\""
|
||||
done < "$PROXMENUX_VFIO_BIND_STATE"
|
||||
} > "$PROXMENUX_VFIO_BIND_UDEV_RULE"
|
||||
|
||||
udevadm control --reload-rules >/dev/null 2>&1 || true
|
||||
}
|
||||
|
||||
# Cleanup helper: remove the obsolete init-top hook from a prior model.
|
||||
# Called transparently by _add/_remove so any host that ran the older
|
||||
# version of this helper self-heals.
|
||||
_proxmenux_vfio_bind_cleanup_legacy() {
|
||||
if [[ -f "$PROXMENUX_VFIO_BIND_LEGACY_HOOK" ]]; then
|
||||
rm -f "$PROXMENUX_VFIO_BIND_LEGACY_HOOK"
|
||||
[[ -n "${HOST_CONFIG_CHANGED+x}" ]] && HOST_CONFIG_CHANGED=true
|
||||
fi
|
||||
}
|
||||
|
||||
_proxmenux_vfio_bind_add_bdfs() {
|
||||
# Args: any number of BDFs ("01:00.0" or "0000:01:00.0")
|
||||
mkdir -p "$(dirname "$PROXMENUX_VFIO_BIND_STATE")"
|
||||
touch "$PROXMENUX_VFIO_BIND_STATE"
|
||||
_proxmenux_vfio_bind_cleanup_legacy
|
||||
|
||||
local changed=false bdf normalized
|
||||
for bdf in "$@"; do
|
||||
[[ -z "$bdf" ]] && continue
|
||||
# Normalize to "0000:XX:YY.Z"
|
||||
if [[ "$bdf" == 0000:* ]]; then
|
||||
normalized="$bdf"
|
||||
else
|
||||
normalized="0000:${bdf}"
|
||||
fi
|
||||
if ! grep -qxF "$normalized" "$PROXMENUX_VFIO_BIND_STATE" 2>/dev/null; then
|
||||
echo "$normalized" >> "$PROXMENUX_VFIO_BIND_STATE"
|
||||
changed=true
|
||||
fi
|
||||
done
|
||||
if $changed; then
|
||||
_proxmenux_vfio_bind_write_udev_rule
|
||||
[[ -n "${HOST_CONFIG_CHANGED+x}" ]] && HOST_CONFIG_CHANGED=true
|
||||
fi
|
||||
}
|
||||
|
||||
_proxmenux_vfio_bind_remove_bdfs() {
|
||||
# Args: any number of BDFs to remove from the binder list
|
||||
[[ -f "$PROXMENUX_VFIO_BIND_STATE" ]] || return 0
|
||||
_proxmenux_vfio_bind_cleanup_legacy
|
||||
|
||||
local bdf normalized tmp
|
||||
tmp=$(mktemp)
|
||||
cp "$PROXMENUX_VFIO_BIND_STATE" "$tmp"
|
||||
for bdf in "$@"; do
|
||||
[[ -z "$bdf" ]] && continue
|
||||
if [[ "$bdf" == 0000:* ]]; then
|
||||
normalized="$bdf"
|
||||
else
|
||||
normalized="0000:${bdf}"
|
||||
fi
|
||||
sed -i "\|^${normalized}\$|d" "$tmp"
|
||||
done
|
||||
if ! cmp -s "$tmp" "$PROXMENUX_VFIO_BIND_STATE"; then
|
||||
mv "$tmp" "$PROXMENUX_VFIO_BIND_STATE"
|
||||
_proxmenux_vfio_bind_write_udev_rule
|
||||
[[ -n "${HOST_CONFIG_CHANGED+x}" ]] && HOST_CONFIG_CHANGED=true
|
||||
# If empty, remove state file too (keeps host clean)
|
||||
[[ ! -s "$PROXMENUX_VFIO_BIND_STATE" ]] && rm -f "$PROXMENUX_VFIO_BIND_STATE"
|
||||
else
|
||||
rm -f "$tmp"
|
||||
fi
|
||||
}
|
||||
|
||||
_proxmenux_vfio_bind_purge_vendor() {
|
||||
# Removes every BDF from the binder state whose PCI vendor matches $1
|
||||
# (hex, e.g. "10de" for NVIDIA, "1002" for AMD, "8086" for Intel).
|
||||
# Used by switch_gpu_mode to drop all NVIDIA bindings when reverting
|
||||
# NVIDIA passthrough — the nvidia module reclaims the GPUs after the
|
||||
# next reboot.
|
||||
local target_vendor="${1,,}"
|
||||
[[ -z "$target_vendor" || ! -f "$PROXMENUX_VFIO_BIND_STATE" ]] && return 0
|
||||
|
||||
local -a to_remove=()
|
||||
local bdf vendor_hex
|
||||
while IFS= read -r bdf; do
|
||||
[[ -z "$bdf" ]] && continue
|
||||
case "$bdf" in \#*) continue ;; esac
|
||||
local full="$bdf"
|
||||
[[ "$full" != 0000:* ]] && full="0000:${full}"
|
||||
vendor_hex=$(cat "/sys/bus/pci/devices/${full}/vendor" 2>/dev/null | sed 's/^0x//' | tr '[:upper:]' '[:lower:]')
|
||||
[[ "$vendor_hex" == "$target_vendor" ]] && to_remove+=("$full")
|
||||
done < "$PROXMENUX_VFIO_BIND_STATE"
|
||||
|
||||
[[ ${#to_remove[@]} -gt 0 ]] && _proxmenux_vfio_bind_remove_bdfs "${to_remove[@]}"
|
||||
}
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Auto-migrate hosts that ran the previous (broken) global-blacklist
|
||||
# model. Idempotent, safe if nothing matches. Removes the global kill-
|
||||
# switches so the nvidia module can load again for the GPU(s) NOT being
|
||||
# passed through.
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
_proxmenux_nvidia_migrate_legacy_blacklist() {
|
||||
local changed=false
|
||||
local blacklist_file="/etc/modprobe.d/blacklist.conf"
|
||||
local nvidia_blacklist="/etc/modprobe.d/nvidia-blacklist.conf"
|
||||
local udev_disabled="/etc/udev/rules.d/70-nvidia.rules.proxmenux-disabled"
|
||||
local udev_rules="/etc/udev/rules.d/70-nvidia.rules"
|
||||
local modules_load_disabled="/etc/modules-load.d/nvidia-vfio.conf.proxmenux-disabled-vfio"
|
||||
local modules_load_active="/etc/modules-load.d/nvidia-vfio.conf"
|
||||
|
||||
if [[ -f "$blacklist_file" ]] && grep -qE '^blacklist (nvidia|nvidia_drm|nvidia_modeset|nvidia_uvm|nvidiafb)$' "$blacklist_file"; then
|
||||
sed -i \
|
||||
-e '/^blacklist nvidia$/d' \
|
||||
-e '/^blacklist nvidia_drm$/d' \
|
||||
-e '/^blacklist nvidia_modeset$/d' \
|
||||
-e '/^blacklist nvidia_uvm$/d' \
|
||||
-e '/^blacklist nvidiafb$/d' \
|
||||
"$blacklist_file"
|
||||
changed=true
|
||||
fi
|
||||
|
||||
if [[ -f "$nvidia_blacklist" ]]; then
|
||||
rm -f "$nvidia_blacklist"
|
||||
changed=true
|
||||
fi
|
||||
|
||||
if [[ -f "$udev_disabled" ]]; then
|
||||
mv "$udev_disabled" "$udev_rules" >/dev/null 2>&1 || true
|
||||
udevadm control --reload-rules >/dev/null 2>&1 || true
|
||||
changed=true
|
||||
fi
|
||||
|
||||
if [[ -f "$modules_load_disabled" ]]; then
|
||||
mv "$modules_load_disabled" "$modules_load_active" >/dev/null 2>&1 || true
|
||||
changed=true
|
||||
fi
|
||||
|
||||
if $changed; then
|
||||
[[ -n "${HOST_CONFIG_CHANGED+x}" ]] && HOST_CONFIG_CHANGED=true
|
||||
if declare -F msg_ok >/dev/null 2>&1; then
|
||||
msg_ok "$(declare -F translate >/dev/null 2>&1 && translate 'Migrated legacy ProxMenux NVIDIA blacklist state — module will reload after reboot' || echo 'Migrated legacy ProxMenux NVIDIA blacklist state — module will reload after reboot')"
|
||||
else
|
||||
echo "[OK] Migrated legacy ProxMenux NVIDIA blacklist state — module will reload after reboot"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user