add_controller_nvme_vm.sh

This commit is contained in:
MacRimi
2026-04-06 13:39:07 +02:00
parent 5be4bd2ae9
commit 10ce9dbcde
14 changed files with 2390 additions and 237 deletions

View File

@@ -0,0 +1,254 @@
#!/usr/bin/env bash
if [[ -n "${__PROXMENUX_GPU_HOOK_GUARD_HELPERS__}" ]]; then
return 0
fi
__PROXMENUX_GPU_HOOK_GUARD_HELPERS__=1
PROXMENUX_GPU_HOOK_STORAGE_REF="local:snippets/proxmenux-gpu-guard.sh"
PROXMENUX_GPU_HOOK_ABS_PATH="/var/lib/vz/snippets/proxmenux-gpu-guard.sh"
_gpu_guard_msg_warn() {
if declare -F msg_warn >/dev/null 2>&1; then
msg_warn "$1"
else
echo "[WARN] $1" >&2
fi
}
_gpu_guard_msg_ok() {
if declare -F msg_ok >/dev/null 2>&1; then
msg_ok "$1"
else
echo "[OK] $1"
fi
}
_gpu_guard_has_vm_gpu() {
local vmid="$1"
qm config "$vmid" 2>/dev/null | grep -qE '^hostpci[0-9]+:'
}
_gpu_guard_has_lxc_gpu() {
local ctid="$1"
local conf="/etc/pve/lxc/${ctid}.conf"
[[ -f "$conf" ]] || return 1
grep -qE 'dev[0-9]+:.*(/dev/dri|/dev/nvidia|/dev/kfd)|lxc\.mount\.entry:.*dev/dri' "$conf" 2>/dev/null
}
ensure_proxmenux_gpu_guard_hookscript() {
mkdir -p /var/lib/vz/snippets 2>/dev/null || true
cat >"$PROXMENUX_GPU_HOOK_ABS_PATH" <<'HOOKEOF'
#!/usr/bin/env bash
set -u
arg1="${1:-}"
arg2="${2:-}"
case "$arg1" in
pre-start|post-start|pre-stop|post-stop)
phase="$arg1"
guest_id="$arg2"
;;
*)
guest_id="$arg1"
phase="$arg2"
;;
esac
[[ "$phase" == "pre-start" ]] || exit 0
vm_conf="/etc/pve/qemu-server/${guest_id}.conf"
ct_conf="/etc/pve/lxc/${guest_id}.conf"
if [[ -f "$vm_conf" ]]; then
mapfile -t hostpci_lines < <(grep -E '^hostpci[0-9]+:' "$vm_conf" 2>/dev/null || true)
[[ ${#hostpci_lines[@]} -eq 0 ]] && exit 0
# Build slot list used by this VM and block if any running VM already uses same slot.
slot_keys=()
for line in "${hostpci_lines[@]}"; do
val="${line#*: }"
[[ "$val" == *"mapping="* ]] && continue
first_field="${val%%,*}"
IFS=';' read -r -a ids <<< "$first_field"
for id in "${ids[@]}"; do
id="${id#host=}"
id="${id// /}"
[[ -z "$id" ]] && continue
if [[ "$id" =~ ^[0-9a-fA-F]{2}:[0-9a-fA-F]{2}$ ]]; then
key="${id,,}"
else
[[ "$id" =~ ^0000: ]] || id="0000:${id}"
key="${id#0000:}"
key="${key%.*}"
key="${key,,}"
fi
dup=0
for existing in "${slot_keys[@]}"; do
[[ "$existing" == "$key" ]] && dup=1 && break
done
[[ "$dup" -eq 0 ]] && slot_keys+=("$key")
done
done
if [[ ${#slot_keys[@]} -gt 0 ]]; then
conflict_details=""
for other_conf in /etc/pve/qemu-server/*.conf; do
[[ -f "$other_conf" ]] || continue
other_vmid="$(basename "$other_conf" .conf)"
[[ "$other_vmid" == "$guest_id" ]] && continue
qm status "$other_vmid" 2>/dev/null | grep -q "status: running" || continue
for key in "${slot_keys[@]}"; do
if grep -qE "^hostpci[0-9]+:.*(0000:)?${key}(\\.[0-7])?([,[:space:]]|$)" "$other_conf" 2>/dev/null; then
other_name="$(awk '/^name:/ {print $2}' "$other_conf" 2>/dev/null)"
[[ -z "$other_name" ]] && other_name="VM-${other_vmid}"
conflict_details+=$'\n'"- ${key} in use by VM ${other_vmid} (${other_name})"
break
fi
done
done
if [[ -n "$conflict_details" ]]; then
echo "ProxMenux GPU Guard: VM ${guest_id} blocked at pre-start." >&2
echo "A hostpci device slot is already in use by another running VM." >&2
printf '%s\n' "$conflict_details" >&2
echo "Stop the source VM or remove/move the shared hostpci assignment." >&2
exit 1
fi
fi
failed=0
details=""
for line in "${hostpci_lines[@]}"; do
val="${line#*: }"
[[ "$val" == *"mapping="* ]] && continue
first_field="${val%%,*}"
IFS=';' read -r -a ids <<< "$first_field"
for id in "${ids[@]}"; do
id="${id#host=}"
id="${id// /}"
[[ -z "$id" ]] && continue
# Slot-only syntax (e.g. 01:00) is accepted by Proxmox.
if [[ "$id" =~ ^[0-9a-fA-F]{2}:[0-9a-fA-F]{2}$ ]]; then
slot_ok=false
for dev in /sys/bus/pci/devices/0000:${id}.*; do
[[ -e "$dev" ]] || continue
drv="$(basename "$(readlink "$dev/driver" 2>/dev/null)" 2>/dev/null)"
[[ "$drv" == "vfio-pci" ]] && slot_ok=true && break
done
if [[ "$slot_ok" != "true" ]]; then
failed=1
details+=$'\n'"- ${id}: not bound to vfio-pci"
fi
continue
fi
[[ "$id" =~ ^0000: ]] || id="0000:${id}"
dev_path="/sys/bus/pci/devices/${id}"
if [[ ! -d "$dev_path" ]]; then
failed=1
details+=$'\n'"- ${id}: PCI device not found"
continue
fi
drv="$(basename "$(readlink "$dev_path/driver" 2>/dev/null)" 2>/dev/null)"
if [[ "$drv" != "vfio-pci" ]]; then
failed=1
details+=$'\n'"- ${id}: driver=${drv:-none}"
fi
done
done
if [[ "$failed" -eq 1 ]]; then
echo "ProxMenux GPU Guard: VM ${guest_id} blocked at pre-start." >&2
echo "GPU passthrough device is not ready for VM mode (vfio-pci required)." >&2
printf '%s\n' "$details" >&2
echo "Switch mode to GPU -> VM from ProxMenux: GPUs and Coral-TPU Menu." >&2
exit 1
fi
exit 0
fi
if [[ -f "$ct_conf" ]]; then
mapfile -t gpu_dev_paths < <(
{
grep -E '^dev[0-9]+:' "$ct_conf" 2>/dev/null | sed -E 's/^dev[0-9]+:[[:space:]]*([^,[:space:]]+).*/\1/'
grep -E '^lxc\.mount\.entry:' "$ct_conf" 2>/dev/null | sed -E 's/^lxc\.mount\.entry:[[:space:]]*([^[:space:]]+).*/\1/'
} | grep -E '^/dev/(dri|nvidia|kfd)' | sort -u
)
[[ ${#gpu_dev_paths[@]} -eq 0 ]] && exit 0
missing=""
for dev in "${gpu_dev_paths[@]}"; do
[[ -e "$dev" ]] || missing+=$'\n'"- ${dev} unavailable"
done
if [[ -n "$missing" ]]; then
echo "ProxMenux GPU Guard: LXC ${guest_id} blocked at pre-start." >&2
echo "Configured GPU devices are unavailable in host device nodes." >&2
printf '%s\n' "$missing" >&2
echo "Switch mode to GPU -> LXC from ProxMenux: GPUs and Coral-TPU Menu." >&2
exit 1
fi
exit 0
fi
exit 0
HOOKEOF
chmod 755 "$PROXMENUX_GPU_HOOK_ABS_PATH" 2>/dev/null || true
}
attach_proxmenux_gpu_guard_to_vm() {
local vmid="$1"
_gpu_guard_has_vm_gpu "$vmid" || return 0
local current
current=$(qm config "$vmid" 2>/dev/null | awk '/^hookscript:/ {print $2}')
if [[ "$current" == "$PROXMENUX_GPU_HOOK_STORAGE_REF" ]]; then
return 0
fi
if qm set "$vmid" --hookscript "$PROXMENUX_GPU_HOOK_STORAGE_REF" >/dev/null 2>&1; then
_gpu_guard_msg_ok "GPU guard hook attached to VM ${vmid}"
else
_gpu_guard_msg_warn "Could not attach GPU guard hook to VM ${vmid}. Ensure 'local' storage supports snippets."
fi
}
attach_proxmenux_gpu_guard_to_lxc() {
local ctid="$1"
_gpu_guard_has_lxc_gpu "$ctid" || return 0
local current
current=$(pct config "$ctid" 2>/dev/null | awk '/^hookscript:/ {print $2}')
if [[ "$current" == "$PROXMENUX_GPU_HOOK_STORAGE_REF" ]]; then
return 0
fi
if pct set "$ctid" -hookscript "$PROXMENUX_GPU_HOOK_STORAGE_REF" >/dev/null 2>&1; then
_gpu_guard_msg_ok "GPU guard hook attached to LXC ${ctid}"
else
_gpu_guard_msg_warn "Could not attach GPU guard hook to LXC ${ctid}. Ensure 'local' storage supports snippets."
fi
}
sync_proxmenux_gpu_guard_hooks() {
ensure_proxmenux_gpu_guard_hookscript
local vmid ctid
for conf in /etc/pve/qemu-server/*.conf; do
[[ -f "$conf" ]] || continue
vmid=$(basename "$conf" .conf)
_gpu_guard_has_vm_gpu "$vmid" && attach_proxmenux_gpu_guard_to_vm "$vmid"
done
for conf in /etc/pve/lxc/*.conf; do
[[ -f "$conf" ]] || continue
ctid=$(basename "$conf" .conf)
_gpu_guard_has_lxc_gpu "$ctid" && attach_proxmenux_gpu_guard_to_lxc "$ctid"
done
}

View File

@@ -136,3 +136,131 @@ function _vm_is_q35() {
machine_line=$(qm config "$vmid" 2>/dev/null | awk -F': ' '/^machine:/ {print $2}')
[[ "$machine_line" == *q35* ]]
}
function _shorten_text() {
local text="$1"
local max_len="${2:-42}"
[[ -z "$text" ]] && { echo ""; return; }
if (( ${#text} > max_len )); then
echo "${text:0:$((max_len-3))}..."
else
echo "$text"
fi
}
function _pci_slot_base() {
local pci_full="$1"
local slot
slot="${pci_full#0000:}"
slot="${slot%.*}"
echo "$slot"
}
function _vm_status_is_running() {
local vmid="$1"
qm status "$vmid" 2>/dev/null | grep -q "status: running"
}
function _vm_onboot_is_enabled() {
local vmid="$1"
qm config "$vmid" 2>/dev/null | grep -qE '^onboot:\s*1'
}
function _vm_name_by_id() {
local vmid="$1"
local conf="/etc/pve/qemu-server/${vmid}.conf"
local vm_name
vm_name=$(awk '/^name:/ {print $2}' "$conf" 2>/dev/null)
[[ -z "$vm_name" ]] && vm_name="VM-${vmid}"
echo "$vm_name"
}
function _vm_has_pci_slot() {
local vmid="$1"
local slot_base="$2"
local conf="/etc/pve/qemu-server/${vmid}.conf"
[[ -f "$conf" ]] || return 1
grep -qE "^hostpci[0-9]+:.*(0000:)?${slot_base}(\\.[0-7])?([,[:space:]]|$)" "$conf"
}
function _pci_assigned_vm_ids() {
local pci_full="$1"
local exclude_vmid="${2:-}"
local slot_base conf vmid
slot_base=$(_pci_slot_base "$pci_full")
for conf in /etc/pve/qemu-server/*.conf; do
[[ -f "$conf" ]] || continue
vmid=$(basename "$conf" .conf)
[[ -n "$exclude_vmid" && "$vmid" == "$exclude_vmid" ]] && continue
if grep -qE "^hostpci[0-9]+:.*(0000:)?${slot_base}(\\.[0-7])?([,[:space:]]|$)" "$conf"; then
echo "$vmid"
fi
done
}
function _remove_pci_slot_from_vm_config() {
local vmid="$1"
local slot_base="$2"
local conf="/etc/pve/qemu-server/${vmid}.conf"
[[ -f "$conf" ]] || return 1
local tmpf
tmpf=$(mktemp)
awk -v slot="$slot_base" '
$0 ~ "^hostpci[0-9]+:.*(0000:)?" slot "(\\.[0-7])?([,[:space:]]|$)" {next}
{print}
' "$conf" > "$tmpf" && cat "$tmpf" > "$conf"
rm -f "$tmpf"
}
function _pci_assigned_vm_summary() {
local pci_full="$1"
local slot_base conf vmid vm_name running onboot
local -a refs=()
local running_count=0 onboot_count=0
slot_base="${pci_full#0000:}"
slot_base="${slot_base%.*}"
for conf in /etc/pve/qemu-server/*.conf; do
[[ -f "$conf" ]] || continue
if ! grep -qE "^hostpci[0-9]+:.*(0000:)?${slot_base}(\\.[0-7])?([,[:space:]]|$)" "$conf"; then
continue
fi
vmid=$(basename "$conf" .conf)
vm_name=$(awk '/^name:/ {print $2}' "$conf" 2>/dev/null)
[[ -z "$vm_name" ]] && vm_name="VM-${vmid}"
if qm status "$vmid" 2>/dev/null | grep -q "status: running"; then
running="running"
running_count=$((running_count + 1))
else
running="stopped"
fi
if grep -qE "^onboot:\s*1" "$conf" 2>/dev/null; then
onboot="1"
onboot_count=$((onboot_count + 1))
else
onboot="0"
fi
refs+=("${vmid}[${running},onboot=${onboot}]")
done
[[ ${#refs[@]} -eq 0 ]] && return 1
local joined summary
joined=$(IFS=', '; echo "${refs[*]}")
summary="$(translate "Assigned to VM(s)"): ${joined}"
if [[ "$running_count" -gt 0 ]]; then
summary+=" ($(translate "running"): ${running_count})"
fi
if [[ "$onboot_count" -gt 0 ]]; then
summary+=", onboot=1: ${onboot_count}"
fi
echo "$summary"
return 0
}