mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-04-05 20:03:48 +00:00
495 lines
16 KiB
Bash
495 lines
16 KiB
Bash
#!/bin/bash
|
|
# ProxMenux - NVIDIA Driver Updater (Host + LXC)
|
|
# ================================================
|
|
# Author : MacRimi
|
|
# License : MIT
|
|
# Version : 1.0
|
|
# Last Updated: 01/04/2026
|
|
# ================================================
|
|
|
|
LOCAL_SCRIPTS="/usr/local/share/proxmenux/scripts"
|
|
BASE_DIR="/usr/local/share/proxmenux"
|
|
UTILS_FILE="$BASE_DIR/utils.sh"
|
|
LOG_FILE="/tmp/nvidia_update.log"
|
|
|
|
NVIDIA_BASE_URL="https://download.nvidia.com/XFree86/Linux-x86_64"
|
|
NVIDIA_WORKDIR="/opt/nvidia"
|
|
|
|
if [[ -f "$UTILS_FILE" ]]; then
|
|
source "$UTILS_FILE"
|
|
fi
|
|
|
|
load_language
|
|
initialize_cache
|
|
|
|
|
|
# ============================================================
|
|
# Host NVIDIA state detection
|
|
# ============================================================
|
|
detect_host_nvidia() {
|
|
HOST_NVIDIA_VERSION=""
|
|
HOST_NVIDIA_READY=false
|
|
|
|
if lsmod | grep -q "^nvidia " && command -v nvidia-smi >/dev/null 2>&1; then
|
|
HOST_NVIDIA_VERSION=$(nvidia-smi --query-gpu=driver_version \
|
|
--format=csv,noheader 2>/dev/null | head -n1 | tr -d '[:space:]')
|
|
[[ -n "$HOST_NVIDIA_VERSION" ]] && HOST_NVIDIA_READY=true
|
|
fi
|
|
|
|
if ! $HOST_NVIDIA_READY; then
|
|
dialog --backtitle "ProxMenux" \
|
|
--title "$(translate 'NVIDIA Not Found')" \
|
|
--msgbox "\n$(translate 'No NVIDIA driver is currently loaded on this host.')\n\n$(translate 'Please install NVIDIA drivers first using the option:')\n\n $(translate 'Install NVIDIA Drivers on Host')\n\n$(translate 'from this same GPU and TPU menu.')" \
|
|
13 72
|
|
exit 0
|
|
fi
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
# LXC containers with NVIDIA passthrough
|
|
# ============================================================
|
|
find_nvidia_containers() {
|
|
NVIDIA_CONTAINERS=()
|
|
for conf in /etc/pve/lxc/*.conf; do
|
|
[[ -f "$conf" ]] || continue
|
|
if grep -qiE "dev[0-9]+:.*nvidia" "$conf"; then
|
|
NVIDIA_CONTAINERS+=("$(basename "$conf" .conf)")
|
|
fi
|
|
done
|
|
}
|
|
|
|
get_lxc_nvidia_version() {
|
|
local ctid="$1"
|
|
local version=""
|
|
|
|
# Prefer nvidia-smi when the container is running (works with .run-installed drivers)
|
|
if pct status "$ctid" 2>/dev/null | grep -q "running"; then
|
|
version=$(pct exec "$ctid" -- nvidia-smi \
|
|
--query-gpu=driver_version --format=csv,noheader 2>/dev/null \
|
|
| head -1 | tr -d '[:space:]' || true)
|
|
fi
|
|
|
|
# Fallback: dpkg status for apt-installed libcuda1 (dir-type storage, no start needed)
|
|
if [[ -z "$version" ]]; then
|
|
local rootfs="/var/lib/lxc/${ctid}/rootfs"
|
|
if [[ -f "${rootfs}/var/lib/dpkg/status" ]]; then
|
|
version=$(grep -A5 "^Package: libcuda1$" "${rootfs}/var/lib/dpkg/status" \
|
|
| grep "^Version:" | head -1 | awk '{print $2}' | cut -d- -f1)
|
|
fi
|
|
fi
|
|
|
|
echo "${version:-$(translate 'not installed')}"
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
# Version list from NVIDIA servers
|
|
# ============================================================
|
|
list_available_versions() {
|
|
local html
|
|
html=$(curl -s --connect-timeout 15 "${NVIDIA_BASE_URL}/" 2>/dev/null) || true
|
|
|
|
if [[ -z "$html" ]]; then
|
|
echo ""
|
|
return 1
|
|
fi
|
|
|
|
echo "$html" \
|
|
| grep -o 'href=[^ >]*' \
|
|
| awk -F"'" '{print $2}' \
|
|
| grep -E '^[0-9]' \
|
|
| sed 's/\/$//' \
|
|
| sed "s/^[[:space:]]*//;s/[[:space:]]*$//" \
|
|
| sort -Vr \
|
|
| uniq
|
|
}
|
|
|
|
get_latest_version() {
|
|
local latest_line
|
|
latest_line=$(curl -fsSL --connect-timeout 15 "${NVIDIA_BASE_URL}/latest.txt" 2>/dev/null) || true
|
|
echo "$latest_line" | awk '{print $1}' | tr -d '[:space:]'
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
# Version selection menu
|
|
# ============================================================
|
|
select_target_version() {
|
|
msg_info "$(translate 'Fetching available NVIDIA versions...')"
|
|
local latest versions_list
|
|
latest=$(get_latest_version 2>/dev/null)
|
|
versions_list=$(list_available_versions 2>/dev/null)
|
|
msg_ok "$(translate 'Version list retrieved.')"
|
|
|
|
if [[ -z "$latest" && -z "$versions_list" ]]; then
|
|
dialog --backtitle "ProxMenux" \
|
|
--title "$(translate 'Error')" \
|
|
--msgbox "\n$(translate 'Could not retrieve versions from NVIDIA. Please check your internet connection.')" \
|
|
8 72
|
|
exit 1
|
|
fi
|
|
|
|
[[ -z "$latest" && -n "$versions_list" ]] && latest=$(echo "$versions_list" | head -1)
|
|
[[ -z "$versions_list" ]] && versions_list="$latest"
|
|
latest=$(echo "$latest" | tr -d '[:space:]')
|
|
|
|
local choices=()
|
|
choices+=("latest" "$(translate 'Latest available') (${latest:-?})")
|
|
choices+=("" "")
|
|
|
|
while IFS= read -r ver; do
|
|
ver=$(echo "$ver" | tr -d '[:space:]')
|
|
[[ -z "$ver" ]] && continue
|
|
choices+=("$ver" "$ver")
|
|
done <<< "$versions_list"
|
|
|
|
local menu_text
|
|
menu_text="\n$(translate 'Current host version:') ${HOST_NVIDIA_VERSION}\n"
|
|
menu_text+="$(translate 'Select the target version to install on host and all affected LXCs:')"
|
|
|
|
TARGET_VERSION=$(dialog --backtitle "ProxMenux" \
|
|
--title "$(translate 'NVIDIA Driver Version')" \
|
|
--menu "$menu_text" 26 80 16 \
|
|
"${choices[@]}" \
|
|
2>&1 >/dev/tty) || exit 0
|
|
|
|
[[ -z "$TARGET_VERSION" ]] && exit 0
|
|
|
|
if [[ "$TARGET_VERSION" == "latest" ]]; then
|
|
TARGET_VERSION="$latest"
|
|
fi
|
|
TARGET_VERSION=$(echo "$TARGET_VERSION" | tr -d '[:space:]')
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
# Update NVIDIA userspace libs inside a single LXC
|
|
# ============================================================
|
|
update_lxc_nvidia() {
|
|
local ctid="$1"
|
|
local version="$2"
|
|
local was_running=false
|
|
|
|
# Capture old version before update
|
|
local old_version
|
|
old_version=$(get_lxc_nvidia_version "$ctid")
|
|
|
|
if pct status "$ctid" 2>/dev/null | grep -q "running"; then
|
|
was_running=true
|
|
else
|
|
msg_info "$(translate 'Starting container') ${ctid}..."
|
|
pct start "$ctid" >>"$LOG_FILE" 2>&1 || true
|
|
local ready=false
|
|
for _ in {1..15}; do
|
|
sleep 2
|
|
pct exec "$ctid" -- true >/dev/null 2>&1 && ready=true && break
|
|
done
|
|
if ! $ready; then
|
|
msg_warn "$(translate 'Container') ${ctid} $(translate 'did not start. Skipping.')"
|
|
return 1
|
|
fi
|
|
msg_ok "$(translate 'Container') ${ctid} $(translate 'started.')"
|
|
fi
|
|
|
|
msg_info "$(translate 'Updating NVIDIA libs in container') ${ctid}..."
|
|
|
|
local run_file="${NVIDIA_WORKDIR}/NVIDIA-Linux-x86_64-${version}.run"
|
|
|
|
if [[ ! -f "$run_file" ]]; then
|
|
msg_warn "$(translate 'Installer not found:') ${run_file} — $(translate 'skipping container') ${ctid}"
|
|
if [[ "$was_running" == "false" ]]; then pct stop "$ctid" >>"$LOG_FILE" 2>&1 || true; fi
|
|
return 1
|
|
fi
|
|
|
|
# Extract .run on the host to avoid decompression failures inside the container
|
|
local extract_dir="${NVIDIA_WORKDIR}/extracted_${version}"
|
|
local archive="/tmp/nvidia_lxc_${version}.tar.gz"
|
|
|
|
msg_info "$(translate 'Extracting NVIDIA installer on host...')"
|
|
rm -rf "$extract_dir"
|
|
if ! sh "$run_file" --extract-only --target "$extract_dir" >>"$LOG_FILE" 2>&1; then
|
|
msg_warn "$(translate 'Extraction failed. Check log:') ${LOG_FILE}"
|
|
if [[ "$was_running" == "false" ]]; then pct stop "$ctid" >>"$LOG_FILE" 2>&1 || true; fi
|
|
return 1
|
|
fi
|
|
msg_ok "$(translate 'Extracted.')"
|
|
|
|
msg_info "$(translate 'Packing and copying installer to container') ${ctid}..."
|
|
tar -czf "$archive" -C "$extract_dir" . >>"$LOG_FILE" 2>&1
|
|
if ! pct push "$ctid" "$archive" /tmp/nvidia_lxc.tar.gz >>"$LOG_FILE" 2>&1; then
|
|
msg_warn "$(translate 'pct push failed. Check log:') ${LOG_FILE}"
|
|
rm -f "$archive"
|
|
if [[ "$was_running" == "false" ]]; then pct stop "$ctid" >>"$LOG_FILE" 2>&1 || true; fi
|
|
return 1
|
|
fi
|
|
rm -f "$archive"
|
|
msg_ok "$(translate 'Installer copied to container.')"
|
|
|
|
msg_info2 "$(translate 'Starting NVIDIA installer in container') ${ctid}. $(translate 'This may take several minutes...')"
|
|
echo "" >>"$LOG_FILE"
|
|
pct exec "$ctid" -- bash -c "
|
|
mkdir -p /tmp/nvidia_lxc_install
|
|
tar -xzf /tmp/nvidia_lxc.tar.gz -C /tmp/nvidia_lxc_install 2>&1
|
|
/tmp/nvidia_lxc_install/nvidia-installer \
|
|
--no-kernel-modules \
|
|
--no-questions \
|
|
--ui=none \
|
|
--no-nouveau-check \
|
|
--no-dkms
|
|
EXIT=\$?
|
|
rm -rf /tmp/nvidia_lxc_install /tmp/nvidia_lxc.tar.gz
|
|
exit \$EXIT
|
|
" 2>&1 | tee -a "$LOG_FILE"
|
|
local rc=${PIPESTATUS[0]}
|
|
|
|
rm -rf "$extract_dir"
|
|
|
|
if [[ $rc -ne 0 ]]; then
|
|
msg_warn "$(translate 'NVIDIA installer returned error') ${rc}. $(translate 'Check log:') ${LOG_FILE}"
|
|
if [[ "$was_running" == "false" ]]; then pct stop "$ctid" >>"$LOG_FILE" 2>&1 || true; fi
|
|
return 1
|
|
fi
|
|
|
|
msg_ok "$(translate 'Container') ${ctid}: ${old_version} → ${version}"
|
|
msg_info2 "$(translate 'NVIDIA driver verification in container') ${ctid}:"
|
|
pct exec "$ctid" -- nvidia-smi 2>/dev/null || true
|
|
|
|
if [[ "$was_running" == "false" ]]; then
|
|
msg_info "$(translate 'Stopping container') ${ctid}..."
|
|
pct stop "$ctid" >>"$LOG_FILE" 2>&1 || true
|
|
msg_ok "$(translate 'Container stopped.')"
|
|
fi
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
# Host NVIDIA update
|
|
# ============================================================
|
|
_stop_nvidia_services() {
|
|
for svc in nvidia-persistenced.service nvidia-powerd.service; do
|
|
systemctl is-active --quiet "$svc" 2>/dev/null && systemctl stop "$svc" >/dev/null 2>&1 || true
|
|
systemctl is-enabled --quiet "$svc" 2>/dev/null && systemctl disable "$svc" >/dev/null 2>&1 || true
|
|
done
|
|
}
|
|
|
|
_unload_nvidia_modules() {
|
|
for mod in nvidia_uvm nvidia_drm nvidia_modeset nvidia; do
|
|
modprobe -r "$mod" >/dev/null 2>&1 || true
|
|
done
|
|
# Second pass for stubborn modules
|
|
for mod in nvidia_uvm nvidia_drm nvidia_modeset nvidia; do
|
|
modprobe -r --force "$mod" >/dev/null 2>&1 || true
|
|
done
|
|
}
|
|
|
|
_purge_nvidia_host() {
|
|
msg_info "$(translate 'Uninstalling current NVIDIA driver from host...')"
|
|
|
|
_stop_nvidia_services
|
|
_unload_nvidia_modules
|
|
|
|
command -v nvidia-uninstall >/dev/null 2>&1 \
|
|
&& nvidia-uninstall --silent >>"$LOG_FILE" 2>&1 || true
|
|
|
|
# Remove DKMS entries
|
|
local dkms_versions
|
|
dkms_versions=$(dkms status 2>/dev/null | awk -F, '/nvidia/ {gsub(/ /,"",$2); print $2}' || true)
|
|
while IFS= read -r ver; do
|
|
[[ -z "$ver" ]] && continue
|
|
dkms remove -m nvidia -v "$ver" --all >/dev/null 2>&1 || true
|
|
done <<< "$dkms_versions"
|
|
|
|
apt-get -y purge 'nvidia-*' 'libnvidia-*' 'cuda-*' >>"$LOG_FILE" 2>&1 || true
|
|
apt-get -y autoremove --purge >>"$LOG_FILE" 2>&1 || true
|
|
|
|
rm -f /etc/udev/rules.d/70-nvidia.rules
|
|
rm -f /etc/modprobe.d/nvidia*.conf /usr/lib/modprobe.d/nvidia*.conf
|
|
|
|
msg_ok "$(translate 'Current NVIDIA driver removed from host.')"
|
|
}
|
|
|
|
_download_installer() {
|
|
local version="$1"
|
|
local run_file="${NVIDIA_WORKDIR}/NVIDIA-Linux-x86_64-${version}.run"
|
|
|
|
mkdir -p "$NVIDIA_WORKDIR"
|
|
|
|
# Reuse cached file if valid
|
|
local existing_size
|
|
existing_size=$(stat -c%s "$run_file" 2>/dev/null || echo "0")
|
|
if [[ -f "$run_file" ]] && [[ "$existing_size" -gt 40000000 ]]; then
|
|
if file "$run_file" 2>/dev/null | grep -q "executable"; then
|
|
msg_ok "$(translate 'Installer already cached.')"
|
|
echo "$run_file"
|
|
return 0
|
|
fi
|
|
fi
|
|
rm -f "$run_file"
|
|
|
|
msg_info "$(translate 'Downloading NVIDIA driver') ${version}..."
|
|
|
|
local urls=(
|
|
"${NVIDIA_BASE_URL}/${version}/NVIDIA-Linux-x86_64-${version}.run"
|
|
"${NVIDIA_BASE_URL}/${version}/NVIDIA-Linux-x86_64-${version}-no-compat32.run"
|
|
)
|
|
|
|
local ok=false
|
|
for url in "${urls[@]}"; do
|
|
if curl -fL --connect-timeout 30 --max-time 600 "$url" -o "$run_file" >>"$LOG_FILE" 2>&1; then
|
|
local sz
|
|
sz=$(stat -c%s "$run_file" 2>/dev/null || echo "0")
|
|
if [[ "$sz" -gt 40000000 ]] && file "$run_file" 2>/dev/null | grep -q "executable"; then
|
|
ok=true
|
|
break
|
|
fi
|
|
fi
|
|
rm -f "$run_file"
|
|
done
|
|
|
|
if ! $ok; then
|
|
msg_error "$(translate 'Download failed. Check /tmp/nvidia_update.log')"
|
|
exit 1
|
|
fi
|
|
|
|
chmod +x "$run_file"
|
|
msg_ok "$(translate 'Download complete.')"
|
|
echo "$run_file"
|
|
}
|
|
|
|
_run_installer() {
|
|
local installer="$1"
|
|
local tmp_dir="${NVIDIA_WORKDIR}/tmp_extract"
|
|
mkdir -p "$tmp_dir"
|
|
|
|
msg_info "$(translate 'Installing NVIDIA driver on host. This may take several minutes...')"
|
|
|
|
sh "$installer" \
|
|
--tmpdir="$tmp_dir" \
|
|
--no-questions \
|
|
--ui=none \
|
|
--disable-nouveau \
|
|
--no-nouveau-check \
|
|
--dkms \
|
|
>>"$LOG_FILE" 2>&1
|
|
local rc=$?
|
|
|
|
rm -rf "$tmp_dir"
|
|
|
|
if [[ $rc -ne 0 ]]; then
|
|
msg_error "$(translate 'NVIDIA installer failed. Check /tmp/nvidia_update.log')"
|
|
exit 1
|
|
fi
|
|
|
|
msg_ok "$(translate 'NVIDIA driver installed on host.')"
|
|
}
|
|
|
|
update_host_nvidia() {
|
|
local version="$1"
|
|
|
|
_purge_nvidia_host
|
|
|
|
local installer
|
|
installer=$(_download_installer "$version")
|
|
|
|
_run_installer "$installer"
|
|
|
|
msg_info "$(translate 'Updating initramfs...')"
|
|
update-initramfs -u -k all >>"$LOG_FILE" 2>&1 || true
|
|
msg_ok "$(translate 'initramfs updated.')"
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
# Overview dialog (current state)
|
|
# ============================================================
|
|
show_current_state_dialog() {
|
|
find_nvidia_containers
|
|
|
|
local info
|
|
info="\n$(translate 'Host NVIDIA driver:') ${HOST_NVIDIA_VERSION}\n\n"
|
|
|
|
if [[ ${#NVIDIA_CONTAINERS[@]} -eq 0 ]]; then
|
|
info+="$(translate 'No LXC containers with NVIDIA passthrough found.')\n"
|
|
else
|
|
info+="$(translate 'LXC containers with NVIDIA passthrough:')\n\n"
|
|
for ctid in "${NVIDIA_CONTAINERS[@]}"; do
|
|
local lxc_ver
|
|
lxc_ver=$(get_lxc_nvidia_version "$ctid")
|
|
local ct_name
|
|
ct_name=$(pct config "$ctid" 2>/dev/null | grep "^hostname:" | awk '{print $2}')
|
|
info+=" CT ${ctid} ${ct_name:+(${ct_name})} — libcuda1: ${lxc_ver}\n"
|
|
done
|
|
fi
|
|
|
|
info+="\n$(translate 'After selecting a version, LXC containers will be updated first, then the host.')"
|
|
info+="\n$(translate 'A reboot is required after the host update.')"
|
|
|
|
dialog --backtitle "ProxMenux" \
|
|
--title "$(translate 'NVIDIA Update — Current State')" \
|
|
--yesno "$info" 20 80 \
|
|
>/dev/tty 2>&1 || exit 0
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
# Restart prompt
|
|
# ============================================================
|
|
restart_prompt() {
|
|
if whiptail --title "$(translate 'NVIDIA Update')" --yesno \
|
|
"$(translate 'The host driver update requires a reboot to take effect. Reboot now?')" 10 70; then
|
|
msg_warn "$(translate 'Restarting the server...')"
|
|
reboot
|
|
else
|
|
msg_success "$(translate 'Update complete. Please reboot the server manually.')"
|
|
msg_success "$(translate 'Completed. Press Enter to return to menu...')"
|
|
read -r
|
|
fi
|
|
}
|
|
|
|
|
|
# ============================================================
|
|
# Main
|
|
# ============================================================
|
|
main() {
|
|
: >"$LOG_FILE"
|
|
|
|
# ---- Phase 1: dialogs ----
|
|
detect_host_nvidia
|
|
show_current_state_dialog
|
|
select_target_version
|
|
|
|
# Same version confirmation
|
|
if [[ "$TARGET_VERSION" == "$HOST_NVIDIA_VERSION" ]]; then
|
|
if ! dialog --backtitle "ProxMenux" \
|
|
--title "$(translate 'Same Version')" \
|
|
--yesno "\n$(translate 'Version') ${TARGET_VERSION} $(translate 'is already installed on the host.')\n\n$(translate 'Reinstall and force-update all LXC containers anyway?')" \
|
|
10 70 >/dev/tty 2>&1; then
|
|
exit 0
|
|
fi
|
|
fi
|
|
|
|
# ---- Phase 2: processing ----
|
|
show_proxmenux_logo
|
|
msg_title "$(translate 'NVIDIA Driver Update')"
|
|
|
|
# Download installer once — reused by both LXC containers and host
|
|
local run_file
|
|
run_file=$(_download_installer "$TARGET_VERSION")
|
|
|
|
# Update LXC containers first (no reboot needed for userspace libs)
|
|
if [[ ${#NVIDIA_CONTAINERS[@]} -gt 0 ]]; then
|
|
msg_info2 "$(translate 'Updating LXC containers...')"
|
|
for ctid in "${NVIDIA_CONTAINERS[@]}"; do
|
|
update_lxc_nvidia "$ctid" "$TARGET_VERSION"
|
|
done
|
|
fi
|
|
|
|
# Update host kernel module + drivers (reuses the already-downloaded installer)
|
|
update_host_nvidia "$TARGET_VERSION"
|
|
|
|
restart_prompt
|
|
}
|
|
|
|
main
|