Merge branch 'main' into refactor/turnkey-modernize

This commit is contained in:
Slaviša Arežina
2026-03-24 15:32:59 +01:00
committed by GitHub
15 changed files with 552 additions and 85 deletions
+28 -9
View File
@@ -348,10 +348,10 @@ explain_exit_code() {
json_escape() {
# Escape a string for safe JSON embedding using awk (handles any input size).
# Pipeline: strip ANSI → remove control chars → escape \ " TAB → join lines with \n
printf '%s' "$1" \
| sed 's/\x1b\[[0-9;]*[a-zA-Z]//g' \
| tr -d '\000-\010\013\014\016-\037\177\r' \
| awk '
printf '%s' "$1" |
sed 's/\x1b\[[0-9;]*[a-zA-Z]//g' |
tr -d '\000-\010\013\014\016-\037\177\r' |
awk '
BEGIN { ORS = "" }
{
gsub(/\\/, "\\\\") # backslash → \\
@@ -627,7 +627,7 @@ post_to_api() {
[[ "${DEV_MODE:-}" == "true" ]] && echo "[DEBUG] post_to_api() DIAGNOSTICS=$DIAGNOSTICS RANDOM_UUID=$RANDOM_UUID NSAPP=$NSAPP" >&2
# Set type for later status updates (respect pre-set value, e.g. "turnkey")
# Set type for later status updates (preserve if already set, e.g. turnkey)
TELEMETRY_TYPE="${TELEMETRY_TYPE:-lxc}"
local pve_version=""
@@ -692,6 +692,7 @@ EOF
# Send initial "installing" record with retry.
# This record MUST exist for all subsequent updates to succeed.
local http_code="" attempt
local _post_success=false
for attempt in 1 2 3; do
if [[ "${DEV_MODE:-}" == "true" ]]; then
http_code=$(curl -sS -w "%{http_code}" -m "${TELEMETRY_TIMEOUT}" -X POST "${TELEMETRY_URL}" \
@@ -703,11 +704,19 @@ EOF
-H "Content-Type: application/json" \
-d "$JSON_PAYLOAD" -o /dev/null 2>/dev/null) || http_code="000"
fi
[[ "$http_code" =~ ^2[0-9]{2}$ ]] && break
if [[ "$http_code" =~ ^2[0-9]{2}$ ]]; then
_post_success=true
break
fi
[[ "$attempt" -lt 3 ]] && sleep 1
done
POST_TO_API_DONE=true
# Only mark done if at least one attempt succeeded.
# If all 3 failed, POST_TO_API_DONE stays false so post_update_to_api
# and on_exit() know the initial record was never created.
# The server has fallback logic to create a new record on status updates,
# so subsequent calls can still succeed even without the initial record.
POST_TO_API_DONE=${_post_success}
}
# ------------------------------------------------------------------------------
@@ -798,15 +807,19 @@ EOF
# Send initial "installing" record with retry (must succeed for updates to work)
local http_code="" attempt
local _post_success=false
for attempt in 1 2 3; do
http_code=$(curl -sS -w "%{http_code}" -m "${TELEMETRY_TIMEOUT}" -X POST "${TELEMETRY_URL}" \
-H "Content-Type: application/json" \
-d "$JSON_PAYLOAD" -o /dev/null 2>/dev/null) || http_code="000"
[[ "$http_code" =~ ^2[0-9]{2}$ ]] && break
if [[ "$http_code" =~ ^2[0-9]{2}$ ]]; then
_post_success=true
break
fi
[[ "$attempt" -lt 3 ]] && sleep 1
done
POST_TO_API_DONE=true
POST_TO_API_DONE=${_post_success}
}
# ------------------------------------------------------------------------------
@@ -1083,6 +1096,12 @@ EOF
# - Used to group errors in dashboard
# ------------------------------------------------------------------------------
categorize_error() {
# Allow build.func to override category based on log analysis (exit code 1 subclassification)
if [[ -n "${ERROR_CATEGORY_OVERRIDE:-}" ]]; then
echo "$ERROR_CATEGORY_OVERRIDE"
return
fi
local code="$1"
case "$code" in
# Network errors (curl/wget)
+69 -36
View File
@@ -221,6 +221,14 @@ update_motd_ip() {
local current_hostname="$(hostname)"
local current_ip="$(hostname -I | awk '{print $1}')"
# Escape sed special chars in replacement strings (& \ |)
current_os="${current_os//\\/\\\\}"
current_os="${current_os//&/\\&}"
current_hostname="${current_hostname//\\/\\\\}"
current_hostname="${current_hostname//&/\\&}"
current_ip="${current_ip//\\/\\\\}"
current_ip="${current_ip//&/\\&}"
# Update only if values actually changed
if ! grep -q "OS:.*$current_os" "$PROFILE_FILE" 2>/dev/null; then
sed -i "s|OS:.*|OS: \${GN}$current_os\${CL}\\\"|" "$PROFILE_FILE"
@@ -4076,8 +4084,8 @@ EOF
if [ "$var_os" == "alpine" ]; then
sleep 3
pct exec "$CTID" -- /bin/sh -c 'cat <<EOF >/etc/apk/repositories
http://dl-cdn.alpinelinux.org/alpine/latest-stable/main
http://dl-cdn.alpinelinux.org/alpine/latest-stable/community
https://dl-cdn.alpinelinux.org/alpine/latest-stable/main
https://dl-cdn.alpinelinux.org/alpine/latest-stable/community
EOF'
pct exec "$CTID" -- ash -c "apk add bash newt curl openssh nano mc ncurses jq" >>"$BUILD_LOG" 2>&1 || {
msg_error "Failed to install base packages in Alpine container"
@@ -4086,7 +4094,9 @@ EOF'
else
sleep 3
LANG=${LANG:-en_US.UTF-8}
pct exec "$CTID" -- bash -c "sed -i \"/$LANG/ s/^# //\" /etc/locale.gen"
local LANG_ESC="${LANG//./\\.}"
LANG_ESC="${LANG_ESC//|/\\|}"
pct exec "$CTID" -- bash -c "sed -i \"/$LANG_ESC/ s/^# //\" /etc/locale.gen"
pct exec "$CTID" -- bash -c "locale_line=\$(grep -v '^#' /etc/locale.gen | grep -E '^[a-zA-Z]' | awk '{print \$1}' | head -n 1) && \
echo LANG=\$locale_line >/etc/default/locale && \
locale-gen >/dev/null && \
@@ -4216,6 +4226,53 @@ EOF'
fi
fi
# Defense-in-depth: Ensure error handling stays disabled during recovery.
# Some functions (e.g. silent/$STD) unconditionally re-enable set -Eeuo pipefail
# and trap 'error_handler' ERR. If any code path above called such a function,
# the grep/sed pipelines below would trigger error_handler on non-match (exit 1).
set +Eeuo pipefail
trap - ERR
# --- Exit code 1 subclassification: analyze logs BEFORE telemetry call ---
# Exit code 1 is generic ("General error"). Analyze logs to determine the
# real error category so telemetry gets a useful classification instead of "shell".
local is_oom=false
local is_network_issue=false
local is_apt_issue=false
local is_cmd_not_found=false
local is_disk_full=false
if [[ $install_exit_code -eq 1 && -f "$combined_log" ]]; then
if grep -qiE 'E: Unable to|E: Package|E: Failed to fetch|dpkg.*error|broken packages|unmet dependencies|dpkg --configure -a' "$combined_log"; then
is_apt_issue=true
fi
if grep -qiE 'Cannot allocate memory|Out of memory|oom-killer|Killed process|JavaScript heap' "$combined_log"; then
is_oom=true
fi
if grep -qiE 'Could not resolve|DNS|Connection refused|Network is unreachable|No route to host|Temporary failure resolving|Failed to fetch' "$combined_log"; then
is_network_issue=true
fi
if grep -qiE ': command not found|No such file or directory.*/s?bin/' "$combined_log"; then
is_cmd_not_found=true
fi
if grep -qiE 'ENOSPC|no space left on device|Disk quota exceeded|errno -28' "$combined_log"; then
is_disk_full=true
fi
fi
# Set override for categorize_error() so telemetry gets the real category
if [[ "$is_apt_issue" == true ]]; then
export ERROR_CATEGORY_OVERRIDE="dependency"
elif [[ "$is_oom" == true ]]; then
export ERROR_CATEGORY_OVERRIDE="resource"
elif [[ "$is_network_issue" == true ]]; then
export ERROR_CATEGORY_OVERRIDE="network"
elif [[ "$is_disk_full" == true ]]; then
export ERROR_CATEGORY_OVERRIDE="storage"
elif [[ "$is_cmd_not_found" == true ]]; then
export ERROR_CATEGORY_OVERRIDE="dependency"
fi
# Report failure to telemetry API (now with log available on host)
# NOTE: Do NOT use msg_info/spinner here — the background spinner process
# causes SIGTSTP in non-interactive shells (bash -c "$(curl ...)"), which
@@ -4224,13 +4281,6 @@ EOF'
post_update_to_api "failed" "$install_exit_code"
$STD echo -e "${TAB}${CM:-} Failure reported"
# Defense-in-depth: Ensure error handling stays disabled during recovery.
# Some functions (e.g. silent/$STD) unconditionally re-enable set -Eeuo pipefail
# and trap 'error_handler' ERR. If any code path above called such a function,
# the grep/sed pipelines below would trigger error_handler on non-match (exit 1).
set +Eeuo pipefail
trap - ERR
# Show combined log location
if [[ -n "$CTID" && -n "${SESSION_ID:-}" ]]; then
msg_custom "📋" "${YW}" "Installation log: ${combined_log}"
@@ -4259,12 +4309,9 @@ EOF'
# Prompt user for cleanup with 60s timeout
echo ""
# Detect error type for smart recovery options
local is_oom=false
local is_network_issue=false
local is_apt_issue=false
local is_cmd_not_found=false
local is_disk_full=false
# Extend error detection for non-exit-1 codes (exit 1 was already analyzed above)
# The is_* flags were set above for exit code 1 log analysis; here we add
# exit-code-specific detections for other codes.
local error_explanation=""
if declare -f explain_exit_code >/dev/null 2>&1; then
error_explanation="$(explain_exit_code "$install_exit_code")"
@@ -4314,26 +4361,6 @@ EOF'
;;
esac
# Exit 1 subclassification: analyze logs to identify actual root cause
# Many exit 1 errors are actually APT, OOM, network, or command-not-found issues
if [[ $install_exit_code -eq 1 && -f "$combined_log" ]]; then
if grep -qiE 'E: Unable to|E: Package|E: Failed to fetch|dpkg.*error|broken packages|unmet dependencies|dpkg --configure -a' "$combined_log"; then
is_apt_issue=true
fi
if grep -qiE 'Cannot allocate memory|Out of memory|oom-killer|Killed process|JavaScript heap' "$combined_log"; then
is_oom=true
fi
if grep -qiE 'Could not resolve|DNS|Connection refused|Network is unreachable|No route to host|Temporary failure resolving|Failed to fetch' "$combined_log"; then
is_network_issue=true
fi
if grep -qiE ': command not found|No such file or directory.*/s?bin/' "$combined_log"; then
is_cmd_not_found=true
fi
if grep -qiE 'ENOSPC|no space left on device|Disk quota exceeded|errno -28' "$combined_log"; then
is_disk_full=true
fi
fi
# Show error explanation if available
if [[ -n "$error_explanation" ]]; then
echo -e "${TAB}${RD}Error: ${error_explanation}${CL}"
@@ -4535,6 +4562,7 @@ EOF'
if [[ $apt_retry_code -eq 0 ]]; then
msg_ok "Installation completed successfully after APT repair!"
INSTALL_COMPLETE=true
post_update_to_api "done" "0" "force"
return 0
else
@@ -4759,6 +4787,10 @@ fix_gpu_gids() {
pct stop "$CTID" >/dev/null 2>&1
sleep 1
# Validate GIDs are numeric before sed
[[ "$render_gid" =~ ^[0-9]+$ ]] || render_gid="104"
[[ "$video_gid" =~ ^[0-9]+$ ]] || video_gid="44"
# Update dev entries with correct GIDs
sed -i.bak -E "s|(dev[0-9]+: /dev/dri/renderD[0-9]+),gid=[0-9]+|\1,gid=${render_gid}|g" "$LXC_CONFIG"
sed -i -E "s|(dev[0-9]+: /dev/dri/card[0-9]+),gid=[0-9]+|\1,gid=${video_gid}|g" "$LXC_CONFIG"
@@ -5705,6 +5737,7 @@ EOF
systemctl start ping-instances.service
fi
INSTALL_COMPLETE=true
post_update_to_api "done" "none"
}
+17 -8
View File
@@ -507,14 +507,23 @@ _stop_container_if_installing() {
on_exit() {
local exit_code=$?
# Report orphaned "installing" records to telemetry API
# Catches ALL exit paths: errors, signals, AND clean exits where
# post_to_api was called but post_update_to_api was never called
if [[ "${POST_TO_API_DONE:-}" == "true" && "${POST_UPDATE_DONE:-}" != "true" ]]; then
if [[ $exit_code -ne 0 ]]; then
_send_abort_telemetry "$exit_code"
elif declare -f post_update_to_api >/dev/null 2>&1; then
post_update_to_api "done" "0" 2>/dev/null || true
# Report orphaned telemetry records
# Two scenarios handled:
# 1. POST_TO_API_DONE=true but POST_UPDATE_DONE=false: Record was created but
# never got a final status update → send abort/done now.
# 2. POST_TO_API_DONE=false but DIAGNOSTICS=yes: Initial post failed (server
# unreachable/timeout), but the server has fallback create-on-update logic,
# so a status update can still create the record. Worth one last try.
if [[ "${POST_UPDATE_DONE:-}" != "true" ]]; then
if [[ "${POST_TO_API_DONE:-}" == "true" || "${DIAGNOSTICS:-no}" == "yes" ]]; then
if [[ $exit_code -ne 0 ]]; then
_send_abort_telemetry "$exit_code"
elif [[ "${INSTALL_COMPLETE:-}" == "true" ]] && declare -f post_update_to_api >/dev/null 2>&1; then
# Only report success if the install was explicitly marked complete.
# Without this guard, early bailouts (e.g. user cancelled) with exit 0
# would be falsely reported as successful installations.
post_update_to_api "done" "0" 2>/dev/null || true
fi
fi
fi
+3 -3
View File
@@ -309,14 +309,14 @@ customize() {
if [[ "$PASSWORD" == "" ]]; then
msg_info "Customizing Container"
GETTY_OVERRIDE="/etc/systemd/system/container-getty@1.service.d/override.conf"
mkdir -p $(dirname $GETTY_OVERRIDE)
cat <<EOF >$GETTY_OVERRIDE
mkdir -p "$(dirname "$GETTY_OVERRIDE")"
cat <<EOF >"$GETTY_OVERRIDE"
[Service]
ExecStart=
ExecStart=-/sbin/agetty --autologin root --noclear --keep-baud tty%I 115200,38400,9600 \$TERM
EOF
systemctl daemon-reload
systemctl restart $(basename $(dirname $GETTY_OVERRIDE) | sed 's/\.d//')
systemctl restart "$(basename "$(dirname "$GETTY_OVERRIDE")" | sed 's/\.d//')"
msg_ok "Customized Container"
fi
echo "bash -c \"\$(curl -fsSL https://raw.githubusercontent.com/community-scripts/ProxmoxVE/main/ct/${app}.sh)\"" >/usr/bin/update
+5 -4
View File
@@ -242,7 +242,7 @@ download_gpg_key() {
# Process based on mode
if [[ "$mode" == "dearmor" ]]; then
if gpg --dearmor --yes -o "$output" <"$temp_key" 2>/dev/null; then
if gpg --dearmor --yes -o "$output" <"$temp_key" 2>/dev/null && [[ -s "$output" ]]; then
rm -f "$temp_key"
debug_log "GPG key installed (dearmored): $output"
return 0
@@ -5192,7 +5192,7 @@ _setup_gpu_permissions() {
for nvidia_dev in /dev/nvidia*; do
[[ -e "$nvidia_dev" ]] && {
chgrp video "$nvidia_dev" 2>/dev/null || true
chmod 666 "$nvidia_dev" 2>/dev/null || true
chmod 660 "$nvidia_dev" 2>/dev/null || true
}
done
if [[ -d /dev/nvidia-caps ]]; then
@@ -5200,7 +5200,7 @@ _setup_gpu_permissions() {
for caps_dev in /dev/nvidia-caps/*; do
[[ -e "$caps_dev" ]] && {
chgrp video "$caps_dev" 2>/dev/null || true
chmod 666 "$caps_dev" 2>/dev/null || true
chmod 660 "$caps_dev" 2>/dev/null || true
}
done
fi
@@ -5217,7 +5217,8 @@ _setup_gpu_permissions() {
# /dev/kfd permissions (AMD ROCm)
if [[ -e /dev/kfd ]]; then
chmod 666 /dev/kfd 2>/dev/null || true
chgrp render /dev/kfd 2>/dev/null || true
chmod 660 /dev/kfd 2>/dev/null || true
msg_info "AMD ROCm compute device configured"
fi