mirror of
https://github.com/community-scripts/ProxmoxVE.git
synced 2026-04-23 10:50:41 +00:00
Classify exit-1 errors & guard telemetry
Analyze logs for generic exit code 1 and export an ERROR_CATEGORY_OVERRIDE so telemetry receives a more accurate error category (apt, oom, network, storage, dependency). Preserve any existing TELEMETRY_TYPE when posting updates. Add defense-in-depth by disabling strict error traps before running grep/sed log analysis to avoid spurious error_handler invocations. Mark successful installs with INSTALL_COMPLETE and update the error handler to only report a successful "done" telemetry state when INSTALL_COMPLETE is explicitly set, preventing false-positive success reports from early zero-exit exits.
This commit is contained in:
+58
-36
@@ -222,9 +222,12 @@ update_motd_ip() {
|
||||
local current_ip="$(hostname -I | awk '{print $1}')"
|
||||
|
||||
# Escape sed special chars in replacement strings (& \ |)
|
||||
current_os="${current_os//\\/\\\\}"; current_os="${current_os//&/\\&}"
|
||||
current_hostname="${current_hostname//\\/\\\\}"; current_hostname="${current_hostname//&/\\&}"
|
||||
current_ip="${current_ip//\\/\\\\}"; current_ip="${current_ip//&/\\&}"
|
||||
current_os="${current_os//\\/\\\\}"
|
||||
current_os="${current_os//&/\\&}"
|
||||
current_hostname="${current_hostname//\\/\\\\}"
|
||||
current_hostname="${current_hostname//&/\\&}"
|
||||
current_ip="${current_ip//\\/\\\\}"
|
||||
current_ip="${current_ip//&/\\&}"
|
||||
|
||||
# Update only if values actually changed
|
||||
if ! grep -q "OS:.*$current_os" "$PROFILE_FILE" 2>/dev/null; then
|
||||
@@ -4223,6 +4226,53 @@ EOF'
|
||||
fi
|
||||
fi
|
||||
|
||||
# Defense-in-depth: Ensure error handling stays disabled during recovery.
|
||||
# Some functions (e.g. silent/$STD) unconditionally re-enable set -Eeuo pipefail
|
||||
# and trap 'error_handler' ERR. If any code path above called such a function,
|
||||
# the grep/sed pipelines below would trigger error_handler on non-match (exit 1).
|
||||
set +Eeuo pipefail
|
||||
trap - ERR
|
||||
|
||||
# --- Exit code 1 subclassification: analyze logs BEFORE telemetry call ---
|
||||
# Exit code 1 is generic ("General error"). Analyze logs to determine the
|
||||
# real error category so telemetry gets a useful classification instead of "shell".
|
||||
local is_oom=false
|
||||
local is_network_issue=false
|
||||
local is_apt_issue=false
|
||||
local is_cmd_not_found=false
|
||||
local is_disk_full=false
|
||||
|
||||
if [[ $install_exit_code -eq 1 && -f "$combined_log" ]]; then
|
||||
if grep -qiE 'E: Unable to|E: Package|E: Failed to fetch|dpkg.*error|broken packages|unmet dependencies|dpkg --configure -a' "$combined_log"; then
|
||||
is_apt_issue=true
|
||||
fi
|
||||
if grep -qiE 'Cannot allocate memory|Out of memory|oom-killer|Killed process|JavaScript heap' "$combined_log"; then
|
||||
is_oom=true
|
||||
fi
|
||||
if grep -qiE 'Could not resolve|DNS|Connection refused|Network is unreachable|No route to host|Temporary failure resolving|Failed to fetch' "$combined_log"; then
|
||||
is_network_issue=true
|
||||
fi
|
||||
if grep -qiE ': command not found|No such file or directory.*/s?bin/' "$combined_log"; then
|
||||
is_cmd_not_found=true
|
||||
fi
|
||||
if grep -qiE 'ENOSPC|no space left on device|Disk quota exceeded|errno -28' "$combined_log"; then
|
||||
is_disk_full=true
|
||||
fi
|
||||
fi
|
||||
|
||||
# Set override for categorize_error() so telemetry gets the real category
|
||||
if [[ "$is_apt_issue" == true ]]; then
|
||||
export ERROR_CATEGORY_OVERRIDE="dependency"
|
||||
elif [[ "$is_oom" == true ]]; then
|
||||
export ERROR_CATEGORY_OVERRIDE="resource"
|
||||
elif [[ "$is_network_issue" == true ]]; then
|
||||
export ERROR_CATEGORY_OVERRIDE="network"
|
||||
elif [[ "$is_disk_full" == true ]]; then
|
||||
export ERROR_CATEGORY_OVERRIDE="storage"
|
||||
elif [[ "$is_cmd_not_found" == true ]]; then
|
||||
export ERROR_CATEGORY_OVERRIDE="dependency"
|
||||
fi
|
||||
|
||||
# Report failure to telemetry API (now with log available on host)
|
||||
# NOTE: Do NOT use msg_info/spinner here — the background spinner process
|
||||
# causes SIGTSTP in non-interactive shells (bash -c "$(curl ...)"), which
|
||||
@@ -4231,13 +4281,6 @@ EOF'
|
||||
post_update_to_api "failed" "$install_exit_code"
|
||||
$STD echo -e "${TAB}${CM:-✔} Failure reported"
|
||||
|
||||
# Defense-in-depth: Ensure error handling stays disabled during recovery.
|
||||
# Some functions (e.g. silent/$STD) unconditionally re-enable set -Eeuo pipefail
|
||||
# and trap 'error_handler' ERR. If any code path above called such a function,
|
||||
# the grep/sed pipelines below would trigger error_handler on non-match (exit 1).
|
||||
set +Eeuo pipefail
|
||||
trap - ERR
|
||||
|
||||
# Show combined log location
|
||||
if [[ -n "$CTID" && -n "${SESSION_ID:-}" ]]; then
|
||||
msg_custom "📋" "${YW}" "Installation log: ${combined_log}"
|
||||
@@ -4266,12 +4309,9 @@ EOF'
|
||||
# Prompt user for cleanup with 60s timeout
|
||||
echo ""
|
||||
|
||||
# Detect error type for smart recovery options
|
||||
local is_oom=false
|
||||
local is_network_issue=false
|
||||
local is_apt_issue=false
|
||||
local is_cmd_not_found=false
|
||||
local is_disk_full=false
|
||||
# Extend error detection for non-exit-1 codes (exit 1 was already analyzed above)
|
||||
# The is_* flags were set above for exit code 1 log analysis; here we add
|
||||
# exit-code-specific detections for other codes.
|
||||
local error_explanation=""
|
||||
if declare -f explain_exit_code >/dev/null 2>&1; then
|
||||
error_explanation="$(explain_exit_code "$install_exit_code")"
|
||||
@@ -4321,26 +4361,6 @@ EOF'
|
||||
;;
|
||||
esac
|
||||
|
||||
# Exit 1 subclassification: analyze logs to identify actual root cause
|
||||
# Many exit 1 errors are actually APT, OOM, network, or command-not-found issues
|
||||
if [[ $install_exit_code -eq 1 && -f "$combined_log" ]]; then
|
||||
if grep -qiE 'E: Unable to|E: Package|E: Failed to fetch|dpkg.*error|broken packages|unmet dependencies|dpkg --configure -a' "$combined_log"; then
|
||||
is_apt_issue=true
|
||||
fi
|
||||
if grep -qiE 'Cannot allocate memory|Out of memory|oom-killer|Killed process|JavaScript heap' "$combined_log"; then
|
||||
is_oom=true
|
||||
fi
|
||||
if grep -qiE 'Could not resolve|DNS|Connection refused|Network is unreachable|No route to host|Temporary failure resolving|Failed to fetch' "$combined_log"; then
|
||||
is_network_issue=true
|
||||
fi
|
||||
if grep -qiE ': command not found|No such file or directory.*/s?bin/' "$combined_log"; then
|
||||
is_cmd_not_found=true
|
||||
fi
|
||||
if grep -qiE 'ENOSPC|no space left on device|Disk quota exceeded|errno -28' "$combined_log"; then
|
||||
is_disk_full=true
|
||||
fi
|
||||
fi
|
||||
|
||||
# Show error explanation if available
|
||||
if [[ -n "$error_explanation" ]]; then
|
||||
echo -e "${TAB}${RD}Error: ${error_explanation}${CL}"
|
||||
@@ -4542,6 +4562,7 @@ EOF'
|
||||
|
||||
if [[ $apt_retry_code -eq 0 ]]; then
|
||||
msg_ok "Installation completed successfully after APT repair!"
|
||||
INSTALL_COMPLETE=true
|
||||
post_update_to_api "done" "0" "force"
|
||||
return 0
|
||||
else
|
||||
@@ -5716,6 +5737,7 @@ EOF
|
||||
systemctl start ping-instances.service
|
||||
fi
|
||||
|
||||
INSTALL_COMPLETE=true
|
||||
post_update_to_api "done" "none"
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user