Classify exit-1 errors & guard telemetry

Analyze logs for generic exit code 1 and export an ERROR_CATEGORY_OVERRIDE so telemetry receives a more accurate error category (apt, oom, network, storage, dependency). Preserve any existing TELEMETRY_TYPE when posting updates. Add defense-in-depth by disabling strict error traps before running grep/sed log analysis to avoid spurious error_handler invocations. Mark successful installs with INSTALL_COMPLETE and update the error handler to only report a successful "done" telemetry state when INSTALL_COMPLETE is explicitly set, preventing false-positive success reports from early zero-exit exits.
This commit is contained in:
CanbiZ (MickLesk)
2026-03-24 09:57:43 +01:00
parent 9aa0390553
commit 86c658909a
3 changed files with 104 additions and 54 deletions

View File

@@ -348,10 +348,10 @@ explain_exit_code() {
json_escape() {
# Escape a string for safe JSON embedding using awk (handles any input size).
# Pipeline: strip ANSI → remove control chars → escape \ " TAB → join lines with \n
printf '%s' "$1" \
| sed 's/\x1b\[[0-9;]*[a-zA-Z]//g' \
| tr -d '\000-\010\013\014\016-\037\177\r' \
| awk '
printf '%s' "$1" |
sed 's/\x1b\[[0-9;]*[a-zA-Z]//g' |
tr -d '\000-\010\013\014\016-\037\177\r' |
awk '
BEGIN { ORS = "" }
{
gsub(/\\/, "\\\\") # backslash → \\
@@ -627,8 +627,8 @@ post_to_api() {
[[ "${DEV_MODE:-}" == "true" ]] && echo "[DEBUG] post_to_api() DIAGNOSTICS=$DIAGNOSTICS RANDOM_UUID=$RANDOM_UUID NSAPP=$NSAPP" >&2
# Set type for later status updates
TELEMETRY_TYPE="lxc"
# Set type for later status updates (preserve if already set, e.g. turnkey)
TELEMETRY_TYPE="${TELEMETRY_TYPE:-lxc}"
local pve_version=""
if command -v pveversion &>/dev/null; then
@@ -692,6 +692,7 @@ EOF
# Send initial "installing" record with retry.
# This record MUST exist for all subsequent updates to succeed.
local http_code="" attempt
local _post_success=false
for attempt in 1 2 3; do
if [[ "${DEV_MODE:-}" == "true" ]]; then
http_code=$(curl -sS -w "%{http_code}" -m "${TELEMETRY_TIMEOUT}" -X POST "${TELEMETRY_URL}" \
@@ -703,11 +704,19 @@ EOF
-H "Content-Type: application/json" \
-d "$JSON_PAYLOAD" -o /dev/null 2>/dev/null) || http_code="000"
fi
[[ "$http_code" =~ ^2[0-9]{2}$ ]] && break
if [[ "$http_code" =~ ^2[0-9]{2}$ ]]; then
_post_success=true
break
fi
[[ "$attempt" -lt 3 ]] && sleep 1
done
POST_TO_API_DONE=true
# Only mark done if at least one attempt succeeded.
# If all 3 failed, POST_TO_API_DONE stays false so post_update_to_api
# and on_exit() know the initial record was never created.
# The server has fallback logic to create a new record on status updates,
# so subsequent calls can still succeed even without the initial record.
POST_TO_API_DONE=${_post_success}
}
# ------------------------------------------------------------------------------
@@ -798,15 +807,19 @@ EOF
# Send initial "installing" record with retry (must succeed for updates to work)
local http_code="" attempt
local _post_success=false
for attempt in 1 2 3; do
http_code=$(curl -sS -w "%{http_code}" -m "${TELEMETRY_TIMEOUT}" -X POST "${TELEMETRY_URL}" \
-H "Content-Type: application/json" \
-d "$JSON_PAYLOAD" -o /dev/null 2>/dev/null) || http_code="000"
[[ "$http_code" =~ ^2[0-9]{2}$ ]] && break
if [[ "$http_code" =~ ^2[0-9]{2}$ ]]; then
_post_success=true
break
fi
[[ "$attempt" -lt 3 ]] && sleep 1
done
POST_TO_API_DONE=true
POST_TO_API_DONE=${_post_success}
}
# ------------------------------------------------------------------------------
@@ -1083,6 +1096,12 @@ EOF
# - Used to group errors in dashboard
# ------------------------------------------------------------------------------
categorize_error() {
# Allow build.func to override category based on log analysis (exit code 1 subclassification)
if [[ -n "${ERROR_CATEGORY_OVERRIDE:-}" ]]; then
echo "$ERROR_CATEGORY_OVERRIDE"
return
fi
local code="$1"
case "$code" in
# Network errors (curl/wget)