Revert "core: add retry logic for template lock in LXC container creation (#1…" (#11011)

This reverts commit d71f24bddb.
This commit is contained in:
CanbiZ (MickLesk)
2026-01-20 23:35:39 +01:00
committed by GitHub
parent 657a9629be
commit 7699f4f6ad
+20 -58
View File
@@ -4743,88 +4743,50 @@ create_lxc_container() {
-rootfs $CONTAINER_STORAGE:${PCT_DISK_SIZE:-8}" -rootfs $CONTAINER_STORAGE:${PCT_DISK_SIZE:-8}"
fi fi
# Lock by template file (avoid concurrent template downloads/validation) # Lock by template file (avoid concurrent downloads/creates)
lockfile="/tmp/template.${TEMPLATE}.lock" lockfile="/tmp/template.${TEMPLATE}.lock"
# Cleanup stale lock files (older than 1 hour - likely from crashed processes)
if [[ -f "$lockfile" ]]; then
local lock_age=$(($(date +%s) - $(stat -c %Y "$lockfile" 2>/dev/null || echo 0)))
if [[ $lock_age -gt 3600 ]]; then
msg_warn "Removing stale template lock file (age: ${lock_age}s)"
rm -f "$lockfile"
fi
fi
exec 9>"$lockfile" || { exec 9>"$lockfile" || {
msg_error "Failed to create lock file '$lockfile'." msg_error "Failed to create lock file '$lockfile'."
exit 200 exit 200
} }
flock -w 60 9 || {
# Retry logic for template lock (another container creation may be running) msg_error "Timeout while waiting for template lock."
local lock_attempts=0 exit 211
local max_lock_attempts=10 }
local lock_wait_time=30
while ! flock -w "$lock_wait_time" 9; do
lock_attempts=$((lock_attempts + 1))
if [[ $lock_attempts -ge $max_lock_attempts ]]; then
msg_error "Timeout while waiting for template lock after ${max_lock_attempts} attempts."
msg_custom "💡" "${YW}" "Another container creation may be stuck. Check running processes or remove: $lockfile"
exit 211
fi
msg_custom "⏳" "${YW}" "Another container is being created with this template. Waiting... (attempt ${lock_attempts}/${max_lock_attempts})"
done
LOGFILE="/tmp/pct_create_${CTID}_$(date +%Y%m%d_%H%M%S)_${SESSION_ID}.log" LOGFILE="/tmp/pct_create_${CTID}_$(date +%Y%m%d_%H%M%S)_${SESSION_ID}.log"
# Validate template before pct create (while holding lock)
if [[ ! -s "$TEMPLATE_PATH" || "$(stat -c%s "$TEMPLATE_PATH" 2>/dev/null || echo 0)" -lt 1000000 ]]; then
msg_info "Template file missing or too small downloading"
rm -f "$TEMPLATE_PATH"
pveam download "$TEMPLATE_STORAGE" "$TEMPLATE" >/dev/null 2>&1
msg_ok "Template downloaded"
elif ! tar -tf "$TEMPLATE_PATH" &>/dev/null; then
if [[ -n "$ONLINE_TEMPLATE" ]]; then
msg_info "Template appears corrupted re-downloading"
rm -f "$TEMPLATE_PATH"
pveam download "$TEMPLATE_STORAGE" "$TEMPLATE" >/dev/null 2>&1
msg_ok "Template re-downloaded"
else
msg_warn "Template appears corrupted, but no online version exists. Skipping re-download."
fi
fi
# Release lock after template validation - pct create has its own internal locking
exec 9>&-
msg_debug "pct create command: pct create $CTID ${TEMPLATE_STORAGE}:vztmpl/${TEMPLATE} $PCT_OPTIONS" msg_debug "pct create command: pct create $CTID ${TEMPLATE_STORAGE}:vztmpl/${TEMPLATE} $PCT_OPTIONS"
msg_debug "Logfile: $LOGFILE" msg_debug "Logfile: $LOGFILE"
# First attempt (PCT_OPTIONS is a multi-line string, use it directly) # First attempt (PCT_OPTIONS is a multi-line string, use it directly)
if ! pct create "$CTID" "${TEMPLATE_STORAGE}:vztmpl/${TEMPLATE}" $PCT_OPTIONS >"$LOGFILE" 2>&1; then if ! pct create "$CTID" "${TEMPLATE_STORAGE}:vztmpl/${TEMPLATE}" $PCT_OPTIONS >"$LOGFILE" 2>&1; then
msg_debug "Container creation failed on ${TEMPLATE_STORAGE}. Checking error..." msg_debug "Container creation failed on ${TEMPLATE_STORAGE}. Validating template..."
# Check if template issue - retry with fresh download # Validate template file
if grep -qiE 'unable to open|corrupt|invalid' "$LOGFILE"; then if [[ ! -s "$TEMPLATE_PATH" || "$(stat -c%s "$TEMPLATE_PATH")" -lt 1000000 ]]; then
msg_info "Template may be corrupted re-downloading" msg_warn "Template file too small or missing re-downloading."
rm -f "$TEMPLATE_PATH" rm -f "$TEMPLATE_PATH"
pveam download "$TEMPLATE_STORAGE" "$TEMPLATE" >/dev/null 2>&1 pveam download "$TEMPLATE_STORAGE" "$TEMPLATE"
msg_ok "Template re-downloaded" elif ! tar -tf "$TEMPLATE_PATH" &>/dev/null; then
if [[ -n "$ONLINE_TEMPLATE" ]]; then
msg_warn "Template appears corrupted re-downloading."
rm -f "$TEMPLATE_PATH"
pveam download "$TEMPLATE_STORAGE" "$TEMPLATE"
else
msg_warn "Template appears corrupted, but no online version exists. Skipping re-download."
fi
fi fi
# Retry after repair # Retry after repair
if ! pct create "$CTID" "${TEMPLATE_STORAGE}:vztmpl/${TEMPLATE}" $PCT_OPTIONS >>"$LOGFILE" 2>&1; then if ! pct create "$CTID" "${TEMPLATE_STORAGE}:vztmpl/${TEMPLATE}" $PCT_OPTIONS >>"$LOGFILE" 2>&1; then
# Fallback to local storage if not already on local # Fallback to local storage if not already on local
if [[ "$TEMPLATE_STORAGE" != "local" ]]; then if [[ "$TEMPLATE_STORAGE" != "local" ]]; then
msg_info "Retrying container creation with fallback to local storage" msg_info "Retrying container creation with fallback to local storage..."
LOCAL_TEMPLATE_PATH="/var/lib/vz/template/cache/$TEMPLATE" LOCAL_TEMPLATE_PATH="/var/lib/vz/template/cache/$TEMPLATE"
if [[ ! -f "$LOCAL_TEMPLATE_PATH" ]]; then if [[ ! -f "$LOCAL_TEMPLATE_PATH" ]]; then
msg_ok "Trying local storage fallback" msg_info "Downloading template to local..."
msg_info "Downloading template to local"
pveam download local "$TEMPLATE" >/dev/null 2>&1 pveam download local "$TEMPLATE" >/dev/null 2>&1
msg_ok "Template downloaded to local"
else
msg_ok "Trying local storage fallback"
fi fi
if ! pct create "$CTID" "local:vztmpl/${TEMPLATE}" $PCT_OPTIONS >>"$LOGFILE" 2>&1; then if ! pct create "$CTID" "local:vztmpl/${TEMPLATE}" $PCT_OPTIONS >>"$LOGFILE" 2>&1; then
# Local fallback also failed - check for LXC stack version issue # Local fallback also failed - check for LXC stack version issue