mirror of
https://github.com/community-scripts/ProxmoxVE.git
synced 2026-05-13 12:15:03 +00:00
tools.func: add setup_nltk as new function (#14314)
This commit is contained in:
committed by
GitHub
parent
24fbf24c6d
commit
02eaf288bf
+74
-4
@@ -2095,10 +2095,10 @@ get_latest_gh_tag() {
|
||||
local count
|
||||
count=$(jq 'length' "$temp_file" 2>/dev/null || echo 0)
|
||||
if [[ "$count" -gt 0 ]]; then
|
||||
tag=$(jq -r '.[].ref' "$temp_file" \
|
||||
| sed 's|^refs/tags/||' \
|
||||
| sort -V \
|
||||
| tail -n1)
|
||||
tag=$(jq -r '.[].ref' "$temp_file" |
|
||||
sed 's|^refs/tags/||' |
|
||||
sort -V |
|
||||
tail -n1)
|
||||
fi
|
||||
else
|
||||
# No prefix: just take the first (newest) tag from /tags
|
||||
@@ -9439,3 +9439,73 @@ function fetch_and_deploy_gl_release() {
|
||||
msg_ok "Deployed: $app ($version)"
|
||||
rm -rf "$tmpdir"
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Download NLTK data packages directly from GitHub, bypassing Python.
|
||||
# Avoids CPU-instruction failures (SIGILL) on older hardware lacking AVX.
|
||||
#
|
||||
# Usage:
|
||||
# setup_nltk "averaged_perceptron_tagger_eng" "/nltk_data"
|
||||
# setup_nltk "snowball_data stopwords punkt_tab" "/usr/share/nltk_data"
|
||||
#
|
||||
# Parameters:
|
||||
# $1 - Space-separated list of NLTK package IDs
|
||||
# $2 - Target directory (default: /usr/share/nltk_data)
|
||||
#
|
||||
# Returns: 0 on success, non-zero if any package failed
|
||||
# ------------------------------------------------------------------------------
|
||||
function setup_nltk() {
|
||||
local packages="${1:?setup_nltk requires at least one package name}"
|
||||
local target_dir="${2:-/usr/share/nltk_data}"
|
||||
local NLTK_INDEX_URL="https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml"
|
||||
local index_xml rc=0
|
||||
|
||||
ensure_dependencies unzip
|
||||
|
||||
index_xml=$(curl_with_retry "$NLTK_INDEX_URL" "-") || {
|
||||
msg_error "Failed to fetch NLTK package index"
|
||||
return 1
|
||||
}
|
||||
|
||||
local pkg
|
||||
for pkg in $packages; do
|
||||
msg_info "Downloading NLTK: $pkg"
|
||||
local pkg_line subdir pkg_url do_unzip tmp_zip
|
||||
|
||||
pkg_line=$(echo "$index_xml" | grep "id=\"${pkg}\"" | head -1)
|
||||
if [[ -z "$pkg_line" ]]; then
|
||||
msg_error "NLTK package not found in index: $pkg"
|
||||
rc=1
|
||||
continue
|
||||
fi
|
||||
|
||||
subdir=$(echo "$pkg_line" | grep -oP 'subdir="\K[^"]+')
|
||||
pkg_url=$(echo "$pkg_line" | grep -oP 'url="\K[^"]+')
|
||||
do_unzip=$(echo "$pkg_line" | grep -oP 'unzip="\K[^"]+')
|
||||
|
||||
if [[ -z "$subdir" || -z "$pkg_url" ]]; then
|
||||
msg_error "Could not parse NLTK index entry for: $pkg"
|
||||
rc=1
|
||||
continue
|
||||
fi
|
||||
|
||||
mkdir -p "${target_dir}/${subdir}"
|
||||
tmp_zip=$(mktemp --suffix=.zip)
|
||||
|
||||
if CURL_TIMEOUT=120 curl_with_retry "$pkg_url" "$tmp_zip"; then
|
||||
if [[ "$do_unzip" == "1" ]]; then
|
||||
$STD unzip -q -o "$tmp_zip" -d "${target_dir}/${subdir}/"
|
||||
rm -f "$tmp_zip"
|
||||
else
|
||||
mv "$tmp_zip" "${target_dir}/${subdir}/${pkg}.zip"
|
||||
fi
|
||||
msg_ok "Downloaded NLTK: $pkg"
|
||||
else
|
||||
msg_error "Failed to download NLTK package: $pkg"
|
||||
rm -f "$tmp_zip"
|
||||
rc=1
|
||||
fi
|
||||
done
|
||||
|
||||
return $rc
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user