mirror of
https://github.com/mudler/LocalAI.git
synced 2024-06-07 19:40:48 +00:00
bc272d1e4b
Signed-off-by: mudler <mudler@localai.io>
98 lines
3.3 KiB
Bash
98 lines
3.3 KiB
Bash
#!/bin/bash
|
|
set -euxo pipefail
|
|
# This scripts needs yq and huggingface_hub to be installed
|
|
# to install hugingface_hub run pip install huggingface_hub
|
|
|
|
# Path to the input YAML file
|
|
input_yaml=$1
|
|
|
|
# Function to download file and check checksum using Python
|
|
function check_and_update_checksum() {
|
|
model_name="$1"
|
|
file_name="$2"
|
|
uri="$3"
|
|
old_checksum="$4"
|
|
idx="$5"
|
|
|
|
# Download the file and calculate new checksum using Python
|
|
new_checksum=$(python3 -c "
|
|
import hashlib
|
|
from huggingface_hub import hf_hub_download
|
|
import requests
|
|
import sys
|
|
import os
|
|
|
|
uri = '$uri'
|
|
file_name = '$file_name'
|
|
|
|
# Function to parse the URI and determine download method
|
|
# Function to parse the URI and determine download method
|
|
def parse_uri(uri):
|
|
if uri.startswith('huggingface://'):
|
|
# Remove the protocol and extract repo id and filename
|
|
repo_id = uri.split('://')[1]
|
|
return 'huggingface', repo_id.rsplit('/', 1)[0]
|
|
elif 'huggingface.co' in uri:
|
|
# For full URLs to Hugging Face, extract repo and filename before '/resolve/'
|
|
parts = uri.split('/resolve/')
|
|
if len(parts) > 1:
|
|
repo_path = parts[0].split('https://huggingface.co/')[-1]
|
|
repo_id, file_part = repo_path.rsplit('/', 1)
|
|
return 'huggingface', (repo_id, file_part)
|
|
return 'direct', uri
|
|
|
|
|
|
def calculate_sha256(file_path):
|
|
sha256_hash = hashlib.sha256()
|
|
with open(file_path, 'rb') as f:
|
|
for byte_block in iter(lambda: f.read(4096), b''):
|
|
sha256_hash.update(byte_block)
|
|
return sha256_hash.hexdigest()
|
|
|
|
download_type, repo_id_or_url = parse_uri(uri)
|
|
|
|
# Decide download method based on URI type
|
|
if download_type == 'huggingface':
|
|
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name, use_auth_token=False)
|
|
else:
|
|
# Direct download for non-Hugging Face URLs
|
|
response = requests.get(repo_id_or_url)
|
|
if response.status_code == 200:
|
|
with open(file_name, 'wb') as f:
|
|
f.write(response.content)
|
|
file_path = file_name
|
|
else:
|
|
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
print(calculate_sha256(file_path))
|
|
# Clean up the downloaded file
|
|
os.remove(file_path)
|
|
")
|
|
|
|
# Compare and update the YAML file if checksums do not match
|
|
if [[ "$old_checksum" != "$new_checksum" ]]; then
|
|
echo "Checksum mismatch for $file_name. Updating..."
|
|
yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\").sha256)" "$input_yaml"
|
|
yq eval -i "(.[$idx].files[] | select(.filename == \"$file_name\")).sha256 = \"$new_checksum\"" "$input_yaml"
|
|
else
|
|
echo "Checksum match for $file_name. No update needed."
|
|
fi
|
|
}
|
|
|
|
# Read the YAML and process each file
|
|
len=$(yq eval '. | length' "$input_yaml")
|
|
for ((i=0; i<$len; i++))
|
|
do
|
|
name=$(yq eval ".[$i].name" "$input_yaml")
|
|
files_len=$(yq eval ".[$i].files | length" "$input_yaml")
|
|
for ((j=0; j<$files_len; j++))
|
|
do
|
|
filename=$(yq eval ".[$i].files[$j].filename" "$input_yaml")
|
|
uri=$(yq eval ".[$i].files[$j].uri" "$input_yaml")
|
|
checksum=$(yq eval ".[$i].files[$j].sha256" "$input_yaml")
|
|
echo "Checking model $name, file $filename. URI = $uri, Checksum = $checksum"
|
|
check_and_update_checksum "$name" "$filename" "$uri" "$checksum" "$i"
|
|
done
|
|
done
|