ci: add checksum checker pipeline (#2274)

Signed-off-by: mudler <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2024-05-09 00:31:27 +02:00 committed by GitHub
parent d651f390cd
commit bc272d1e4b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 126 additions and 0 deletions

97
.github/checksum_checker.sh vendored Normal file
View File

@ -0,0 +1,97 @@
#!/bin/bash
set -euxo pipefail
# This scripts needs yq and huggingface_hub to be installed
# to install hugingface_hub run pip install huggingface_hub
# Path to the input YAML file
input_yaml=$1
# Function to download file and check checksum using Python
function check_and_update_checksum() {
model_name="$1"
file_name="$2"
uri="$3"
old_checksum="$4"
idx="$5"
# Download the file and calculate new checksum using Python
new_checksum=$(python3 -c "
import hashlib
from huggingface_hub import hf_hub_download
import requests
import sys
import os
uri = '$uri'
file_name = '$file_name'
# Function to parse the URI and determine download method
# Function to parse the URI and determine download method
def parse_uri(uri):
if uri.startswith('huggingface://'):
# Remove the protocol and extract repo id and filename
repo_id = uri.split('://')[1]
return 'huggingface', repo_id.rsplit('/', 1)[0]
elif 'huggingface.co' in uri:
# For full URLs to Hugging Face, extract repo and filename before '/resolve/'
parts = uri.split('/resolve/')
if len(parts) > 1:
repo_path = parts[0].split('https://huggingface.co/')[-1]
repo_id, file_part = repo_path.rsplit('/', 1)
return 'huggingface', (repo_id, file_part)
return 'direct', uri
def calculate_sha256(file_path):
sha256_hash = hashlib.sha256()
with open(file_path, 'rb') as f:
for byte_block in iter(lambda: f.read(4096), b''):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
download_type, repo_id_or_url = parse_uri(uri)
# Decide download method based on URI type
if download_type == 'huggingface':
file_path = hf_hub_download(repo_id=repo_id_or_url, filename=file_name, use_auth_token=False)
else:
# Direct download for non-Hugging Face URLs
response = requests.get(repo_id_or_url)
if response.status_code == 200:
with open(file_name, 'wb') as f:
f.write(response.content)
file_path = file_name
else:
print(f'Error downloading file: {response.status_code}', file=sys.stderr)
sys.exit(1)
print(calculate_sha256(file_path))
# Clean up the downloaded file
os.remove(file_path)
")
# Compare and update the YAML file if checksums do not match
if [[ "$old_checksum" != "$new_checksum" ]]; then
echo "Checksum mismatch for $file_name. Updating..."
yq eval -i "del(.[$idx].files[] | select(.filename == \"$file_name\").sha256)" "$input_yaml"
yq eval -i "(.[$idx].files[] | select(.filename == \"$file_name\")).sha256 = \"$new_checksum\"" "$input_yaml"
else
echo "Checksum match for $file_name. No update needed."
fi
}
# Read the YAML and process each file
len=$(yq eval '. | length' "$input_yaml")
for ((i=0; i<$len; i++))
do
name=$(yq eval ".[$i].name" "$input_yaml")
files_len=$(yq eval ".[$i].files | length" "$input_yaml")
for ((j=0; j<$files_len; j++))
do
filename=$(yq eval ".[$i].files[$j].filename" "$input_yaml")
uri=$(yq eval ".[$i].files[$j].uri" "$input_yaml")
checksum=$(yq eval ".[$i].files[$j].sha256" "$input_yaml")
echo "Checking model $name, file $filename. URI = $uri, Checksum = $checksum"
check_and_update_checksum "$name" "$filename" "$uri" "$checksum" "$i"
done
done

29
.github/workflows/checksum_checker.yaml vendored Normal file
View File

@ -0,0 +1,29 @@
name: Check if checksums are up-to-date
on:
schedule:
- cron: 0 20 * * *
workflow_dispatch:
jobs:
checksum_check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install dependencies
run: |
pip install huggingface_hub
# yq
VERSION=v4.43.1 BINARY=yq_linux_amd64 wget https://github.com/mikefarah/yq/releases/download/${VERSION}/${BINARY}.tar.gz -O - |\
tar xz && sudo mv ${BINARY} /usr/bin/yq
- name: Checksum checker 🔧
run: |
bash .github/checksum_checker.sh gallery/index.yaml
- name: Create Pull Request
uses: peter-evans/create-pull-request@v6
with:
token: ${{ secrets.UPDATE_BOT_TOKEN }}
push-to-fork: ci-forks/LocalAI
commit-message: ':arrow_up: Checksum updates in gallery/index.yaml'
title: 'models(gallery): :arrow_up: update checksum'
branch: "update/checksum"
body: Updating checksums in gallery/index.yaml
signoff: true