From abc9360dc62863af1c484f914cf2b0948169fb02 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 21 Mar 2024 22:09:04 +0100 Subject: [PATCH] feat(aio): entrypoint, update workflows (#1872) --- .github/workflows/image.yml | 5 ++ .github/workflows/image_build.yml | 68 ++++++++++++++++++++- Dockerfile.aio | 7 +-- Makefile | 5 +- aio/cpu/README.md | 5 ++ aio/cpu/embeddings.yaml | 13 ++-- aio/entrypoint.sh | 98 +++++++++++++++++++++++++++++++ aio/gpu-8g/embeddings.yaml | 4 +- aio/gpu-8g/image-gen.yaml | 2 +- 9 files changed, 191 insertions(+), 16 deletions(-) create mode 100644 aio/cpu/README.md create mode 100755 aio/entrypoint.sh diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index 5ba0f1bf..8e2bbbdd 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -26,6 +26,7 @@ jobs: platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} base-image: ${{ matrix.base-image }} + aio: ${{ matrix.aio }} makeflags: "-j3" secrets: dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }} @@ -86,6 +87,7 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" + aio: "-aio-gpu-nvidia-cuda-11" - build-type: 'cublas' cuda-major-version: "12" cuda-minor-version: "1" @@ -96,6 +98,7 @@ jobs: image-type: 'extras' runs-on: 'arc-runner-set' base-image: "ubuntu:22.04" + aio: "-aio-gpu-nvidia-cuda-12" - build-type: '' #platforms: 'linux/amd64,linux/arm64' platforms: 'linux/amd64' @@ -199,6 +202,7 @@ jobs: cuda-minor-version: ${{ matrix.cuda-minor-version }} platforms: ${{ matrix.platforms }} runs-on: ${{ matrix.runs-on }} + aio: ${{ matrix.aio }} base-image: ${{ matrix.base-image }} makeflags: "-j3" secrets: @@ -217,6 +221,7 @@ jobs: image-type: 'core' base-image: "ubuntu:22.04" runs-on: 'ubuntu-latest' + aio: "-aio-cpu" - build-type: 'cublas' cuda-major-version: "11" cuda-minor-version: "7" diff --git a/.github/workflows/image_build.yml b/.github/workflows/image_build.yml index a978f1bf..22f72131 100644 --- a/.github/workflows/image_build.yml +++ b/.github/workflows/image_build.yml @@ -51,6 +51,11 @@ on: required: false default: '' type: string + aio: + description: 'AIO Image Name' + required: false + default: '' + type: string secrets: dockerUsername: required: true @@ -129,7 +134,30 @@ jobs: flavor: | latest=${{ inputs.tag-latest }} suffix=${{ inputs.tag-suffix }} - + - name: Docker meta AIO (quay.io) + if: inputs.aio != '' + id: meta_aio + uses: docker/metadata-action@v5 + with: + images: | + quay.io/go-skynet/local-ai + tags: | + type=ref,event=branch + type=semver,pattern={{raw}} + flavor: | + suffix=${{ inputs.aio }} + - name: Docker meta AIO (dockerhub) + if: inputs.aio != '' + id: meta_aio_dockerhub + uses: docker/metadata-action@v5 + with: + images: | + localai/localai + tags: | + type=ref,event=branch + type=semver,pattern={{raw}} + flavor: | + suffix=${{ inputs.aio }} - name: Set up QEMU uses: docker/setup-qemu-action@master with: @@ -172,6 +200,44 @@ jobs: push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} + - + name: Inspect image + if: github.event_name != 'pull_request' + run: | + docker pull localai/localai:${{ steps.meta.outputs.version }} + docker image inspect localai/localai:${{ steps.meta.outputs.version }} + docker pull quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} + docker image inspect quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} + - name: Build and push AIO image + if: inputs.aio != '' + uses: docker/build-push-action@v5 + with: + builder: ${{ steps.buildx.outputs.name }} + build-args: | + BASE_IMAGE=quay.io/go-skynet/local-ai:${{ steps.meta.outputs.version }} + context: . + file: ./Dockerfile.aio + platforms: ${{ inputs.platforms }} + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta_aio.outputs.tags }} + labels: ${{ steps.meta_aio.outputs.labels }} + - name: Build and push AIO image (dockerhub) + if: inputs.aio != '' + uses: docker/build-push-action@v5 + with: + builder: ${{ steps.buildx.outputs.name }} + build-args: | + BASE_IMAGE=localai/localai:${{ steps.meta_aio_dockerhub.outputs.version }} + context: . + file: ./Dockerfile.aio + platforms: ${{ inputs.platforms }} + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta_aio_dockerhub.outputs.tags }} + labels: ${{ steps.meta_aio_dockerhub.outputs.labels }} - name: job summary run: | echo "Built image: ${{ steps.meta.outputs.labels }}" >> $GITHUB_STEP_SUMMARY + - name: job summary(AIO) + if: inputs.aio != '' + run: | + echo "Built image: ${{ steps.meta_aio.outputs.labels }}" >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/Dockerfile.aio b/Dockerfile.aio index 4097e6d5..81063bb4 100644 --- a/Dockerfile.aio +++ b/Dockerfile.aio @@ -1,9 +1,8 @@ ARG BASE_IMAGE=ubuntu:22.04 FROM ${BASE_IMAGE} -ARG SIZE=cpu -ENV MODELS="/aio-models/embeddings.yaml,/aio-models/text-to-speech.yaml,/aio-models/image-gen.yaml,/aio-models/text-to-text.yaml,/aio-models/speech-to-text.yaml,/aio-models/vision.yaml" -COPY aio/${SIZE} /aio-models +RUN apt-get update && apt-get install -y pciutils && apt-get clean -ENTRYPOINT [ "/build/entrypoint.sh" ] \ No newline at end of file +COPY aio/ /aio +ENTRYPOINT [ "/aio/entrypoint.sh" ] \ No newline at end of file diff --git a/Makefile b/Makefile index c03091d0..96347307 100644 --- a/Makefile +++ b/Makefile @@ -536,7 +536,6 @@ grpcs: prepare $(GRPC_BACKENDS) DOCKER_IMAGE?=local-ai DOCKER_AIO_IMAGE?=local-ai-aio -DOCKER_AIO_SIZE?=cpu IMAGE_TYPE?=core BASE_IMAGE?=ubuntu:22.04 @@ -549,11 +548,9 @@ docker: -t $(DOCKER_IMAGE) . docker-aio: - @echo "Building AIO image with size $(DOCKER_AIO_SIZE)" - @echo "Building AIO image with base image $(BASE_IMAGE)" + @echo "Building AIO image with base $(BASE_IMAGE) as $(DOCKER_AIO_IMAGE)" docker build \ --build-arg BASE_IMAGE=$(BASE_IMAGE) \ - --build-arg SIZE=$(DOCKER_AIO_SIZE) \ -t $(DOCKER_AIO_IMAGE) -f Dockerfile.aio . docker-aio-all: diff --git a/aio/cpu/README.md b/aio/cpu/README.md new file mode 100644 index 00000000..8b0b1086 --- /dev/null +++ b/aio/cpu/README.md @@ -0,0 +1,5 @@ +## AIO CPU size + +Use this image with CPU-only. + +Please keep using only C++ backends so the base image is as small as possible (without CUDA, cuDNN, python, etc). \ No newline at end of file diff --git a/aio/cpu/embeddings.yaml b/aio/cpu/embeddings.yaml index 512d63a4..bdee079c 100644 --- a/aio/cpu/embeddings.yaml +++ b/aio/cpu/embeddings.yaml @@ -1,13 +1,18 @@ -name: all-minilm-l6-v2 -backend: sentencetransformers +backend: bert-embeddings embeddings: true +f16: true + +gpu_layers: 90 +mmap: true +name: text-embedding-ada-002 + parameters: - model: all-MiniLM-L6-v2 + model: huggingface://mudler/all-MiniLM-L6-v2/ggml-model-q4_0.bin usage: | You can test this model with curl like this: curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{ "input": "Your text string goes here", - "model": "all-minilm-l6-v2" + "model": "text-embedding-ada-002" }' \ No newline at end of file diff --git a/aio/entrypoint.sh b/aio/entrypoint.sh new file mode 100755 index 00000000..8c15a5e4 --- /dev/null +++ b/aio/entrypoint.sh @@ -0,0 +1,98 @@ +#!/bin/bash + +echo "===> LocalAI All-in-One (AIO) container starting..." + +GPU_ACCELERATION=false +GPU_VENDOR="" + +function detect_gpu() { + case "$(uname -s)" in + Linux) + if lspci | grep -E 'VGA|3D' | grep -iq nvidia; then + echo "NVIDIA GPU detected" + # nvidia-smi should be installed in the container + if nvidia-smi; then + GPU_ACCELERATION=true + GPU_VENDOR=nvidia + else + echo "NVIDIA GPU detected, but nvidia-smi is not installed. GPU acceleration will not be available." + fi + elif lspci | grep -E 'VGA|3D' | grep -iq amd; then + echo "AMD GPU detected" + # Check if ROCm is installed + if [ -d /opt/rocm ]; then + GPU_ACCELERATION=true + GPU_VENDOR=amd + else + echo "AMD GPU detected, but ROCm is not installed. GPU acceleration will not be available." + fi + elif lspci | grep -E 'VGA|3D' | grep -iq intel; then + echo "Intel GPU detected" + if [ -d /opt/intel ]; then + GPU_ACCELERATION=true + else + echo "Intel GPU detected, but Intel GPU drivers are not installed. GPU acceleration will not be available." + fi + fi + ;; + Darwin) + if system_profiler SPDisplaysDataType | grep -iq 'Metal'; then + echo "Apple Metal supported GPU detected" + GPU_ACCELERATION=true + GPU_VENDOR=apple + fi + ;; + esac +} + +function detect_gpu_size() { + if [ "$GPU_ACCELERATION" = true ]; then + GPU_SIZE=gpu-8g + fi + + # Attempting to find GPU memory size for NVIDIA GPUs + if echo "$gpu_model" | grep -iq nvidia; then + echo "NVIDIA GPU detected. Attempting to find memory size..." + nvidia_sm=($(nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits)) + if [ ! -z "$nvidia_sm" ]; then + echo "Total GPU Memory: ${nvidia_sm[0]} MiB" + else + echo "Unable to determine NVIDIA GPU memory size." + fi + # if bigger than 8GB, use 16GB + #if [ "$nvidia_sm" -gt 8192 ]; then + # GPU_SIZE=gpu-16g + #fi + else + echo "Non-NVIDIA GPU detected. GPU memory size detection for non-NVIDIA GPUs is not supported in this script." + fi + + # default to cpu if GPU_SIZE is not set + if [ -z "$GPU_SIZE" ]; then + GPU_SIZE=cpu + fi +} + +function check_vars() { + if [ -z "$MODELS" ]; then + echo "MODELS environment variable is not set. Please set it to a comma-separated list of model YAML files to load." + exit 1 + fi + + if [ -z "$SIZE" ]; then + echo "SIZE environment variable is not set. Please set it to one of the following: cpu, gpu-8g, gpu-16g, apple" + exit 1 + fi +} + +detect_gpu +detect_gpu_size + +SIZE=${SIZE:-$GPU_SIZE} # default to cpu +MODELS=${MODELS:-/aio/${SIZE}/embeddings.yaml,/aio/${SIZE}/text-to-speech.yaml,/aio/${SIZE}/image-gen.yaml,/aio/${SIZE}/text-to-text.yaml,/aio/${SIZE}/speech-to-text.yaml,/aio/${SIZE}/vision.yaml} + +check_vars + +echo "Starting LocalAI with the following models: $MODELS" + +/build/entrypoint.sh "$@" \ No newline at end of file diff --git a/aio/gpu-8g/embeddings.yaml b/aio/gpu-8g/embeddings.yaml index 512d63a4..98b519d5 100644 --- a/aio/gpu-8g/embeddings.yaml +++ b/aio/gpu-8g/embeddings.yaml @@ -1,4 +1,4 @@ -name: all-minilm-l6-v2 +name: text-embedding-ada-002 backend: sentencetransformers embeddings: true parameters: @@ -9,5 +9,5 @@ usage: | curl http://localhost:8080/embeddings -X POST -H "Content-Type: application/json" -d '{ "input": "Your text string goes here", - "model": "all-minilm-l6-v2" + "model": "text-embedding-ada-002" }' \ No newline at end of file diff --git a/aio/gpu-8g/image-gen.yaml b/aio/gpu-8g/image-gen.yaml index 3857cd6b..74cefc1d 100644 --- a/aio/gpu-8g/image-gen.yaml +++ b/aio/gpu-8g/image-gen.yaml @@ -4,7 +4,7 @@ parameters: backend: diffusers step: 25 f16: true -cuda: true + diffusers: pipeline_type: StableDiffusionPipeline cuda: true