From 432513c3ba7c7e2491fed64dec4c4f0f545984ba Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Thu, 19 Oct 2023 13:50:40 +0200 Subject: [PATCH] ci: add GPU tests (#1095) * ci: test GPU Signed-off-by: Ettore Di Giacinto * ci: show logs Signed-off-by: Ettore Di Giacinto * Debug * debug Signed-off-by: Ettore Di Giacinto * split extra/core images Signed-off-by: Ettore Di Giacinto * split extra/core images Signed-off-by: Ettore Di Giacinto * consider runner host dir Signed-off-by: Ettore Di Giacinto --------- Signed-off-by: Ettore Di Giacinto --- .github/workflows/test-gpu.yml | 60 +++++++++++++++++++++++++ Dockerfile | 80 +++++++++++++++++++++------------- Makefile | 27 ++++++++++++ tests/e2e-fixtures/gpu.yaml | 17 ++++++++ tests/e2e/e2e_suite_test.go | 18 ++++++++ tests/e2e/e2e_test.go | 70 +++++++++++++++++++++++++++++ 6 files changed, 242 insertions(+), 30 deletions(-) create mode 100644 .github/workflows/test-gpu.yml create mode 100644 tests/e2e-fixtures/gpu.yaml create mode 100644 tests/e2e/e2e_suite_test.go create mode 100644 tests/e2e/e2e_test.go diff --git a/.github/workflows/test-gpu.yml b/.github/workflows/test-gpu.yml new file mode 100644 index 00000000..1dcbc1b9 --- /dev/null +++ b/.github/workflows/test-gpu.yml @@ -0,0 +1,60 @@ +--- +name: 'GPU tests' + +on: + pull_request: + push: + branches: + - master + tags: + - '*' + +concurrency: + group: ci-gpu-tests-${{ github.head_ref || github.ref }}-${{ github.repository }} + cancel-in-progress: true + +jobs: + ubuntu-latest: + runs-on: self-hosted + strategy: + matrix: + go-version: ['1.21.x'] + steps: + - name: Clone + uses: actions/checkout@v3 + with: + submodules: true + - name: Setup Go ${{ matrix.go-version }} + uses: actions/setup-go@v4 + with: + go-version: ${{ matrix.go-version }} + # You can test your matrix by printing the current Go version + - name: Display Go version + run: go version + - name: Dependencies + run: | + sudo apt-get update + sudo DEBIAN_FRONTEND=noninteractive apt-get install -y make wget + - name: Build + run: | + if [ ! -e /run/systemd/system ]; then + sudo mkdir /run/systemd/system + fi + make \ + TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \ + BUILD_TYPE=cublas \ + prepare-e2e run-e2e-image test-e2e + - name: Release space from worker ♻ + if: always() + run: | + sudo rm -rf build || true + sudo rm -rf bin || true + sudo rm -rf dist || true + sudo docker logs $(sudo docker ps -q --filter ancestor=localai-tests) > logs.txt + sudo cat logs.txt || true + sudo rm -rf logs.txt + make clean || true + make \ + TEST_DIR="/host/tests/${{ github.head_ref || github.ref }}" \ + teardown-e2e || true + docker system prune -f -a --volumes || true \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 431307a5..f90950d9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,9 @@ ARG GO_VERSION=1.21-bullseye +ARG IMAGE_TYPE=extras +# extras or core -FROM golang:$GO_VERSION as requirements + +FROM golang:$GO_VERSION as requirements-core ARG BUILD_TYPE ARG CUDA_MAJOR_VERSION=11 @@ -35,24 +38,6 @@ RUN if [ "${BUILD_TYPE}" = "cublas" ]; then \ ; fi ENV PATH /usr/local/cuda/bin:${PATH} -# Extras requirements -COPY extra/requirements.txt /build/extra/requirements.txt -ENV PATH="/root/.cargo/bin:${PATH}" -RUN pip install --upgrade pip -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y -RUN if [ "${TARGETARCH}" = "amd64" ]; then \ - pip install git+https://github.com/suno-ai/bark.git diffusers invisible_watermark transformers accelerate safetensors;\ - fi -RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "amd64" ]; then \ - pip install torch vllm && pip install auto-gptq https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}-cp39-cp39-linux_x86_64.whl;\ - fi -RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt - -# Vall-e-X -RUN git clone https://github.com/Plachtaa/VALL-E-X.git /usr/lib/vall-e-x && cd /usr/lib/vall-e-x && pip install -r requirements.txt - -WORKDIR /build - # OpenBLAS requirements RUN apt-get install -y libopenblas-dev @@ -61,6 +46,8 @@ RUN apt-get install -y libopencv-dev && \ ln -s /usr/include/opencv4/opencv2 /usr/include/opencv2 +WORKDIR /build + # piper requirements # Use pre-compiled Piper phonemization library (includes onnxruntime) #RUN if echo "${GO_TAGS}" | grep -q "tts"; then \ @@ -80,17 +67,40 @@ RUN curl -L "https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSIO tar -C "lib/Linux-$(uname -m)/piper_phonemize" -xzvf - && ls -liah /build/lib/Linux-$(uname -m)/piper_phonemize/ && \ cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/lib/. /usr/lib/ && \ ln -s /usr/lib/libpiper_phonemize.so /usr/lib/libpiper_phonemize.so.1 && \ - cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/ + cp -rfv /build/lib/Linux-$(uname -m)/piper_phonemize/include/. /usr/include/ && \ + rm spdlog-${SPDLOG_VERSION} -rf && \ + rm /build/lib/Linux-$(uname -m)/piper_phonemize -rf + +# Extras requirements +FROM requirements-core as requirements-extras + +COPY extra/requirements.txt /build/extra/requirements.txt +ENV PATH="/root/.cargo/bin:${PATH}" +RUN pip install --upgrade pip +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y +RUN if [ "${TARGETARCH}" = "amd64" ]; then \ + pip install git+https://github.com/suno-ai/bark.git diffusers invisible_watermark transformers accelerate safetensors;\ + fi +RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH}" = "amd64" ]; then \ + pip install torch vllm && pip install auto-gptq https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu${CUDA_MAJOR_VERSION}${CUDA_MINOR_VERSION}-cp39-cp39-linux_x86_64.whl;\ + fi +RUN pip install -r /build/extra/requirements.txt && rm -rf /build/extra/requirements.txt + +# Vall-e-X +RUN git clone https://github.com/Plachtaa/VALL-E-X.git /usr/lib/vall-e-x && cd /usr/lib/vall-e-x && pip install -r requirements.txt + # \ # ; fi ################################### ################################### -FROM requirements as builder +FROM requirements-${IMAGE_TYPE} as builder ARG GO_TAGS="stablediffusion tts" - +ARG GRPC_BACKENDS +ARG BUILD_GRPC=true +ENV GRPC_BACKENDS=${GRPC_BACKENDS} ENV GO_TAGS=${GO_TAGS} ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0" @@ -108,10 +118,12 @@ COPY .git . # stablediffusion does not tolerate a newer version of abseil, build it first RUN GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build -RUN git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ +RUN if [ "${BUILD_GRPC}" = "true" ]; then \ + git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \ cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \ -DgRPC_BUILD_TESTS=OFF \ - ../.. && make -j12 install && rm -rf grpc + ../.. && make -j12 install && rm -rf grpc \ + ; fi # Rebuild with defaults backends RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data make build @@ -119,7 +131,7 @@ RUN ESPEAK_DATA=/build/lib/Linux-$(uname -m)/piper_phonemize/lib/espeak-ng-data ################################### ################################### -FROM requirements +FROM requirements-${IMAGE_TYPE} ARG FFMPEG ARG BUILD_TYPE @@ -129,6 +141,11 @@ ENV BUILD_TYPE=${BUILD_TYPE} ENV REBUILD=false ENV HEALTHCHECK_ENDPOINT=http://localhost:8080/readyz +ARG CUDA_MAJOR_VERSION=11 +ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility +ENV NVIDIA_REQUIRE_CUDA="cuda>=${CUDA_MAJOR_VERSION}.0" +ENV NVIDIA_VISIBLE_DEVICES=all + # Add FFmpeg RUN if [ "${FFMPEG}" = "true" ]; then \ apt-get install -y ffmpeg \ @@ -146,16 +163,19 @@ RUN make prepare-sources # Copy the binary COPY --from=builder /build/local-ai ./ -# do not let piper rebuild (requires an older version of absl) -COPY --from=builder /build/backend-assets/grpc/piper ./backend-assets/grpc/piper +# do not let stablediffusion rebuild (requires an older version of absl) +COPY --from=builder /build/backend-assets/grpc/stablediffusion ./backend-assets/grpc/stablediffusion # Copy VALLE-X as it's not a real "lib" -RUN cp -rfv /usr/lib/vall-e-x/* ./ +RUN if [ -d /usr/lib/vall-e-x ]; then \ + cp -rfv /usr/lib/vall-e-x/* ./ ; \ + fi -# To resolve exllama import error -RUN if [ "${BUILD_TYPE}" = "cublas" ] && [ "${TARGETARCH:-$(go env GOARCH)}" = "amd64" ]; then \ +# we also copy exllama libs over to resolve exllama import error +RUN if [ -d /usr/local/lib/python3.9/dist-packages/exllama ]; then \ cp -rfv /usr/local/lib/python3.9/dist-packages/exllama extra/grpc/exllama/;\ fi + # Define the health check command HEALTHCHECK --interval=1m --timeout=10m --retries=10 \ CMD curl -f $HEALTHCHECK_ENDPOINT || exit 1 diff --git a/Makefile b/Makefile index 692417bd..89fdb42d 100644 --- a/Makefile +++ b/Makefile @@ -47,6 +47,10 @@ CUDA_LIBPATH?=/usr/local/cuda/lib64/ GO_TAGS?= BUILD_ID?=git +TEST_DIR=/tmp/test + +RANDOM := $(shell bash -c 'echo $$RANDOM') + VERSION?=$(shell git describe --always --tags || echo "dev" ) # go tool nm ./local-ai | grep Commit LD_FLAGS?= @@ -64,6 +68,9 @@ WHITE := $(shell tput -Txterm setaf 7) CYAN := $(shell tput -Txterm setaf 6) RESET := $(shell tput -Txterm sgr0) +# Default Docker bridge IP +E2E_BRIDGE_IP?=172.17.0.1 + ifndef UNAME_S UNAME_S := $(shell uname -s) endif @@ -329,6 +336,26 @@ test: prepare test-models/testmodel grpcs $(MAKE) test-tts $(MAKE) test-stablediffusion +prepare-e2e: + mkdir -p $(TEST_DIR) + cp -rfv $(abspath ./tests/e2e-fixtures)/gpu.yaml $(TEST_DIR)/gpu.yaml + test -e $(TEST_DIR)/ggllm-test-model.bin || wget -q https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q2_K.gguf -O $(TEST_DIR)/ggllm-test-model.bin + docker build --build-arg BUILD_GRPC=true --build-arg GRPC_BACKENDS="$(GRPC_BACKENDS)" --build-arg IMAGE_TYPE=core --build-arg BUILD_TYPE=$(BUILD_TYPE) --build-arg CUDA_MAJOR_VERSION=11 --build-arg CUDA_MINOR_VERSION=7 --build-arg FFMPEG=true -t localai-tests . + +run-e2e-image: + ls -liah $(abspath ./tests/e2e-fixtures) + docker run -p 5390:8080 -e MODELS_PATH=/models -e THREADS=1 -e DEBUG=true -d --rm -v $(TEST_DIR):/models --gpus all --name e2e-tests-$(RANDOM) localai-tests + +test-e2e: + @echo 'Running e2e tests' + BUILD_TYPE=$(BUILD_TYPE) \ + LOCALAI_API=http://$(E2E_BRIDGE_IP):5390/v1 \ + $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --flake-attempts 5 -v -r ./tests/e2e + +teardown-e2e: + rm -rf $(TEST_DIR) || true + docker stop $$(docker ps -q --filter ancestor=localai-tests) + test-gpt4all: prepare-test TEST_DIR=$(abspath ./)/test-dir/ FIXTURES=$(abspath ./)/tests/fixtures CONFIG_FILE=$(abspath ./)/test-models/config.yaml MODELS_PATH=$(abspath ./)/test-models \ $(GOCMD) run github.com/onsi/ginkgo/v2/ginkgo --label-filter="gpt4all" --flake-attempts 5 -v -r ./api ./pkg diff --git a/tests/e2e-fixtures/gpu.yaml b/tests/e2e-fixtures/gpu.yaml new file mode 100644 index 00000000..78d6d4ed --- /dev/null +++ b/tests/e2e-fixtures/gpu.yaml @@ -0,0 +1,17 @@ +context_size: 2048 +mirostat: 2 +mirostat_tau: 5.0 +mirostat_eta: 0.1 +f16: true +threads: 1 +gpu_layers: 90 +name: gpt-4 +mmap: true +parameters: + model: ggllm-test-model.bin + rope_freq_base: 10000 + max_tokens: 20 + rope_freq_scale: 1 + temperature: 0.2 + top_k: 40 + top_p: 0.95 diff --git a/tests/e2e/e2e_suite_test.go b/tests/e2e/e2e_suite_test.go new file mode 100644 index 00000000..f6ab238d --- /dev/null +++ b/tests/e2e/e2e_suite_test.go @@ -0,0 +1,18 @@ +package e2e_test + +import ( + "os" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +var ( + localAIURL = os.Getenv("LOCALAI_API") +) + +func TestLocalAI(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "LocalAI E2E test suite") +} diff --git a/tests/e2e/e2e_test.go b/tests/e2e/e2e_test.go new file mode 100644 index 00000000..7b506e60 --- /dev/null +++ b/tests/e2e/e2e_test.go @@ -0,0 +1,70 @@ +package e2e_test + +import ( + "context" + "fmt" + "os" + "os/exec" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + openaigo "github.com/otiai10/openaigo" + "github.com/sashabaranov/go-openai" +) + +var _ = Describe("E2E test", func() { + var client *openai.Client + var client2 *openaigo.Client + + Context("API with ephemeral models", func() { + BeforeEach(func() { + defaultConfig := openai.DefaultConfig("") + defaultConfig.BaseURL = localAIURL + + client2 = openaigo.NewClient("") + client2.BaseURL = defaultConfig.BaseURL + + // Wait for API to be ready + client = openai.NewClientWithConfig(defaultConfig) + Eventually(func() error { + _, err := client.ListModels(context.TODO()) + return err + }, "2m").ShouldNot(HaveOccurred()) + }) + + // Check that the GPU was used + AfterEach(func() { + cmd := exec.Command("/bin/bash", "-xce", "docker logs $(docker ps -q --filter ancestor=localai-tests)") + out, err := cmd.CombinedOutput() + Expect(err).ToNot(HaveOccurred(), string(out)) + // Execute docker logs $$(docker ps -q --filter ancestor=localai-tests) as a command and check the output + if os.Getenv("BUILD_TYPE") == "cublas" { + + Expect(string(out)).To(ContainSubstring("found 1 CUDA devices"), string(out)) + Expect(string(out)).To(ContainSubstring("using CUDA for GPU acceleration"), string(out)) + } else { + fmt.Println("Skipping GPU check") + Expect(string(out)).To(ContainSubstring("[llama-cpp] Loads OK"), string(out)) + Expect(string(out)).To(ContainSubstring("llama_model_loader"), string(out)) + } + }) + + Context("Generates text", func() { + It("streams chat tokens", func() { + model := "gpt-4" + resp, err := client.CreateChatCompletion(context.TODO(), + openai.ChatCompletionRequest{ + Model: model, Messages: []openai.ChatCompletionMessage{ + { + Role: "user", + Content: "How much is 2+2?", + }, + }}) + Expect(err).ToNot(HaveOccurred()) + Expect(len(resp.Choices)).To(Equal(1), fmt.Sprint(resp)) + Expect(resp.Choices[0].Message.Content).To(Or(ContainSubstring("4"), ContainSubstring("four")), fmt.Sprint(resp.Choices[0].Message.Content)) + }) + }) + }) +})