diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..2a3a8916 --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +models/*.bin \ No newline at end of file diff --git a/.env b/.env new file mode 100644 index 00000000..8cfa7262 --- /dev/null +++ b/.env @@ -0,0 +1 @@ +THREADS=14 \ No newline at end of file diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml index b9f1ee9f..bf97850f 100644 --- a/.github/workflows/image.yml +++ b/.github/workflows/image.yml @@ -12,68 +12,42 @@ jobs: docker: runs-on: ubuntu-latest steps: - - name: Release space from worker - run: | - echo "Listing top largest packages" - pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - head -n 30 <<< "${pkgs}" - echo - df -h - echo - sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true - sudo apt-get remove --auto-remove android-sdk-platform-tools || true - sudo apt-get purge --auto-remove android-sdk-platform-tools || true - sudo rm -rf /usr/local/lib/android - sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true - sudo rm -rf /usr/share/dotnet - sudo apt-get remove -y '^mono-.*' || true - sudo apt-get remove -y '^ghc-.*' || true - sudo apt-get remove -y '.*jdk.*|.*jre.*' || true - sudo apt-get remove -y 'php.*' || true - sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true - sudo apt-get remove -y '^google-.*' || true - sudo apt-get remove -y azure-cli || true - sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true - sudo apt-get remove -y '^gfortran-.*' || true - sudo apt-get autoremove -y - sudo apt-get clean - echo - echo "Listing top largest packages" - pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr) - head -n 30 <<< "${pkgs}" - echo - sudo rm -rfv build || true - df -h - name: Checkout uses: actions/checkout@v3 + - name: Prepare id: prep run: | DOCKER_IMAGE=quay.io/go-skynet/llama-cli - VERSION=latest + VERSION=master SHORTREF=${GITHUB_SHA::8} + # If this is git tag, use the tag name as a docker tag if [[ $GITHUB_REF == refs/tags/* ]]; then VERSION=${GITHUB_REF#refs/tags/} fi TAGS="${DOCKER_IMAGE}:${VERSION},${DOCKER_IMAGE}:${SHORTREF}" + # If the VERSION looks like a version number, assume that # this is the most recent version of the image and also # tag it 'latest'. if [[ $VERSION =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then TAGS="$TAGS,${DOCKER_IMAGE}:latest" fi + # Set output parameters. echo ::set-output name=tags::${TAGS} echo ::set-output name=docker_image::${DOCKER_IMAGE} - echo ::set-output name=image::${DOCKER_IMAGE}:${VERSION} + - name: Set up QEMU uses: docker/setup-qemu-action@master with: platforms: all + - name: Set up Docker Buildx id: buildx uses: docker/setup-buildx-action@master + - name: Login to DockerHub if: github.event_name != 'pull_request' uses: docker/login-action@v2 @@ -81,9 +55,23 @@ jobs: registry: quay.io username: ${{ secrets.QUAY_USERNAME }} password: ${{ secrets.QUAY_PASSWORD }} - - uses: earthly/actions/setup-earthly@v1 + - name: Build PRs + if: github.event_name != 'pull_request' + uses: docker/build-push-action@v4 + with: + builder: ${{ steps.buildx.outputs.name }} + context: . + file: ./Dockerfile + platforms: linux/amd64,linux/arm64,linux/arm + push: true + tags: ${{ steps.prep.outputs.tags }} - name: Build - run: | - earthly config "global.conversion_parallelism" "1" - earthly config "global.buildkit_max_parallelism" "1" - earthly --push +image-all --IMAGE=${{ steps.prep.outputs.image }} + if: github.event_name == 'pull_request' + uses: docker/build-push-action@v4 + with: + builder: ${{ steps.buildx.outputs.name }} + context: . + file: ./Dockerfile + platforms: linux/amd64 + push: false + tags: ${{ steps.prep.outputs.tags }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 1666f7b5..69c6aeda 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -llama-cli \ No newline at end of file +llama-cli +models/*.bin \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..3b284a99 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,19 @@ +ARG GO_VERSION=1.20 +ARG DEBIAN_VERSION=11 + +FROM golang:$GO_VERSION as builder + +WORKDIR /build +RUN git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp +RUN cd go-llama.cpp && make libbinding.a +COPY go.mod ./ +COPY go.sum ./ +RUN go mod download +RUN apt-get update +COPY . . +RUN go mod edit -replace github.com/go-skynet/go-llama.cpp=/build/go-llama.cpp +RUN C_INCLUDE_PATH=/build/go-llama.cpp LIBRARY_PATH=/build/go-llama.cpp go build -o llama-cli ./ + +FROM debian:$DEBIAN_VERSION +COPY --from=builder /build/llama-cli /usr/bin/llama-cli +ENTRYPOINT [ "/usr/bin/llama-cli" ] \ No newline at end of file diff --git a/Earthfile b/Earthfile index 1a5a43da..6625c3ef 100644 --- a/Earthfile +++ b/Earthfile @@ -1,32 +1,5 @@ VERSION 0.7 -go-deps: - ARG GO_VERSION=1.20 - FROM golang:$GO_VERSION - WORKDIR /build - COPY go.mod ./ - COPY go.sum ./ - RUN go mod download - RUN apt-get update - SAVE ARTIFACT go.mod AS LOCAL go.mod - SAVE ARTIFACT go.sum AS LOCAL go.sum - build: - FROM +go-deps - WORKDIR /build - RUN git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp - RUN cd go-llama.cpp && make libbinding.a - COPY . . - RUN go mod edit -replace github.com/go-skynet/go-llama.cpp=/build/go-llama.cpp - RUN C_INCLUDE_PATH=$GOPATH/src/github.com/go-skynet/go-llama.cpp LIBRARY_PATH=$GOPATH/src/github.com/go-skynet/go-llama.cpp go build -o llama-cli ./ - SAVE ARTIFACT llama-cli AS LOCAL llama-cli - -image: - FROM +go-deps - ARG IMAGE=alpaca-cli-nomodel - COPY +build/llama-cli /llama-cli - ENTRYPOINT [ "/llama-cli" ] - SAVE IMAGE --push $IMAGE - -image-all: - BUILD --platform=linux/amd64 --platform=linux/arm64 +image + FROM DOCKERFILE -f Dockerfile . + SAVE ARTIFACT /usr/bin/llama-cli AS LOCAL llama-cli diff --git a/README.md b/README.md index c7b0f18e..354aac7e 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,47 @@ It is compatible with the models supported by `llama.cpp`. You might need to con `llama-cli` doesn't shell-out, it uses https://github.com/go-skynet/go-llama.cpp, which is a golang binding of [llama.cpp](https://github.com/ggerganov/llama.cpp). +## Usage + +You can use `docker-compose`: + +```bash + +git clone https://github.com/go-skynet/llama-cli +cd llama-cli + +# copy your models to models/ +cp your-model.bin models/ + +# (optional) Edit the .env file to set the number of concurrent threads used for inference +# echo "THREADS=14" > .env + +# start with docker-compose +docker compose up -d --build + +# Now API is accessible at localhost:8080 +curl http://localhost:8080/v1/models +# {"object":"list","data":[{"id":"your-model.bin","object":"model"}]} +curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{ + "model": "your-model.bin", + "prompt": "A long time ago in a galaxy far, far away", + "temperature": 0.7 + }' + + +``` + +Note: You can use a use a default template for every model in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance this can be used with alpaca: + +``` +Below is an instruction that describes a task. Write a response that appropriately completes the request. + +### Instruction: +{{.Input}} + +### Response: +``` + ## Container images `llama-cli` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/llama-cli?tab=tags&tag=latest) @@ -158,16 +199,6 @@ Below is an instruction that describes a task. Write a response that appropriate ### Response: ``` -Note: You can use a use a default template for every model in your model path, by creating a corresponding file with the `.tmpl` suffix. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance: - -``` -Below is an instruction that describes a task. Write a response that appropriately completes the request. - -### Instruction: -{{.Input}} - -### Response: -``` ## Using other models @@ -229,9 +260,8 @@ In order to build the `llama-cli` container image locally you can use `docker`: ``` # build the image as "alpaca-image" -docker run --privileged -v /var/run/docker.sock:/var/run/docker.sock --rm -t -v "$(pwd)":/workspace -v earthly-tmp:/tmp/earthly:rw earthly/earthly:v0.7.2 +image --IMAGE=alpaca-image -# run the image -docker run alpaca-image --instruction "What's an alpaca?" +docker build -t llama-cli . +docker run llama-cli --instruction "What's an alpaca?" ``` Or build the binary with: diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 00000000..7a1b29e6 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,15 @@ +version: '3.6' + +services: + api: + image: quay.io/go-skynet/llama-cli:latest + build: . + volumes: + - ./models:/models + ports: + - 8080:8080 + environment: + - MODELS_PATH=/models + - CONTEXT_SIZE=700 + - THREADS=$THREADS + command: api \ No newline at end of file diff --git a/models/.keep b/models/.keep new file mode 100644 index 00000000..e69de29b