diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 00000000..2a3a8916
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1 @@
+models/*.bin
\ No newline at end of file
diff --git a/.env b/.env
new file mode 100644
index 00000000..8cfa7262
--- /dev/null
+++ b/.env
@@ -0,0 +1 @@
+THREADS=14
\ No newline at end of file
diff --git a/.github/workflows/image.yml b/.github/workflows/image.yml
index b9f1ee9f..bf97850f 100644
--- a/.github/workflows/image.yml
+++ b/.github/workflows/image.yml
@@ -12,68 +12,42 @@ jobs:
   docker:
     runs-on: ubuntu-latest
     steps:
-      - name: Release space from worker
-        run: |
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          df -h
-          echo
-          sudo apt-get remove -y '^llvm-.*|^libllvm.*' || true
-          sudo apt-get remove --auto-remove android-sdk-platform-tools || true
-          sudo apt-get purge --auto-remove android-sdk-platform-tools || true
-          sudo rm -rf /usr/local/lib/android
-          sudo apt-get remove -y '^dotnet-.*|^aspnetcore-.*' || true
-          sudo rm -rf /usr/share/dotnet
-          sudo apt-get remove -y '^mono-.*' || true
-          sudo apt-get remove -y '^ghc-.*' || true
-          sudo apt-get remove -y '.*jdk.*|.*jre.*' || true
-          sudo apt-get remove -y 'php.*' || true
-          sudo apt-get remove -y hhvm powershell firefox monodoc-manual msbuild || true
-          sudo apt-get remove -y '^google-.*' || true
-          sudo apt-get remove -y azure-cli || true
-          sudo apt-get remove -y '^mongo.*-.*|^postgresql-.*|^mysql-.*|^mssql-.*' || true
-          sudo apt-get remove -y '^gfortran-.*' || true
-          sudo apt-get autoremove -y
-          sudo apt-get clean
-          echo
-          echo "Listing top largest packages"
-          pkgs=$(dpkg-query -Wf '${Installed-Size}\t${Package}\t${Status}\n' | awk '$NF == "installed"{print $1 "\t" $2}' | sort -nr)
-          head -n 30 <<< "${pkgs}"
-          echo
-          sudo rm -rfv build || true
-          df -h
       - name: Checkout
         uses: actions/checkout@v3
+
       - name: Prepare
         id: prep
         run: |
           DOCKER_IMAGE=quay.io/go-skynet/llama-cli
-          VERSION=latest
+          VERSION=master
           SHORTREF=${GITHUB_SHA::8}
+
           # If this is git tag, use the tag name as a docker tag
           if [[ $GITHUB_REF == refs/tags/* ]]; then
             VERSION=${GITHUB_REF#refs/tags/}
           fi
           TAGS="${DOCKER_IMAGE}:${VERSION},${DOCKER_IMAGE}:${SHORTREF}"
+
           # If the VERSION looks like a version number, assume that
           # this is the most recent version of the image and also
           # tag it 'latest'.
           if [[ $VERSION =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
             TAGS="$TAGS,${DOCKER_IMAGE}:latest"
           fi
+
           # Set output parameters.
           echo ::set-output name=tags::${TAGS}
           echo ::set-output name=docker_image::${DOCKER_IMAGE}
-          echo ::set-output name=image::${DOCKER_IMAGE}:${VERSION}
+
       - name: Set up QEMU
         uses: docker/setup-qemu-action@master
         with:
           platforms: all
+
       - name: Set up Docker Buildx
         id: buildx
         uses: docker/setup-buildx-action@master
+
       - name: Login to DockerHub
         if: github.event_name != 'pull_request'
         uses: docker/login-action@v2
@@ -81,9 +55,23 @@ jobs:
           registry: quay.io
           username: ${{ secrets.QUAY_USERNAME }}
           password: ${{ secrets.QUAY_PASSWORD }}
-      - uses: earthly/actions/setup-earthly@v1
+      - name: Build PRs
+        if: github.event_name != 'pull_request'
+        uses: docker/build-push-action@v4
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64,linux/arm64,linux/arm
+          push: true
+          tags: ${{ steps.prep.outputs.tags }}
       - name: Build
-        run: |
-            earthly config "global.conversion_parallelism" "1"
-            earthly config "global.buildkit_max_parallelism" "1"
-            earthly --push +image-all --IMAGE=${{ steps.prep.outputs.image }}
+        if: github.event_name == 'pull_request'
+        uses: docker/build-push-action@v4
+        with:
+          builder: ${{ steps.buildx.outputs.name }}
+          context: .
+          file: ./Dockerfile
+          platforms: linux/amd64
+          push: false
+          tags: ${{ steps.prep.outputs.tags }}
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 1666f7b5..69c6aeda 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
-llama-cli
\ No newline at end of file
+llama-cli
+models/*.bin
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 00000000..3b284a99
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,19 @@
+ARG GO_VERSION=1.20
+ARG DEBIAN_VERSION=11
+
+FROM golang:$GO_VERSION as builder
+
+WORKDIR /build
+RUN git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp
+RUN cd go-llama.cpp && make libbinding.a
+COPY go.mod ./
+COPY go.sum ./
+RUN go mod download
+RUN apt-get update
+COPY . .
+RUN go mod edit -replace github.com/go-skynet/go-llama.cpp=/build/go-llama.cpp
+RUN C_INCLUDE_PATH=/build/go-llama.cpp LIBRARY_PATH=/build/go-llama.cpp go build -o llama-cli ./
+
+FROM debian:$DEBIAN_VERSION
+COPY --from=builder /build/llama-cli /usr/bin/llama-cli
+ENTRYPOINT [ "/usr/bin/llama-cli" ]
\ No newline at end of file
diff --git a/Earthfile b/Earthfile
index 1a5a43da..6625c3ef 100644
--- a/Earthfile
+++ b/Earthfile
@@ -1,32 +1,5 @@
 VERSION 0.7
 
-go-deps:
-    ARG GO_VERSION=1.20
-    FROM golang:$GO_VERSION
-    WORKDIR /build
-    COPY go.mod ./
-    COPY go.sum ./
-    RUN go mod download
-    RUN apt-get update
-    SAVE ARTIFACT go.mod AS LOCAL go.mod
-    SAVE ARTIFACT go.sum AS LOCAL go.sum
-
 build:
-    FROM +go-deps
-    WORKDIR /build
-    RUN git clone --recurse-submodules https://github.com/go-skynet/go-llama.cpp
-    RUN cd go-llama.cpp && make libbinding.a
-    COPY . .
-    RUN go mod edit -replace github.com/go-skynet/go-llama.cpp=/build/go-llama.cpp
-    RUN C_INCLUDE_PATH=$GOPATH/src/github.com/go-skynet/go-llama.cpp LIBRARY_PATH=$GOPATH/src/github.com/go-skynet/go-llama.cpp go build -o llama-cli ./
-    SAVE ARTIFACT llama-cli AS LOCAL llama-cli
-
-image:
-    FROM +go-deps
-    ARG IMAGE=alpaca-cli-nomodel
-    COPY +build/llama-cli /llama-cli
-    ENTRYPOINT [ "/llama-cli" ]
-    SAVE IMAGE --push $IMAGE
-
-image-all:
-    BUILD --platform=linux/amd64 --platform=linux/arm64 +image
+    FROM DOCKERFILE -f Dockerfile .
+    SAVE ARTIFACT /usr/bin/llama-cli AS LOCAL llama-cli
diff --git a/README.md b/README.md
index c7b0f18e..354aac7e 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,47 @@ It is compatible with the models supported by `llama.cpp`. You might need to con
 
 `llama-cli` doesn't shell-out, it uses https://github.com/go-skynet/go-llama.cpp, which is a golang binding of [llama.cpp](https://github.com/ggerganov/llama.cpp).
 
+## Usage
+
+You can use `docker-compose`:
+
+```bash
+
+git clone https://github.com/go-skynet/llama-cli
+cd llama-cli
+
+# copy your models to models/
+cp your-model.bin models/
+
+# (optional) Edit the .env file to set the number of concurrent threads used for inference
+# echo "THREADS=14" > .env
+
+# start with docker-compose
+docker compose up -d --build
+
+# Now API is accessible at localhost:8080
+curl http://localhost:8080/v1/models
+# {"object":"list","data":[{"id":"your-model.bin","object":"model"}]}
+curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
+     "model": "your-model.bin",            
+     "prompt": "A long time ago in a galaxy far, far away",
+     "temperature": 0.7
+   }'
+
+
+```
+
+Note: You can use a use a default template for every model in your model path, by creating a corresponding file with the `.tmpl` suffix next to your model. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance this can be used with alpaca:
+
+```
+Below is an instruction that describes a task. Write a response that appropriately completes the request.
+
+### Instruction:
+{{.Input}}
+
+### Response:
+```
+
 ## Container images
 
 `llama-cli` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/llama-cli?tab=tags&tag=latest)
@@ -158,16 +199,6 @@ Below is an instruction that describes a task. Write a response that appropriate
 ### Response:
 ```
 
-Note: You can use a use a default template for every model in your model path, by creating a corresponding file with the `.tmpl` suffix. For instance, if the model is called `foo.bin`, you can create a sibiling file, `foo.bin.tmpl` which will be used as a default prompt, for instance:
-
-```
-Below is an instruction that describes a task. Write a response that appropriately completes the request.
-
-### Instruction:
-{{.Input}}
-
-### Response:
-```
 
 ## Using other models
 
@@ -229,9 +260,8 @@ In order to build the `llama-cli` container image locally you can use `docker`:
 
 ```
 # build the image as "alpaca-image"
-docker run --privileged -v /var/run/docker.sock:/var/run/docker.sock --rm -t -v "$(pwd)":/workspace -v earthly-tmp:/tmp/earthly:rw earthly/earthly:v0.7.2 +image --IMAGE=alpaca-image
-# run the image
-docker run alpaca-image --instruction "What's an alpaca?"
+docker build -t llama-cli .
+docker run llama-cli --instruction "What's an alpaca?"
 ```
 
 Or build the binary with:
diff --git a/docker-compose.yaml b/docker-compose.yaml
new file mode 100644
index 00000000..7a1b29e6
--- /dev/null
+++ b/docker-compose.yaml
@@ -0,0 +1,15 @@
+version: '3.6'
+
+services:
+  api:
+    image: quay.io/go-skynet/llama-cli:latest
+    build: .
+    volumes:
+      - ./models:/models
+    ports:
+      - 8080:8080
+    environment:
+      - MODELS_PATH=/models
+      - CONTEXT_SIZE=700
+      - THREADS=$THREADS
+    command: api
\ No newline at end of file
diff --git a/models/.keep b/models/.keep
new file mode 100644
index 00000000..e69de29b