diff --git a/.dockerignore b/.dockerignore
index 2a3a8916..604f0f2c 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1 +1 @@
-models/*.bin
\ No newline at end of file
+models
diff --git a/.env b/.env
index 8cfa7262..bd6d6c47 100644
--- a/.env
+++ b/.env
@@ -1 +1,3 @@
-THREADS=14
\ No newline at end of file
+THREADS=14
+CONTEXT_SIZE=700
+MODELS_PATH=/models
diff --git a/.gitignore b/.gitignore
index 69c6aeda..536866d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,9 @@
+# go-llama build artifacts
+go-llama
+
+# llama-cli build binary
 llama-cli
-models/*.bin
\ No newline at end of file
+
+# Ignore models
+models/*.bin
+models/ggml-*
\ No newline at end of file
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 00000000..8ae5ad9d
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,16 @@
+{
+    "version": "0.2.0",
+    "configurations": [
+    
+    {
+        "name": "Launch Go",
+        "type": "go",
+        "request": "launch",
+        "mode": "debug",
+        "program": "${workspaceFolder}/main.go",
+        "args": [
+            "api"
+        ]
+    }
+    ]
+}
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..ce3827ae
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,52 @@
+GOCMD=go
+GOTEST=$(GOCMD) test
+GOVET=$(GOCMD) vet
+BINARY_NAME=llama-cli
+GOLLAMA_VERSION?=llama.cpp-8b67998
+
+GREEN  := $(shell tput -Txterm setaf 2)
+YELLOW := $(shell tput -Txterm setaf 3)
+WHITE  := $(shell tput -Txterm setaf 7)
+CYAN   := $(shell tput -Txterm setaf 6)
+RESET  := $(shell tput -Txterm sgr0)
+
+.PHONY: all test build vendor
+
+all: help
+
+## Build:
+
+build: prepare ## Build the project
+	$(GOCMD) build -o $(BINARY_NAME) ./
+
+go-llama:
+	git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama
+
+prepare: go-llama
+	$(MAKE) -C go-llama libbinding.a
+	$(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama
+	
+clean: ## Remove build related file
+	$(MAKE) -C go-llama clean
+	rm -fr ./go-llama
+	rm -f $(BINARY_NAME)
+
+## Run:
+run: prepare
+	C_INCLUDE_PATH=$(shell pwd)/go-llama.cpp LIBRARY_PATH=$(shell pwd)/go-llama.cpp $(GOCMD) run ./ api
+
+## Test:
+test: ## Run the tests of the project
+	$(GOTEST) -v -race ./... $(OUTPUT_OPTIONS)
+
+## Help:
+help: ## Show this help.
+	@echo ''
+	@echo 'Usage:'
+	@echo '  ${YELLOW}make${RESET} ${GREEN}<target>${RESET}'
+	@echo ''
+	@echo 'Targets:'
+	@awk 'BEGIN {FS = ":.*?## "} { \
+		if (/^[a-zA-Z_-]+:.*?##.*$$/) {printf "    ${YELLOW}%-20s${GREEN}%s${RESET}\n", $$1, $$2} \
+		else if (/^## .*$$/) {printf "  ${CYAN}%s${RESET}\n", substr($$1,4)} \
+		}' $(MAKEFILE_LIST)
\ No newline at end of file
diff --git a/README.md b/README.md
index cb68f78f..07150ed3 100644
--- a/README.md
+++ b/README.md
@@ -19,8 +19,8 @@ cd llama-cli
 # copy your models to models/
 cp your-model.bin models/
 
-# (optional) Edit the .env file to set the number of concurrent threads used for inference
-# echo "THREADS=14" > .env
+# (optional) Edit the .env file to set things like context size and threads
+# vim .env
 
 # start with docker-compose
 docker compose up -d --build
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 7a1b29e6..45531bfa 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -1,15 +1,28 @@
 version: '3.6'
 
 services:
+
+  # chatgpt:
+  #   image: ghcr.io/mckaywrigley/chatbot-ui:main
+  #   # platform: linux/amd64
+  #   ports:
+  #     - 3000:3000
+  #   environment:
+  #     - 'OPENAI_API_KEY=sk-000000000000000'
+  #     - 'OPENAI_API_HOST=http://api:8080'
+
   api:
     image: quay.io/go-skynet/llama-cli:latest
-    build: .
-    volumes:
-      - ./models:/models
+    build:
+      context: .
+      dockerfile: Dockerfile
     ports:
       - 8080:8080
     environment:
-      - MODELS_PATH=/models
-      - CONTEXT_SIZE=700
+      - MODELS_PATH=$MODELS_PATH
+      - CONTEXT_SIZE=$CONTEXT_SIZE
       - THREADS=$THREADS
-    command: api
\ No newline at end of file
+    volumes:
+      - ./models:/models:cached
+    command: api
+    
\ No newline at end of file
diff --git a/go.mod b/go.mod
index 56cf3975..275642d8 100644
--- a/go.mod
+++ b/go.mod
@@ -3,7 +3,7 @@ module github.com/go-skynet/llama-cli
 go 1.19
 
 require (
-	github.com/go-skynet/go-llama.cpp v0.0.0-20230415155049-9260bfd28bc4
+	github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640
 	github.com/gofiber/fiber/v2 v2.42.0
 	github.com/urfave/cli/v2 v2.25.0
 )
diff --git a/go.sum b/go.sum
index b9d811ee..9c8a22b1 100644
--- a/go.sum
+++ b/go.sum
@@ -3,8 +3,8 @@ github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHG
 github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w=
 github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
 github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230415155049-9260bfd28bc4 h1:u/y9MlPHOeIj636IQmrf9ptMjjdgCVIcsfb7lMFh39M=
-github.com/go-skynet/go-llama.cpp v0.0.0-20230415155049-9260bfd28bc4/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640 h1:8SSVbQ3yvq7JnfLCLF4USV0PkQnnduUkaNCv/hHDa3E=
+github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw=
 github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI=
 github.com/gofiber/fiber/v2 v2.42.0 h1:Fnp7ybWvS+sjNQsFvkhf4G8OhXswvB6Vee8hM/LyS+8=
 github.com/gofiber/fiber/v2 v2.42.0/go.mod h1:3+SGNjqMh5VQH5Vz2Wdi43zTIV16ktlFd3x3R6O1Zlc=
diff --git a/main.go b/main.go
index 8e1ee02a..6fc7321a 100644
--- a/main.go
+++ b/main.go
@@ -136,7 +136,7 @@ echo "An Alpaca (Vicugna pacos) is a domesticated species of South American came
 					},
 					&cli.StringFlag{
 						Name:    "default-model",
-						EnvVars: []string{"default-model"},
+						EnvVars: []string{"DEFAULT_MODEL"},
 					},
 					&cli.StringFlag{
 						Name:    "address",