diff --git a/.dockerignore b/.dockerignore index 2a3a8916..604f0f2c 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1 +1 @@ -models/*.bin \ No newline at end of file +models diff --git a/.env b/.env index 8cfa7262..bd6d6c47 100644 --- a/.env +++ b/.env @@ -1 +1,3 @@ -THREADS=14 \ No newline at end of file +THREADS=14 +CONTEXT_SIZE=700 +MODELS_PATH=/models diff --git a/.gitignore b/.gitignore index 69c6aeda..536866d7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,9 @@ +# go-llama build artifacts +go-llama + +# llama-cli build binary llama-cli -models/*.bin \ No newline at end of file + +# Ignore models +models/*.bin +models/ggml-* \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..8ae5ad9d --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + "version": "0.2.0", + "configurations": [ + + { + "name": "Launch Go", + "type": "go", + "request": "launch", + "mode": "debug", + "program": "${workspaceFolder}/main.go", + "args": [ + "api" + ] + } + ] +} diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..ce3827ae --- /dev/null +++ b/Makefile @@ -0,0 +1,52 @@ +GOCMD=go +GOTEST=$(GOCMD) test +GOVET=$(GOCMD) vet +BINARY_NAME=llama-cli +GOLLAMA_VERSION?=llama.cpp-8b67998 + +GREEN := $(shell tput -Txterm setaf 2) +YELLOW := $(shell tput -Txterm setaf 3) +WHITE := $(shell tput -Txterm setaf 7) +CYAN := $(shell tput -Txterm setaf 6) +RESET := $(shell tput -Txterm sgr0) + +.PHONY: all test build vendor + +all: help + +## Build: + +build: prepare ## Build the project + $(GOCMD) build -o $(BINARY_NAME) ./ + +go-llama: + git clone -b $(GOLLAMA_VERSION) --recurse-submodules https://github.com/go-skynet/go-llama.cpp go-llama + +prepare: go-llama + $(MAKE) -C go-llama libbinding.a + $(GOCMD) mod edit -replace github.com/go-skynet/go-llama.cpp=$(shell pwd)/go-llama + +clean: ## Remove build related file + $(MAKE) -C go-llama clean + rm -fr ./go-llama + rm -f $(BINARY_NAME) + +## Run: +run: prepare + C_INCLUDE_PATH=$(shell pwd)/go-llama.cpp LIBRARY_PATH=$(shell pwd)/go-llama.cpp $(GOCMD) run ./ api + +## Test: +test: ## Run the tests of the project + $(GOTEST) -v -race ./... $(OUTPUT_OPTIONS) + +## Help: +help: ## Show this help. + @echo '' + @echo 'Usage:' + @echo ' ${YELLOW}make${RESET} ${GREEN}${RESET}' + @echo '' + @echo 'Targets:' + @awk 'BEGIN {FS = ":.*?## "} { \ + if (/^[a-zA-Z_-]+:.*?##.*$$/) {printf " ${YELLOW}%-20s${GREEN}%s${RESET}\n", $$1, $$2} \ + else if (/^## .*$$/) {printf " ${CYAN}%s${RESET}\n", substr($$1,4)} \ + }' $(MAKEFILE_LIST) \ No newline at end of file diff --git a/README.md b/README.md index cb68f78f..07150ed3 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,8 @@ cd llama-cli # copy your models to models/ cp your-model.bin models/ -# (optional) Edit the .env file to set the number of concurrent threads used for inference -# echo "THREADS=14" > .env +# (optional) Edit the .env file to set things like context size and threads +# vim .env # start with docker-compose docker compose up -d --build diff --git a/docker-compose.yaml b/docker-compose.yaml index 7a1b29e6..45531bfa 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,15 +1,28 @@ version: '3.6' services: + + # chatgpt: + # image: ghcr.io/mckaywrigley/chatbot-ui:main + # # platform: linux/amd64 + # ports: + # - 3000:3000 + # environment: + # - 'OPENAI_API_KEY=sk-000000000000000' + # - 'OPENAI_API_HOST=http://api:8080' + api: image: quay.io/go-skynet/llama-cli:latest - build: . - volumes: - - ./models:/models + build: + context: . + dockerfile: Dockerfile ports: - 8080:8080 environment: - - MODELS_PATH=/models - - CONTEXT_SIZE=700 + - MODELS_PATH=$MODELS_PATH + - CONTEXT_SIZE=$CONTEXT_SIZE - THREADS=$THREADS - command: api \ No newline at end of file + volumes: + - ./models:/models:cached + command: api + \ No newline at end of file diff --git a/go.mod b/go.mod index 56cf3975..275642d8 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/go-skynet/llama-cli go 1.19 require ( - github.com/go-skynet/go-llama.cpp v0.0.0-20230415155049-9260bfd28bc4 + github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640 github.com/gofiber/fiber/v2 v2.42.0 github.com/urfave/cli/v2 v2.25.0 ) diff --git a/go.sum b/go.sum index b9d811ee..9c8a22b1 100644 --- a/go.sum +++ b/go.sum @@ -3,8 +3,8 @@ github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHG github.com/cpuguy83/go-md2man/v2 v2.0.2 h1:p1EgwI/C7NhT0JmVkwCD2ZBK8j4aeHQX2pMHHBfMQ6w= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= -github.com/go-skynet/go-llama.cpp v0.0.0-20230415155049-9260bfd28bc4 h1:u/y9MlPHOeIj636IQmrf9ptMjjdgCVIcsfb7lMFh39M= -github.com/go-skynet/go-llama.cpp v0.0.0-20230415155049-9260bfd28bc4/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw= +github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640 h1:8SSVbQ3yvq7JnfLCLF4USV0PkQnnduUkaNCv/hHDa3E= +github.com/go-skynet/go-llama.cpp v0.0.0-20230415213228-bac222030640/go.mod h1:35AKIEMY+YTKCBJIa/8GZcNGJ2J+nQk1hQiWo/OnEWw= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/gofiber/fiber/v2 v2.42.0 h1:Fnp7ybWvS+sjNQsFvkhf4G8OhXswvB6Vee8hM/LyS+8= github.com/gofiber/fiber/v2 v2.42.0/go.mod h1:3+SGNjqMh5VQH5Vz2Wdi43zTIV16ktlFd3x3R6O1Zlc= diff --git a/main.go b/main.go index 8e1ee02a..6fc7321a 100644 --- a/main.go +++ b/main.go @@ -136,7 +136,7 @@ echo "An Alpaca (Vicugna pacos) is a domesticated species of South American came }, &cli.StringFlag{ Name: "default-model", - EnvVars: []string{"default-model"}, + EnvVars: []string{"DEFAULT_MODEL"}, }, &cli.StringFlag{ Name: "address",