From 3bec467a91071133f8f74e7ce04d997733ed51b9 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 22 Mar 2024 21:12:48 +0100 Subject: [PATCH] feat(models): add phi-2-chat, llava-1.6, bakllava, cerbero (#1879) --- embedded/models/bakllava.yaml | 40 ++++++++++++++++++++++++++ embedded/models/cerbero.yaml | 24 ++++++++++++++++ embedded/models/llava-1.5.yaml | 33 +++++++++++++++++++++ embedded/models/llava-1.6-mistral.yaml | 33 +++++++++++++++++++++ embedded/models/llava-1.6-vicuna.yaml | 37 ++++++++++++++++++++++++ embedded/models/phi-2-chat.yaml | 25 ++++++++++++++++ 6 files changed, 192 insertions(+) create mode 100644 embedded/models/bakllava.yaml create mode 100644 embedded/models/cerbero.yaml create mode 100644 embedded/models/llava-1.5.yaml create mode 100644 embedded/models/llava-1.6-mistral.yaml create mode 100644 embedded/models/llava-1.6-vicuna.yaml create mode 100644 embedded/models/phi-2-chat.yaml diff --git a/embedded/models/bakllava.yaml b/embedded/models/bakllava.yaml new file mode 100644 index 00000000..52fd9466 --- /dev/null +++ b/embedded/models/bakllava.yaml @@ -0,0 +1,40 @@ +backend: llama-cpp +context_size: 4096 +f16: true + +gpu_layers: 90 +mmap: true +name: bakllava + +roles: + user: "USER:" + assistant: "ASSISTANT:" + system: "SYSTEM:" + +mmproj: bakllava-mmproj.gguf +parameters: + model: bakllava.gguf + temperature: 0.2 + top_k: 40 + top_p: 0.95 + seed: -1 +mirostat: 2 +mirostat_eta: 1.0 +mirostat_tau: 1.0 + +template: + chat: | + A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input}} + ASSISTANT: + +download_files: +- filename: bakllava.gguf + uri: huggingface://mys/ggml_bakllava-1/ggml-model-q4_k.gguf +- filename: bakllava-mmproj.gguf + uri: huggingface://mys/ggml_bakllava-1/mmproj-model-f16.gguf + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "bakllava", + "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/embedded/models/cerbero.yaml b/embedded/models/cerbero.yaml new file mode 100644 index 00000000..8ace4e35 --- /dev/null +++ b/embedded/models/cerbero.yaml @@ -0,0 +1,24 @@ +backend: llama +context_size: 8192 +f16: false +gpu_layers: 90 +name: cerbero +mmap: false +parameters: + model: huggingface://galatolo/cerbero-7b-gguf/ggml-model-Q8_0.gguf + top_k: 80 + temperature: 0.2 + top_p: 0.7 +template: + completion: "{{.Input}}" + chat: "Questa รจ una conversazione tra un umano ed un assistente AI.\n{{.Input}}\n[|Assistente|] " +roles: + user: "[|Umano|] " + system: "[|Umano|] " + assistant: "[|Assistente|] " + +stopwords: +- "[|Umano|]" + +trimsuffix: +- "\n" \ No newline at end of file diff --git a/embedded/models/llava-1.5.yaml b/embedded/models/llava-1.5.yaml new file mode 100644 index 00000000..3db48524 --- /dev/null +++ b/embedded/models/llava-1.5.yaml @@ -0,0 +1,33 @@ +backend: llama-cpp +context_size: 4096 +f16: true + +gpu_layers: 90 +mmap: true +name: llava-1.5 + +roles: + user: "USER:" + assistant: "ASSISTANT:" + system: "SYSTEM:" + +mmproj: llava-v1.5-7b-mmproj-Q8_0.gguf +parameters: + model: llava-v1.5-7b-Q4_K.gguf + +template: + chat: | + A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input}} + ASSISTANT: + +download_files: +- filename: llava-v1.5-7b-Q4_K.gguf + uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-Q4_K.gguf +- filename: llava-v1.5-7b-mmproj-Q8_0.gguf + uri: huggingface://jartine/llava-v1.5-7B-GGUF/llava-v1.5-7b-mmproj-Q8_0.gguf + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llava-1.5", + "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/embedded/models/llava-1.6-mistral.yaml b/embedded/models/llava-1.6-mistral.yaml new file mode 100644 index 00000000..602ceb62 --- /dev/null +++ b/embedded/models/llava-1.6-mistral.yaml @@ -0,0 +1,33 @@ +backend: llama-cpp +context_size: 4096 +f16: true + +gpu_layers: 90 +mmap: true +name: llava-1.6-mistral + +roles: + user: "USER:" + assistant: "ASSISTANT:" + system: "SYSTEM:" + +mmproj: llava-v1.6-7b-mmproj-f16.gguf +parameters: + model: llava-v1.6-mistral-7b.gguf + +template: + chat: | + A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input}} + ASSISTANT: + +download_files: +- filename: llava-v1.6-mistral-7b.gguf + uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q6_K.gguf +- filename: llava-v1.6-7b-mmproj-f16.gguf + uri: huggingface://cjpais/llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llava-1.6-mistral", + "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/embedded/models/llava-1.6-vicuna.yaml b/embedded/models/llava-1.6-vicuna.yaml new file mode 100644 index 00000000..cea33e7f --- /dev/null +++ b/embedded/models/llava-1.6-vicuna.yaml @@ -0,0 +1,37 @@ +backend: llama-cpp +context_size: 4096 +f16: true + +gpu_layers: 90 +mmap: true +name: llava-1.6-vicuna + +roles: + user: "USER:" + assistant: "ASSISTANT:" + system: "SYSTEM:" + +mmproj: mmproj-vicuna7b-f16.gguf +parameters: + model: vicuna-7b-q5_k.gguf + temperature: 0.2 + top_k: 40 + top_p: 0.95 + seed: -1 + +template: + chat: | + A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. + {{.Input}} + ASSISTANT: + +download_files: +- filename: vicuna-7b-q5_k.gguf + uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/vicuna-7b-q5_k.gguf +- filename: mmproj-vicuna7b-f16.gguf + uri: https://huggingface.co/cmp-nct/llava-1.6-gguf/resolve/main/mmproj-vicuna7b-f16.gguf + +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llava-1.6-vicuna", + "messages": [{"role": "user", "content": [{"type":"text", "text": "What is in the image?"}, {"type": "image_url", "image_url": {"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" }}], "temperature": 0.9}]}' diff --git a/embedded/models/phi-2-chat.yaml b/embedded/models/phi-2-chat.yaml new file mode 100644 index 00000000..4a3ca7aa --- /dev/null +++ b/embedded/models/phi-2-chat.yaml @@ -0,0 +1,25 @@ +name: phi-2-chat +mmap: true +parameters: + model: huggingface://l3utterfly/phi-2-layla-v1-chatml-gguf/phi-2-layla-v1-chatml-Q8_0.gguf + +template: + chat_message: | + <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}} + {{if .Content}}{{.Content}}{{end}} + <|im_end|> + chat: | + {{.Input}} + <|im_start|>assistant + completion: | + {{.Input}} +context_size: 4096 +f16: true +stopwords: +- <|im_end|> +- +usage: | + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "phi-2-chat", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }'