LocalAI/embedded/models/transformers-tinyllama.yaml

name: tinyllama-chat
backend: transformers
type: AutoModelForCausalLM

parameters:
  model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
  temperature: 0.2
  top_k: 40
  seed: -1
  top_p: 0.95
  max_tokens: 4096

template:
  chat_message: |
    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
    {{if .Content}}{{.Content}}{{end}}<|im_end|>
  chat: |
    {{.Input}}
    <|im_start|>assistant

  completion: |
    {{.Input}}

stopwords:
- <|im_end|>

usage: |
      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
        "model": "tinyllama-chat",
        "messages": [{"role": "user", "content": "Say this is a test!"}],
        "temperature": 0.7
      }'