LocalAI/embedded/models/transformers-tinyllama.yaml

name: tinyllama-chat
backend: transformers
type: AutoModelForCausalLM

parameters:
  model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
  temperature: 0.2
  top_k: 40
  seed: -1
  top_p: 0.95
  max_tokens: 4096

template:
  chat_message: |
    <|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
    {{if .Content}}{{.Content}}{{end}}<|im_end|>
  chat: |
    {{.Input}}
    <|im_start|>assistant
    
  completion: |
    {{.Input}}

stopwords:
- <|im_end|>

usage: |
      curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
        "model": "tinyllama-chat",
        "messages": [{"role": "user", "content": "Say this is a test!"}],
        "temperature": 0.7
      }'
transformers: correctly load automodels (#1643) * backends(transformers): use AutoModel with LLM types * examples: animagine-xl * Add codellama examples 2024-01-25 23:13:21 +00:00			`name: tinyllama-chat`
			`backend: transformers`
			`type: AutoModelForCausalLM`

			`parameters:`
			`model: TinyLlama/TinyLlama-1.1B-Chat-v1.0`
			`temperature: 0.2`
			`top_k: 40`
			`seed: -1`
			`top_p: 0.95`
			`max_tokens: 4096`

			`template:`
			`chat_message: \|`
			`<\|im_start\|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}`
			`{{if .Content}}{{.Content}}{{end}}<\|im_end\|>`
			`chat: \|`
			`{{.Input}}`
			`<\|im_start\|>assistant`

			`completion: \|`
			`{{.Input}}`

			`stopwords:`
			`- <\|im_end\|>`

			`usage: \|`
			`curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{`
			`"model": "tinyllama-chat",`
			`"messages": [{"role": "user", "content": "Say this is a test!"}],`
			`"temperature": 0.7`
			`}'`