mirror of
https://github.com/mudler/LocalAI.git
synced 2024-06-07 19:40:48 +00:00
feat: add starcoder (#236)
This commit is contained in:
parent
f359e1c6c4
commit
4413defca5
2
Makefile
2
Makefile
@ -6,7 +6,7 @@ BINARY_NAME=local-ai
|
|||||||
GOLLAMA_VERSION?=c03e8adbc45c866e0f6d876af1887d6b01d57eb4
|
GOLLAMA_VERSION?=c03e8adbc45c866e0f6d876af1887d6b01d57eb4
|
||||||
GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all
|
GPT4ALL_REPO?=https://github.com/go-skynet/gpt4all
|
||||||
GPT4ALL_VERSION?=3657f9417e17edf378c27d0a9274a1bf41caa914
|
GPT4ALL_VERSION?=3657f9417e17edf378c27d0a9274a1bf41caa914
|
||||||
GOGPT2_VERSION?=abf038a7d8efa4eefdc7c891f05ad33d4e59e49d
|
GOGPT2_VERSION?=6a10572
|
||||||
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
|
||||||
RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
|
RWKV_VERSION?=07166da10cb2a9e8854395a4f210464dcea76e47
|
||||||
WHISPER_CPP_VERSION?=bf2449dfae35a46b2cd92ab22661ce81a48d4993
|
WHISPER_CPP_VERSION?=bf2449dfae35a46b2cd92ab22661ce81a48d4993
|
||||||
|
22
README.md
22
README.md
@ -92,6 +92,28 @@ It should also be compatible with StableLM and GPTNeoX ggml models (untested).
|
|||||||
Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources.
|
Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `ggml` based backends. `rwkv` is less expensive on resources.
|
||||||
|
|
||||||
|
|
||||||
|
### Feature support matrix
|
||||||
|
|
||||||
|
<details>
|
||||||
|
|
||||||
|
| Backend | Compatible models | Completion/Chat endpoint | Audio transcription | Embeddings support | Token stream support | Github | Bindings |
|
||||||
|
|-----------------|-----------------------|--------------------------|---------------------|-----------------------------------|----------------------|--------------------------------------------|-------------------------------------------|
|
||||||
|
| llama | Vicuna, Alpaca, LLaMa | yes | no | yes (doesn't seem to be accurate) | yes | https://github.com/ggerganov/llama.cpp | https://github.com/go-skynet/go-llama.cpp |
|
||||||
|
| gpt4all-llama | Vicuna, Alpaca, LLaMa | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all |
|
||||||
|
| gpt4all-mpt | MPT | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all |
|
||||||
|
| gpt4all-j | GPT4ALL-J | yes | no | no | yes | https://github.com/nomic-ai/gpt4all | https://github.com/go-skynet/gpt4all |
|
||||||
|
| gpt2 | GPT/NeoX, Cerebras | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||||
|
| dolly | Dolly | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||||
|
| redpajama | RedPajama | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||||
|
| stableLM | StableLM GPT/NeoX | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||||
|
| starcoder | Starcoder | yes | no | no | no | https://github.com/ggerganov/ggml | https://github.com/go-skynet/go-gpt2.cpp |
|
||||||
|
| bloomz | Bloom | yes | no | no | no | https://github.com/NouamaneTazi/bloomz.cpp | https://github.com/go-skynet/bloomz.cpp |
|
||||||
|
| rwkv | RWKV | yes | no | no | yes | https://github.com/saharNooby/rwkv.cpp | https://github.com/donomii/go-rwkv.cpp |
|
||||||
|
| bert-embeddings | bert | no | no | yes | no | https://github.com/skeskinen/bert.cpp | https://github.com/go-skynet/go-bert.cpp |
|
||||||
|
| whisper | whisper | no | yes | no | no | https://github.com/ggerganov/whisper.cpp | https://github.com/ggerganov/whisper.cpp |
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
> `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
|
> `LocalAI` comes by default as a container image. You can check out all the available images with corresponding tags [here](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest).
|
||||||
|
@ -79,7 +79,7 @@ var _ = Describe("API test", func() {
|
|||||||
It("returns errors", func() {
|
It("returns errors", func() {
|
||||||
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
|
_, err := client.CreateCompletion(context.TODO(), openai.CompletionRequest{Model: "foomodel", Prompt: "abcdedfghikl"})
|
||||||
Expect(err).To(HaveOccurred())
|
Expect(err).To(HaveOccurred())
|
||||||
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 9 errors occurred:"))
|
Expect(err.Error()).To(ContainSubstring("error, status code: 500, message: could not load model - all backends returned error: 10 errors occurred:"))
|
||||||
})
|
})
|
||||||
|
|
||||||
})
|
})
|
||||||
|
@ -199,6 +199,30 @@ func ModelInference(s string, loader *model.ModelLoader, c Config, tokenCallback
|
|||||||
|
|
||||||
return response, nil
|
return response, nil
|
||||||
}
|
}
|
||||||
|
case *gpt2.Starcoder:
|
||||||
|
fn = func() (string, error) {
|
||||||
|
// Generate the prediction using the language model
|
||||||
|
predictOptions := []gpt2.PredictOption{
|
||||||
|
gpt2.SetTemperature(c.Temperature),
|
||||||
|
gpt2.SetTopP(c.TopP),
|
||||||
|
gpt2.SetTopK(c.TopK),
|
||||||
|
gpt2.SetTokens(c.Maxtokens),
|
||||||
|
gpt2.SetThreads(c.Threads),
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.Batch != 0 {
|
||||||
|
predictOptions = append(predictOptions, gpt2.SetBatch(c.Batch))
|
||||||
|
}
|
||||||
|
|
||||||
|
if c.Seed != 0 {
|
||||||
|
predictOptions = append(predictOptions, gpt2.SetSeed(c.Seed))
|
||||||
|
}
|
||||||
|
|
||||||
|
return model.Predict(
|
||||||
|
s,
|
||||||
|
predictOptions...,
|
||||||
|
)
|
||||||
|
}
|
||||||
case *gpt2.RedPajama:
|
case *gpt2.RedPajama:
|
||||||
fn = func() (string, error) {
|
fn = func() (string, error) {
|
||||||
// Generate the prediction using the language model
|
// Generate the prediction using the language model
|
||||||
|
@ -20,6 +20,7 @@ const tokenizerSuffix = ".tokenizer.json"
|
|||||||
const (
|
const (
|
||||||
LlamaBackend = "llama"
|
LlamaBackend = "llama"
|
||||||
BloomzBackend = "bloomz"
|
BloomzBackend = "bloomz"
|
||||||
|
StarcoderBackend = "starcoder"
|
||||||
StableLMBackend = "stablelm"
|
StableLMBackend = "stablelm"
|
||||||
DollyBackend = "dolly"
|
DollyBackend = "dolly"
|
||||||
RedPajamaBackend = "redpajama"
|
RedPajamaBackend = "redpajama"
|
||||||
@ -45,6 +46,11 @@ var backends []string = []string{
|
|||||||
DollyBackend,
|
DollyBackend,
|
||||||
RedPajamaBackend,
|
RedPajamaBackend,
|
||||||
BertEmbeddingsBackend,
|
BertEmbeddingsBackend,
|
||||||
|
StarcoderBackend,
|
||||||
|
}
|
||||||
|
|
||||||
|
var starCoder = func(modelFile string) (interface{}, error) {
|
||||||
|
return gpt2.NewStarcoder(modelFile)
|
||||||
}
|
}
|
||||||
|
|
||||||
var redPajama = func(modelFile string) (interface{}, error) {
|
var redPajama = func(modelFile string) (interface{}, error) {
|
||||||
@ -110,6 +116,8 @@ func (ml *ModelLoader) BackendLoader(backendString string, modelFile string, lla
|
|||||||
return ml.LoadModel(modelFile, redPajama)
|
return ml.LoadModel(modelFile, redPajama)
|
||||||
case Gpt2Backend:
|
case Gpt2Backend:
|
||||||
return ml.LoadModel(modelFile, gpt2LM)
|
return ml.LoadModel(modelFile, gpt2LM)
|
||||||
|
case StarcoderBackend:
|
||||||
|
return ml.LoadModel(modelFile, starCoder)
|
||||||
case Gpt4AllLlamaBackend:
|
case Gpt4AllLlamaBackend:
|
||||||
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType)))
|
return ml.LoadModel(modelFile, gpt4allLM(gpt4all.SetThreads(int(threads)), gpt4all.SetModelType(gpt4all.LLaMAType)))
|
||||||
case Gpt4AllMptBackend:
|
case Gpt4AllMptBackend:
|
||||||
|
Loading…
Reference in New Issue
Block a user