2023-04-11 22:02:39 +00:00
package model
2023-04-07 09:30:59 +00:00
import (
2023-07-14 23:19:43 +00:00
"context"
2023-04-07 09:30:59 +00:00
"fmt"
"os"
"path/filepath"
2023-04-10 10:02:40 +00:00
"strings"
2023-04-07 09:30:59 +00:00
"sync"
2024-04-19 02:40:18 +00:00
"github.com/go-skynet/LocalAI/pkg/templates"
2024-04-18 20:43:12 +00:00
"github.com/go-skynet/LocalAI/pkg/functions"
2023-07-14 23:19:43 +00:00
"github.com/go-skynet/LocalAI/pkg/grpc"
2024-04-19 02:40:18 +00:00
"github.com/go-skynet/LocalAI/pkg/utils"
2023-07-14 23:19:43 +00:00
process "github.com/mudler/go-processmanager"
2023-05-10 23:12:58 +00:00
"github.com/rs/zerolog/log"
2023-04-07 09:30:59 +00:00
)
2023-07-22 15:31:39 +00:00
// Rather than pass an interface{} to the prompt template:
// These are the definitions of all possible variables LocalAI will currently populate for use in a prompt template file
// Please note: Not all of these are populated on every endpoint - your template should either be tested for each endpoint you map it to, or tolerant of zero values.
type PromptTemplateData struct {
2023-08-02 22:19:55 +00:00
SystemPrompt string
SuppressSystemPrompt bool // used by chat specifically to indicate that SystemPrompt above should be _ignored_
Input string
Instruction string
2024-04-18 20:43:12 +00:00
Functions [ ] functions . Function
2023-08-02 22:19:55 +00:00
MessageIndex int
2023-07-22 15:31:39 +00:00
}
// TODO: Ask mudler about FunctionCall stuff being useful at the message level?
type ChatMessageTemplateData struct {
SystemPrompt string
Role string
RoleName string
2024-02-17 09:00:34 +00:00
FunctionName string
2023-07-22 15:31:39 +00:00
Content string
MessageIndex int
2024-03-21 00:12:20 +00:00
Function bool
FunctionCall interface { }
LastMessage bool
2023-07-22 15:31:39 +00:00
}
// new idea: what if we declare a struct of these here, and use a loop to check?
// TODO: Split ModelLoader and TemplateLoader? Just to keep things more organized. Left together to share a mutex until I look into that. Would split if we seperate directories for .bin/.yaml and .tmpl
2023-04-07 09:30:59 +00:00
type ModelLoader struct {
2023-04-27 04:18:18 +00:00
ModelPath string
2023-04-20 17:33:36 +00:00
mu sync . Mutex
2023-05-10 13:20:21 +00:00
// TODO: this needs generics
2024-01-23 07:56:36 +00:00
grpcClients map [ string ] grpc . Backend
2023-11-16 07:20:05 +00:00
models map [ string ] ModelAddress
2023-07-22 15:31:39 +00:00
grpcProcesses map [ string ] * process . Process
2024-04-19 02:40:18 +00:00
templates * templates . TemplateCache
2023-11-26 17:36:23 +00:00
wd * WatchDog
2023-04-07 09:30:59 +00:00
}
2023-11-16 07:20:05 +00:00
type ModelAddress string
2024-01-23 07:56:36 +00:00
func ( m ModelAddress ) GRPC ( parallel bool , wd * WatchDog ) grpc . Backend {
2023-11-26 17:36:23 +00:00
enableWD := false
if wd != nil {
enableWD = true
}
return grpc . NewClient ( string ( m ) , parallel , wd , enableWD )
2023-11-16 07:20:05 +00:00
}
2023-04-07 09:30:59 +00:00
func NewModelLoader ( modelPath string ) * ModelLoader {
2023-07-22 15:31:39 +00:00
nml := & ModelLoader {
ModelPath : modelPath ,
2024-01-23 07:56:36 +00:00
grpcClients : make ( map [ string ] grpc . Backend ) ,
2023-11-16 07:20:05 +00:00
models : make ( map [ string ] ModelAddress ) ,
2024-04-19 02:40:18 +00:00
templates : templates . NewTemplateCache ( modelPath ) ,
2023-07-22 15:31:39 +00:00
grpcProcesses : make ( map [ string ] * process . Process ) ,
2023-04-20 22:06:55 +00:00
}
2023-11-26 17:36:23 +00:00
2023-07-22 15:31:39 +00:00
return nml
2023-04-07 09:30:59 +00:00
}
2023-11-26 17:36:23 +00:00
func ( ml * ModelLoader ) SetWatchDog ( wd * WatchDog ) {
ml . wd = wd
}
2023-04-20 16:33:02 +00:00
func ( ml * ModelLoader ) ExistsInModelPath ( s string ) bool {
2024-04-19 02:40:18 +00:00
return utils . ExistsInPath ( ml . ModelPath , s )
2023-04-20 16:33:02 +00:00
}
2023-04-10 10:02:40 +00:00
func ( ml * ModelLoader ) ListModels ( ) ( [ ] string , error ) {
2023-07-22 15:31:39 +00:00
files , err := os . ReadDir ( ml . ModelPath )
2023-04-10 10:02:40 +00:00
if err != nil {
return [ ] string { } , err
}
models := [ ] string { }
for _ , file := range files {
2023-07-31 17:14:32 +00:00
// Skip templates, YAML, .keep, .json, and .DS_Store files - TODO: as this list grows, is there a more efficient method?
if strings . HasSuffix ( file . Name ( ) , ".tmpl" ) || strings . HasSuffix ( file . Name ( ) , ".keep" ) || strings . HasSuffix ( file . Name ( ) , ".yaml" ) || strings . HasSuffix ( file . Name ( ) , ".yml" ) || strings . HasSuffix ( file . Name ( ) , ".json" ) || strings . HasSuffix ( file . Name ( ) , ".DS_Store" ) {
2023-04-20 16:33:02 +00:00
continue
2023-04-10 10:02:40 +00:00
}
2023-04-20 16:33:02 +00:00
models = append ( models , file . Name ( ) )
2023-04-10 10:02:40 +00:00
}
return models , nil
}
2023-11-16 07:20:05 +00:00
func ( ml * ModelLoader ) LoadModel ( modelName string , loader func ( string , string ) ( ModelAddress , error ) ) ( ModelAddress , error ) {
2023-05-10 23:12:58 +00:00
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
// Check if we already have a loaded model
2023-11-16 07:20:05 +00:00
if model := ml . CheckIsLoaded ( modelName ) ; model != "" {
2023-07-14 23:19:43 +00:00
return model , nil
2023-04-19 15:10:29 +00:00
}
2023-04-20 16:33:02 +00:00
2023-04-19 15:10:29 +00:00
// Load the model and keep it in memory for later use
2023-04-27 04:18:18 +00:00
modelFile := filepath . Join ( ml . ModelPath , modelName )
2023-04-20 16:33:02 +00:00
log . Debug ( ) . Msgf ( "Loading model in memory from file: %s" , modelFile )
2023-08-07 20:39:10 +00:00
model , err := loader ( modelName , modelFile )
2023-04-19 15:10:29 +00:00
if err != nil {
2023-11-16 07:20:05 +00:00
return "" , err
2023-04-19 15:10:29 +00:00
}
2023-07-22 15:31:39 +00:00
// TODO: Add a helper method to iterate all prompt templates associated with a config if and only if it's YAML?
// Minor perf loss here until this is fixed, but we initialize on first request
// // If there is a prompt template, load it
// if err := ml.loadTemplateIfExists(modelName); err != nil {
// return nil, err
// }
2023-04-08 08:46:51 +00:00
2023-04-20 16:33:02 +00:00
ml . models [ modelName ] = model
2023-05-11 14:34:16 +00:00
return model , nil
2023-05-05 09:20:06 +00:00
}
2023-07-14 23:19:43 +00:00
2023-08-23 16:38:37 +00:00
func ( ml * ModelLoader ) ShutdownModel ( modelName string ) error {
ml . mu . Lock ( )
defer ml . mu . Unlock ( )
2023-11-26 17:36:23 +00:00
2024-03-23 15:19:57 +00:00
return ml . stopModel ( modelName )
2023-11-26 17:36:23 +00:00
}
2024-03-23 15:19:57 +00:00
func ( ml * ModelLoader ) stopModel ( modelName string ) error {
2023-11-26 17:36:23 +00:00
defer ml . deleteProcess ( modelName )
2023-08-23 16:38:37 +00:00
if _ , ok := ml . models [ modelName ] ; ! ok {
return fmt . Errorf ( "model %s not found" , modelName )
}
2023-11-26 17:36:23 +00:00
return nil
//return ml.deleteProcess(modelName)
2023-08-23 16:38:37 +00:00
}
2023-11-16 07:20:05 +00:00
func ( ml * ModelLoader ) CheckIsLoaded ( s string ) ModelAddress {
2024-01-23 07:56:36 +00:00
var client grpc . Backend
2023-07-14 23:19:43 +00:00
if m , ok := ml . models [ s ] ; ok {
log . Debug ( ) . Msgf ( "Model already loaded in memory: %s" , s )
2023-11-16 21:20:16 +00:00
if c , ok := ml . grpcClients [ s ] ; ok {
client = c
} else {
2023-11-26 17:36:23 +00:00
client = m . GRPC ( false , ml . wd )
2023-11-16 21:20:16 +00:00
}
2024-01-07 23:37:02 +00:00
alive , err := client . HealthCheck ( context . Background ( ) )
if ! alive {
log . Warn ( ) . Msgf ( "GRPC Model not responding: %s" , err . Error ( ) )
log . Warn ( ) . Msgf ( "Deleting the process in order to recreate it" )
2023-07-14 23:19:43 +00:00
if ! ml . grpcProcesses [ s ] . IsAlive ( ) {
2023-07-22 15:31:39 +00:00
log . Debug ( ) . Msgf ( "GRPC Process is not responding: %s" , s )
2023-07-14 23:19:43 +00:00
// stop and delete the process, this forces to re-load the model and re-create again the service
2023-08-18 23:49:33 +00:00
ml . deleteProcess ( s )
2023-11-16 07:20:05 +00:00
return ""
2023-07-14 23:19:43 +00:00
}
}
return m
}
2023-11-16 07:20:05 +00:00
return ""
2023-07-14 23:19:43 +00:00
}
2023-07-22 15:31:39 +00:00
2024-04-19 02:40:18 +00:00
const (
ChatPromptTemplate templates . TemplateType = iota
ChatMessageTemplate
CompletionPromptTemplate
EditPromptTemplate
FunctionsPromptTemplate
)
func ( ml * ModelLoader ) EvaluateTemplateForPrompt ( templateType templates . TemplateType , templateName string , in PromptTemplateData ) ( string , error ) {
2023-07-22 15:31:39 +00:00
// TODO: should this check be improved?
if templateType == ChatMessageTemplate {
return "" , fmt . Errorf ( "invalid templateType: ChatMessage" )
}
2024-04-19 02:40:18 +00:00
return ml . templates . EvaluateTemplate ( templateType , templateName , in )
2023-07-22 15:31:39 +00:00
}
func ( ml * ModelLoader ) EvaluateTemplateForChatMessage ( templateName string , messageData ChatMessageTemplateData ) ( string , error ) {
2024-04-19 02:40:18 +00:00
return ml . templates . EvaluateTemplate ( ChatMessageTemplate , templateName , messageData )
2023-07-22 15:31:39 +00:00
}