2024-02-21 01:21:19 +00:00
package config
2023-07-14 23:19:43 +00:00
import (
2024-04-17 21:33:49 +00:00
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"sort"
"strings"
"sync"
2024-03-01 15:19:53 +00:00
"github.com/go-skynet/LocalAI/core/schema"
2024-04-17 21:33:49 +00:00
"github.com/go-skynet/LocalAI/pkg/downloader"
2024-04-18 20:43:12 +00:00
"github.com/go-skynet/LocalAI/pkg/functions"
2024-04-17 21:33:49 +00:00
"github.com/go-skynet/LocalAI/pkg/utils"
"github.com/rs/zerolog/log"
"gopkg.in/yaml.v3"
"github.com/charmbracelet/glamour"
2023-07-14 23:19:43 +00:00
)
2024-04-03 20:25:47 +00:00
const (
RAND_SEED = - 1
)
2024-03-01 15:19:53 +00:00
type BackendConfig struct {
schema . PredictionOptions ` yaml:"parameters" `
Name string ` yaml:"name" `
2023-08-09 06:38:51 +00:00
2024-03-13 09:05:30 +00:00
F16 * bool ` yaml:"f16" `
Threads * int ` yaml:"threads" `
Debug * bool ` yaml:"debug" `
2023-08-09 06:38:51 +00:00
Roles map [ string ] string ` yaml:"roles" `
Embeddings bool ` yaml:"embeddings" `
Backend string ` yaml:"backend" `
TemplateConfig TemplateConfig ` yaml:"template" `
PromptStrings , InputStrings [ ] string ` yaml:"-" `
InputToken [ ] [ ] int ` yaml:"-" `
functionCallString , functionCallNameString string ` yaml:"-" `
2023-07-14 23:19:43 +00:00
2024-04-18 20:43:12 +00:00
FunctionsConfig functions . FunctionsConfig ` yaml:"function" `
2023-07-22 15:31:39 +00:00
2023-08-19 14:15:22 +00:00
FeatureFlag FeatureFlag ` yaml:"feature_flags" ` // Feature Flag registry. We move fast, and features may break on a per model/backend basis. Registry for (usually temporary) flags that indicate aborting something early.
2023-08-09 06:38:51 +00:00
// LLM configs (GPT4ALL, Llama.cpp, ...)
LLMConfig ` yaml:",inline" `
// AutoGPTQ specifics
AutoGPTQ AutoGPTQ ` yaml:"autogptq" `
2023-08-02 22:51:08 +00:00
2023-08-09 06:38:51 +00:00
// Diffusers
Diffusers Diffusers ` yaml:"diffusers" `
2023-12-13 18:20:22 +00:00
Step int ` yaml:"step" `
2023-08-15 23:11:32 +00:00
// GRPC Options
GRPC GRPC ` yaml:"grpc" `
2023-09-04 17:25:23 +00:00
// Vall-e-x
VallE VallE ` yaml:"vall-e" `
2023-12-08 14:45:04 +00:00
// CUDA
// Explicitly enable CUDA or not (some backends might need it)
CUDA bool ` yaml:"cuda" `
2024-01-01 13:39:13 +00:00
DownloadFiles [ ] File ` yaml:"download_files" `
2024-01-07 23:37:02 +00:00
Description string ` yaml:"description" `
Usage string ` yaml:"usage" `
2024-01-01 13:39:13 +00:00
}
type File struct {
Filename string ` yaml:"filename" json:"filename" `
SHA256 string ` yaml:"sha256" json:"sha256" `
URI string ` yaml:"uri" json:"uri" `
2023-09-04 17:25:23 +00:00
}
type VallE struct {
AudioPath string ` yaml:"audio_path" `
2023-08-15 23:11:32 +00:00
}
2023-08-19 14:15:22 +00:00
type FeatureFlag map [ string ] * bool
func ( ff FeatureFlag ) Enabled ( s string ) bool {
v , exist := ff [ s ]
return exist && v != nil && * v
}
2023-08-15 23:11:32 +00:00
type GRPC struct {
Attempts int ` yaml:"attempts" `
AttemptsSleepTime int ` yaml:"attempts_sleep_time" `
2023-08-09 06:38:51 +00:00
}
type Diffusers struct {
2023-12-15 23:06:20 +00:00
CUDA bool ` yaml:"cuda" `
2023-08-15 23:11:42 +00:00
PipelineType string ` yaml:"pipeline_type" `
SchedulerType string ` yaml:"scheduler_type" `
EnableParameters string ` yaml:"enable_parameters" ` // A list of comma separated parameters to specify
CFGScale float32 ` yaml:"cfg_scale" ` // Classifier-Free Guidance Scale
2023-08-17 21:38:59 +00:00
IMG2IMG bool ` yaml:"img2img" ` // Image to Image Diffuser
ClipSkip int ` yaml:"clip_skip" ` // Skip every N frames
ClipModel string ` yaml:"clip_model" ` // Clip model to use
ClipSubFolder string ` yaml:"clip_subfolder" ` // Subfolder to use for clip model
2023-12-13 18:20:22 +00:00
ControlNet string ` yaml:"control_net" `
2023-08-09 06:38:51 +00:00
}
type LLMConfig struct {
SystemPrompt string ` yaml:"system_prompt" `
TensorSplit string ` yaml:"tensor_split" `
MainGPU string ` yaml:"main_gpu" `
RMSNormEps float32 ` yaml:"rms_norm_eps" `
NGQA int32 ` yaml:"ngqa" `
PromptCachePath string ` yaml:"prompt_cache_path" `
PromptCacheAll bool ` yaml:"prompt_cache_all" `
PromptCacheRO bool ` yaml:"prompt_cache_ro" `
2024-03-13 09:05:30 +00:00
MirostatETA * float64 ` yaml:"mirostat_eta" `
MirostatTAU * float64 ` yaml:"mirostat_tau" `
Mirostat * int ` yaml:"mirostat" `
NGPULayers * int ` yaml:"gpu_layers" `
MMap * bool ` yaml:"mmap" `
MMlock * bool ` yaml:"mmlock" `
LowVRAM * bool ` yaml:"low_vram" `
2023-08-09 06:38:51 +00:00
Grammar string ` yaml:"grammar" `
StopWords [ ] string ` yaml:"stopwords" `
Cutstrings [ ] string ` yaml:"cutstrings" `
TrimSpace [ ] string ` yaml:"trimspace" `
2024-01-01 13:39:42 +00:00
TrimSuffix [ ] string ` yaml:"trimsuffix" `
2024-03-13 09:05:30 +00:00
ContextSize * int ` yaml:"context_size" `
2024-03-01 21:48:53 +00:00
NUMA bool ` yaml:"numa" `
LoraAdapter string ` yaml:"lora_adapter" `
LoraBase string ` yaml:"lora_base" `
LoraScale float32 ` yaml:"lora_scale" `
NoMulMatQ bool ` yaml:"no_mulmatq" `
DraftModel string ` yaml:"draft_model" `
NDraft int32 ` yaml:"n_draft" `
Quantization string ` yaml:"quantization" `
GPUMemoryUtilization float32 ` yaml:"gpu_memory_utilization" ` // vLLM
TrustRemoteCode bool ` yaml:"trust_remote_code" ` // vLLM
EnforceEager bool ` yaml:"enforce_eager" ` // vLLM
SwapSpace int ` yaml:"swap_space" ` // vLLM
MaxModelLen int ` yaml:"max_model_len" ` // vLLM
2024-04-20 14:37:02 +00:00
TensorParallelSize int ` yaml:"tensor_parallel_size" ` // vLLM
2024-03-01 21:48:53 +00:00
MMProj string ` yaml:"mmproj" `
2023-11-11 17:40:48 +00:00
2024-01-25 23:13:21 +00:00
RopeScaling string ` yaml:"rope_scaling" `
ModelType string ` yaml:"type" `
2023-11-11 17:40:48 +00:00
YarnExtFactor float32 ` yaml:"yarn_ext_factor" `
YarnAttnFactor float32 ` yaml:"yarn_attn_factor" `
YarnBetaFast float32 ` yaml:"yarn_beta_fast" `
YarnBetaSlow float32 ` yaml:"yarn_beta_slow" `
2023-08-09 06:38:51 +00:00
}
2023-08-07 20:39:10 +00:00
2023-08-09 06:38:51 +00:00
type AutoGPTQ struct {
2023-08-07 23:10:05 +00:00
ModelBaseName string ` yaml:"model_base_name" `
Device string ` yaml:"device" `
Triton bool ` yaml:"triton" `
UseFastTokenizer bool ` yaml:"use_fast_tokenizer" `
2023-07-14 23:19:43 +00:00
}
type TemplateConfig struct {
2024-04-11 17:20:22 +00:00
Chat string ` yaml:"chat" `
ChatMessage string ` yaml:"chat_message" `
Completion string ` yaml:"completion" `
Edit string ` yaml:"edit" `
Functions string ` yaml:"function" `
UseTokenizerTemplate bool ` yaml:"use_tokenizer_template" `
2023-07-14 23:19:43 +00:00
}
2024-03-01 15:19:53 +00:00
func ( c * BackendConfig ) SetFunctionCallString ( s string ) {
2023-07-14 23:19:43 +00:00
c . functionCallString = s
}
2024-03-01 15:19:53 +00:00
func ( c * BackendConfig ) SetFunctionCallNameString ( s string ) {
2023-07-14 23:19:43 +00:00
c . functionCallNameString = s
}
2024-03-01 15:19:53 +00:00
func ( c * BackendConfig ) ShouldUseFunctions ( ) bool {
2023-07-14 23:19:43 +00:00
return ( ( c . functionCallString != "none" || c . functionCallString == "" ) || c . ShouldCallSpecificFunction ( ) )
}
2024-03-01 15:19:53 +00:00
func ( c * BackendConfig ) ShouldCallSpecificFunction ( ) bool {
2023-07-14 23:19:43 +00:00
return len ( c . functionCallNameString ) > 0
}
2024-03-01 15:19:53 +00:00
func ( c * BackendConfig ) FunctionToCall ( ) string {
2024-04-01 17:39:54 +00:00
if c . functionCallNameString != "" &&
c . functionCallNameString != "none" && c . functionCallNameString != "auto" {
return c . functionCallNameString
}
return c . functionCallString
2023-07-14 23:19:43 +00:00
}
2024-03-22 19:55:11 +00:00
func ( cfg * BackendConfig ) SetDefaults ( opts ... ConfigLoaderOption ) {
2024-04-17 21:33:49 +00:00
lo := & LoadOptions { }
2024-03-22 19:55:11 +00:00
lo . Apply ( opts ... )
ctx := lo . ctxSize
threads := lo . threads
f16 := lo . f16
debug := lo . debug
2024-04-06 20:56:45 +00:00
// https://github.com/ggerganov/llama.cpp/blob/75cd4c77292034ecec587ecb401366f57338f7c0/common/sampling.h#L22
defaultTopP := 0.95
defaultTopK := 40
2024-03-13 09:05:30 +00:00
defaultTemp := 0.9
defaultMirostat := 2
defaultMirostatTAU := 5.0
defaultMirostatETA := 0.1
2024-04-06 20:56:45 +00:00
defaultTypicalP := 1.0
defaultTFZ := 1.0
2024-04-21 14:34:00 +00:00
defaultZero := 0
2024-03-13 09:05:30 +00:00
// Try to offload all GPU layers (if GPU is found)
2024-04-20 18:20:10 +00:00
defaultHigh := 99999999
2024-03-13 09:05:30 +00:00
trueV := true
falseV := false
if cfg . Seed == nil {
// random number generator seed
2024-04-03 20:25:47 +00:00
defaultSeed := RAND_SEED
2024-03-13 09:05:30 +00:00
cfg . Seed = & defaultSeed
}
if cfg . TopK == nil {
cfg . TopK = & defaultTopK
}
2024-04-06 20:56:45 +00:00
if cfg . TypicalP == nil {
cfg . TypicalP = & defaultTypicalP
}
if cfg . TFZ == nil {
cfg . TFZ = & defaultTFZ
}
2024-03-13 09:05:30 +00:00
if cfg . MMap == nil {
// MMap is enabled by default
cfg . MMap = & trueV
2024-03-01 15:19:53 +00:00
}
2024-03-13 09:05:30 +00:00
if cfg . MMlock == nil {
// MMlock is disabled by default
cfg . MMlock = & falseV
}
if cfg . TopP == nil {
cfg . TopP = & defaultTopP
}
if cfg . Temperature == nil {
cfg . Temperature = & defaultTemp
}
if cfg . Maxtokens == nil {
2024-04-21 14:34:00 +00:00
cfg . Maxtokens = & defaultZero
2024-03-13 09:05:30 +00:00
}
if cfg . Mirostat == nil {
cfg . Mirostat = & defaultMirostat
}
if cfg . MirostatETA == nil {
cfg . MirostatETA = & defaultMirostatETA
}
if cfg . MirostatTAU == nil {
cfg . MirostatTAU = & defaultMirostatTAU
}
if cfg . NGPULayers == nil {
2024-04-20 18:20:10 +00:00
cfg . NGPULayers = & defaultHigh
2024-03-13 09:05:30 +00:00
}
if cfg . LowVRAM == nil {
cfg . LowVRAM = & falseV
}
// Value passed by the top level are treated as default (no implicit defaults)
// defaults are set by the user
if ctx == 0 {
ctx = 1024
}
if cfg . ContextSize == nil {
cfg . ContextSize = & ctx
}
if threads == 0 {
// Threads can't be 0
threads = 4
}
if cfg . Threads == nil {
cfg . Threads = & threads
}
if cfg . F16 == nil {
cfg . F16 = & f16
}
2024-03-18 17:59:39 +00:00
if cfg . Debug == nil {
cfg . Debug = & falseV
}
2024-03-13 09:05:30 +00:00
if debug {
2024-03-18 17:59:39 +00:00
cfg . Debug = & trueV
2024-03-01 15:19:53 +00:00
}
}
2024-04-17 21:33:49 +00:00
////// Config Loader ////////
type BackendConfigLoader struct {
configs map [ string ] BackendConfig
sync . Mutex
}
type LoadOptions struct {
debug bool
threads , ctxSize int
f16 bool
}
func LoadOptionDebug ( debug bool ) ConfigLoaderOption {
return func ( o * LoadOptions ) {
o . debug = debug
}
}
func LoadOptionThreads ( threads int ) ConfigLoaderOption {
return func ( o * LoadOptions ) {
o . threads = threads
}
}
func LoadOptionContextSize ( ctxSize int ) ConfigLoaderOption {
return func ( o * LoadOptions ) {
o . ctxSize = ctxSize
}
}
func LoadOptionF16 ( f16 bool ) ConfigLoaderOption {
return func ( o * LoadOptions ) {
o . f16 = f16
}
}
type ConfigLoaderOption func ( * LoadOptions )
func ( lo * LoadOptions ) Apply ( options ... ConfigLoaderOption ) {
for _ , l := range options {
l ( lo )
}
}
// Load a config file for a model
func ( cl * BackendConfigLoader ) LoadBackendConfigFileByName ( modelName , modelPath string , opts ... ConfigLoaderOption ) ( * BackendConfig , error ) {
// Load a config file if present after the model name
cfg := & BackendConfig {
PredictionOptions : schema . PredictionOptions {
Model : modelName ,
} ,
}
cfgExisting , exists := cl . GetBackendConfig ( modelName )
if exists {
cfg = & cfgExisting
} else {
// Try loading a model config file
modelConfig := filepath . Join ( modelPath , modelName + ".yaml" )
if _ , err := os . Stat ( modelConfig ) ; err == nil {
if err := cl . LoadBackendConfig (
modelConfig , opts ... ,
) ; err != nil {
return nil , fmt . Errorf ( "failed loading model config (%s) %s" , modelConfig , err . Error ( ) )
}
cfgExisting , exists = cl . GetBackendConfig ( modelName )
if exists {
cfg = & cfgExisting
}
}
}
cfg . SetDefaults ( opts ... )
return cfg , nil
}
func NewBackendConfigLoader ( ) * BackendConfigLoader {
return & BackendConfigLoader {
configs : make ( map [ string ] BackendConfig ) ,
}
}
func ReadBackendConfigFile ( file string , opts ... ConfigLoaderOption ) ( [ ] * BackendConfig , error ) {
c := & [ ] * BackendConfig { }
f , err := os . ReadFile ( file )
if err != nil {
return nil , fmt . Errorf ( "cannot read config file: %w" , err )
}
if err := yaml . Unmarshal ( f , c ) ; err != nil {
return nil , fmt . Errorf ( "cannot unmarshal config file: %w" , err )
}
for _ , cc := range * c {
cc . SetDefaults ( opts ... )
}
return * c , nil
}
func ReadBackendConfig ( file string , opts ... ConfigLoaderOption ) ( * BackendConfig , error ) {
lo := & LoadOptions { }
lo . Apply ( opts ... )
c := & BackendConfig { }
f , err := os . ReadFile ( file )
if err != nil {
return nil , fmt . Errorf ( "cannot read config file: %w" , err )
}
if err := yaml . Unmarshal ( f , c ) ; err != nil {
return nil , fmt . Errorf ( "cannot unmarshal config file: %w" , err )
}
c . SetDefaults ( opts ... )
return c , nil
}
func ( cm * BackendConfigLoader ) LoadBackendConfigFile ( file string , opts ... ConfigLoaderOption ) error {
cm . Lock ( )
defer cm . Unlock ( )
c , err := ReadBackendConfigFile ( file , opts ... )
if err != nil {
return fmt . Errorf ( "cannot load config file: %w" , err )
}
for _ , cc := range c {
cm . configs [ cc . Name ] = * cc
}
return nil
}
func ( cl * BackendConfigLoader ) LoadBackendConfig ( file string , opts ... ConfigLoaderOption ) error {
cl . Lock ( )
defer cl . Unlock ( )
c , err := ReadBackendConfig ( file , opts ... )
if err != nil {
return fmt . Errorf ( "cannot read config file: %w" , err )
}
cl . configs [ c . Name ] = * c
return nil
}
func ( cl * BackendConfigLoader ) GetBackendConfig ( m string ) ( BackendConfig , bool ) {
cl . Lock ( )
defer cl . Unlock ( )
v , exists := cl . configs [ m ]
return v , exists
}
func ( cl * BackendConfigLoader ) GetAllBackendConfigs ( ) [ ] BackendConfig {
cl . Lock ( )
defer cl . Unlock ( )
var res [ ] BackendConfig
for _ , v := range cl . configs {
res = append ( res , v )
}
sort . SliceStable ( res , func ( i , j int ) bool {
return res [ i ] . Name < res [ j ] . Name
} )
return res
}
func ( cl * BackendConfigLoader ) ListBackendConfigs ( ) [ ] string {
cl . Lock ( )
defer cl . Unlock ( )
var res [ ] string
for k := range cl . configs {
res = append ( res , k )
}
return res
}
// Preload prepare models if they are not local but url or huggingface repositories
func ( cl * BackendConfigLoader ) Preload ( modelPath string ) error {
cl . Lock ( )
defer cl . Unlock ( )
status := func ( fileName , current , total string , percent float64 ) {
utils . DisplayDownloadFunction ( fileName , current , total , percent )
}
log . Info ( ) . Msgf ( "Preloading models from %s" , modelPath )
renderMode := "dark"
if os . Getenv ( "COLOR" ) != "" {
renderMode = os . Getenv ( "COLOR" )
}
glamText := func ( t string ) {
out , err := glamour . Render ( t , renderMode )
if err == nil && os . Getenv ( "NO_COLOR" ) == "" {
fmt . Println ( out )
} else {
fmt . Println ( t )
}
}
for i , config := range cl . configs {
// Download files and verify their SHA
2024-04-23 07:22:58 +00:00
for i , file := range config . DownloadFiles {
2024-04-17 21:33:49 +00:00
log . Debug ( ) . Msgf ( "Checking %q exists and matches SHA" , file . Filename )
if err := utils . VerifyPath ( file . Filename , modelPath ) ; err != nil {
return err
}
// Create file path
filePath := filepath . Join ( modelPath , file . Filename )
2024-04-23 07:22:58 +00:00
if err := downloader . DownloadFile ( file . URI , filePath , file . SHA256 , i , len ( config . DownloadFiles ) , status ) ; err != nil {
2024-04-17 21:33:49 +00:00
return err
}
}
modelURL := config . PredictionOptions . Model
modelURL = downloader . ConvertURL ( modelURL )
if downloader . LooksLikeURL ( modelURL ) {
// md5 of model name
md5Name := utils . MD5 ( modelURL )
// check if file exists
if _ , err := os . Stat ( filepath . Join ( modelPath , md5Name ) ) ; errors . Is ( err , os . ErrNotExist ) {
2024-04-23 07:22:58 +00:00
err := downloader . DownloadFile ( modelURL , filepath . Join ( modelPath , md5Name ) , "" , 0 , 0 , status )
2024-04-17 21:33:49 +00:00
if err != nil {
return err
}
}
cc := cl . configs [ i ]
c := & cc
c . PredictionOptions . Model = md5Name
cl . configs [ i ] = * c
}
if cl . configs [ i ] . Name != "" {
glamText ( fmt . Sprintf ( "**Model name**: _%s_" , cl . configs [ i ] . Name ) )
}
if cl . configs [ i ] . Description != "" {
//glamText("**Description**")
glamText ( cl . configs [ i ] . Description )
}
if cl . configs [ i ] . Usage != "" {
//glamText("**Usage**")
glamText ( cl . configs [ i ] . Usage )
}
}
return nil
}
// LoadBackendConfigsFromPath reads all the configurations of the models from a path
// (non-recursive)
func ( cm * BackendConfigLoader ) LoadBackendConfigsFromPath ( path string , opts ... ConfigLoaderOption ) error {
cm . Lock ( )
defer cm . Unlock ( )
entries , err := os . ReadDir ( path )
if err != nil {
return err
}
files := make ( [ ] fs . FileInfo , 0 , len ( entries ) )
for _ , entry := range entries {
info , err := entry . Info ( )
if err != nil {
return err
}
files = append ( files , info )
}
for _ , file := range files {
// Skip templates, YAML and .keep files
if ! strings . Contains ( file . Name ( ) , ".yaml" ) && ! strings . Contains ( file . Name ( ) , ".yml" ) {
continue
}
c , err := ReadBackendConfig ( filepath . Join ( path , file . Name ( ) ) , opts ... )
if err == nil {
cm . configs [ c . Name ] = * c
}
}
return nil
}