feat(functions): better free string matching, allow to expect strings after JSON (#2445)

Allow now any non-character, both as suffix and prefix when mixed grammars are enabled

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
Ettore Di Giacinto 2024-05-31 09:36:27 +02:00 committed by GitHub
parent 5dc6bace49
commit 3f7212c660
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 43 additions and 15 deletions

View File

@ -67,9 +67,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
return true return true
}) })
textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig)
result = functions.CleanupLLMResult(result, config.FunctionsConfig) result = functions.CleanupLLMResult(result, config.FunctionsConfig)
results := functions.ParseFunctionCall(result, config.FunctionsConfig) results := functions.ParseFunctionCall(result, config.FunctionsConfig)
textContentToReturn = functions.ParseTextContent(result, config.FunctionsConfig) log.Debug().Msgf("Text content to return: %s", textContentToReturn)
noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0 noActionToRun := len(results) > 0 && results[0].Name == noAction || len(results) == 0
switch { switch {
@ -136,7 +137,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
Model: req.Model, // we have to return what the user sent here, due to OpenAI spec. Model: req.Model, // we have to return what the user sent here, due to OpenAI spec.
Choices: []schema.Choice{{ Choices: []schema.Choice{{
Delta: &schema.Message{ Delta: &schema.Message{
Role: "assistant", Role: "assistant",
Content: &textContentToReturn,
ToolCalls: []schema.ToolCall{ ToolCalls: []schema.ToolCall{
{ {
Index: i, Index: i,
@ -477,9 +479,10 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
return return
} }
textContentToReturn = functions.ParseTextContent(s, config.FunctionsConfig)
s = functions.CleanupLLMResult(s, config.FunctionsConfig) s = functions.CleanupLLMResult(s, config.FunctionsConfig)
results := functions.ParseFunctionCall(s, config.FunctionsConfig) results := functions.ParseFunctionCall(s, config.FunctionsConfig)
textContentToReturn = functions.ParseTextContent(s, config.FunctionsConfig) log.Debug().Msgf("Text content to return: %s", textContentToReturn)
noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0 noActionsToRun := len(results) > 0 && results[0].Name == noActionName || len(results) == 0
switch { switch {
@ -507,6 +510,7 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
if len(input.Tools) > 0 { if len(input.Tools) > 0 {
// If we are using tools, we condense the function calls into // If we are using tools, we condense the function calls into
// a single response choice with all the tools // a single response choice with all the tools
toolChoice.Message.Content = textContentToReturn
toolChoice.Message.ToolCalls = append(toolChoice.Message.ToolCalls, toolChoice.Message.ToolCalls = append(toolChoice.Message.ToolCalls,
schema.ToolCall{ schema.ToolCall{
ID: id, ID: id,
@ -522,7 +526,8 @@ func ChatEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, startup
*c = append(*c, schema.Choice{ *c = append(*c, schema.Choice{
FinishReason: "function_call", FinishReason: "function_call",
Message: &schema.Message{ Message: &schema.Message{
Role: "assistant", Role: "assistant",
Content: &textContentToReturn,
FunctionCall: map[string]interface{}{ FunctionCall: map[string]interface{}{
"name": name, "name": name,
"arguments": args, "arguments": args,

View File

@ -54,7 +54,7 @@ var (
// however, if we don't have it, the grammar will be ambiguous and // however, if we don't have it, the grammar will be ambiguous and
// empirically results are way worse. // empirically results are way worse.
"freestring": `( "freestring": `(
[^"\\] | [^\x00] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
)* space`, )* space`,
"null": `"null" space`, "null": `"null" space`,
@ -131,7 +131,7 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption))
grammarOpts := &GrammarOption{} grammarOpts := &GrammarOption{}
grammarOpts.Apply(options...) grammarOpts.Apply(options...)
suffix := grammarOpts.Suffix prefix := grammarOpts.Prefix
maybeArray := grammarOpts.MaybeArray maybeArray := grammarOpts.MaybeArray
disableParallelNewLines := grammarOpts.DisableParallelNewLines disableParallelNewLines := grammarOpts.DisableParallelNewLines
maybeString := grammarOpts.MaybeString maybeString := grammarOpts.MaybeString
@ -139,7 +139,7 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption))
var lines []string var lines []string
swapRoot := maybeArray || maybeString || suffix != "" swapRoot := maybeArray || maybeString || prefix != ""
// write down the computed rules. // write down the computed rules.
// if maybeArray is true, we need to add the array rule and slightly tweak the root rule // if maybeArray is true, we need to add the array rule and slightly tweak the root rule
@ -164,9 +164,9 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption))
freestringRule = "freestring" freestringRule = "freestring"
} }
if suffix != "" { if prefix != "" {
// quote newlines in suffix // quote newlines in suffix
suffix = utils.EscapeNewLines(suffix) prefix = utils.EscapeNewLines(prefix)
if maybeArray && maybeString { if maybeArray && maybeString {
newRoot = "(" + newRoot + ")" newRoot = "(" + newRoot + ")"
@ -174,9 +174,9 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption))
if maybeString { if maybeString {
//newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) " //newRoot = "( (\"" + suffix + "\" " + newRoot + ") | freestring ) "
newRoot = "( \"" + suffix + "\" " + newRoot + " | " + freestringRule + " ) " newRoot = "( \"" + prefix + "\" " + newRoot + " | " + freestringRule + " ) "
} else { } else {
newRoot = "\"" + suffix + "\" " + "" + newRoot + "" newRoot = "\"" + prefix + "\" " + "" + newRoot + ""
} }
} else if maybeString { } else if maybeString {
if maybeArray { if maybeArray {
@ -194,9 +194,17 @@ func (sc *JSONSchemaConverter) finalizeGrammar(options ...func(*GrammarOption))
} }
if maybeArray { if maybeArray {
lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`) if grammarOpts.ExpectStringsAfterJSON {
lines = append(lines, `mixedstring ::= freestring | freestring arr freestring | (freestring realvalue freestring)* | realvalue | arr`)
} else {
lines = append(lines, `mixedstring ::= freestring | freestring arr | freestring realvalue | realvalue | arr`)
}
} else { } else {
lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`) if grammarOpts.ExpectStringsAfterJSON {
lines = append(lines, `mixedstring ::= freestring | (freestring realvalue freestring)* | realvalue`)
} else {
lines = append(lines, `mixedstring ::= freestring | freestring realvalue | realvalue`)
}
} }
return strings.Join(lines, "\n") return strings.Join(lines, "\n")

View File

@ -2,11 +2,12 @@ package functions
type GrammarOption struct { type GrammarOption struct {
PropOrder string PropOrder string
Suffix string Prefix string
MaybeArray bool MaybeArray bool
DisableParallelNewLines bool DisableParallelNewLines bool
MaybeString bool MaybeString bool
NoMixedFreeString bool NoMixedFreeString bool
ExpectStringsAfterJSON bool
} }
func (o *GrammarOption) Apply(options ...func(*GrammarOption)) { func (o *GrammarOption) Apply(options ...func(*GrammarOption)) {
@ -31,8 +32,13 @@ var NoMixedFreeString func(*GrammarOption) = func(o *GrammarOption) {
o.NoMixedFreeString = true o.NoMixedFreeString = true
} }
// ExpectStringsAfterJSON enables mixed string suffix
var ExpectStringsAfterJSON func(*GrammarOption) = func(o *GrammarOption) {
o.ExpectStringsAfterJSON = true
}
func SetPrefix(suffix string) func(*GrammarOption) { func SetPrefix(suffix string) func(*GrammarOption) {
return func(o *GrammarOption) { return func(o *GrammarOption) {
o.Suffix = suffix o.Prefix = suffix
} }
} }

View File

@ -29,6 +29,9 @@ type GrammarConfig struct {
// Prefix is the suffix to append to the grammar when being generated // Prefix is the suffix to append to the grammar when being generated
// This is useful when models prepend a tag before returning JSON // This is useful when models prepend a tag before returning JSON
Prefix string `yaml:"prefix"` Prefix string `yaml:"prefix"`
// ExpectStringsAfterJSON enables mixed string suffix
ExpectStringsAfterJSON bool `yaml:"expect_strings_after_json"`
} }
// FunctionsConfig is the configuration for the tool/function call. // FunctionsConfig is the configuration for the tool/function call.
@ -98,6 +101,9 @@ func (g GrammarConfig) Options() []func(o *GrammarOption) {
if g.NoMixedFreeString { if g.NoMixedFreeString {
opts = append(opts, NoMixedFreeString) opts = append(opts, NoMixedFreeString)
} }
if g.ExpectStringsAfterJSON {
opts = append(opts, ExpectStringsAfterJSON)
}
return opts return opts
} }
@ -116,6 +122,9 @@ func CleanupLLMResult(llmresult string, functionConfig FunctionsConfig) string {
} }
func ParseTextContent(llmresult string, functionConfig FunctionsConfig) string { func ParseTextContent(llmresult string, functionConfig FunctionsConfig) string {
log.Debug().Msgf("ParseTextContent: %s", llmresult)
log.Debug().Msgf("CaptureLLMResult: %s", functionConfig.CaptureLLMResult)
for _, r := range functionConfig.CaptureLLMResult { for _, r := range functionConfig.CaptureLLMResult {
// We use a regex to extract the JSON object from the response // We use a regex to extract the JSON object from the response
var respRegex = regexp.MustCompile(r) var respRegex = regexp.MustCompile(r)