LocalAI/pkg/functions/parse.go

package functions

import (
	"encoding/json"
	"regexp"

	"github.com/go-skynet/LocalAI/pkg/utils"
	"github.com/rs/zerolog/log"
)

type FunctionsConfig struct {
	DisableNoAction         bool   `yaml:"disable_no_action"`
	NoActionFunctionName    string `yaml:"no_action_function_name"`
	NoActionDescriptionName string `yaml:"no_action_description_name"`
	ParallelCalls           bool   `yaml:"parallel_calls"`
	NoGrammar               bool   `yaml:"no_grammar"`
	ResponseRegex           string `yaml:"response_regex"`

	// FunctionName enable the LLM to return { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }
	// instead of { "function": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }.
	// This might be useful for certain models trained with the function name as the first token.
	FunctionName bool `yaml:"return_name_in_function_response"`
}

type FuncCallResults struct {
	Name      string
	Arguments string
}

func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncCallResults {
	multipleResults := functionConfig.ParallelCalls
	useGrammars := !functionConfig.NoGrammar

	functionNameKey := "function"
	if functionConfig.FunctionName {
		functionNameKey = "name"
	}

	results := []FuncCallResults{}

	// if no grammar is used, we have to extract function and arguments from the result
	if !useGrammars {
		// the response is a string that we have to parse
		result := make(map[string]string)

		if functionConfig.ResponseRegex != "" {
			// We use named regexes here to extract the function name and arguments
			// obviously, this expects the LLM to be stable and return correctly formatted JSON
			// TODO: optimize this and pre-compile it
			var respRegex = regexp.MustCompile(functionConfig.ResponseRegex)
			match := respRegex.FindStringSubmatch(llmresult)
			for i, name := range respRegex.SubexpNames() {
				if i != 0 && name != "" && len(match) > i {
					result[name] = match[i]
				}
			}

			// TODO: open point about multiple results and/or mixed with chat messages
			// This is not handled as for now, we only expect one function call per response
			functionName := result[functionNameKey]
			if functionName == "" {
				return results
			}
		} else {
			// We expect the result to be a JSON object with a function name and arguments
			err := json.Unmarshal([]byte(llmresult), &result)
			if err != nil {
				log.Error().Err(err).Str("llmresult", llmresult).Msg("unable to unmarshal llm result")
				return results
			}
		}

		return append(results, FuncCallResults{Name: result[functionNameKey], Arguments: result["arguments"]})
	}

	// with grammars
	// TODO: use generics to avoid this code duplication
	if multipleResults {
		ss := []map[string]interface{}{}
		s := utils.EscapeNewLines(llmresult)
		err := json.Unmarshal([]byte(s), &ss)
		if err != nil {
			log.Error().Err(err).Str("escapedLLMResult", s).Msg("multiple results: unable to unmarshal llm result")
		}
		log.Debug().Msgf("Function return: %s %+v", s, ss)

		for _, s := range ss {
			func_name, ok := s[functionNameKey]
			if !ok {
				continue
			}
			args, ok := s["arguments"]
			if !ok {
				continue
			}
			d, _ := json.Marshal(args)
			funcName, ok := func_name.(string)
			if !ok {
				continue
			}
			results = append(results, FuncCallResults{Name: funcName, Arguments: string(d)})
		}
	} else {
		// As we have to change the result before processing, we can't stream the answer token-by-token (yet?)
		ss := map[string]interface{}{}
		// This prevent newlines to break JSON parsing for clients
		s := utils.EscapeNewLines(llmresult)
		err := json.Unmarshal([]byte(s), &ss)
		if err != nil {
			log.Error().Err(err).Str("escapedLLMResult", s).Msg("unable to unmarshal llm result")
		}
		log.Debug().Msgf("Function return: %s %+v", s, ss)

		// The grammar defines the function name as "function", while OpenAI returns "name"
		func_name, ok := ss[functionNameKey]
		if !ok {
			return results
		}
		// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object
		args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)
		if !ok {
			return results
		}
		d, _ := json.Marshal(args)
		funcName, ok := func_name.(string)
		if !ok {
			return results
		}
		results = append(results, FuncCallResults{Name: funcName, Arguments: string(d)})
	}

	return results
}
feat(functions): support models with no grammar, add tests (#2068) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2024-04-18 20:43:12 +00:00			`package functions`

			`import (`
			`"encoding/json"`
			`"regexp"`

			`"github.com/go-skynet/LocalAI/pkg/utils"`
			`"github.com/rs/zerolog/log"`
			`)`

			`type FunctionsConfig struct {`
			DisableNoAction bool `yaml:"disable_no_action"`
			NoActionFunctionName string `yaml:"no_action_function_name"`
			NoActionDescriptionName string `yaml:"no_action_description_name"`
			ParallelCalls bool `yaml:"parallel_calls"`
			NoGrammar bool `yaml:"no_grammar"`
			ResponseRegex string `yaml:"response_regex"`
feat(grammar): support models with specific construct (#2291) When enabling grammar with functions, it might be useful to allow more flexibility to support models that are fine-tuned against returning function calls of the form of { "name": "function_name", "arguments" {...} } rather then { "function": "function_name", "arguments": {..} }. This might call out to a more generic approach later on, but for the moment being we can easily support both as we have just to specific different types. If needed we can expand on this later on Signed-off-by: mudler <mudler@localai.io> 2024-05-11 23:13:22 +00:00
			`// FunctionName enable the LLM to return { "name": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }`
			`// instead of { "function": "function_name", "arguments": { "arg1": "value1", "arg2": "value2" } }.`
			`// This might be useful for certain models trained with the function name as the first token.`
			FunctionName bool `yaml:"return_name_in_function_response"`
feat(functions): support models with no grammar, add tests (#2068) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2024-04-18 20:43:12 +00:00			`}`

			`type FuncCallResults struct {`
			`Name string`
			`Arguments string`
			`}`

			`func ParseFunctionCall(llmresult string, functionConfig FunctionsConfig) []FuncCallResults {`
			`multipleResults := functionConfig.ParallelCalls`
			`useGrammars := !functionConfig.NoGrammar`

feat(grammar): support models with specific construct (#2291) When enabling grammar with functions, it might be useful to allow more flexibility to support models that are fine-tuned against returning function calls of the form of { "name": "function_name", "arguments" {...} } rather then { "function": "function_name", "arguments": {..} }. This might call out to a more generic approach later on, but for the moment being we can easily support both as we have just to specific different types. If needed we can expand on this later on Signed-off-by: mudler <mudler@localai.io> 2024-05-11 23:13:22 +00:00			`functionNameKey := "function"`
			`if functionConfig.FunctionName {`
			`functionNameKey = "name"`
			`}`

feat(functions): support models with no grammar, add tests (#2068) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2024-04-18 20:43:12 +00:00			`results := []FuncCallResults{}`

			`// if no grammar is used, we have to extract function and arguments from the result`
			`if !useGrammars {`
			`// the response is a string that we have to parse`
			`result := make(map[string]string)`
feat(functions): support models with no grammar and no regex (#2315) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2024-05-13 22:32:32 +00:00
			`if functionConfig.ResponseRegex != "" {`
			`// We use named regexes here to extract the function name and arguments`
			`// obviously, this expects the LLM to be stable and return correctly formatted JSON`
			`// TODO: optimize this and pre-compile it`
			`var respRegex = regexp.MustCompile(functionConfig.ResponseRegex)`
			`match := respRegex.FindStringSubmatch(llmresult)`
			`for i, name := range respRegex.SubexpNames() {`
			`if i != 0 && name != "" && len(match) > i {`
			`result[name] = match[i]`
			`}`
feat(functions): support models with no grammar, add tests (#2068) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2024-04-18 20:43:12 +00:00			`}`

feat(functions): support models with no grammar and no regex (#2315) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2024-05-13 22:32:32 +00:00			`// TODO: open point about multiple results and/or mixed with chat messages`
			`// This is not handled as for now, we only expect one function call per response`
			`functionName := result[functionNameKey]`
			`if functionName == "" {`
			`return results`
			`}`
			`} else {`
			`// We expect the result to be a JSON object with a function name and arguments`
			`err := json.Unmarshal([]byte(llmresult), &result)`
			`if err != nil {`
			`log.Error().Err(err).Str("llmresult", llmresult).Msg("unable to unmarshal llm result")`
			`return results`
			`}`
feat(functions): support models with no grammar, add tests (#2068) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2024-04-18 20:43:12 +00:00			`}`

feat(grammar): support models with specific construct (#2291) When enabling grammar with functions, it might be useful to allow more flexibility to support models that are fine-tuned against returning function calls of the form of { "name": "function_name", "arguments" {...} } rather then { "function": "function_name", "arguments": {..} }. This might call out to a more generic approach later on, but for the moment being we can easily support both as we have just to specific different types. If needed we can expand on this later on Signed-off-by: mudler <mudler@localai.io> 2024-05-11 23:13:22 +00:00			`return append(results, FuncCallResults{Name: result[functionNameKey], Arguments: result["arguments"]})`
feat(functions): support models with no grammar, add tests (#2068) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2024-04-18 20:43:12 +00:00			`}`

			`// with grammars`
			`// TODO: use generics to avoid this code duplication`
			`if multipleResults {`
			`ss := []map[string]interface{}{}`
			`s := utils.EscapeNewLines(llmresult)`
fix: security scanner warning noise: error handlers part 2 (#2145) check off a few more error handlers Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-29 13:11:42 +00:00			`err := json.Unmarshal([]byte(s), &ss)`
			`if err != nil {`
			`log.Error().Err(err).Str("escapedLLMResult", s).Msg("multiple results: unable to unmarshal llm result")`
			`}`
feat(functions): support models with no grammar, add tests (#2068) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2024-04-18 20:43:12 +00:00			`log.Debug().Msgf("Function return: %s %+v", s, ss)`

			`for _, s := range ss {`
feat(grammar): support models with specific construct (#2291) When enabling grammar with functions, it might be useful to allow more flexibility to support models that are fine-tuned against returning function calls of the form of { "name": "function_name", "arguments" {...} } rather then { "function": "function_name", "arguments": {..} }. This might call out to a more generic approach later on, but for the moment being we can easily support both as we have just to specific different types. If needed we can expand on this later on Signed-off-by: mudler <mudler@localai.io> 2024-05-11 23:13:22 +00:00			`func_name, ok := s[functionNameKey]`
feat(functions): support models with no grammar, add tests (#2068) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2024-04-18 20:43:12 +00:00			`if !ok {`
			`continue`
			`}`
			`args, ok := s["arguments"]`
			`if !ok {`
			`continue`
			`}`
			`d, _ := json.Marshal(args)`
			`funcName, ok := func_name.(string)`
			`if !ok {`
			`continue`
			`}`
			`results = append(results, FuncCallResults{Name: funcName, Arguments: string(d)})`
			`}`
			`} else {`
			`// As we have to change the result before processing, we can't stream the answer token-by-token (yet?)`
			`ss := map[string]interface{}{}`
			`// This prevent newlines to break JSON parsing for clients`
			`s := utils.EscapeNewLines(llmresult)`
fix: security scanner warning noise: error handlers part 2 (#2145) check off a few more error handlers Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-29 13:11:42 +00:00			`err := json.Unmarshal([]byte(s), &ss)`
			`if err != nil {`
			`log.Error().Err(err).Str("escapedLLMResult", s).Msg("unable to unmarshal llm result")`
			`}`
feat(functions): support models with no grammar, add tests (#2068) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2024-04-18 20:43:12 +00:00			`log.Debug().Msgf("Function return: %s %+v", s, ss)`

			`// The grammar defines the function name as "function", while OpenAI returns "name"`
feat(grammar): support models with specific construct (#2291) When enabling grammar with functions, it might be useful to allow more flexibility to support models that are fine-tuned against returning function calls of the form of { "name": "function_name", "arguments" {...} } rather then { "function": "function_name", "arguments": {..} }. This might call out to a more generic approach later on, but for the moment being we can easily support both as we have just to specific different types. If needed we can expand on this later on Signed-off-by: mudler <mudler@localai.io> 2024-05-11 23:13:22 +00:00			`func_name, ok := ss[functionNameKey]`
feat(functions): support models with no grammar, add tests (#2068) Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2024-04-18 20:43:12 +00:00			`if !ok {`
			`return results`
			`}`
			`// Similarly, while here arguments is a map[string]interface{}, OpenAI actually want a stringified object`
			`args, ok := ss["arguments"] // arguments needs to be a string, but we return an object from the grammar result (TODO: fix)`
			`if !ok {`
			`return results`
			`}`
			`d, _ := json.Marshal(args)`
			`funcName, ok := func_name.(string)`
			`if !ok {`
			`return results`
			`}`
			`results = append(results, FuncCallResults{Name: funcName, Arguments: string(d)})`
			`}`

			`return results`
			`}`