LocalAI/core/http/endpoints/openai/chat.go

package openai

import (
	"bufio"
	"bytes"
	"encoding/json"
	"fmt"

	fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"
	"github.com/go-skynet/LocalAI/core/schema"
	"github.com/go-skynet/LocalAI/core/services"
	"github.com/gofiber/fiber/v2"
	"github.com/rs/zerolog/log"
	"github.com/valyala/fasthttp"
)

// ChatEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/chat/create
// @Summary Generate a chat completions for a given prompt and model.
// @Param request body schema.OpenAIRequest true "query params"
// @Success 200 {object} schema.OpenAIResponse "Response"
// @Router /v1/chat/completions [post]
func ChatEndpoint(fce *fiberContext.FiberContextExtractor, oais *services.OpenAIService) func(c *fiber.Ctx) error {
	return func(c *fiber.Ctx) error {
		_, request, err := fce.OpenAIRequestFromContext(c, false)
		if err != nil {
			return fmt.Errorf("failed reading parameters from request: %w", err)
		}

		traceID, finalResultChannel, _, tokenChannel, err := oais.Chat(request, false, request.Stream)
		if err != nil {
			return err
		}

		if request.Stream {

			log.Debug().Msgf("Chat Stream request received")

			c.Context().SetContentType("text/event-stream")
			//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)
			//
			c.Set("Cache-Control", "no-cache")
			c.Set("Connection", "keep-alive")
			c.Set("Transfer-Encoding", "chunked")

			c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {
				usage := &schema.OpenAIUsage{}
				toolsCalled := false
				for ev := range tokenChannel {
					if ev.Error != nil {
						log.Debug().Err(ev.Error).Msg("chat streaming responseChannel error")
						request.Cancel()
						break
					}
					usage = &ev.Value.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it

					if len(ev.Value.Choices[0].Delta.ToolCalls) > 0 {
						toolsCalled = true
					}
					var buf bytes.Buffer
					enc := json.NewEncoder(&buf)
					if ev.Error != nil {
						log.Debug().Err(ev.Error).Msg("[ChatEndpoint] error to debug during tokenChannel handler")
						enc.Encode(ev.Error)
					} else {
						enc.Encode(ev.Value)
					}
					log.Debug().Msgf("chat streaming sending chunk: %s", buf.String())
					_, err := fmt.Fprintf(w, "data: %v\n", buf.String())
					if err != nil {
						log.Debug().Err(err).Msgf("Sending chunk failed")
						request.Cancel()
						break
					}
					err = w.Flush()
					if err != nil {
						log.Debug().Msg("error while flushing, closing connection")
						request.Cancel()
						break
					}
				}

				finishReason := "stop"
				if toolsCalled {
					finishReason = "tool_calls"
				} else if toolsCalled && len(request.Tools) == 0 {
					finishReason = "function_call"
				}

				resp := &schema.OpenAIResponse{
					ID:      traceID.ID,
					Created: traceID.Created,
					Model:   request.Model, // we have to return what the user sent here, due to OpenAI spec.
					Choices: []schema.Choice{
						{
							FinishReason: finishReason,
							Index:        0,
							Delta:        &schema.Message{Content: ""},
						}},
					Object: "chat.completion.chunk",
					Usage:  *usage,
				}
				respData, _ := json.Marshal(resp)

				w.WriteString(fmt.Sprintf("data: %s\n\n", respData))
				w.WriteString("data: [DONE]\n\n")
				w.Flush()
			}))

			return nil
		}

		// TODO is this proper to have exclusive from Stream, or do we need to issue both responses?
		rawResponse := <-finalResultChannel

		if rawResponse.Error != nil {
			return rawResponse.Error
		}

		jsonResult, _ := json.Marshal(rawResponse.Value)
		log.Debug().Str("jsonResult", string(jsonResult)).Msg("Chat Final Response")

		// Return the prediction in the response body
		return c.JSON(rawResponse.Value)
	}
}
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`package openai`

			`import (`
			`"bufio"`
			`"bytes"`
			`"encoding/json"`
			`"fmt"`

refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`fiberContext "github.com/go-skynet/LocalAI/core/http/ctx"`
MQTT Startup Refactoring Part 1: core/ packages part 1 (#1728) This PR specifically introduces a `core` folder and moves the following packages over, without any other changes: - `api/backend` - `api/config` - `api/options` - `api/schema` Once this is merged and we confirm there's no regressions, I can migrate over the remaining changes piece by piece to split up application startup, backend services, http, and mqtt as was the goal of the earlier PRs! 2024-02-21 01:21:19 +00:00			`"github.com/go-skynet/LocalAI/core/schema"`
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`"github.com/go-skynet/LocalAI/core/services"`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`"github.com/gofiber/fiber/v2"`
			`"github.com/rs/zerolog/log"`
			`"github.com/valyala/fasthttp"`
			`)`

feat(swagger): Add swagger API doc (#1926) * makefile(build): add minimal and api build target * feat(swagger): Add swagger 2024-03-29 21:29:33 +00:00			`// ChatEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/chat/create`
			`// @Summary Generate a chat completions for a given prompt and model.`
			`// @Param request body schema.OpenAIRequest true "query params"`
			`// @Success 200 {object} schema.OpenAIResponse "Response"`
			`// @Router /v1/chat/completions [post]`
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`func ChatEndpoint(fce fiberContext.FiberContextExtractor, oais services.OpenAIService) func(c *fiber.Ctx) error {`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`return func(c *fiber.Ctx) error {`
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`_, request, err := fce.OpenAIRequestFromContext(c, false)`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`if err != nil {`
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`return fmt.Errorf("failed reading parameters from request: %w", err)`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`}`

refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`traceID, finalResultChannel, _, tokenChannel, err := oais.Chat(request, false, request.Stream)`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`if err != nil {`
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`return err`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`}`

refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`if request.Stream {`
feat: use tokenizer.apply_chat_template() in vLLM (#1990) Use tokenizer.apply_chat_template() in vLLM Signed-off-by: Ludovic LEROUX <ludovic@inpher.io> 2024-04-11 17:20:22 +00:00
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`log.Debug().Msgf("Chat Stream request received")`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00
			`c.Context().SetContentType("text/event-stream")`
			`//c.Response().Header.SetContentType(fiber.MIMETextHTMLCharsetUTF8)`
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`//`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`c.Set("Cache-Control", "no-cache")`
			`c.Set("Connection", "keep-alive")`
			`c.Set("Transfer-Encoding", "chunked")`

			`c.Context().SetBodyStreamWriter(fasthttp.StreamWriter(func(w *bufio.Writer) {`
			`usage := &schema.OpenAIUsage{}`
feat(tools): support Tool calls in the API (#1715) * feat(tools): support Tools in the API Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@sap.com> * feat(tools): support function streaming * Adhere to new return types when using tools instead of functions * Keep backward compatibility with function calling * Evaluate function names in chat templates * Disable recovery with --debug * Correctly stream out the entire result * Detect when llm chooses to reply and to not perform any action in SSE * Feedback from code review --------- Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@sap.com> 2024-02-17 09:00:34 +00:00			`toolsCalled := false`
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`for ev := range tokenChannel {`
			`if ev.Error != nil {`
			`log.Debug().Err(ev.Error).Msg("chat streaming responseChannel error")`
			`request.Cancel()`
			`break`
			`}`
			`usage = &ev.Value.Usage // Copy a pointer to the latest usage chunk so that the stop message can reference it`

			`if len(ev.Value.Choices[0].Delta.ToolCalls) > 0 {`
feat(tools): support Tool calls in the API (#1715) * feat(tools): support Tools in the API Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@sap.com> * feat(tools): support function streaming * Adhere to new return types when using tools instead of functions * Keep backward compatibility with function calling * Evaluate function names in chat templates * Disable recovery with --debug * Correctly stream out the entire result * Detect when llm chooses to reply and to not perform any action in SSE * Feedback from code review --------- Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@sap.com> 2024-02-17 09:00:34 +00:00			`toolsCalled = true`
			`}`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`var buf bytes.Buffer`
			`enc := json.NewEncoder(&buf)`
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`if ev.Error != nil {`
			`log.Debug().Err(ev.Error).Msg("[ChatEndpoint] error to debug during tokenChannel handler")`
			`enc.Encode(ev.Error)`
			`} else {`
			`enc.Encode(ev.Value)`
			`}`
			`log.Debug().Msgf("chat streaming sending chunk: %s", buf.String())`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`_, err := fmt.Fprintf(w, "data: %v\n", buf.String())`
			`if err != nil {`
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`log.Debug().Err(err).Msgf("Sending chunk failed")`
			`request.Cancel()`
			`break`
			`}`
			`err = w.Flush()`
			`if err != nil {`
			`log.Debug().Msg("error while flushing, closing connection")`
			`request.Cancel()`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`break`
			`}`
			`}`

feat(tools): support Tool calls in the API (#1715) * feat(tools): support Tools in the API Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@sap.com> * feat(tools): support function streaming * Adhere to new return types when using tools instead of functions * Keep backward compatibility with function calling * Evaluate function names in chat templates * Disable recovery with --debug * Correctly stream out the entire result * Detect when llm chooses to reply and to not perform any action in SSE * Feedback from code review --------- Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@sap.com> 2024-02-17 09:00:34 +00:00			`finishReason := "stop"`
			`if toolsCalled {`
			`finishReason = "tool_calls"`
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`} else if toolsCalled && len(request.Tools) == 0 {`
feat(tools): support Tool calls in the API (#1715) * feat(tools): support Tools in the API Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@sap.com> * feat(tools): support function streaming * Adhere to new return types when using tools instead of functions * Keep backward compatibility with function calling * Evaluate function names in chat templates * Disable recovery with --debug * Correctly stream out the entire result * Detect when llm chooses to reply and to not perform any action in SSE * Feedback from code review --------- Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@sap.com> 2024-02-17 09:00:34 +00:00			`finishReason = "function_call"`
			`}`

Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`resp := &schema.OpenAIResponse{`
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`ID: traceID.ID,`
			`Created: traceID.Created,`
			`Model: request.Model, // we have to return what the user sent here, due to OpenAI spec.`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`Choices: []schema.Choice{`
			`{`
feat(tools): support Tool calls in the API (#1715) * feat(tools): support Tools in the API Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@sap.com> * feat(tools): support function streaming * Adhere to new return types when using tools instead of functions * Keep backward compatibility with function calling * Evaluate function names in chat templates * Disable recovery with --debug * Correctly stream out the entire result * Detect when llm chooses to reply and to not perform any action in SSE * Feedback from code review --------- Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@sap.com> 2024-02-17 09:00:34 +00:00			`FinishReason: finishReason,`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`Index: 0,`
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`Delta: &schema.Message{Content: ""},`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`}},`
			`Object: "chat.completion.chunk",`
			`Usage: *usage,`
			`}`
			`respData, _ := json.Marshal(resp)`

			`w.WriteString(fmt.Sprintf("data: %s\n\n", respData))`
			`w.WriteString("data: [DONE]\n\n")`
			`w.Flush()`
			`}))`
feat(tools): support Tool calls in the API (#1715) * feat(tools): support Tools in the API Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@sap.com> * feat(tools): support function streaming * Adhere to new return types when using tools instead of functions * Keep backward compatibility with function calling * Evaluate function names in chat templates * Disable recovery with --debug * Correctly stream out the entire result * Detect when llm chooses to reply and to not perform any action in SSE * Feedback from code review --------- Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@sap.com> 2024-02-17 09:00:34 +00:00
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`return nil`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`}`

refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`// TODO is this proper to have exclusive from Stream, or do we need to issue both responses?`
			`rawResponse := <-finalResultChannel`
feat(tools): support Tool calls in the API (#1715) * feat(tools): support Tools in the API Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@sap.com> * feat(tools): support function streaming * Adhere to new return types when using tools instead of functions * Keep backward compatibility with function calling * Evaluate function names in chat templates * Disable recovery with --debug * Correctly stream out the entire result * Detect when llm chooses to reply and to not perform any action in SSE * Feedback from code review --------- Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@sap.com> 2024-02-17 09:00:34 +00:00
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`if rawResponse.Error != nil {`
			`return rawResponse.Error`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`}`
feat(tools): support Tool calls in the API (#1715) * feat(tools): support Tools in the API Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@sap.com> * feat(tools): support function streaming * Adhere to new return types when using tools instead of functions * Keep backward compatibility with function calling * Evaluate function names in chat templates * Disable recovery with --debug * Correctly stream out the entire result * Detect when llm chooses to reply and to not perform any action in SSE * Feedback from code review --------- Co-authored-by: =?UTF-8?q?Stephan=20A=C3=9Fmus?= <stephan.assmus@sap.com> 2024-02-17 09:00:34 +00:00
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`jsonResult, _ := json.Marshal(rawResponse.Value)`
			`log.Debug().Str("jsonResult", string(jsonResult)).Msg("Chat Final Response")`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00
refactor: backend/service split, channel-based llm flow (#1963) Refactor: channel based llm flow and services split --------- Signed-off-by: Dave Lee <dave@gray101.com> 2024-04-13 07:45:34 +00:00			`// Return the prediction in the response body`
			`return c.JSON(rawResponse.Value)`
Revert "[Refactor]: Core/API Split" (#1550) Revert "[Refactor]: Core/API Split (#1506)" This reverts commit ab7b4d5ee9448e533a342bd1771393acd2967191. 2024-01-05 17:04:46 +00:00			`}`
			`}`