syntax = "proto3"; option go_package = "github.com/go-skynet/LocalAI/pkg/grpc/proto"; option java_multiple_files = true; option java_package = "io.skynet.localai.backend"; option java_outer_classname = "LocalAIBackend"; package backend; service Backend { rpc Health(HealthMessage) returns (Reply) {} rpc Predict(PredictOptions) returns (Reply) {} rpc LoadModel(ModelOptions) returns (Result) {} rpc PredictStream(PredictOptions) returns (stream Reply) {} rpc Embedding(PredictOptions) returns (EmbeddingResult) {} rpc GenerateImage(GenerateImageRequest) returns (Result) {} rpc AudioTranscription(TranscriptRequest) returns (TranscriptResult) {} rpc TTS(TTSRequest) returns (Result) {} } message HealthMessage {} // The request message containing the user's name. message PredictOptions { string Prompt = 1; int32 Seed = 2; int32 Threads = 3; int32 Tokens = 4; int32 TopK = 5; int32 Repeat = 6; int32 Batch = 7; int32 NKeep = 8; float Temperature = 9; float Penalty = 10; bool F16KV = 11; bool DebugMode = 12; repeated string StopPrompts = 13; bool IgnoreEOS = 14; float TailFreeSamplingZ = 15; float TypicalP = 16; float FrequencyPenalty = 17; float PresencePenalty = 18; int32 Mirostat = 19; float MirostatETA = 20; float MirostatTAU = 21; bool PenalizeNL = 22; string LogitBias = 23; bool MLock = 25; bool MMap = 26; bool PromptCacheAll = 27; bool PromptCacheRO = 28; string Grammar = 29; string MainGPU = 30; string TensorSplit = 31; float TopP = 32; string PromptCachePath = 33; bool Debug = 34; repeated int32 EmbeddingTokens = 35; string Embeddings = 36; float RopeFreqBase = 37; float RopeFreqScale = 38; float NegativePromptScale = 39; string NegativePrompt = 40; } // The response message containing the result message Reply { bytes message = 1; } message ModelOptions { string Model = 1; int32 ContextSize = 2; int32 Seed = 3; int32 NBatch = 4; bool F16Memory = 5; bool MLock = 6; bool MMap = 7; bool VocabOnly = 8; bool LowVRAM = 9; bool Embeddings = 10; bool NUMA = 11; int32 NGPULayers = 12; string MainGPU = 13; string TensorSplit = 14; int32 Threads = 15; string LibrarySearchPath = 16; float RopeFreqBase = 17; float RopeFreqScale = 18; float RMSNormEps = 19; int32 NGQA = 20; } message Result { string message = 1; bool success = 2; } message EmbeddingResult { repeated float embeddings = 1; } message TranscriptRequest { string dst = 2; string language = 3; uint32 threads = 4; } message TranscriptResult { repeated TranscriptSegment segments = 1; string text = 2; } message TranscriptSegment { int32 id = 1; int64 start = 2; int64 end = 3; string text = 4; repeated int32 tokens = 5; } message GenerateImageRequest { int32 height = 1; int32 width = 2; int32 mode = 3; int32 step = 4; int32 seed = 5; string positive_prompt = 6; string negative_prompt = 7; string dst = 8; } message TTSRequest { string text = 1; string model = 2; string dst = 3; }