LocalAI/pkg/grpc/proto/llmserver.proto

syntax = "proto3";

option go_package = "github.com/go-skynet/LocalAI/pkg/grpc/proto";
option java_multiple_files = true;
option java_package = "io.skynet.localai.llmserver";
option java_outer_classname = "LLMServer";

package llm;

service LLM {
  rpc Health(HealthMessage) returns (Reply) {}
  rpc Predict(PredictOptions) returns (Reply) {}
  rpc LoadModel(ModelOptions) returns (Result) {}
  rpc PredictStream(PredictOptions) returns (stream Reply) {}
  rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
}

message HealthMessage {}

// The request message containing the user's name.
message PredictOptions {
  string Prompt = 1;
  int32 Seed = 2;
  int32 Threads = 3;
  int32 Tokens = 4;
  int32 TopK = 5;
  int32 Repeat = 6;
  int32 Batch = 7;
  int32 NKeep = 8;
  float Temperature = 9;
  float Penalty = 10;
  bool F16KV = 11;
  bool DebugMode = 12;
  repeated string StopPrompts = 13;
  bool IgnoreEOS = 14;
  float TailFreeSamplingZ = 15;
  float TypicalP = 16;
  float FrequencyPenalty = 17;
  float PresencePenalty = 18;
  int32 Mirostat = 19;
  float MirostatETA = 20;
  float MirostatTAU = 21;
  bool PenalizeNL = 22;
  string LogitBias = 23;
  bool MLock = 25;
  bool MMap = 26;
  bool PromptCacheAll = 27;
  bool PromptCacheRO = 28;
  string Grammar = 29;
  string MainGPU = 30;
  string TensorSplit = 31;
  float TopP = 32;
  string PromptCachePath = 33;
  bool Debug = 34;
  repeated int32 EmbeddingTokens = 35;
  string Embeddings = 36;
}

// The response message containing the result
message Reply {
  string message = 1;
}

message ModelOptions {
  string Model = 1;
  int32 ContextSize = 2;
  int32 Seed = 3;
  int32 NBatch = 4;
  bool F16Memory = 5;
  bool MLock = 6;
  bool MMap = 7;
  bool VocabOnly = 8;
  bool LowVRAM = 9;
  bool Embeddings = 10;
  bool NUMA = 11;
  int32 NGPULayers = 12;
  string MainGPU = 13;
  string TensorSplit = 14;
  int32 Threads = 15;
  string LibrarySearchPath = 16;
}

message Result {
  string message = 1;
  bool success = 2;
}

message EmbeddingResult {
  repeated float embeddings = 1;
}
feat: add falcon ggllm via grpc client Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`syntax = "proto3";`

			`option go_package = "github.com/go-skynet/LocalAI/pkg/grpc/proto";`
			`option java_multiple_files = true;`
			`option java_package = "io.skynet.localai.llmserver";`
			`option java_outer_classname = "LLMServer";`

			`package llm;`

			`service LLM {`
			`rpc Health(HealthMessage) returns (Reply) {}`
			`rpc Predict(PredictOptions) returns (Reply) {}`
			`rpc LoadModel(ModelOptions) returns (Result) {}`
			`rpc PredictStream(PredictOptions) returns (stream Reply) {}`
feat: move llama to a grpc Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`rpc Embedding(PredictOptions) returns (EmbeddingResult) {}`
feat: add falcon ggllm via grpc client Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`}`

			`message HealthMessage {}`

			`// The request message containing the user's name.`
			`message PredictOptions {`
			`string Prompt = 1;`
			`int32 Seed = 2;`
			`int32 Threads = 3;`
			`int32 Tokens = 4;`
			`int32 TopK = 5;`
			`int32 Repeat = 6;`
			`int32 Batch = 7;`
			`int32 NKeep = 8;`
			`float Temperature = 9;`
			`float Penalty = 10;`
			`bool F16KV = 11;`
			`bool DebugMode = 12;`
			`repeated string StopPrompts = 13;`
			`bool IgnoreEOS = 14;`
			`float TailFreeSamplingZ = 15;`
			`float TypicalP = 16;`
			`float FrequencyPenalty = 17;`
			`float PresencePenalty = 18;`
			`int32 Mirostat = 19;`
			`float MirostatETA = 20;`
			`float MirostatTAU = 21;`
			`bool PenalizeNL = 22;`
			`string LogitBias = 23;`
			`bool MLock = 25;`
			`bool MMap = 26;`
			`bool PromptCacheAll = 27;`
			`bool PromptCacheRO = 28;`
			`string Grammar = 29;`
			`string MainGPU = 30;`
			`string TensorSplit = 31;`
			`float TopP = 32;`
			`string PromptCachePath = 33;`
			`bool Debug = 34;`
feat: move llama to a grpc Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`repeated int32 EmbeddingTokens = 35;`
			`string Embeddings = 36;`
feat: add falcon ggllm via grpc client Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`}`

			`// The response message containing the result`
			`message Reply {`
			`string message = 1;`
			`}`

			`message ModelOptions {`
			`string Model = 1;`
			`int32 ContextSize = 2;`
			`int32 Seed = 3;`
			`int32 NBatch = 4;`
			`bool F16Memory = 5;`
			`bool MLock = 6;`
			`bool MMap = 7;`
			`bool VocabOnly = 8;`
			`bool LowVRAM = 9;`
			`bool Embeddings = 10;`
			`bool NUMA = 11;`
			`int32 NGPULayers = 12;`
			`string MainGPU = 13;`
			`string TensorSplit = 14;`
feat: move gpt4all to a grpc service Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`int32 Threads = 15;`
			`string LibrarySearchPath = 16;`
feat: add falcon ggllm via grpc client Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`}`

			`message Result {`
			`string message = 1;`
			`bool success = 2;`
feat: move llama to a grpc Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`}`

			`message EmbeddingResult {`
			`repeated float embeddings = 1;`
feat: add falcon ggllm via grpc client Signed-off-by: Ettore Di Giacinto <mudler@localai.io> 2023-07-14 23:19:43 +00:00			`}`