2023-07-14 23:19:43 +00:00
|
|
|
syntax = "proto3";
|
|
|
|
|
|
|
|
option go_package = "github.com/go-skynet/LocalAI/pkg/grpc/proto";
|
|
|
|
option java_multiple_files = true;
|
|
|
|
option java_package = "io.skynet.localai.llmserver";
|
|
|
|
option java_outer_classname = "LLMServer";
|
|
|
|
|
|
|
|
package llm;
|
|
|
|
|
|
|
|
service LLM {
|
|
|
|
rpc Health(HealthMessage) returns (Reply) {}
|
|
|
|
rpc Predict(PredictOptions) returns (Reply) {}
|
|
|
|
rpc LoadModel(ModelOptions) returns (Result) {}
|
|
|
|
rpc PredictStream(PredictOptions) returns (stream Reply) {}
|
2023-07-14 23:19:43 +00:00
|
|
|
rpc Embedding(PredictOptions) returns (EmbeddingResult) {}
|
2023-07-14 23:19:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
message HealthMessage {}
|
|
|
|
|
|
|
|
// The request message containing the user's name.
|
|
|
|
message PredictOptions {
|
|
|
|
string Prompt = 1;
|
|
|
|
int32 Seed = 2;
|
|
|
|
int32 Threads = 3;
|
|
|
|
int32 Tokens = 4;
|
|
|
|
int32 TopK = 5;
|
|
|
|
int32 Repeat = 6;
|
|
|
|
int32 Batch = 7;
|
|
|
|
int32 NKeep = 8;
|
|
|
|
float Temperature = 9;
|
|
|
|
float Penalty = 10;
|
|
|
|
bool F16KV = 11;
|
|
|
|
bool DebugMode = 12;
|
|
|
|
repeated string StopPrompts = 13;
|
|
|
|
bool IgnoreEOS = 14;
|
|
|
|
float TailFreeSamplingZ = 15;
|
|
|
|
float TypicalP = 16;
|
|
|
|
float FrequencyPenalty = 17;
|
|
|
|
float PresencePenalty = 18;
|
|
|
|
int32 Mirostat = 19;
|
|
|
|
float MirostatETA = 20;
|
|
|
|
float MirostatTAU = 21;
|
|
|
|
bool PenalizeNL = 22;
|
|
|
|
string LogitBias = 23;
|
|
|
|
bool MLock = 25;
|
|
|
|
bool MMap = 26;
|
|
|
|
bool PromptCacheAll = 27;
|
|
|
|
bool PromptCacheRO = 28;
|
|
|
|
string Grammar = 29;
|
|
|
|
string MainGPU = 30;
|
|
|
|
string TensorSplit = 31;
|
|
|
|
float TopP = 32;
|
|
|
|
string PromptCachePath = 33;
|
|
|
|
bool Debug = 34;
|
2023-07-14 23:19:43 +00:00
|
|
|
repeated int32 EmbeddingTokens = 35;
|
|
|
|
string Embeddings = 36;
|
2023-07-14 23:19:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// The response message containing the result
|
|
|
|
message Reply {
|
|
|
|
string message = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
message ModelOptions {
|
|
|
|
string Model = 1;
|
|
|
|
int32 ContextSize = 2;
|
|
|
|
int32 Seed = 3;
|
|
|
|
int32 NBatch = 4;
|
|
|
|
bool F16Memory = 5;
|
|
|
|
bool MLock = 6;
|
|
|
|
bool MMap = 7;
|
|
|
|
bool VocabOnly = 8;
|
|
|
|
bool LowVRAM = 9;
|
|
|
|
bool Embeddings = 10;
|
|
|
|
bool NUMA = 11;
|
|
|
|
int32 NGPULayers = 12;
|
|
|
|
string MainGPU = 13;
|
|
|
|
string TensorSplit = 14;
|
2023-07-14 23:19:43 +00:00
|
|
|
int32 Threads = 15;
|
|
|
|
string LibrarySearchPath = 16;
|
2023-07-14 23:19:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
message Result {
|
|
|
|
string message = 1;
|
|
|
|
bool success = 2;
|
2023-07-14 23:19:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
message EmbeddingResult {
|
|
|
|
repeated float embeddings = 1;
|
2023-07-14 23:19:43 +00:00
|
|
|
}
|