Update version back to version 0.3.3
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
parent
cea6dd868f
commit
ab12752c83
@ -15,7 +15,7 @@ spec:
|
||||
spec:
|
||||
containers:
|
||||
- name: vllm-inference-server
|
||||
image: vllm/vllm-openai
|
||||
image: vllm/vllm-openai:v0.3.3
|
||||
imagePullPolicy: IfNotPresent
|
||||
|
||||
resources:
|
||||
@ -28,16 +28,16 @@ spec:
|
||||
value: /.cache
|
||||
- name: shm-size
|
||||
value: 1g
|
||||
command: ["/bin/bash", "-c"]
|
||||
args:
|
||||
- while true; do sleep 2600; done
|
||||
#command: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
||||
#args: ["--model=meta-llama/Llama-2-7b-hf",
|
||||
# "--gpu-memory-utilization=0.95",
|
||||
# "--disable-log-requests",
|
||||
# "--trust-remote-code",
|
||||
# "--port=8000",
|
||||
# "--tensor-parallel-size=1"]
|
||||
#command: ["/bin/bash", "-c"]
|
||||
#args:
|
||||
#- while true; do sleep 2600; done
|
||||
command: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
||||
args: ["--model=meta-llama/Llama-2-7b-hf",
|
||||
"--gpu-memory-utilization=0.95",
|
||||
"--disable-log-requests",
|
||||
"--trust-remote-code",
|
||||
"--port=8000",
|
||||
"--tensor-parallel-size=1"]
|
||||
ports:
|
||||
- containerPort: 8000
|
||||
name: http
|
||||
|
Loading…
Reference in New Issue
Block a user