Update version back to version 0.3.3
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
parent
cea6dd868f
commit
ab12752c83
@ -15,7 +15,7 @@ spec:
|
|||||||
spec:
|
spec:
|
||||||
containers:
|
containers:
|
||||||
- name: vllm-inference-server
|
- name: vllm-inference-server
|
||||||
image: vllm/vllm-openai
|
image: vllm/vllm-openai:v0.3.3
|
||||||
imagePullPolicy: IfNotPresent
|
imagePullPolicy: IfNotPresent
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
@ -28,16 +28,16 @@ spec:
|
|||||||
value: /.cache
|
value: /.cache
|
||||||
- name: shm-size
|
- name: shm-size
|
||||||
value: 1g
|
value: 1g
|
||||||
command: ["/bin/bash", "-c"]
|
#command: ["/bin/bash", "-c"]
|
||||||
args:
|
#args:
|
||||||
- while true; do sleep 2600; done
|
#- while true; do sleep 2600; done
|
||||||
#command: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
command: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
||||||
#args: ["--model=meta-llama/Llama-2-7b-hf",
|
args: ["--model=meta-llama/Llama-2-7b-hf",
|
||||||
# "--gpu-memory-utilization=0.95",
|
"--gpu-memory-utilization=0.95",
|
||||||
# "--disable-log-requests",
|
"--disable-log-requests",
|
||||||
# "--trust-remote-code",
|
"--trust-remote-code",
|
||||||
# "--port=8000",
|
"--port=8000",
|
||||||
# "--tensor-parallel-size=1"]
|
"--tensor-parallel-size=1"]
|
||||||
ports:
|
ports:
|
||||||
- containerPort: 8000
|
- containerPort: 8000
|
||||||
name: http
|
name: http
|
||||||
|
Loading…
Reference in New Issue
Block a user