Update version back to version 0.3.3
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Tyler Perkins 2024-03-30 22:35:21 -04:00
parent cea6dd868f
commit ab12752c83
Signed by: tyler
GPG Key ID: 03B27509E17EFDC8

View File

@ -15,7 +15,7 @@ spec:
spec: spec:
containers: containers:
- name: vllm-inference-server - name: vllm-inference-server
image: vllm/vllm-openai image: vllm/vllm-openai:v0.3.3
imagePullPolicy: IfNotPresent imagePullPolicy: IfNotPresent
resources: resources:
@ -28,16 +28,16 @@ spec:
value: /.cache value: /.cache
- name: shm-size - name: shm-size
value: 1g value: 1g
command: ["/bin/bash", "-c"] #command: ["/bin/bash", "-c"]
args: #args:
- while true; do sleep 2600; done #- while true; do sleep 2600; done
#command: ["python3", "-m", "vllm.entrypoints.openai.api_server"] command: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
#args: ["--model=meta-llama/Llama-2-7b-hf", args: ["--model=meta-llama/Llama-2-7b-hf",
# "--gpu-memory-utilization=0.95", "--gpu-memory-utilization=0.95",
# "--disable-log-requests", "--disable-log-requests",
# "--trust-remote-code", "--trust-remote-code",
# "--port=8000", "--port=8000",
# "--tensor-parallel-size=1"] "--tensor-parallel-size=1"]
ports: ports:
- containerPort: 8000 - containerPort: 8000
name: http name: http