Update version back to version 0.3.3

2024-03-30 22:35:21 -04:00 · 2024-03-30 22:35:21 -04:00 · ab12752c83
commit ab12752c83
parent cea6dd868f
1 changed files with 11 additions and 11 deletions
--- a/vllm/vllm-deployment.yaml
+++ b/vllm/vllm-deployment.yaml
@ -15,7 +15,7 @@ spec:
    spec:
      containers:
        - name: vllm-inference-server
-          image: vllm/vllm-openai
+          image: vllm/vllm-openai:v0.3.3
          imagePullPolicy: IfNotPresent
          resources:
@ -28,16 +28,16 @@ spec:
              value: /.cache
            - name: shm-size
              value: 1g
-          command: ["/bin/bash", "-c"]
+                #command: ["/bin/bash", "-c"]
-          args:
+                #args:
-            - while true; do sleep 2600; done
+                #- while true; do sleep 2600; done
-                #command: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
+          command: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
-                #args: ["--model=meta-llama/Llama-2-7b-hf",
+          args: ["--model=meta-llama/Llama-2-7b-hf",
-                #       "--gpu-memory-utilization=0.95",
+                 "--gpu-memory-utilization=0.95",
-                #       "--disable-log-requests",
+                 "--disable-log-requests",
-                #       "--trust-remote-code",
+                 "--trust-remote-code",
-                #       "--port=8000",
+                 "--port=8000",
-                #       "--tensor-parallel-size=1"]
+                 "--tensor-parallel-size=1"]
          ports:
            - containerPort: 8000
              name: http