From d0717f588d4cc181cde9761305e5ae71c9c4b272 Mon Sep 17 00:00:00 2001 From: Tyler Perkins Date: Sat, 30 Mar 2024 22:22:51 -0400 Subject: [PATCH] Remove vllm --- vllm/vllm-deployment.yaml | 54 --------------------------------------- vllm/vllm-service.yaml | 12 --------- 2 files changed, 66 deletions(-) delete mode 100644 vllm/vllm-deployment.yaml delete mode 100644 vllm/vllm-service.yaml diff --git a/vllm/vllm-deployment.yaml b/vllm/vllm-deployment.yaml deleted file mode 100644 index 0bb9857..0000000 --- a/vllm/vllm-deployment.yaml +++ /dev/null @@ -1,54 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vllm-server - namespace: vllm-ns -spec: - replicas: 1 - selector: - matchLabels: - app: vllm-inference-server - template: - metadata: - labels: - app: vllm-inference-server - spec: - containers: - - name: vllm-inference-server - image: vllm/vllm-openai - imagePullPolicy: IfNotPresent - - resources: - limits: - nvidia.com/gpu: 1 - env: - - name: HUGGING_FACE_HUB_TOKEN - value: "" - - name: TRANSFORMERS_CACHE - value: /.cache - - name: shm-size - value: 1g - command: ["watch", "ls"] - #command: ["python3", "-m", "vllm.entrypoints.openai.api_server"] - #args: ["--model=meta-llama/Llama-2-7b-hf", - # "--gpu-memory-utilization=0.95", - # "--disable-log-requests", - # "--trust-remote-code", - # "--port=8000", - # "--tensor-parallel-size=1"] - ports: - - containerPort: 8000 - name: http - securityContext: - runAsUser: 1000 - volumeMounts: - - mountPath: /dev/shm - name: dshm - - mountPath: /.cache - name: cache - volumes: - - name: cache - emptyDir: {} - - name: dshm - emptyDir: - medium: Memory diff --git a/vllm/vllm-service.yaml b/vllm/vllm-service.yaml deleted file mode 100644 index 26ce96a..0000000 --- a/vllm/vllm-service.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: vllm-inference-server - namespace: vllm-ns -spec: - selector: - app: vllm-inference-server - type: LoadBalancer - ports: - - port: 8000 - targetPort: http