From c7a613c9cae348786551fcdc10b121560acdd982 Mon Sep 17 00:00:00 2001 From: Tyler Perkins Date: Sat, 30 Mar 2024 23:07:47 -0400 Subject: [PATCH] Remove vllm --- vllm/vllm-deployment.yaml | 58 --------------------------------------- vllm/vllm-service.yaml | 12 -------- 2 files changed, 70 deletions(-) delete mode 100644 vllm/vllm-deployment.yaml delete mode 100644 vllm/vllm-service.yaml diff --git a/vllm/vllm-deployment.yaml b/vllm/vllm-deployment.yaml deleted file mode 100644 index 3c93a25..0000000 --- a/vllm/vllm-deployment.yaml +++ /dev/null @@ -1,58 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vllm-server - namespace: vllm-ns -spec: - replicas: 1 - selector: - matchLabels: - app: vllm-inference-server - template: - metadata: - labels: - app: vllm-inference-server - spec: - runtimeClassName: nvidia - containers: - - name: vllm-inference-server - image: vllm/vllm-openai:latest - imagePullPolicy: IfNotPresent - - resources: - limits: - nvidia.com/gpu: 2 - env: - - name: HUGGING_FACE_HUB_TOKEN - value: "" - - name: TRANSFORMERS_CACHE - value: /.cache - - name: shm-size - value: 1g - #command: ["/bin/bash", "-c"] - #args: - #- while true; do sleep 2600; done - command: ["python3", "-m", "vllm.entrypoints.openai.api_server"] - args: ["--model=openai-community/gpt2", - "--gpu-memory-utilization=0.95", - "--disable-log-requests", - "--trust-remote-code", - "--port=8000", - "--dtype=half", - "--tensor-parallel-size=2"] - ports: - - containerPort: 8000 - name: http - securityContext: - runAsUser: 1000 - volumeMounts: - - mountPath: /dev/shm - name: dshm - - mountPath: /.cache - name: cache - volumes: - - name: cache - emptyDir: {} - - name: dshm - emptyDir: - medium: Memory diff --git a/vllm/vllm-service.yaml b/vllm/vllm-service.yaml deleted file mode 100644 index 26ce96a..0000000 --- a/vllm/vllm-service.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: vllm-inference-server - namespace: vllm-ns -spec: - selector: - app: vllm-inference-server - type: LoadBalancer - ports: - - port: 8000 - targetPort: http