Remove vllm

2024-03-30 22:22:51 -04:00 · 2024-03-30 22:22:51 -04:00 · d0717f588d
commit d0717f588d
parent 17e076e0c8
2 changed files with 0 additions and 66 deletions
--- a/vllm/vllm-deployment.yaml
+++ b/vllm/vllm-deployment.yaml
@ -1,54 +0,0 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: vllm-server
  namespace: vllm-ns
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: vllm-inference-server
  template:
    metadata:
      labels:
        app: vllm-inference-server
    spec:
      containers:
        - name: vllm-inference-server
          image: vllm/vllm-openai
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              nvidia.com/gpu: 1
          env:
            - name: HUGGING_FACE_HUB_TOKEN
              value: ""
            - name: TRANSFORMERS_CACHE
              value: /.cache
            - name: shm-size
              value: 1g
          command: ["watch", "ls"]
                #command: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
                #args: ["--model=meta-llama/Llama-2-7b-hf",
                #       "--gpu-memory-utilization=0.95",
                #       "--disable-log-requests",
                #       "--trust-remote-code",
                #       "--port=8000",
                #       "--tensor-parallel-size=1"]
          ports:
            - containerPort: 8000
              name: http
          securityContext:
            runAsUser: 1000
          volumeMounts:
            - mountPath: /dev/shm
              name: dshm
            - mountPath: /.cache
              name: cache
      volumes:
       - name: cache
         emptyDir: {}
       - name: dshm
         emptyDir:
              medium: Memory
--- a/vllm/vllm-service.yaml
+++ b/vllm/vllm-service.yaml
@ -1,12 +0,0 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: vllm-inference-server
  namespace: vllm-ns
 spec:
  selector:
    app: vllm-inference-server
  type: LoadBalancer
  ports:
    - port: 8000
      targetPort: http