Add back localai

2024-04-05 11:33:13 -04:00 · 2024-04-05 11:33:13 -04:00 · 0c3d9ad4b6
commit 0c3d9ad4b6
parent 7a291f3b04
4 changed files with 200 additions and 0 deletions
--- a/localai/localai-deployment.yaml
+++ b/localai/localai-deployment.yaml
@ -0,0 +1,32 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: localai
+  namespace: localai-ns
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: localai
+  template:
+    metadata:
+      labels:
+        app: localai
+    spec:
+      runtimeClassName: nvidia
+      resources:
+        limits:
+          nvidia.com/gpu: 2
+      containers:
+      - name: localai
+        image: quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12
+        ports:
+        - containerPort: 80
+        env:
+        volumeMounts:
+        - mountPath: "/models"
+          name: models
+      volumes:
+      - name: models
+        persistentVolumeClaim:
+          claimName: localai-pvc
--- a/localai/localai-helm-release.yaml.off
+++ b/localai/localai-helm-release.yaml.off
@ -0,0 +1,144 @@
+apiVersion: helm.toolkit.fluxcd.io/v2beta1
+kind: HelmRelease
+metadata:
+  name: localai
+  namespace: localai-ns
+spec:
+  chart:
+    spec:
+      chart: local-ai
+      sourceRef:
+        kind: HelmRepository
+        name: go-skynet
+        namespace: flux-system
+  interval: 15m0s
+  timeout: 5m
+  releaseName: localai
+  values:
+    replicaCount: 1
+
+    deployment:
+      image:
+        repository: quay.io/go-skynet/local-ai  # Example: "docker.io/myapp"
+        tag: latest
+      env:
+        threads: 4
+        context_size: 1024
+        debug: "true"
+      modelsPath: "/models"
+      download_model:
+        # To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
+        image: busybox
+      prompt_templates:
+        # To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
+        image: busybox
+      pullPolicy: IfNotPresent
+      imagePullSecrets: []
+        # - name: secret-names
+
+      ## Needed for GPU Nodes
+      runtimeClassName: nvidia
+
+    resources:
+      # {}
+      # We usually recommend not to specify default resources and to leave this as a conscious
+      # choice for the user. This also increases chances charts run on environments with little
+      # resources, such as Minikube. If you do want to specify resources, uncomment the following
+      # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
+      limits:
+        cpu: 100m
+        memory: 2048Mi
+        nvidia.com/gpu: 3
+      # requests:
+      #   cpu: 100m
+      #   memory: 128Mi
+
+    # Prompt templates to include
+    # Note: the keys of this map will be the names of the prompt template files
+    promptTemplates:
+      {}
+      # ggml-gpt4all-j.tmpl: |
+      #   The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
+      #   ### Prompt:
+      #   {{.Input}}
+      #   ### Response:
+
+    # Models to download at runtime
+    models:
+      # Whether to force download models even if they already exist
+      forceDownload: false
+
+      # The list of URLs to download models from
+      # Note: the name of the file will be the name of the loaded model
+      list:
+      #  - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
+          # basicAuth: base64EncodedCredentials
+
+    initContainers: []
+    # Example:
+    # - name: my-init-container
+    #   image: my-init-image
+    #   imagePullPolicy: IfNotPresent
+    #   command: ["/bin/sh", "-c", "echo init"]
+    #   volumeMounts:
+    #     - name: my-volume
+    #       mountPath: /path/to/mount
+
+    sidecarContainers: []
+      #- name: model-file-browser
+      #  image: my-sidecar-image
+      #  imagePullPolicy: IfNotPresent
+      #  ports:
+      #    - containerPort: 1234
+
+    # Persistent storage for models and prompt templates.
+    # PVC and HostPath are mutually exclusive. If both are enabled,
+    # PVC configuration takes precedence. If neither are enabled, ephemeral
+    # storage is used.
+    persistence:
+      models:
+        enabled: true
+        annotations: {}
+        storageClass: "longhorn"
+        accessModes: ReadWriteMany
+        size: 50Gi
+        globalMount: /models
+      output:
+        enabled: true
+        annotations: {}
+        storageClass: "longhorn"
+        accessModes: ReadWriteMany
+        size: 1Gi
+        globalMount: /tmp/generated
+
+    service:
+      type: LoadBalancer
+      # If deferring to an internal only load balancer
+      # externalTrafficPolicy: Local
+      port: 80
+      annotations: {}
+      # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
+      # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
+
+    ingress:
+      enabled: false
+      className: ""
+      annotations:
+        {}
+        # kubernetes.io/ingress.class: nginx
+        # kubernetes.io/tls-acme: "true"
+      hosts:
+        - host: chart-example.local
+          paths:
+            - path: /
+              pathType: ImplementationSpecific
+      tls: []
+      #  - secretName: chart-example-tls
+      #    hosts:
+      #      - chart-example.local
+
+    nodeSelector: {}
+
+    tolerations: []
+
+    affinity: {}
--- a/localai/localai-pvc.yaml
+++ b/localai/localai-pvc.yaml
@ -0,0 +1,11 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: localai-pvc
+  namespace: localai-ns
+spec:
+  accessModes:
+    - ReadWriteMany
+  resources:
+    requests:
+      storage: 20Gi
--- a/localai/localai-service.yaml
+++ b/localai/localai-service.yaml
@ -0,0 +1,13 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: localai
+  namespace: localai-ns
+spec:
+  type: LoadBalancer
+  selector:
+    app: localai
+  ports:
+  - port: 80
+    targetPort: 80
+    protocol: TCP