Add back localai

2024-04-05 11:33:13 -04:00 · 2024-04-05 11:33:13 -04:00 · 0c3d9ad4b6
commit 0c3d9ad4b6
parent 7a291f3b04
4 changed files with 200 additions and 0 deletions
--- a/localai/localai-deployment.yaml
+++ b/localai/localai-deployment.yaml
@ -0,0 +1,32 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: localai
  namespace: localai-ns
 spec:
  replicas: 1
  selector:
    matchLabels:
      app: localai
  template:
    metadata:
      labels:
        app: localai
    spec:
      runtimeClassName: nvidia
      resources:
        limits:
          nvidia.com/gpu: 2
      containers:
      - name: localai
        image: quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12
        ports:
        - containerPort: 80
        env:
        volumeMounts:
        - mountPath: "/models"
          name: models
      volumes:
      - name: models
        persistentVolumeClaim:
          claimName: localai-pvc
--- a/localai/localai-helm-release.yaml.off
+++ b/localai/localai-helm-release.yaml.off
@ -0,0 +1,144 @@
 apiVersion: helm.toolkit.fluxcd.io/v2beta1
 kind: HelmRelease
 metadata:
  name: localai
  namespace: localai-ns
 spec:
  chart:
    spec:
      chart: local-ai
      sourceRef:
        kind: HelmRepository
        name: go-skynet
        namespace: flux-system
  interval: 15m0s
  timeout: 5m
  releaseName: localai
  values:
    replicaCount: 1
    deployment:
      image:
        repository: quay.io/go-skynet/local-ai  # Example: "docker.io/myapp"
        tag: latest
      env:
        threads: 4
        context_size: 1024
        debug: "true"
      modelsPath: "/models"
      download_model:
        # To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
        image: busybox
      prompt_templates:
        # To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
        image: busybox
      pullPolicy: IfNotPresent
      imagePullSecrets: []
        # - name: secret-names
      ## Needed for GPU Nodes
      runtimeClassName: nvidia
    resources:
      # {}
      # We usually recommend not to specify default resources and to leave this as a conscious
      # choice for the user. This also increases chances charts run on environments with little
      # resources, such as Minikube. If you do want to specify resources, uncomment the following
      # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
      limits:
        cpu: 100m
        memory: 2048Mi
        nvidia.com/gpu: 3
      # requests:
      #   cpu: 100m
      #   memory: 128Mi
    # Prompt templates to include
    # Note: the keys of this map will be the names of the prompt template files
    promptTemplates:
      {}
      # ggml-gpt4all-j.tmpl: |
      #   The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
      #   ### Prompt:
      #   {{.Input}}
      #   ### Response:
    # Models to download at runtime
    models:
      # Whether to force download models even if they already exist
      forceDownload: false
      # The list of URLs to download models from
      # Note: the name of the file will be the name of the loaded model
      list:
      #  - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
          # basicAuth: base64EncodedCredentials
    initContainers: []
    # Example:
    # - name: my-init-container
    #   image: my-init-image
    #   imagePullPolicy: IfNotPresent
    #   command: ["/bin/sh", "-c", "echo init"]
    #   volumeMounts:
    #     - name: my-volume
    #       mountPath: /path/to/mount
    sidecarContainers: []
      #- name: model-file-browser
      #  image: my-sidecar-image
      #  imagePullPolicy: IfNotPresent
      #  ports:
      #    - containerPort: 1234
    # Persistent storage for models and prompt templates.
    # PVC and HostPath are mutually exclusive. If both are enabled,
    # PVC configuration takes precedence. If neither are enabled, ephemeral
    # storage is used.
    persistence:
      models:
        enabled: true
        annotations: {}
        storageClass: "longhorn"
        accessModes: ReadWriteMany
        size: 50Gi
        globalMount: /models
      output:
        enabled: true
        annotations: {}
        storageClass: "longhorn"
        accessModes: ReadWriteMany
        size: 1Gi
        globalMount: /tmp/generated
    service:
      type: LoadBalancer
      # If deferring to an internal only load balancer
      # externalTrafficPolicy: Local
      port: 80
      annotations: {}
      # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
      # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
    ingress:
      enabled: false
      className: ""
      annotations:
        {}
        # kubernetes.io/ingress.class: nginx
        # kubernetes.io/tls-acme: "true"
      hosts:
        - host: chart-example.local
          paths:
            - path: /
              pathType: ImplementationSpecific
      tls: []
      #  - secretName: chart-example-tls
      #    hosts:
      #      - chart-example.local
    nodeSelector: {}
    tolerations: []
    affinity: {}
--- a/localai/localai-pvc.yaml
+++ b/localai/localai-pvc.yaml
@ -0,0 +1,11 @@
 apiVersion: v1
 kind: PersistentVolumeClaim
 metadata:
  name: localai-pvc
  namespace: localai-ns
 spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: 20Gi
--- a/localai/localai-service.yaml
+++ b/localai/localai-service.yaml
@ -0,0 +1,13 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: localai
  namespace: localai-ns
 spec:
  type: LoadBalancer
  selector:
    app: localai
  ports:
  - port: 80
    targetPort: 80
    protocol: TCP