apiVersion: helm.toolkit.fluxcd.io/v2beta1 kind: HelmRelease metadata: name: localai namespace: localai-ns spec: chart: spec: chart: local-ai sourceRef: kind: GitRepository name: go-skynet namespace: flux-system interval: 15m0s timeout: 5m releaseName: localai values: replicaCount: 1 deployment: image: repository: quay.io/go-skynet/local-ai # Example: "docker.io/myapp" tag: latest env: threads: 4 context_size: 1024 debug: "true" modelsPath: "/models" download_model: # To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox image: busybox prompt_templates: # To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox image: busybox pullPolicy: IfNotPresent imagePullSecrets: [] # - name: secret-names ## Needed for GPU Nodes runtimeClassName: nvidia resources: # {} # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following # lines, adjust them as necessary, and remove the curly braces after 'resources:'. limits: cpu: 100m memory: 2048Mi nvidia.com/gpu: 3 # requests: # cpu: 100m # memory: 128Mi # Prompt templates to include # Note: the keys of this map will be the names of the prompt template files promptTemplates: {} # ggml-gpt4all-j.tmpl: | # The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response. # ### Prompt: # {{.Input}} # ### Response: # Models to download at runtime models: # Whether to force download models even if they already exist forceDownload: false # The list of URLs to download models from # Note: the name of the file will be the name of the loaded model list: # - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin" # basicAuth: base64EncodedCredentials initContainers: [] # Example: # - name: my-init-container # image: my-init-image # imagePullPolicy: IfNotPresent # command: ["/bin/sh", "-c", "echo init"] # volumeMounts: # - name: my-volume # mountPath: /path/to/mount sidecarContainers: [] #- name: model-file-browser # image: my-sidecar-image # imagePullPolicy: IfNotPresent # ports: # - containerPort: 1234 # Persistent storage for models and prompt templates. # PVC and HostPath are mutually exclusive. If both are enabled, # PVC configuration takes precedence. If neither are enabled, ephemeral # storage is used. persistence: models: enabled: true annotations: {} storageClass: "longhorn" accessModes: ReadWriteMany size: 50Gi globalMount: /models output: enabled: true annotations: {} storageClass: "longhorn" accessModes: ReadWriteMany size: 1Gi globalMount: /tmp/generated service: type: LoadBalancer # If deferring to an internal only load balancer # externalTrafficPolicy: Local port: 80 annotations: {} # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200" ingress: enabled: false className: "" annotations: {} # kubernetes.io/ingress.class: nginx # kubernetes.io/tls-acme: "true" hosts: - host: chart-example.local paths: - path: / pathType: ImplementationSpecific tls: [] # - secretName: chart-example-tls # hosts: # - chart-example.local nodeSelector: {} tolerations: [] affinity: {}