diff --git a/local-ai/helmrelease-local-ai.yaml b/local-ai/helmrelease-local-ai.yaml new file mode 100644 index 0000000..79abb37 --- /dev/null +++ b/local-ai/helmrelease-local-ai.yaml @@ -0,0 +1,122 @@ +apiVersion: helm.toolkit.fluxcd.io/v2beta1 +kind: HelmRelease +metadata: + name: local-ai + namespace: local-ai-ns +spec: + chart: + spec: + chart: local-ai + sourceRef: + kind: HelmRepository + name: local-ai + namespace: flux-system + interval: 15m0s + timeout: 5m + releaseName: local-ai + values: + replicaCount: 1 + deployment: + image: quay.io/go-skynet/local-ai:latest + env: + threads: 4 + context_size: 512 + modelsPath: "/models" + download_model: + # To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox + image: busybox + prompt_templates: + # To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox + image: busybox + pullPolicy: IfNotPresent + imagePullSecrets: [] + # - name: secret-names + + resources: + {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + # Prompt templates to include + # Note: the keys of this map will be the names of the prompt template files + promptTemplates: + {} + # ggml-gpt4all-j.tmpl: | + # The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response. + # ### Prompt: + # {{.Input}} + # ### Response: + + # Models to download at runtime + models: + # Whether to force download models even if they already exist + forceDownload: false + + # The list of URLs to download models from + # Note: the name of the file will be the name of the loaded model + list: + # - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin" + # basicAuth: base64EncodedCredentials + + # Persistent storage for models and prompt templates. + # PVC and HostPath are mutually exclusive. If both are enabled, + # PVC configuration takes precedence. If neither are enabled, ephemeral + # storage is used. + persistence: + pvc: + enabled: true + size: 6Gi + accessModes: + - ReadWriteOnce + + annotations: {} + + # Optional + storageClass: longhorn + + hostPath: + enabled: false + path: "/models" + + service: + type: LoadBalancer + # If deferring to an internal only load balancer + # externalTrafficPolicy: Local + port: 80 + annotations: {} + # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout + # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200" + + ingress: + enabled: false + className: "" + annotations: + {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: chart-example.local + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + + nodeSelector: {} + + tolerations: [] + + affinity: {} + + image: + pullPolicy: IfNotPresent