Gluttony-Cluster/local-ai/helmrelease-local-ai.yaml

123 lines
3.5 KiB
YAML
Raw Normal View History

2023-12-14 02:07:42 +00:00
apiVersion: helm.toolkit.fluxcd.io/v2beta1
kind: HelmRelease
metadata:
name: local-ai
namespace: local-ai-ns
spec:
chart:
spec:
chart: local-ai
sourceRef:
kind: HelmRepository
name: local-ai
namespace: flux-system
interval: 15m0s
timeout: 5m
releaseName: local-ai
values:
replicaCount: 1
deployment:
image: quay.io/go-skynet/local-ai:latest
env:
threads: 4
context_size: 512
modelsPath: "/models"
download_model:
# To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
image: busybox
prompt_templates:
# To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
image: busybox
pullPolicy: IfNotPresent
imagePullSecrets: []
# - name: secret-names
resources:
{}
# We usually recommend not to specify default resources and to leave this as a conscious
# choice for the user. This also increases chances charts run on environments with little
# resources, such as Minikube. If you do want to specify resources, uncomment the following
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
# limits:
# cpu: 100m
# memory: 128Mi
# requests:
# cpu: 100m
# memory: 128Mi
# Prompt templates to include
# Note: the keys of this map will be the names of the prompt template files
promptTemplates:
{}
# ggml-gpt4all-j.tmpl: |
# The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
# ### Prompt:
# {{.Input}}
# ### Response:
# Models to download at runtime
models:
# Whether to force download models even if they already exist
forceDownload: false
# The list of URLs to download models from
# Note: the name of the file will be the name of the loaded model
list:
# - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
# basicAuth: base64EncodedCredentials
# Persistent storage for models and prompt templates.
# PVC and HostPath are mutually exclusive. If both are enabled,
# PVC configuration takes precedence. If neither are enabled, ephemeral
# storage is used.
persistence:
pvc:
enabled: true
size: 6Gi
accessModes:
- ReadWriteOnce
annotations: {}
# Optional
storageClass: longhorn
hostPath:
enabled: false
path: "/models"
service:
type: LoadBalancer
# If deferring to an internal only load balancer
# externalTrafficPolicy: Local
port: 80
annotations: {}
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
ingress:
enabled: false
className: ""
annotations:
{}
# kubernetes.io/ingress.class: nginx
# kubernetes.io/tls-acme: "true"
hosts:
- host: chart-example.local
paths:
- path: /
pathType: ImplementationSpecific
tls: []
# - secretName: chart-example-tls
# hosts:
# - chart-example.local
nodeSelector: {}
tolerations: []
affinity: {}
image:
pullPolicy: IfNotPresent