feat: Add helm chart (#56)

2024-06-07 19:40:48 +00:00 · 2023-04-21 13:22:03 -07:00 · 2023-04-21 13:22:03 -07:00 · bf20cc34f6
commit bf20cc34f6
parent 5cba71de70
10 changed files with 207 additions and 89 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,6 +5,8 @@ go-gpt4all-j
 # LocalAI build binary
 LocalAI
 local-ai
 # prevent above rules from omitting the helm chart
 !charts/*
 # Ignore models
 models/*.bin
--- a/README.md
+++ b/README.md
@ -63,6 +63,26 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
   }'
 ```
 ## Helm Chart Installation (run LocalAI in Kubernetes)
 The local-ai Helm chart supports two options for the LocalAI server's models directory:
 1. Basic deployment with no persistent volume. You must manually update the Deployment to configure your own models directory.
    Install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == false`.
 2. Advanced, two-phase deployment to provision the models directory using a DataVolume. Requires [Containerized Data Importer CDI](https://github.com/kubevirt/containerized-data-importer) to be pre-installed in your cluster.
    First, install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == true`:
    ```bash
    helm install local-ai charts/local-ai -n local-ai --create-namespace
    ```
    Wait for CDI to create an importer Pod for the DataVolume and for the importer pod to finish provisioning the model archive inside the PV.
    Once the PV is provisioned and the importer Pod removed, set `.Values.deployment.volumes.enabled == true` and `.Values.dataVolume.enabled == false` and upgrade the chart:
    ```bash
    helm upgrade local-ai -n local-ai charts/local-ai
    ```
    This will update the local-ai Deployment to mount the PV that was provisioned by the DataVolume.
 ## Prompt templates 
 The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
@ -184,10 +204,6 @@ python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.model
 It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
 ### Kubernetes
 You can run the API in Kubernetes, see an example deployment in [kubernetes](https://github.com/go-skynet/LocalAI/tree/master/kubernetes)
 ### Build locally
 Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
--- a/charts/local-ai/Chart.yaml
+++ b/charts/local-ai/Chart.yaml
@ -0,0 +1,6 @@
 apiVersion: v2
 appVersion: 0.1.0
 description: A Helm chart for LocalAI
 name: local-ai
 type: application
 version: 1.0.0
--- a/charts/local-ai/templates/_helpers.tpl
+++ b/charts/local-ai/templates/_helpers.tpl
@ -0,0 +1,44 @@
 {{/*
 Expand the name of the chart.
 */}}
 {{- define "local-ai.name" -}}
 {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Create a default fully qualified app name.
 We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
 If release name contains chart name it will be used as a full name.
 */}}
 {{- define "local-ai.fullname" -}}
 {{- if .Values.fullnameOverride }}
 {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- $name := default .Chart.Name .Values.nameOverride }}
 {{- if contains $name .Release.Name }}
 {{- .Release.Name | trunc 63 | trimSuffix "-" }}
 {{- else }}
 {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{- end }}
 {{- end }}
 {{/*
 Create chart name and version as used by the chart label.
 */}}
 {{- define "local-ai.chart" -}}
 {{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
 {{- end }}
 {{/*
 Common labels
 */}}
 {{- define "local-ai.labels" -}}
 helm.sh/chart: {{ include "local-ai.chart" . }}
 app.kubernetes.io/name: {{ include "local-ai.name" . }}
 app.kubernetes.io/instance: "{{ .Release.Name }}"
 app.kubernetes.io/managed-by: {{ .Release.Service }}
 {{- if .Chart.AppVersion }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
 {{- end }}
--- a/charts/local-ai/templates/data-volume.yaml
+++ b/charts/local-ai/templates/data-volume.yaml
@ -0,0 +1,39 @@
 {{- if .Values.dataVolume.enabled }}
 apiVersion: cdi.kubevirt.io/v1beta1
 kind: DataVolume
 metadata:
  name: {{ template "local-ai.fullname" . }}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "local-ai.labels" . | nindent 4 }}
 spec:
  contentType: archive
  source:
    {{ .Values.dataVolume.source.type }}:
      url: {{ .Values.dataVolume.source.url }}
      secretRef: {{ template "local-ai.fullname" . }}
      {{- if and (eq .Values.dataVolume.source.type "http") .Values.dataVolume.source.secretExtraHeaders }}
      secretExtraHeaders: {{ .Values.dataVolume.source.secretExtraHeaders }}
      {{- end }}
      {{- if .Values.dataVolume.source.caCertConfigMap }}
      caCertConfigMap: {{ .Values.dataVolume.source.caCertConfigMap }}
      {{- end }}
  pvc:
    accessModes: {{ .Values.dataVolume.pvc.accessModes }}
    resources:
      requests:
        storage: {{ .Values.dataVolume.pvc.size }}
 ---
 {{- if .Values.dataVolume.secret.enabled }}
 apiVersion: v1
 kind: Secret
 metadata:
  name: {{ template "local-ai.fullname" . }}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "local-ai.labels" . | nindent 4 }}
 data:
  accessKeyId: {{ .Values.dataVolume.secret.username }}
  secretKey: {{ .Values.dataVolume.secret.password }}
 {{- end }}
 {{- end }}
--- a/charts/local-ai/templates/deployment.yaml
+++ b/charts/local-ai/templates/deployment.yaml
@ -0,0 +1,39 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ template "local-ai.fullname" . }}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "local-ai.labels" . | nindent 4 }}
 spec:
  selector:
    matchLabels:
      app.kubernetes.io/name: {{ include "local-ai.name" . }}
      app.kubernetes.io/instance: {{ .Release.Name }}
  replicas: 1
  template:
    metadata:
      name: {{ template "local-ai.fullname" . }}
      labels:
        app.kubernetes.io/name: {{ include "local-ai.name" . }}
        app.kubernetes.io/instance: {{ .Release.Name }}
    spec:
      containers:
        - name: {{ template "local-ai.fullname" . }}
          image: {{ .Values.deployment.image }}
          env:
          - name: THREADS
            value: {{ .Values.deployment.env.threads | quote }}
          - name: CONTEXT_SIZE
            value: {{ .Values.deployment.env.contextSize | quote }}
          - name: MODELS_PATH
            value: {{ .Values.deployment.env.modelsPath }}
 {{- if .Values.deployment.volume.enabled }}
          volumeMounts:
          - mountPath: {{ .Values.deployment.env.modelsPath }}
            name: models
      volumes:
      - name: models
        persistentVolumeClaim:
          claimName: {{ template "local-ai.fullname" . }}
 {{- end }}
--- a/charts/local-ai/templates/service.yaml
+++ b/charts/local-ai/templates/service.yaml
@ -0,0 +1,19 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: {{ template "local-ai.fullname" . }}
  namespace: {{ .Release.Namespace | quote }}
  labels:
    {{- include "local-ai.labels" . | nindent 4 }}
 {{- if .Values.service.annotations }}
  annotations:
  {{ toYaml .Values.service.annotations | indent 4 }}
 {{- end }}
 spec:
  selector:
    app.kubernetes.io/name: {{ include "local-ai.name" . }}
  type: "{{ .Values.service.type }}"
  ports:
    - protocol: TCP
      port: 8080
      targetPort: 8080
--- a/charts/local-ai/values.yaml
+++ b/charts/local-ai/values.yaml
@ -0,0 +1,38 @@
 deployment:
  image: quay.io/go-skynet/local-ai:latest
  env:
    threads: 14
    contextSize: 512
    modelsPath: "/models"
  volume:
    enabled: false
 service:
  type: ClusterIP
  annotations: {}
  # If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
  # service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
 # Optionally create a PVC containing a model binary, sourced from an arbitrary HTTP server or S3 bucket
 # (requires https://github.com/kubevirt/containerized-data-importer)
 dataVolume:
  enabled: false
  source:
    type: "http" # Source type. One of: [ http | s3 ]
    url: "http://<model_server>/<model_archive>" # e.g. koala-7B-4bit-128g.GGML.tar
    # CertConfigMap is an optional ConfigMap reference, containing a Certificate Authority (CA) public key
    # and a base64 encoded pem certificate
    caCertConfigMap: ""
    # SecretExtraHeaders is an optional list of Secret references, each containing an extra HTTP header
    # that may include sensitive information. Only applicable for the http source type.
    secretExtraHeaders: []
  pvc:
    accessModes:
    - ReadWriteOnce
    size: 5Gi
  secret:
    enabled: false
    username: "" # base64 encoded
    password: "" # base64 encoded
--- a/kubernetes/data-volume.yaml
+++ b/kubernetes/data-volume.yaml
@ -1,28 +0,0 @@
 # Create a PVC containing a model binary, sourced from an arbitrary HTTP server
 # (requires https://github.com/kubevirt/containerized-data-importer)
 apiVersion: cdi.kubevirt.io/v1beta1
 kind: DataVolume
 metadata:
  name: models
  namespace: local-ai
 spec:
  contentType: archive
  source:
    http:
      url: http://<model_server>/koala-7B-4bit-128g.GGML.tar
      secretRef: model-secret
  pvc:
    accessModes:
    - ReadWriteOnce
    resources:
      requests:
        storage: 5Gi
 ---
 apiVersion: v1
 kind: Secret
 metadata:
  name: model-secret
  namespace: local-ai
 data:
  accessKeyId: <model_server_username_base64_encoded>
  secretKey: <model_server_password_base64_encoded>
--- a/kubernetes/deployment.yaml
+++ b/kubernetes/deployment.yaml
@ -1,57 +0,0 @@
 apiVersion: v1
 kind: Namespace
 metadata:
  name: local-ai
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: local-ai
  namespace: local-ai
  labels:
    app: local-ai
 spec:
  selector:
    matchLabels:
      app: local-ai
  replicas: 1
  template:
    metadata:
      labels:
        app: local-ai
      name: local-ai
    spec:
      containers:
        - name: local-ai
          image: quay.io/go-skynet/local-ai:latest
          env:
          - name: THREADS
            value: "14"
          - name: CONTEXT_SIZE
            value: "512"
          - name: MODELS_PATH
            value: /models
          volumeMounts:
          - mountPath: /models
            name: models
      volumes:
      - name: models
        persistentVolumeClaim:
          claimName: models
 ---
 apiVersion: v1
 kind: Service
 metadata:
  name: local-ai
  namespace: local-ai
  # If using AWS, you'll need to override the default 60s load balancer idle timeout
  # annotations:
  #   service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
 spec:
  selector:
    app: local-ai
  type: LoadBalancer
  ports:
    - protocol: TCP
      port: 8080
      targetPort: 8080