mirror of
https://github.com/mudler/LocalAI.git
synced 2024-06-07 19:40:48 +00:00
feat: Add helm chart (#56)
This commit is contained in:
parent
5cba71de70
commit
bf20cc34f6
2
.gitignore
vendored
2
.gitignore
vendored
@ -5,6 +5,8 @@ go-gpt4all-j
|
|||||||
# LocalAI build binary
|
# LocalAI build binary
|
||||||
LocalAI
|
LocalAI
|
||||||
local-ai
|
local-ai
|
||||||
|
# prevent above rules from omitting the helm chart
|
||||||
|
!charts/*
|
||||||
|
|
||||||
# Ignore models
|
# Ignore models
|
||||||
models/*.bin
|
models/*.bin
|
||||||
|
24
README.md
24
README.md
@ -63,6 +63,26 @@ curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d
|
|||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Helm Chart Installation (run LocalAI in Kubernetes)
|
||||||
|
The local-ai Helm chart supports two options for the LocalAI server's models directory:
|
||||||
|
1. Basic deployment with no persistent volume. You must manually update the Deployment to configure your own models directory.
|
||||||
|
|
||||||
|
Install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == false`.
|
||||||
|
|
||||||
|
2. Advanced, two-phase deployment to provision the models directory using a DataVolume. Requires [Containerized Data Importer CDI](https://github.com/kubevirt/containerized-data-importer) to be pre-installed in your cluster.
|
||||||
|
|
||||||
|
First, install the chart with `.Values.deployment.volumes.enabled == false` and `.Values.dataVolume.enabled == true`:
|
||||||
|
```bash
|
||||||
|
helm install local-ai charts/local-ai -n local-ai --create-namespace
|
||||||
|
```
|
||||||
|
Wait for CDI to create an importer Pod for the DataVolume and for the importer pod to finish provisioning the model archive inside the PV.
|
||||||
|
|
||||||
|
Once the PV is provisioned and the importer Pod removed, set `.Values.deployment.volumes.enabled == true` and `.Values.dataVolume.enabled == false` and upgrade the chart:
|
||||||
|
```bash
|
||||||
|
helm upgrade local-ai -n local-ai charts/local-ai
|
||||||
|
```
|
||||||
|
This will update the local-ai Deployment to mount the PV that was provisioned by the DataVolume.
|
||||||
|
|
||||||
## Prompt templates
|
## Prompt templates
|
||||||
|
|
||||||
The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
|
The API doesn't inject a default prompt for talking to the model. You have to use a prompt similar to what's described in the standford-alpaca docs: https://github.com/tatsu-lab/stanford_alpaca#data-release.
|
||||||
@ -184,10 +204,6 @@ python 828bddec6162a023114ce19146cb2b82/gistfile1.txt models tokenizer.model
|
|||||||
|
|
||||||
It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
|
It should work, however you need to make sure you give enough resources to the container. See https://github.com/go-skynet/LocalAI/issues/2
|
||||||
|
|
||||||
### Kubernetes
|
|
||||||
|
|
||||||
You can run the API in Kubernetes, see an example deployment in [kubernetes](https://github.com/go-skynet/LocalAI/tree/master/kubernetes)
|
|
||||||
|
|
||||||
### Build locally
|
### Build locally
|
||||||
|
|
||||||
Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
|
Pre-built images might fit well for most of the modern hardware, however you can and might need to build the images manually.
|
||||||
|
6
charts/local-ai/Chart.yaml
Normal file
6
charts/local-ai/Chart.yaml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
apiVersion: v2
|
||||||
|
appVersion: 0.1.0
|
||||||
|
description: A Helm chart for LocalAI
|
||||||
|
name: local-ai
|
||||||
|
type: application
|
||||||
|
version: 1.0.0
|
44
charts/local-ai/templates/_helpers.tpl
Normal file
44
charts/local-ai/templates/_helpers.tpl
Normal file
@ -0,0 +1,44 @@
|
|||||||
|
{{/*
|
||||||
|
Expand the name of the chart.
|
||||||
|
*/}}
|
||||||
|
{{- define "local-ai.name" -}}
|
||||||
|
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create a default fully qualified app name.
|
||||||
|
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
|
||||||
|
If release name contains chart name it will be used as a full name.
|
||||||
|
*/}}
|
||||||
|
{{- define "local-ai.fullname" -}}
|
||||||
|
{{- if .Values.fullnameOverride }}
|
||||||
|
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- else }}
|
||||||
|
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||||
|
{{- if contains $name .Release.Name }}
|
||||||
|
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- else }}
|
||||||
|
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Create chart name and version as used by the chart label.
|
||||||
|
*/}}
|
||||||
|
{{- define "local-ai.chart" -}}
|
||||||
|
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||||
|
{{- end }}
|
||||||
|
|
||||||
|
{{/*
|
||||||
|
Common labels
|
||||||
|
*/}}
|
||||||
|
{{- define "local-ai.labels" -}}
|
||||||
|
helm.sh/chart: {{ include "local-ai.chart" . }}
|
||||||
|
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||||
|
app.kubernetes.io/instance: "{{ .Release.Name }}"
|
||||||
|
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||||
|
{{- if .Chart.AppVersion }}
|
||||||
|
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
39
charts/local-ai/templates/data-volume.yaml
Normal file
39
charts/local-ai/templates/data-volume.yaml
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
{{- if .Values.dataVolume.enabled }}
|
||||||
|
apiVersion: cdi.kubevirt.io/v1beta1
|
||||||
|
kind: DataVolume
|
||||||
|
metadata:
|
||||||
|
name: {{ template "local-ai.fullname" . }}
|
||||||
|
namespace: {{ .Release.Namespace | quote }}
|
||||||
|
labels:
|
||||||
|
{{- include "local-ai.labels" . | nindent 4 }}
|
||||||
|
spec:
|
||||||
|
contentType: archive
|
||||||
|
source:
|
||||||
|
{{ .Values.dataVolume.source.type }}:
|
||||||
|
url: {{ .Values.dataVolume.source.url }}
|
||||||
|
secretRef: {{ template "local-ai.fullname" . }}
|
||||||
|
{{- if and (eq .Values.dataVolume.source.type "http") .Values.dataVolume.source.secretExtraHeaders }}
|
||||||
|
secretExtraHeaders: {{ .Values.dataVolume.source.secretExtraHeaders }}
|
||||||
|
{{- end }}
|
||||||
|
{{- if .Values.dataVolume.source.caCertConfigMap }}
|
||||||
|
caCertConfigMap: {{ .Values.dataVolume.source.caCertConfigMap }}
|
||||||
|
{{- end }}
|
||||||
|
pvc:
|
||||||
|
accessModes: {{ .Values.dataVolume.pvc.accessModes }}
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: {{ .Values.dataVolume.pvc.size }}
|
||||||
|
---
|
||||||
|
{{- if .Values.dataVolume.secret.enabled }}
|
||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: {{ template "local-ai.fullname" . }}
|
||||||
|
namespace: {{ .Release.Namespace | quote }}
|
||||||
|
labels:
|
||||||
|
{{- include "local-ai.labels" . | nindent 4 }}
|
||||||
|
data:
|
||||||
|
accessKeyId: {{ .Values.dataVolume.secret.username }}
|
||||||
|
secretKey: {{ .Values.dataVolume.secret.password }}
|
||||||
|
{{- end }}
|
||||||
|
{{- end }}
|
39
charts/local-ai/templates/deployment.yaml
Normal file
39
charts/local-ai/templates/deployment.yaml
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: {{ template "local-ai.fullname" . }}
|
||||||
|
namespace: {{ .Release.Namespace | quote }}
|
||||||
|
labels:
|
||||||
|
{{- include "local-ai.labels" . | nindent 4 }}
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||||
|
replicas: 1
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
name: {{ template "local-ai.fullname" . }}
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||||
|
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: {{ template "local-ai.fullname" . }}
|
||||||
|
image: {{ .Values.deployment.image }}
|
||||||
|
env:
|
||||||
|
- name: THREADS
|
||||||
|
value: {{ .Values.deployment.env.threads | quote }}
|
||||||
|
- name: CONTEXT_SIZE
|
||||||
|
value: {{ .Values.deployment.env.contextSize | quote }}
|
||||||
|
- name: MODELS_PATH
|
||||||
|
value: {{ .Values.deployment.env.modelsPath }}
|
||||||
|
{{- if .Values.deployment.volume.enabled }}
|
||||||
|
volumeMounts:
|
||||||
|
- mountPath: {{ .Values.deployment.env.modelsPath }}
|
||||||
|
name: models
|
||||||
|
volumes:
|
||||||
|
- name: models
|
||||||
|
persistentVolumeClaim:
|
||||||
|
claimName: {{ template "local-ai.fullname" . }}
|
||||||
|
{{- end }}
|
19
charts/local-ai/templates/service.yaml
Normal file
19
charts/local-ai/templates/service.yaml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: {{ template "local-ai.fullname" . }}
|
||||||
|
namespace: {{ .Release.Namespace | quote }}
|
||||||
|
labels:
|
||||||
|
{{- include "local-ai.labels" . | nindent 4 }}
|
||||||
|
{{- if .Values.service.annotations }}
|
||||||
|
annotations:
|
||||||
|
{{ toYaml .Values.service.annotations | indent 4 }}
|
||||||
|
{{- end }}
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: {{ include "local-ai.name" . }}
|
||||||
|
type: "{{ .Values.service.type }}"
|
||||||
|
ports:
|
||||||
|
- protocol: TCP
|
||||||
|
port: 8080
|
||||||
|
targetPort: 8080
|
38
charts/local-ai/values.yaml
Normal file
38
charts/local-ai/values.yaml
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
deployment:
|
||||||
|
image: quay.io/go-skynet/local-ai:latest
|
||||||
|
env:
|
||||||
|
threads: 14
|
||||||
|
contextSize: 512
|
||||||
|
modelsPath: "/models"
|
||||||
|
volume:
|
||||||
|
enabled: false
|
||||||
|
|
||||||
|
service:
|
||||||
|
type: ClusterIP
|
||||||
|
annotations: {}
|
||||||
|
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
|
||||||
|
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
|
||||||
|
|
||||||
|
# Optionally create a PVC containing a model binary, sourced from an arbitrary HTTP server or S3 bucket
|
||||||
|
# (requires https://github.com/kubevirt/containerized-data-importer)
|
||||||
|
dataVolume:
|
||||||
|
enabled: false
|
||||||
|
source:
|
||||||
|
type: "http" # Source type. One of: [ http | s3 ]
|
||||||
|
url: "http://<model_server>/<model_archive>" # e.g. koala-7B-4bit-128g.GGML.tar
|
||||||
|
|
||||||
|
# CertConfigMap is an optional ConfigMap reference, containing a Certificate Authority (CA) public key
|
||||||
|
# and a base64 encoded pem certificate
|
||||||
|
caCertConfigMap: ""
|
||||||
|
|
||||||
|
# SecretExtraHeaders is an optional list of Secret references, each containing an extra HTTP header
|
||||||
|
# that may include sensitive information. Only applicable for the http source type.
|
||||||
|
secretExtraHeaders: []
|
||||||
|
pvc:
|
||||||
|
accessModes:
|
||||||
|
- ReadWriteOnce
|
||||||
|
size: 5Gi
|
||||||
|
secret:
|
||||||
|
enabled: false
|
||||||
|
username: "" # base64 encoded
|
||||||
|
password: "" # base64 encoded
|
@ -1,28 +0,0 @@
|
|||||||
# Create a PVC containing a model binary, sourced from an arbitrary HTTP server
|
|
||||||
# (requires https://github.com/kubevirt/containerized-data-importer)
|
|
||||||
apiVersion: cdi.kubevirt.io/v1beta1
|
|
||||||
kind: DataVolume
|
|
||||||
metadata:
|
|
||||||
name: models
|
|
||||||
namespace: local-ai
|
|
||||||
spec:
|
|
||||||
contentType: archive
|
|
||||||
source:
|
|
||||||
http:
|
|
||||||
url: http://<model_server>/koala-7B-4bit-128g.GGML.tar
|
|
||||||
secretRef: model-secret
|
|
||||||
pvc:
|
|
||||||
accessModes:
|
|
||||||
- ReadWriteOnce
|
|
||||||
resources:
|
|
||||||
requests:
|
|
||||||
storage: 5Gi
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Secret
|
|
||||||
metadata:
|
|
||||||
name: model-secret
|
|
||||||
namespace: local-ai
|
|
||||||
data:
|
|
||||||
accessKeyId: <model_server_username_base64_encoded>
|
|
||||||
secretKey: <model_server_password_base64_encoded>
|
|
@ -1,57 +0,0 @@
|
|||||||
apiVersion: v1
|
|
||||||
kind: Namespace
|
|
||||||
metadata:
|
|
||||||
name: local-ai
|
|
||||||
---
|
|
||||||
apiVersion: apps/v1
|
|
||||||
kind: Deployment
|
|
||||||
metadata:
|
|
||||||
name: local-ai
|
|
||||||
namespace: local-ai
|
|
||||||
labels:
|
|
||||||
app: local-ai
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
matchLabels:
|
|
||||||
app: local-ai
|
|
||||||
replicas: 1
|
|
||||||
template:
|
|
||||||
metadata:
|
|
||||||
labels:
|
|
||||||
app: local-ai
|
|
||||||
name: local-ai
|
|
||||||
spec:
|
|
||||||
containers:
|
|
||||||
- name: local-ai
|
|
||||||
image: quay.io/go-skynet/local-ai:latest
|
|
||||||
env:
|
|
||||||
- name: THREADS
|
|
||||||
value: "14"
|
|
||||||
- name: CONTEXT_SIZE
|
|
||||||
value: "512"
|
|
||||||
- name: MODELS_PATH
|
|
||||||
value: /models
|
|
||||||
volumeMounts:
|
|
||||||
- mountPath: /models
|
|
||||||
name: models
|
|
||||||
volumes:
|
|
||||||
- name: models
|
|
||||||
persistentVolumeClaim:
|
|
||||||
claimName: models
|
|
||||||
---
|
|
||||||
apiVersion: v1
|
|
||||||
kind: Service
|
|
||||||
metadata:
|
|
||||||
name: local-ai
|
|
||||||
namespace: local-ai
|
|
||||||
# If using AWS, you'll need to override the default 60s load balancer idle timeout
|
|
||||||
# annotations:
|
|
||||||
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
|
|
||||||
spec:
|
|
||||||
selector:
|
|
||||||
app: local-ai
|
|
||||||
type: LoadBalancer
|
|
||||||
ports:
|
|
||||||
- protocol: TCP
|
|
||||||
port: 8080
|
|
||||||
targetPort: 8080
|
|
Loading…
Reference in New Issue
Block a user