This commit is contained in:
parent
7a291f3b04
commit
0c3d9ad4b6
32
localai/localai-deployment.yaml
Normal file
32
localai/localai-deployment.yaml
Normal file
@ -0,0 +1,32 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: localai
|
||||
namespace: localai-ns
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: localai
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: localai
|
||||
spec:
|
||||
runtimeClassName: nvidia
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: 2
|
||||
containers:
|
||||
- name: localai
|
||||
image: quay.io/go-skynet/local-ai:latest-aio-gpu-nvidia-cuda-12
|
||||
ports:
|
||||
- containerPort: 80
|
||||
env:
|
||||
volumeMounts:
|
||||
- mountPath: "/models"
|
||||
name: models
|
||||
volumes:
|
||||
- name: models
|
||||
persistentVolumeClaim:
|
||||
claimName: localai-pvc
|
144
localai/localai-helm-release.yaml.off
Normal file
144
localai/localai-helm-release.yaml.off
Normal file
@ -0,0 +1,144 @@
|
||||
apiVersion: helm.toolkit.fluxcd.io/v2beta1
|
||||
kind: HelmRelease
|
||||
metadata:
|
||||
name: localai
|
||||
namespace: localai-ns
|
||||
spec:
|
||||
chart:
|
||||
spec:
|
||||
chart: local-ai
|
||||
sourceRef:
|
||||
kind: HelmRepository
|
||||
name: go-skynet
|
||||
namespace: flux-system
|
||||
interval: 15m0s
|
||||
timeout: 5m
|
||||
releaseName: localai
|
||||
values:
|
||||
replicaCount: 1
|
||||
|
||||
deployment:
|
||||
image:
|
||||
repository: quay.io/go-skynet/local-ai # Example: "docker.io/myapp"
|
||||
tag: latest
|
||||
env:
|
||||
threads: 4
|
||||
context_size: 1024
|
||||
debug: "true"
|
||||
modelsPath: "/models"
|
||||
download_model:
|
||||
# To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
|
||||
image: busybox
|
||||
prompt_templates:
|
||||
# To use cloud provided (eg AWS) image, provide it like: 1234356789.dkr.ecr.us-REGION-X.amazonaws.com/busybox
|
||||
image: busybox
|
||||
pullPolicy: IfNotPresent
|
||||
imagePullSecrets: []
|
||||
# - name: secret-names
|
||||
|
||||
## Needed for GPU Nodes
|
||||
runtimeClassName: nvidia
|
||||
|
||||
resources:
|
||||
# {}
|
||||
# We usually recommend not to specify default resources and to leave this as a conscious
|
||||
# choice for the user. This also increases chances charts run on environments with little
|
||||
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
||||
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 2048Mi
|
||||
nvidia.com/gpu: 3
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
|
||||
# Prompt templates to include
|
||||
# Note: the keys of this map will be the names of the prompt template files
|
||||
promptTemplates:
|
||||
{}
|
||||
# ggml-gpt4all-j.tmpl: |
|
||||
# The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.
|
||||
# ### Prompt:
|
||||
# {{.Input}}
|
||||
# ### Response:
|
||||
|
||||
# Models to download at runtime
|
||||
models:
|
||||
# Whether to force download models even if they already exist
|
||||
forceDownload: false
|
||||
|
||||
# The list of URLs to download models from
|
||||
# Note: the name of the file will be the name of the loaded model
|
||||
list:
|
||||
# - url: "https://gpt4all.io/models/ggml-gpt4all-j.bin"
|
||||
# basicAuth: base64EncodedCredentials
|
||||
|
||||
initContainers: []
|
||||
# Example:
|
||||
# - name: my-init-container
|
||||
# image: my-init-image
|
||||
# imagePullPolicy: IfNotPresent
|
||||
# command: ["/bin/sh", "-c", "echo init"]
|
||||
# volumeMounts:
|
||||
# - name: my-volume
|
||||
# mountPath: /path/to/mount
|
||||
|
||||
sidecarContainers: []
|
||||
#- name: model-file-browser
|
||||
# image: my-sidecar-image
|
||||
# imagePullPolicy: IfNotPresent
|
||||
# ports:
|
||||
# - containerPort: 1234
|
||||
|
||||
# Persistent storage for models and prompt templates.
|
||||
# PVC and HostPath are mutually exclusive. If both are enabled,
|
||||
# PVC configuration takes precedence. If neither are enabled, ephemeral
|
||||
# storage is used.
|
||||
persistence:
|
||||
models:
|
||||
enabled: true
|
||||
annotations: {}
|
||||
storageClass: "longhorn"
|
||||
accessModes: ReadWriteMany
|
||||
size: 50Gi
|
||||
globalMount: /models
|
||||
output:
|
||||
enabled: true
|
||||
annotations: {}
|
||||
storageClass: "longhorn"
|
||||
accessModes: ReadWriteMany
|
||||
size: 1Gi
|
||||
globalMount: /tmp/generated
|
||||
|
||||
service:
|
||||
type: LoadBalancer
|
||||
# If deferring to an internal only load balancer
|
||||
# externalTrafficPolicy: Local
|
||||
port: 80
|
||||
annotations: {}
|
||||
# If using an AWS load balancer, you'll need to override the default 60s load balancer idle timeout
|
||||
# service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "1200"
|
||||
|
||||
ingress:
|
||||
enabled: false
|
||||
className: ""
|
||||
annotations:
|
||||
{}
|
||||
# kubernetes.io/ingress.class: nginx
|
||||
# kubernetes.io/tls-acme: "true"
|
||||
hosts:
|
||||
- host: chart-example.local
|
||||
paths:
|
||||
- path: /
|
||||
pathType: ImplementationSpecific
|
||||
tls: []
|
||||
# - secretName: chart-example-tls
|
||||
# hosts:
|
||||
# - chart-example.local
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
tolerations: []
|
||||
|
||||
affinity: {}
|
11
localai/localai-pvc.yaml
Normal file
11
localai/localai-pvc.yaml
Normal file
@ -0,0 +1,11 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: localai-pvc
|
||||
namespace: localai-ns
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
resources:
|
||||
requests:
|
||||
storage: 20Gi
|
13
localai/localai-service.yaml
Normal file
13
localai/localai-service.yaml
Normal file
@ -0,0 +1,13 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: localai
|
||||
namespace: localai-ns
|
||||
spec:
|
||||
type: LoadBalancer
|
||||
selector:
|
||||
app: localai
|
||||
ports:
|
||||
- port: 80
|
||||
targetPort: 80
|
||||
protocol: TCP
|
Loading…
Reference in New Issue
Block a user