docs/examples: enhancements (#1572)

* docs: re-order sections

* fix references

* Add mixtral-instruct, tinyllama-chat, dolphin-2.5-mixtral-8x7b

* Fix link

* Minor corrections

* fix: models is a StringSlice, not a String

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>

* WIP: switch docs theme

* content

* Fix GH link

* enhancements

* enhancements

* Fixed how to link

Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>

* fixups

* logo fix

* more fixups

* final touches

---------

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
Signed-off-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
Co-authored-by: lunamidori5 <118759930+lunamidori5@users.noreply.github.com>
This commit is contained in:
Ettore Di Giacinto 2024-01-18 19:41:08 +01:00 committed by GitHub
parent b5c93f176a
commit 6ca4d38a01
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
79 changed files with 1826 additions and 3546 deletions

View File

@ -2,9 +2,7 @@
name: Bug report
about: Create a report to help us improve
title: ''
labels: bug
assignees: mudler
labels: bug, unconfirmed, up-for-grabs
---
<!-- Thanks for helping us to improve LocalAI! We welcome all bug reports. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->

View File

@ -2,9 +2,7 @@
name: Feature request
about: Suggest an idea for this project
title: ''
labels: enhancement
assignees: mudler
labels: enhancement, up-for-grabs
---
<!-- Thanks for helping us to improve LocalAI! We welcome all feature requests. Please fill out each area of the template so we can better help you. Comments like this will be hidden when you post but you can delete them if you wish. -->

3
.gitmodules vendored
View File

@ -1,3 +1,6 @@
[submodule "docs/themes/hugo-theme-relearn"]
path = docs/themes/hugo-theme-relearn
url = https://github.com/McShelby/hugo-theme-relearn.git
[submodule "docs/themes/lotusdocs"]
path = docs/themes/lotusdocs
url = https://github.com/colinwilson/lotusdocs

View File

@ -1,6 +1,6 @@
MIT License
Copyright (c) 2023 Ettore Di Giacinto
Copyright (c) 2023-2024 Ettore Di Giacinto (mudler@localai.io)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal

11
docs/assets/jsconfig.json Normal file
View File

@ -0,0 +1,11 @@
{
"compilerOptions": {
"baseUrl": ".",
"paths": {
"*": [
"../../../../.cache/hugo_cache/modules/filecache/modules/pkg/mod/github.com/gohugoio/hugo-mod-jslibs-dist/popperjs/v2@v2.21100.20000/package/dist/cjs/popper.js/*",
"../../../../.cache/hugo_cache/modules/filecache/modules/pkg/mod/github.com/twbs/bootstrap@v5.3.2+incompatible/js/*"
]
}
}
}

View File

@ -1,133 +1,178 @@
# this is a required setting for this theme to appear on https://themes.gohugo.io/
# change this to a value appropriate for you; if your site is served from a subdirectory
# set it like "https://example.com/mysite/"
baseURL = "https://localai.io/"
languageCode = "en-GB"
contentDir = "content"
enableEmoji = true
enableGitInfo = true # N.B. .GitInfo does not currently function with git submodule content directories
# canonicalization will only be used for the sitemap.xml and index.xml files;
# if set to false, a site served from a subdirectory will generate wrong links
# inside of the above mentioned files; if you serve the page from the servers root
# you are free to set the value to false as recommended by the official Hugo documentation
canonifyURLs = true # true -> all relative URLs would instead be canonicalized using baseURL
# required value to serve this page from a webserver AND the file system;
# if you don't want to serve your page from the file system, you can also set this value
# to false
relativeURLs = true # true -> rewrite all relative URLs to be relative to the current content
# if you set uglyURLs to false, this theme will append 'index.html' to any branch bundle link
# so your page can be also served from the file system; if you don't want that,
# set disableExplicitIndexURLs=true in the [params] section
uglyURLs = false # true -> basic/index.html -> basic.html
defaultContentLanguage = 'en'
# the directory where Hugo reads the themes from; this is specific to your
# installation and most certainly needs be deleted or changed
#themesdir = "../.."
# yeah, well, obviously a mandatory setting for your site, if you want to
# use this theme ;-)
theme = "hugo-theme-relearn"
# the main language of this site; also an automatic pirrrate translation is
# available in this showcase
languageCode = "en"
# make sure your defaultContentLanguage is the first one in the [languages]
# array below, as the theme needs to make assumptions on it
defaultContentLanguage = "en"
# the site's title of this showcase; you should change this ;-)
title = "LocalAI Documentation"
# We disable this for testing the exampleSite; you must do so too
# if you want to use the themes parameter disableGeneratorVersion=true;
# otherwise Hugo will create a generator tag on your home page
disableHugoGeneratorInject = true
[outputs]
# add JSON to the home to support Lunr search; This is a mandatory setting
# for the search functionality
# add PRINT to home, section and page to activate the feature to print whole
# chapters
home = ["HTML", "RSS", "PRINT", "SEARCH", "SEARCHPAGE"]
section = ["HTML", "RSS", "PRINT"]
page = ["HTML", "RSS", "PRINT"]
[markup]
[markup.highlight]
# if `guessSyntax = true`, there will be no unstyled code even if no language
# was given BUT Mermaid and Math codefences will not work anymore! So this is a
# mandatory setting for your site if you want to use Mermaid or Math codefences
guessSyntax = true
defaultMarkdownHandler = "goldmark"
[markup.tableOfContents]
endLevel = 3
startLevel = 1
[markup.goldmark]
[markup.goldmark.renderer]
unsafe = true # https://jdhao.github.io/2019/12/29/hugo_html_not_shown/
# [markup.highlight]
# codeFences = false # disables Hugo's default syntax highlighting
# [markup.goldmark.parser]
# [markup.goldmark.parser.attribute]
# block = true
# title = true
# here in this showcase we use our own modified chroma syntax highlightning style
# which is imported in theme-relearn-light.css / theme-relearn-dark.css;
# if you want to use a predefined style instead:
# - remove the following `noClasses`
# - set the following `style` to a predefined style name
# - remove the `@import` of the self-defined chroma stylesheet from your CSS files
# (here eg.: theme-relearn-light.css / theme-relearn-dark.css)
noClasses = false
style = "tango"
[markup.goldmark.renderer]
# activated for this showcase to use HTML and JavaScript; decide on your own needs;
# if in doubt, remove this line
unsafe = true
# allows `hugo server` to display this showcase in IE11; this is used for testing, as we
# are still supporting IE11 - although with degraded experience; if you don't care about
# `hugo server` or browsers of ancient times, fell free to remove this whole block
[server]
[[server.headers]]
for = "**.html"
[server.headers.values]
X-UA-Compatible = "IE=edge"
[params]
google_fonts = [
["Inter", "300, 400, 600, 700"],
["Fira Code", "500, 700"]
]
sans_serif_font = "Inter" # Default is System font
secondary_font = "Inter" # Default is System font
mono_font = "Fira Code" # Default is System font
[params.footer]
copyright = "© 2023-2024 <a href='https://mudler.pm' target=_blank>Ettore Di Giacinto</a>"
version = true # includes git commit info
[params.social]
github = "mudler/LocalAI" # YOUR_GITHUB_ID or YOUR_GITHUB_URL
twitter = "LocalAI_API" # YOUR_TWITTER_ID
dicord = "uJAeKSAGDy"
# instagram = "colinwilson" # YOUR_INSTAGRAM_ID
rss = true # show rss icon with link
[params.docs] # Parameters for the /docs 'template'
logo = "https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"
logo_text = "LocalAI"
title = "LocalAI documentation" # default html title for documentation pages/sections
pathName = "docs" # path name for documentation site | default "docs"
# themeColor = "cyan" # (optional) - Set theme accent colour. Options include: blue (default), green, red, yellow, emerald, cardinal, magenta, cyan
darkMode = true # enable dark mode option? default false
prism = true # enable syntax highlighting via Prism
prismTheme = "solarized-light" # (optional) - Set theme for PrismJS. Options include: lotusdocs (default), solarized-light, twilight, lucario
# gitinfo
repoURL = "https://github.com/mudler/LocalAI" # Git repository URL for your site [support for GitHub, GitLab, and BitBucket]
repoBranch = "master"
editPage = true # enable 'Edit this page' feature - default false
lastMod = true # enable 'Last modified' date on pages - default false
lastModRelative = true # format 'Last modified' time as relative - default true
sidebarIcons = true # enable sidebar icons? default false
breadcrumbs = true # default is true
backToTop = true # enable back-to-top button? default true
# ToC
toc = true # enable table of contents? default is true
tocMobile = true # enable table of contents in mobile view? default is true
scrollSpy = true # enable scrollspy on ToC? default is true
# front matter
descriptions = true # enable front matter descriptions under content title?
titleIcon = true # enable front matter icon title prefix? default is false
# content navigation
navDesc = true # include front matter descriptions in Prev/Next navigation cards
navDescTrunc = 30 # Number of characters by which to truncate the Prev/Next descriptions
listDescTrunc = 100 # Number of characters by which to truncate the list card description
# Link behaviour
intLinkTooltip = true # Enable a tooltip for internal links that displays info about the destination? default false
# extLinkNewTab = false # Open external links in a new Tab? default true
# logoLinkURL = "" # Set a custom URL destination for the top header logo link.
[params.flexsearch] # Parameters for FlexSearch
enabled = true
# tokenize = "full"
# optimize = true
# cache = 100
# minQueryChar = 3 # default is 0 (disabled)
# maxResult = 5 # default is 5
# searchSectionsIndex = []
[params.docsearch] # Parameters for DocSearch
# appID = "" # Algolia Application ID
# apiKey = "" # Algolia Search-Only API (Public) Key
# indexName = "" # Index Name to perform search on (or set env variable HUGO_PARAM_DOCSEARCH_indexName)
[params.analytics] # Parameters for Analytics (Google, Plausible)
# plausibleURL = "/docs/s" # (or set via env variable HUGO_PARAM_ANALYTICS_plausibleURL)
# plausibleAPI = "/docs/s" # optional - (or set via env variable HUGO_PARAM_ANALYTICS_plausibleAPI)
# plausibleDomain = "" # (or set via env variable HUGO_PARAM_ANALYTICS_plausibleDomain)
# [params.feedback]
# enabled = true
# emoticonTpl = true
# eventDest = ["plausible","google"]
# emoticonEventName = "Feedback"
# positiveEventName = "Positive Feedback"
# negativeEventName = "Negative Feedback"
# positiveFormTitle = "What did you like?"
# negativeFormTitle = "What went wrong?"
# successMsg = "Thank you for helping to improve Lotus Docs' documentation!"
# errorMsg = "Sorry! There was an error while attempting to submit your feedback!"
# positiveForm = [
# ["Accurate", "Accurately describes the feature or option."],
# ["Solved my problem", "Helped me resolve an issue."],
# ["Easy to understand", "Easy to follow and comprehend."],
# ["Something else"]
# ]
# negativeForm = [
# ["Inaccurate", "Doesn't accurately describe the feature or option."],
# ["Couldn't find what I was looking for", "Missing important information."],
# ["Hard to understand", "Too complicated or unclear."],
# ["Code sample errors", "One or more code samples are incorrect."],
# ["Something else"]
# ]
[menu]
[[menu.primary]]
name = "Docs"
url = "docs/"
identifier = "docs"
weight = 10
[[menu.primary]]
name = "Discord"
url = "https://discord.gg/uJAeKSAGDy"
identifier = "discord"
weight = 20
# showcase of the menu shortcuts; you can use relative URLs linking
# to your content or use fully-quallified URLs to link outside of
# your project
[languages]
[languages.en]
title = "LocalAI documentation"
weight = 1
languageName = "English"
[languages.en.params]
landingPageName = "<i class='fas fa-home'></i> Home"
[[languages.en.menu.shortcuts]]
name = "<i class='fas fa-home'></i> Home"
url = "/"
weight = 1
[[languages.en.menu.shortcuts]]
name = "<i class='fab fa-fw fa-github'></i> GitHub repo"
identifier = "ds"
url = "https://github.com/go-skynet/LocalAI"
weight = 10
# [languages.fr]
# title = "LocalAI documentation"
# languageName = "Français"
# contentDir = "content/fr"
# weight = 20
# [languages.de]
# title = "LocalAI documentation"
# languageName = "Deutsch"
# contentDir = "content/de"
# weight = 30
[[languages.en.menu.shortcuts]]
name = "<i class='fas fa-fw fa-camera'></i> Examples"
url = "https://github.com/go-skynet/LocalAI/tree/master/examples/"
weight = 11
[[languages.en.menu.shortcuts]]
name = "<i class='fas fa-fw fa-images'></i> Model Gallery"
url = "https://github.com/go-skynet/model-gallery"
weight = 12
[[languages.en.menu.shortcuts]]
name = "<i class='fas fa-fw fa-download'></i> Container images"
url = "https://quay.io/repository/go-skynet/local-ai"
weight = 20
#[[languages.en.menu.shortcuts]]
# name = "<i class='fas fa-fw fa-bullhorn'></i> Credits"
# url = "more/credits/"
# weight = 30
[[languages.en.menu.shortcuts]]
name = "<i class='fas fa-fw fa-tags'></i> Releases"
url = "https://github.com/go-skynet/LocalAI/releases"
weight = 40
# mounts are only needed in this showcase to access the publicly available screenshots;
# remove this section if you don't need further mounts
[module]
replacements = "github.com/colinwilson/lotusdocs -> lotusdocs"
[[module.mounts]]
source = 'archetypes'
target = 'archetypes'
@ -152,30 +197,11 @@ disableHugoGeneratorInject = true
[[module.mounts]]
source = 'static'
target = 'static'
# settings specific to this theme's features; choose to your likings and
# consult this documentation for explaination
[params]
editURL = "https://github.com/mudler/LocalAI/edit/master/docs/content/"
description = "Documentation for LocalAI"
author = "Ettore Di Giacinto"
showVisitedLinks = true
collapsibleMenu = true
disableBreadcrumb = false
disableInlineCopyToClipBoard = true
disableNextPrev = false
disableLandingPageButton = true
breadcrumbSeparator = ">"
titleSeparator = "::"
themeVariant = [ "auto", "relearn-bright", "relearn-light", "relearn-dark", "learn", "neon", "blue", "green", "red" ]
themeVariantAuto = [ "relearn-light", "relearn-dark" ]
disableSeoHiddenPages = true
# this is to index search for your native language in other languages, too (eg.
# pir in this showcase)
additionalContentLanguage = [ "en" ]
# this is for the stylesheet generator to allow for interactivity in Mermaid
# graphs; you usually will not need it and you should remove this for
# security reasons
mermaidInitialize = "{ \"securityLevel\": \"loose\" }"
mermaidZoom = true
# uncomment line below for temporary local development of module
# or when using a 'theme' as a git submodule
[[module.imports]]
path = "github.com/colinwilson/lotusdocs"
disable = false
[[module.imports]]
path = "github.com/gohugoio/hugo-mod-bootstrap-scss/v5"
disable = false

View File

@ -1,37 +0,0 @@
+++
disableToc = false
title = "Development documentation"
weight = 7
+++
{{% notice note %}}
This section is for developers and contributors. If you are looking for the user documentation, this is not the right place!
{{% /notice %}}
This section will collect how-to, notes and development documentation
## Contributing
We use conventional commits and semantic versioning. Please follow the [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/) specification when writing commit messages.
## Creating a gRPC backend
LocalAI backends are `gRPC` servers.
In order to create a new backend you need:
- If there are changes required to the protobuf code, modify the [proto](https://github.com/go-skynet/LocalAI/blob/master/pkg/grpc/proto/backend.proto) file and re-generate the code with `make protogen`.
- Modify the `Makefile` to add your new backend and re-generate the client code with `make protogen` if necessary.
- Create a new `gRPC` server in `extra/grpc` if it's not written in go: [link](https://github.com/go-skynet/LocalAI/tree/master/extra/grpc), and create the specific implementation.
- Golang `gRPC` servers should be added in the [pkg/backend](https://github.com/go-skynet/LocalAI/tree/master/pkg/backend) directory given their type. See [piper](https://github.com/go-skynet/LocalAI/blob/master/pkg/backend/tts/piper.go) as an example.
- Golang servers needs a respective `cmd/grpc` binary that must be created too, see also [cmd/grpc/piper](https://github.com/go-skynet/LocalAI/tree/master/cmd/grpc/piper) as an example, update also the Makefile accordingly to build the binary during build time.
- Update the Dockerfile: if the backend is written in another language, update the `Dockerfile` default *EXTERNAL_GRPC_BACKENDS* variable by listing the new binary [link](https://github.com/go-skynet/LocalAI/blob/c2233648164f67cdb74dd33b8d46244e14436ab3/Dockerfile#L14).
Once you are done, you can either re-build `LocalAI` with your backend or you can try it out by running the `gRPC` server manually and specifying the host and IP to LocalAI with `--external-grpc-backends` or using (`EXTERNAL_GRPC_BACKENDS` environment variable, comma separated list of `name:host:port` tuples, e.g. `my-awesome-backend:host:port`):
```bash
./local-ai --debug --external-grpc-backends "my-awesome-backend:host:port" ...
```

View File

@ -0,0 +1,11 @@
---
weight: 20
title: "Advanced"
description: "Advanced usage"
icon: science
lead: ""
date: 2020-10-06T08:49:15+00:00
lastmod: 2020-10-06T08:49:15+00:00
draft: false
images: []
---

View File

@ -1,8 +1,9 @@
+++
disableToc = false
title = "Advanced"
weight = 6
title = "Advanced usage"
weight = 21
url = '/advanced'
+++
### Advanced configuration with YAML files
@ -309,7 +310,7 @@ prompt_cache_all: true
By default LocalAI will try to autoload the model by trying all the backends. This might work for most of models, but some of the backends are NOT configured to autoload.
The available backends are listed in the [model compatibility table]({{%relref "model-compatibility" %}}).
The available backends are listed in the [model compatibility table]({{%relref "docs/reference/compatibility-table" %}}).
In order to specify a backend for your models, create a model config file in your `models` directory specifying the backend:
@ -343,6 +344,19 @@ Or a remote URI:
./local-ai --debug --external-grpc-backends "my-awesome-backend:host:port"
```
For example, to start vllm manually after compiling LocalAI (also assuming running the command from the root of the repository):
```bash
./local-ai --external-grpc-backends "vllm:$PWD/backend/python/vllm/run.sh"
```
Note that first is is necessary to create the conda environment with:
```bash
make -C backend/python/vllm
```
### Environment variables
When LocalAI runs in a container,
@ -419,11 +433,11 @@ RUN PATH=$PATH:/opt/conda/bin make -C backend/python/diffusers
ENV EXTERNAL_GRPC_BACKENDS="diffusers:/build/backend/python/diffusers/run.sh"
```
{{% notice note %}}
{{% alert note %}}
You can specify remote external backends or path to local files. The syntax is `backend-name:/path/to/backend` or `backend-name:host:port`.
{{% /notice %}}
{{% /alert %}}
#### In runtime

View File

@ -2,12 +2,12 @@
+++
disableToc = false
title = "Fine-tuning LLMs for text generation"
weight = 3
weight = 22
+++
{{% notice note %}}
{{% alert note %}}
Section under construction
{{% /notice %}}
{{% /alert %}}
This section covers how to fine-tune a language model for text generation and consume it in LocalAI.

View File

@ -2,7 +2,8 @@
+++
disableToc = false
title = "FAQ"
weight = 9
weight = 24
icon = "quiz"
+++
## Frequently asked questions
@ -12,25 +13,13 @@ Here are answers to some of the most common questions.
### How do I get models?
<details>
Most gguf-based models should work, but newer models may require additions to the API. If a model doesn't work, please feel free to open up issues. However, be cautious about downloading models from the internet and directly onto your machine, as there may be security vulnerabilities in lama.cpp or ggml that could be maliciously exploited. Some models can be found on Hugging Face: https://huggingface.co/models?search=gguf, or models from gpt4all are compatible too: https://github.com/nomic-ai/gpt4all.
</details>
### What's the difference with Serge, or XXX?
<details>
LocalAI is a multi-model solution that doesn't focus on a specific model type (e.g., llama.cpp or alpaca.cpp), and it handles all of these internally for faster inference, easy to set up locally and deploy to Kubernetes.
</details>
### Everything is slow, how come?
<details>
### Everything is slow, how is it possible?
There are few situation why this could occur. Some tips are:
- Don't use HDD to store your models. Prefer SSD over HDD. In case you are stuck with HDD, disable `mmap` in the model config file so it loads everything in memory.
@ -38,61 +27,31 @@ There are few situation why this could occur. Some tips are:
- Run LocalAI with `DEBUG=true`. This gives more information, including stats on the token inference speed.
- Check that you are actually getting an output: run a simple curl request with `"stream": true` to see how fast the model is responding.
</details>
### Can I use it with a Discord bot, or XXX?
<details>
Yes! If the client uses OpenAI and supports setting a different base URL to send requests to, you can use the LocalAI endpoint. This allows to use this with every application that was supposed to work with OpenAI, but without changing the application!
</details>
### Can this leverage GPUs?
<details>
There is partial GPU support, see build instructions above.
</details>
There is GPU support, see {{%relref "docs/features/GPU-acceleration" %}}.
### Where is the webUI?
<details>
There is the availability of localai-webui and chatbot-ui in the examples section and can be setup as per the instructions. However as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
</details>
There is the availability of localai-webui and chatbot-ui in the examples section and can be setup as per the instructions. However as LocalAI is an API you can already plug it into existing projects that provides are UI interfaces to OpenAI's APIs. There are several already on Github, and should be compatible with LocalAI already (as it mimics the OpenAI API)
### Does it work with AutoGPT?
<details>
Yes, see the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/)!
</details>
### How can I troubleshoot when something is wrong?
<details>
Enable the debug mode by setting `DEBUG=true` in the environment variables. This will give you more information on what's going on.
You can also specify `--debug` in the command line.
</details>
### I'm getting 'invalid pitch' error when running with CUDA, what's wrong?
<details>
This typically happens when your prompt exceeds the context size. Try to reduce the prompt size, or increase the context size.
</details>
### I'm getting a 'SIGILL' error, what's wrong?
<details>
Your CPU probably does not have support for certain instructions that are compiled by default in the pre-built binaries. If you are running in a container, try setting `REBUILD=true` and disable the CPU instructions that are not compatible with your CPU. For instance: `CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make build`
</details>
Your CPU probably does not have support for certain instructions that are compiled by default in the pre-built binaries. If you are running in a container, try setting `REBUILD=true` and disable the CPU instructions that are not compatible with your CPU. For instance: `CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF" make build`

View File

@ -1,22 +1,23 @@
+++
disableToc = false
title = "⚡ GPU acceleration"
weight = 2
weight = 9
+++
{{% notice note %}}
{{% alert context="warning" %}}
Section under construction
{{% /notice %}}
{{% /alert %}}
This section contains instruction on how to use LocalAI with GPU acceleration.
{{% notice note %}}
For accelleration for AMD or Metal HW there are no specific container images, see the [build]({{%relref "build/#acceleration" %}})
{{% /notice %}}
{{% alert icon="⚡" context="warning" %}}
For accelleration for AMD or Metal HW there are no specific container images, see the [build]({{%relref "docs/getting-started/build#Acceleration" %}})
{{% /alert %}}
### CUDA(NVIDIA) acceleration
#### Requirements
Requirement: nvidia-container-toolkit (installation instructions [1](https://www.server-world.info/en/note?os=Ubuntu_22.04&p=nvidia&f=2) [2](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html))
To check what CUDA version do you need, you can either run `nvidia-smi` or `nvcc --version`.

View File

@ -0,0 +1,7 @@
+++
disableToc = false
title = "Features"
weight = 8
icon = "feature_search"
+++

View File

@ -1,10 +1,12 @@
+++
disableToc = false
title = "🔈 Audio to text"
weight = 2
weight = 16
+++
The transcription endpoint allows to convert audio files to text. The endpoint is based on [whisper.cpp](https://github.com/ggerganov/whisper.cpp), a C++ library for audio transcription. The endpoint supports the audio formats supported by `ffmpeg`.
Audio to text models are models that can generate text from an audio file.
The transcription endpoint allows to convert audio files to text. The endpoint is based on [whisper.cpp](https://github.com/ggerganov/whisper.cpp), a C++ library for audio transcription. The endpoint input supports all the audio formats supported by `ffmpeg`.
## Usage

View File

@ -2,20 +2,20 @@
+++
disableToc = false
title = "✍️ Constrained grammars"
weight = 6
weight = 15
+++
The chat endpoint accepts an additional `grammar` parameter which takes a [BNF defined grammar](https://en.wikipedia.org/wiki/Backus%E2%80%93Naur_form).
This allows the LLM to constrain the output to a user-defined schema, allowing to generate `JSON`, `YAML`, and everything that can be defined with a BNF grammar.
{{% notice note %}}
This feature works only with models compatible with the [llama.cpp](https://github.com/ggerganov/llama.cpp) backend (see also [Model compatibility]({{%relref "model-compatibility" %}})). For details on how it works, see the upstream PRs: https://github.com/ggerganov/llama.cpp/pull/1773, https://github.com/ggerganov/llama.cpp/pull/1887
{{% /notice %}}
{{% alert note %}}
This feature works only with models compatible with the [llama.cpp](https://github.com/ggerganov/llama.cpp) backend (see also [Model compatibility]({{%relref "docs/reference/compatibility-table" %}})). For details on how it works, see the upstream PRs: https://github.com/ggerganov/llama.cpp/pull/1773, https://github.com/ggerganov/llama.cpp/pull/1887
{{% /alert %}}
## Setup
Follow the setup instructions from the [LocalAI functions]({{%relref "features/openai-functions" %}}) page.
Follow the setup instructions from the [LocalAI functions]({{%relref "docs/features/openai-functions" %}}) page.
## 💡 Usage example

View File

@ -2,7 +2,7 @@
+++
disableToc = false
title = "🧠 Embeddings"
weight = 2
weight = 13
+++
LocalAI supports generating embeddings for text or list of tokens.
@ -73,7 +73,7 @@ parameters:
The `sentencetransformers` backend uses Python [sentence-transformers](https://github.com/UKPLab/sentence-transformers). For a list of all pre-trained models available see here: https://github.com/UKPLab/sentence-transformers#pre-trained-models
{{% notice note %}}
{{% alert note %}}
- The `sentencetransformers` backend is an optional backend of LocalAI and uses Python. If you are running `LocalAI` from the containers you are good to go and should be already configured for use.
- If you are running `LocalAI` manually you must install the python dependencies (`make prepare-extra-conda-environments`). This requires `conda` to be installed.
@ -82,7 +82,7 @@ The `sentencetransformers` backend uses Python [sentence-transformers](https://g
- The `sentencetransformers` backend does support only embeddings of text, and not of tokens. If you need to embed tokens you can use the `bert` backend or `llama.cpp`.
- No models are required to be downloaded before using the `sentencetransformers` backend. The models will be downloaded automatically the first time the API is used.
{{% /notice %}}
{{% /alert %}}
## Llama.cpp embeddings

View File

@ -2,12 +2,12 @@
+++
disableToc = false
title = "🆕 GPT Vision"
weight = 2
weight = 14
+++
{{% notice note %}}
{{% alert note %}}
Available only on `master` builds
{{% /notice %}}
{{% /alert %}}
LocalAI supports understanding images by using [LLaVA](https://llava.hliu.cc/), and implements the [GPT Vision API](https://platform.openai.com/docs/guides/vision) from OpenAI.

View File

@ -2,13 +2,13 @@
+++
disableToc = false
title = "🎨 Image generation"
weight = 2
weight = 12
+++
![anime_girl](https://github.com/go-skynet/LocalAI/assets/2420543/8aaca62a-e864-4011-98ae-dcc708103928)
(Generated with [AnimagineXL](https://huggingface.co/Linaqruf/animagine-xl))
LocalAI supports generating images with Stable diffusion, running on CPU using a C++ implementation, [Stable-Diffusion-NCNN](https://github.com/EdVince/Stable-Diffusion-NCNN) ([binding](https://github.com/mudler/go-stable-diffusion)) and [🧨 Diffusers]({{%relref "model-compatibility/diffusers" %}}).
LocalAI supports generating images with Stable diffusion, running on CPU using C++ and Python implementations.
## Usage
@ -35,7 +35,9 @@ curl http://localhost:8080/v1/images/generations -H "Content-Type: application/j
}'
```
## stablediffusion-cpp
## Backends
### stablediffusion-cpp
| mode=0 | mode=1 (winograd/sgemm) |
|------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
@ -45,7 +47,7 @@ curl http://localhost:8080/v1/images/generations -H "Content-Type: application/j
Note: image generator supports images up to 512x512. You can use other tools however to upscale the image, for instance: https://github.com/upscayl/upscayl.
### Setup
#### Setup
Note: In order to use the `images/generation` endpoint with the `stablediffusion` C++ backend, you need to build LocalAI with `GO_TAGS=stablediffusion`. If you are using the container images, it is already enabled.
@ -128,11 +130,14 @@ models
{{< /tabs >}}
## Diffusers
### Diffusers
This is an extra backend - in the container is already available and there is nothing to do for the setup.
[Diffusers](https://huggingface.co/docs/diffusers/index) is the go-to library for state-of-the-art pretrained diffusion models for generating images, audio, and even 3D structures of molecules. LocalAI has a diffusers backend which allows image generation using the `diffusers` library.
### Model setup
![anime_girl](https://github.com/go-skynet/LocalAI/assets/2420543/8aaca62a-e864-4011-98ae-dcc708103928)
(Generated with [AnimagineXL](https://huggingface.co/Linaqruf/animagine-xl))
#### Model setup
The models will be downloaded the first time you use the backend from `huggingface` automatically.
@ -150,3 +155,198 @@ diffusers:
cuda: false # Enable for GPU usage (CUDA)
scheduler_type: euler_a
```
#### Dependencies
This is an extra backend - in the container is already available and there is nothing to do for the setup. Do not use *core* images (ending with `-core`). If you are building manually, see the [build instructions]({{%relref "docs/getting-started/build" %}}).
#### Model setup
The models will be downloaded the first time you use the backend from `huggingface` automatically.
Create a model configuration file in the `models` directory, for instance to use `Linaqruf/animagine-xl` with CPU:
```yaml
name: animagine-xl
parameters:
model: Linaqruf/animagine-xl
backend: diffusers
cuda: true
f16: true
diffusers:
scheduler_type: euler_a
```
#### Local models
You can also use local models, or modify some parameters like `clip_skip`, `scheduler_type`, for instance:
```yaml
name: stablediffusion
parameters:
model: toonyou_beta6.safetensors
backend: diffusers
step: 30
f16: true
cuda: true
diffusers:
pipeline_type: StableDiffusionPipeline
enable_parameters: "negative_prompt,num_inference_steps,clip_skip"
scheduler_type: "k_dpmpp_sde"
cfg_scale: 8
clip_skip: 11
```
#### Configuration parameters
The following parameters are available in the configuration file:
| Parameter | Description | Default |
| --- | --- | --- |
| `f16` | Force the usage of `float16` instead of `float32` | `false` |
| `step` | Number of steps to run the model for | `30` |
| `cuda` | Enable CUDA acceleration | `false` |
| `enable_parameters` | Parameters to enable for the model | `negative_prompt,num_inference_steps,clip_skip` |
| `scheduler_type` | Scheduler type | `k_dpp_sde` |
| `cfg_scale` | Configuration scale | `8` |
| `clip_skip` | Clip skip | None |
| `pipeline_type` | Pipeline type | `AutoPipelineForText2Image` |
There are available several types of schedulers:
| Scheduler | Description |
| --- | --- |
| `ddim` | DDIM |
| `pndm` | PNDM |
| `heun` | Heun |
| `unipc` | UniPC |
| `euler` | Euler |
| `euler_a` | Euler a |
| `lms` | LMS |
| `k_lms` | LMS Karras |
| `dpm_2` | DPM2 |
| `k_dpm_2` | DPM2 Karras |
| `dpm_2_a` | DPM2 a |
| `k_dpm_2_a` | DPM2 a Karras |
| `dpmpp_2m` | DPM++ 2M |
| `k_dpmpp_2m` | DPM++ 2M Karras |
| `dpmpp_sde` | DPM++ SDE |
| `k_dpmpp_sde` | DPM++ SDE Karras |
| `dpmpp_2m_sde` | DPM++ 2M SDE |
| `k_dpmpp_2m_sde` | DPM++ 2M SDE Karras |
Pipelines types available:
| Pipeline type | Description |
| --- | --- |
| `StableDiffusionPipeline` | Stable diffusion pipeline |
| `StableDiffusionImg2ImgPipeline` | Stable diffusion image to image pipeline |
| `StableDiffusionDepth2ImgPipeline` | Stable diffusion depth to image pipeline |
| `DiffusionPipeline` | Diffusion pipeline |
| `StableDiffusionXLPipeline` | Stable diffusion XL pipeline |
#### Usage
#### Text to Image
Use the `image` generation endpoint with the `model` name from the configuration file:
```bash
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "<positive prompt>|<negative prompt>",
"model": "animagine-xl",
"step": 51,
"size": "1024x1024"
}'
```
#### Image to Image
https://huggingface.co/docs/diffusers/using-diffusers/img2img
An example model (GPU):
```yaml
name: stablediffusion-edit
parameters:
model: nitrosocke/Ghibli-Diffusion
backend: diffusers
step: 25
cuda: true
f16: true
diffusers:
pipeline_type: StableDiffusionImg2ImgPipeline
enable_parameters: "negative_prompt,num_inference_steps,image"
```
```bash
IMAGE_PATH=/path/to/your/image
(echo -n '{"file": "'; base64 $IMAGE_PATH; echo '", "prompt": "a sky background","size": "512x512","model":"stablediffusion-edit"}') |
curl -H "Content-Type: application/json" -d @- http://localhost:8080/v1/images/generations
```
#### Depth to Image
https://huggingface.co/docs/diffusers/using-diffusers/depth2img
```yaml
name: stablediffusion-depth
parameters:
model: stabilityai/stable-diffusion-2-depth
backend: diffusers
step: 50
# Force CPU usage
f16: true
cuda: true
diffusers:
pipeline_type: StableDiffusionDepth2ImgPipeline
enable_parameters: "negative_prompt,num_inference_steps,image"
cfg_scale: 6
```
```bash
(echo -n '{"file": "'; base64 ~/path/to/image.jpeg; echo '", "prompt": "a sky background","size": "512x512","model":"stablediffusion-depth"}') |
curl -H "Content-Type: application/json" -d @- http://localhost:8080/v1/images/generations
```
#### img2vid
```yaml
name: img2vid
parameters:
model: stabilityai/stable-video-diffusion-img2vid
backend: diffusers
step: 25
# Force CPU usage
f16: true
cuda: true
diffusers:
pipeline_type: StableVideoDiffusionPipeline
```
```bash
(echo -n '{"file": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png?download=true","size": "512x512","model":"img2vid"}') |
curl -H "Content-Type: application/json" -X POST -d @- http://localhost:8080/v1/images/generations
```
#### txt2vid
```yaml
name: txt2vid
parameters:
model: damo-vilab/text-to-video-ms-1.7b
backend: diffusers
step: 25
# Force CPU usage
f16: true
cuda: true
diffusers:
pipeline_type: VideoDiffusionPipeline
cuda: true
```
```bash
(echo -n '{"prompt": "spiderman surfing","size": "512x512","model":"txt2vid"}') |
curl -H "Content-Type: application/json" -X POST -d @- http://localhost:8080/v1/images/generations
```

View File

@ -2,7 +2,9 @@
+++
disableToc = false
title = "🖼️ Model gallery"
weight = 7
weight = 18
url = '/models'
+++
<h1 align="center">
@ -15,13 +17,13 @@ The model gallery is a (experimental!) collection of models configurations for [
LocalAI to ease out installations of models provide a way to preload models on start and downloading and installing them in runtime. You can install models manually by copying them over the `models` directory, or use the API to configure, download and verify the model assets for you. As the UI is still a work in progress, you will find here the documentation about the API Endpoints.
{{% notice note %}}
{{% alert note %}}
The models in this gallery are not directly maintained by LocalAI. If you find a model that is not working, please open an issue on the model gallery repository.
{{% /notice %}}
{{% /alert %}}
{{% notice note %}}
{{% alert note %}}
GPT and text generation models might have a license which is not permissive for commercial use or might be questionable or without any license at all. Please check the model license before using it. The official gallery contains only open licensed models.
{{% /notice %}}
{{% /alert %}}
## Useful Links and resources
@ -48,7 +50,7 @@ GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.
where `github:go-skynet/model-gallery/index.yaml` will be expanded automatically to `https://raw.githubusercontent.com/go-skynet/model-gallery/main/index.yaml`.
{{% notice note %}}
{{% alert note %}}
As this feature is experimental, you need to run `local-ai` with a list of `GALLERIES`. Currently there are two galleries:
@ -63,19 +65,19 @@ GALLERIES=[{"name":"model-gallery", "url":"github:go-skynet/model-gallery/index.
If running with `docker-compose`, simply edit the `.env` file and uncomment the `GALLERIES` variable, and add the one you want to use.
{{% /notice %}}
{{% /alert %}}
{{% notice note %}}
{{% alert note %}}
You might not find all the models in this gallery. Automated CI updates the gallery automatically. You can find however most of the models on huggingface (https://huggingface.co/), generally it should be available `~24h` after upload.
By under any circumstances LocalAI and any developer is not responsible for the models in this gallery, as CI is just indexing them and providing a convenient way to install with an automatic configuration with a consistent API. Don't install models from authors you don't trust, and, check the appropriate license for your use case. Models are automatically indexed and hosted on huggingface (https://huggingface.co/). For any issue with the models, please open an issue on the model gallery repository if it's a LocalAI misconfiguration, otherwise refer to the huggingface repository. If you think a model should not be listed, please reach to us and we will remove it from the gallery.
{{% /notice %}}
{{% /alert %}}
{{% notice note %}}
{{% alert note %}}
There is no documentation yet on how to build a gallery or a repository - but you can find an example in the [model-gallery](https://github.com/go-skynet/model-gallery) repository.
{{% /notice %}}
{{% /alert %}}
### List Models
@ -117,7 +119,7 @@ where:
- `bert-embeddings` is the model name in the gallery
(read its [config here](https://github.com/go-skynet/model-gallery/blob/main/bert-embeddings.yaml)).
{{% notice note %}}
{{% alert note %}}
If the `huggingface` model gallery is enabled (it's enabled by default),
and the model has an entry in the model gallery's associated YAML config
(for `huggingface`, see [`model-gallery/huggingface.yaml`](https://github.com/go-skynet/model-gallery/blob/main/huggingface.yaml)),
@ -132,7 +134,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
```
Note that the `id` can be used similarly when pre-loading models at start.
{{% /notice %}}
{{% /alert %}}
## How to install a model (without a gallery)
@ -217,7 +219,7 @@ YAML:
</details>
{{% notice note %}}
{{% alert note %}}
You can find already some open licensed models in the [model gallery](https://github.com/go-skynet/model-gallery).
@ -241,7 +243,7 @@ curl $LOCALAI/models/apply -H "Content-Type: application/json" -d '{
</details>
{{% /notice %}}
{{% /alert %}}
## Installing a model with a different name

View File

@ -2,7 +2,7 @@
+++
disableToc = false
title = "🔥 OpenAI functions"
weight = 2
weight = 17
+++
LocalAI supports running OpenAI functions with `llama.cpp` compatible models.
@ -67,13 +67,13 @@ response = openai.ChatCompletion.create(
# ...
```
{{% notice note %}}
{{% alert note %}}
When running the python script, be sure to:
- Set `OPENAI_API_KEY` environment variable to a random string (the OpenAI api key is NOT required!)
- Set `OPENAI_API_BASE` to point to your LocalAI service, for example `OPENAI_API_BASE=http://localhost:8080`
{{% /notice %}}
{{% /alert %}}
## Advanced

View File

@ -0,0 +1,263 @@
+++
disableToc = false
title = "📖 Text generation (GPT)"
weight = 10
+++
LocalAI supports generating text with GPT with `llama.cpp` and other backends (such as `rwkv.cpp` as ) see also the [Model compatibility]({{%relref "docs/reference/compatibility-table" %}}) for an up-to-date list of the supported model families.
Note:
- You can also specify the model name as part of the OpenAI token.
- If only one model is available, the API will use it for all the requests.
## API Reference
### Chat completions
https://platform.openai.com/docs/api-reference/chat
For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:
```bash
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.7
}'
```
Available additional parameters: `top_p`, `top_k`, `max_tokens`
### Edit completions
https://platform.openai.com/docs/api-reference/edits
To generate an edit completion you can send a POST request to the `/v1/edits` endpoint with the instruction as the request body:
```bash
curl http://localhost:8080/v1/edits -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"instruction": "rephrase",
"input": "Black cat jumped out of the window",
"temperature": 0.7
}'
```
Available additional parameters: `top_p`, `top_k`, `max_tokens`.
### Completions
https://platform.openai.com/docs/api-reference/completions
To generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as per the request body:
```bash
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"prompt": "A long time ago in a galaxy far, far away",
"temperature": 0.7
}'
```
Available additional parameters: `top_p`, `top_k`, `max_tokens`
### List models
You can list all the models available with:
```bash
curl http://localhost:8080/v1/models
```
## Backends
### AutoGPTQ
[AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ) is an easy-to-use LLMs quantization package with user-friendly apis, based on GPTQ algorithm.
#### Prerequisites
This is an extra backend - in the container images is already available and there is nothing to do for the setup.
If you are building LocalAI locally, you need to install [AutoGPTQ manually](https://github.com/PanQiWei/AutoGPTQ#quick-installation).
#### Model setup
The models are automatically downloaded from `huggingface` if not present the first time. It is possible to define models via `YAML` config file, or just by querying the endpoint with the `huggingface` repository model name. For example, create a `YAML` config file in `models/`:
```
name: orca
backend: autogptq
model_base_name: "orca_mini_v2_13b-GPTQ-4bit-128g.no-act.order"
parameters:
model: "TheBloke/orca_mini_v2_13b-GPTQ"
# ...
```
Test with:
```bash
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "orca",
"messages": [{"role": "user", "content": "How are you?"}],
"temperature": 0.1
}'
```
### RWKV
A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv).
Note: rwkv models needs to specify the backend `rwkv` in the YAML config files and have an associated tokenizer along that needs to be provided with it:
```
36464540 -rw-r--r-- 1 mudler mudler 1.2G May 3 10:51 rwkv_small
36464543 -rw-r--r-- 1 mudler mudler 2.4M May 3 10:51 rwkv_small.tokenizer.json
```
### llama.cpp
[llama.cpp](https://github.com/ggerganov/llama.cpp) is a popular port of Facebook's LLaMA model in C/C++.
{{% alert note %}}
The `ggml` file format has been deprecated. If you are using `ggml` models and you are configuring your model with a YAML file, specify, use the `llama-ggml` backend instead. If you are relying in automatic detection of the model, you should be fine. For `gguf` models, use the `llama` backend. The go backend is deprecated as well but still available as `go-llama`. The go backend supports still features not available in the mainline: speculative sampling and embeddings.
{{% /alert %}}
#### Features
The `llama.cpp` model supports the following features:
- [📖 Text generation (GPT)]({{%relref "docs/features/text-generation" %}})
- [🧠 Embeddings]({{%relref "docs/features/embeddings" %}})
- [🔥 OpenAI functions]({{%relref "docs/features/openai-functions" %}})
- [✍️ Constrained grammars]({{%relref "docs/features/constrained_grammars" %}})
#### Setup
LocalAI supports `llama.cpp` models out of the box. You can use the `llama.cpp` model in the same way as any other model.
##### Manual setup
It is sufficient to copy the `ggml` or `gguf` model files in the `models` folder. You can refer to the model in the `model` parameter in the API calls.
[You can optionally create an associated YAML]({{%relref "docs/advanced" %}}) model config file to tune the model's parameters or apply a template to the prompt.
Prompt templates are useful for models that are fine-tuned towards a specific prompt.
##### Automatic setup
LocalAI supports model galleries which are indexes of models. For instance, the huggingface gallery contains a large curated index of models from the huggingface model hub for `ggml` or `gguf` models.
For instance, if you have the galleries enabled and LocalAI already running, you can just start chatting with models in huggingface by running:
```bash
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "TheBloke/WizardLM-13B-V1.2-GGML/wizardlm-13b-v1.2.ggmlv3.q2_K.bin",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.1
}'
```
LocalAI will automatically download and configure the model in the `model` directory.
Models can be also preloaded or downloaded on demand. To learn about model galleries, check out the [model gallery documentation]({{%relref "docs/features/model-gallery" %}}).
#### YAML configuration
To use the `llama.cpp` backend, specify `llama` as the backend in the YAML file:
```yaml
name: llama
backend: llama
parameters:
# Relative to the models path
model: file.gguf.bin
```
In the example above we specify `llama` as the backend to restrict loading `gguf` models only.
For instance, to use the `llama-ggml` backend for `ggml` models:
```yaml
name: llama
backend: llama-ggml
parameters:
# Relative to the models path
model: file.ggml.bin
```
#### Reference
- [llama](https://github.com/ggerganov/llama.cpp)
- [binding](https://github.com/go-skynet/go-llama.cpp)
### exllama/2
[Exllama](https://github.com/turboderp/exllama) is a "A more memory-efficient rewrite of the HF transformers implementation of Llama for use with quantized weights". Both `exllama` and `exllama2` are supported.
#### Model setup
Download the model as a folder inside the `model ` directory and create a YAML file specifying the `exllama` backend. For instance with the `TheBloke/WizardLM-7B-uncensored-GPTQ` model:
```
$ git lfs install
$ cd models && git clone https://huggingface.co/TheBloke/WizardLM-7B-uncensored-GPTQ
$ ls models/
.keep WizardLM-7B-uncensored-GPTQ/ exllama.yaml
$ cat models/exllama.yaml
name: exllama
parameters:
model: WizardLM-7B-uncensored-GPTQ
backend: exllama
# Note: you can also specify "exllama2" if it's an exllama2 model here
# ...
```
Test with:
```bash
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "exllama",
"messages": [{"role": "user", "content": "How are you?"}],
"temperature": 0.1
}'
```
### vLLM
[vLLM](https://github.com/vllm-project/vllm) is a fast and easy-to-use library for LLM inference.
LocalAI has a built-in integration with vLLM, and it can be used to run models. You can check out `vllm` performance [here](https://github.com/vllm-project/vllm#performance).
#### Setup
Create a YAML file for the model you want to use with `vllm`.
To setup a model, you need to just specify the model name in the YAML config file:
```yaml
name: vllm
backend: vllm
parameters:
model: "facebook/opt-125m"
# Decomment to specify a quantization method (optional)
# quantization: "awq"
```
The backend will automatically download the required files in order to run the model.
#### Usage
Use the `completions` endpoint by specifying the `vllm` backend:
```
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "vllm",
"prompt": "Hello, my name is",
"temperature": 0.1, "top_p": 0.1
}'
```

View File

@ -0,0 +1,158 @@
+++
disableToc = false
title = "🗣 Text to audio (TTS)"
weight = 11
+++
The `/tts` endpoint can be used to generate speech from text.
## Usage
Input: `input`, `model`
For example, to generate an audio file, you can send a POST request to the `/tts` endpoint with the instruction as the request body:
```bash
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"input": "Hello world",
"model": "tts"
}'
```
Returns an `audio/wav` file.
## Backends
### 🐸 Coqui
Required: Don't use `LocalAI` images ending with the `-core` tag,. Python dependencies are required in order to use this backend.
Coqui works without any configuration, to test it, you can run the following curl command:
```
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "coqui",
"model": "tts_models/en/ljspeech/glow-tts",
"input":"Hello, this is a test!"
}'
```
### Bark
[Bark](https://github.com/suno-ai/bark) allows to generate audio from text prompts.
This is an extra backend - in the container is already available and there is nothing to do for the setup.
#### Model setup
There is nothing to be done for the model setup. You can already start to use bark. The models will be downloaded the first time you use the backend.
#### Usage
Use the `tts` endpoint by specifying the `bark` backend:
```
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "bark",
"input":"Hello!"
}' | aplay
```
To specify a voice from https://github.com/suno-ai/bark#-voice-presets ( https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c ), use the `model` parameter:
```
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "bark",
"input":"Hello!",
"model": "v2/en_speaker_4"
}' | aplay
```
### Piper
To install the `piper` audio models manually:
- Download Voices from https://github.com/rhasspy/piper/releases/tag/v0.0.2
- Extract the `.tar.tgz` files (.onnx,.json) inside `models`
- Run the following command to test the model is working
To use the tts endpoint, run the following command. You can specify a backend with the `backend` parameter. For example, to use the `piper` backend:
```bash
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"model":"it-riccardo_fasol-x-low.onnx",
"backend": "piper",
"input": "Ciao, sono Ettore"
}' | aplay
```
Note:
- `aplay` is a Linux command. You can use other tools to play the audio file.
- The model name is the filename with the extension.
- The model name is case sensitive.
- LocalAI must be compiled with the `GO_TAGS=tts` flag.
### Transformers-musicgen
LocalAI also has experimental support for `transformers-musicgen` for the generation of short musical compositions. Currently, this is implemented via the same requests used for text to speech:
```
curl --request POST \
--url http://localhost:8080/tts \
--header 'Content-Type: application/json' \
--data '{
"backend": "transformers-musicgen",
"model": "facebook/musicgen-medium",
"input": "Cello Rave"
}' | aplay
```
Future versions of LocalAI will expose additional control over audio generation beyond the text prompt.
### Vall-E-X
[VALL-E-X](https://github.com/Plachtaa/VALL-E-X) is an open source implementation of Microsoft's VALL-E X zero-shot TTS model.
#### Setup
The backend will automatically download the required files in order to run the model.
This is an extra backend - in the container is already available and there is nothing to do for the setup. If you are building manually, you need to install Vall-E-X manually first.
#### Usage
Use the tts endpoint by specifying the vall-e-x backend:
```
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "vall-e-x",
"input":"Hello!"
}' | aplay
```
#### Voice cloning
In order to use voice cloning capabilities you must create a `YAML` configuration file to setup a model:
```yaml
name: cloned-voice
backend: vall-e-x
parameters:
model: "cloned-voice"
vall-e:
# The path to the audio file to be cloned
# relative to the models directory
audio_path: "path-to-wav-source.wav"
```
Then you can specify the model name in the requests:
```
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "vall-e-x",
"model": "cloned-voice",
"input":"Hello!"
}' | aplay
```

View File

@ -0,0 +1,7 @@
+++
disableToc = false
title = "Getting started"
weight = 2
icon = "rocket_launch"
+++

View File

@ -1,14 +1,20 @@
+++
disableToc = false
title = "Build"
weight = 5
title = "Build LocalAI from source"
weight = 6
url = '/basics/build/'
ico = "rocket_launch"
+++
### Build
LocalAI can be built as a container image or as a single, portable binary. Note that the some model architectures might require Python libraries, which are not included in the binary. The binary contains only the core backends written in Go and C++.
LocalAI's extensible architecture allows you to add your own backends, which can be written in any language, and as such the container images contains also the Python dependencies to run all the available backends (for example, in order to run backends like __Diffusers__ that allows to generate images and videos from text).
In some cases you might want to re-build LocalAI from source (for instance to leverage Apple Silicon acceleration), or to build a custom container image with your own backends. This section contains instructions on how to build LocalAI from source.
#### Container image
Requirements:
@ -23,7 +29,9 @@ docker build -t localai .
docker run localai
```
#### Locally
#### Build LocalAI locally
##### Requirements
In order to build LocalAI locally, you need the following requirements:
@ -34,22 +42,22 @@ In order to build LocalAI locally, you need the following requirements:
To install the dependencies follow the instructions below:
{{< tabs >}}
{{% tab name="Apple" %}}
{{< tabs tabTotal="3" >}}
{{% tab tabName="Apple" %}}
```bash
brew install abseil cmake go grpc protobuf wget
```
{{% /tab %}}
{{% tab name="Debian" %}}
{{% tab tabName="Debian" %}}
```bash
apt install golang protobuf-compiler-grpc libgrpc-dev make cmake
```
{{% /tab %}}
{{% tab name="From source" %}}
{{% tab tabName="From source" %}}
Specify `BUILD_GRPC_FOR_BACKEND_LLAMA=true` to build automatically the gRPC dependencies
@ -60,7 +68,7 @@ make ... BUILD_GRPC_FOR_BACKEND_LLAMA=true build
{{% /tab %}}
{{< /tabs >}}
##### Build
To build LocalAI with `make`:
```
@ -71,7 +79,7 @@ make build
This should produce the binary `local-ai`
{{% notice note %}}
{{% alert note %}}
#### CPU flagset compatibility
@ -89,7 +97,7 @@ docker run quay.io/go-skynet/localai
docker run --rm -ti -p 8080:8080 -e DEBUG=true -e MODELS_PATH=/models -e THREADS=1 -e REBUILD=true -e CMAKE_ARGS="-DLLAMA_F16C=OFF -DLLAMA_AVX512=OFF -DLLAMA_AVX2=OFF -DLLAMA_AVX=OFF -DLLAMA_FMA=OFF" -v $PWD/models:/models quay.io/go-skynet/local-ai:latest
```
{{% /notice %}}
{{% /alert %}}
### Example: Build on mac
@ -133,7 +141,7 @@ curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/jso
**Requirements**: OpenCV, Gomp
Image generation is experimental and requires `GO_TAGS=stablediffusion` to be set during build:
Image generation requires `GO_TAGS=stablediffusion` or `GO_TAGS=tinydream` to be set during build:
```
make GO_TAGS=stablediffusion build
@ -156,7 +164,7 @@ List of the variables available to customize the build:
| Variable | Default | Description |
| ---------------------| ------- | ----------- |
| `BUILD_TYPE` | None | Build type. Available: `cublas`, `openblas`, `clblas`, `metal`,`hipblas` |
| `GO_TAGS` | `tts stablediffusion` | Go tags. Available: `stablediffusion`, `tts` |
| `GO_TAGS` | `tts stablediffusion` | Go tags. Available: `stablediffusion`, `tts`, `tinydream` |
| `CLBLAST_DIR` | | Specify a CLBlast directory |
| `CUDA_LIBPATH` | | Specify a CUDA library path |
@ -216,7 +224,7 @@ make BUILD_TYPE=clblas build
To specify a clblast dir set: `CLBLAST_DIR`
### Metal (Apple Silicon)
#### Metal (Apple Silicon)
```
make BUILD_TYPE=metal build
@ -225,7 +233,16 @@ make BUILD_TYPE=metal build
# Note: only models quantized with q4_0 are supported!
```
### Build only a single backend
### Windows compatibility
Make sure to give enough resources to the running container. See https://github.com/go-skynet/LocalAI/issues/2
### Examples
More advanced build options are available, for instance to build only a single backend.
#### Build only a single backend
You can control the backends that are built by setting the `GRPC_BACKENDS` environment variable. For instance, to build only the `llama-cpp` backend only:
@ -235,14 +252,10 @@ make GRPC_BACKENDS=backend-assets/grpc/llama-cpp build
By default, all the backends are built.
### Specific llama.cpp version
#### Specific llama.cpp version
To build with a specific version of llama.cpp, set `CPPLLAMA_VERSION` to the tag or wanted sha:
```
CPPLLAMA_VERSION=<sha> make build
```
### Windows compatibility
Make sure to give enough resources to the running container. See https://github.com/go-skynet/LocalAI/issues/2

View File

@ -0,0 +1,71 @@
+++
disableToc = false
title = "Customizing the Model"
weight = 4
icon = "rocket_launch"
+++
To customize the prompt template or the default settings of the model, a configuration file is utilized. This file must adhere to the LocalAI YAML configuration standards. For comprehensive syntax details, refer to the [advanced documentation]({{%relref "docs/advanced" %}}). The configuration file can be located either remotely (such as in a Github Gist) or within the local filesystem or a remote URL.
LocalAI can be initiated using either its container image or binary, with a command that includes URLs of model config files or utilizes a shorthand format (like `huggingface://` or `github://`), which is then expanded into complete URLs.
The configuration can also be set via an environment variable. For instance:
```
# Command-Line Arguments
local-ai github://owner/repo/file.yaml@branch
# Environment Variable
MODELS="github://owner/repo/file.yaml@branch,github://owner/repo/file.yaml@branch" local-ai
```
Here's an example to initiate the **phi-2** model:
```bash
docker run -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml
```
{{% alert icon="" %}}
The model configurations used in the quickstart are accessible here: [https://github.com/mudler/LocalAI/tree/master/embedded/models](https://github.com/mudler/LocalAI/tree/master/embedded/models). Contributions are welcome; please feel free to submit a Pull Request.
The `phi-2` model configuration from the quickstart is expanded from [https://github.com/mudler/LocalAI/blob/master/examples/configurations/phi-2.yaml](https://github.com/mudler/LocalAI/blob/master/examples/configurations/phi-2.yaml).
{{% /alert %}}
## Example: Customizing the Prompt Template
To modify the prompt template, create a Github gist or a Pastebin file, and copy the content from [https://github.com/mudler/LocalAI/blob/master/examples/configurations/phi-2.yaml](https://github.com/mudler/LocalAI/blob/master/examples/configurations/phi-2.yaml). Alter the fields as needed:
```yaml
name: phi-2
context_size: 2048
f16: true
threads: 11
gpu_layers: 90
mmap: true
parameters:
# Reference any HF model or a local file here
model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
template:
chat: &template |
Instruct: {{.Input}}
Output:
# Modify the prompt template here ^^^ as per your requirements
completion: *template
```
Then, launch LocalAI using your gist's URL:
```bash
## Important! Substitute with your gist's URL!
docker run -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core https://gist.githubusercontent.com/xxxx/phi-2.yaml
```
## Next Steps
- Visit the [advanced section]({{%relref "docs/advanced" %}}) for more insights on prompt templates and configuration files.
- To learn about fine-tuning an LLM model, check out the [fine-tuning section]({{%relref "docs/advanced/fine-tuning" %}}).

View File

@ -0,0 +1,150 @@
+++
disableToc = false
title = "Run models manually"
weight = 5
icon = "rocket_launch"
+++
1. Ensure you have a model file, a configuration YAML file, or both. Customize model defaults and specific settings with a configuration file. For advanced configurations, refer to the [Advanced Documentation](docs/advanced).
2. For GPU Acceleration instructions, visit [GPU acceleration](docs/features/gpu-acceleration).
{{< tabs tabTotal="5" >}}
{{% tab tabName="Docker" %}}
```bash
# Prepare the models into the `model` directory
mkdir models
# copy your models to it
cp your-model.gguf models/
# run the LocalAI container
docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:latest --models-path /models --context-size 700 --threads 4
# You should see:
#
# ┌───────────────────────────────────────────────────┐
# │ Fiber v2.42.0 │
# │ http://127.0.0.1:8080 │
# │ (bound on host 0.0.0.0 and port 8080) │
# │ │
# │ Handlers ............. 1 Processes ........... 1 │
# │ Prefork ....... Disabled PID ................. 1 │
# └───────────────────────────────────────────────────┘
# Try the endpoint with curl
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "your-model.gguf",
"prompt": "A long time ago in a galaxy far, far away",
"temperature": 0.7
}'
```
{{% alert note %}}
- If running on Apple Silicon (ARM) it is **not** suggested to run on Docker due to emulation. Follow the [build instructions]({{%relref "docs/getting-started/build" %}}) to use Metal acceleration for full GPU support.
- If you are running Apple x86_64 you can use `docker`, there is no additional gain into building it from source.
{{% /alert %}}
{{% /tab %}}
{{% tab tabName="Docker compose" %}}
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# copy your models to models/
cp your-model.gguf models/
# (optional) Edit the .env file to set things like context size and threads
# vim .env
# start with docker compose
docker compose up -d --pull always
# or you can build the images with:
# docker compose up -d --build
# Now API is accessible at localhost:8080
curl http://localhost:8080/v1/models
# {"object":"list","data":[{"id":"your-model.gguf","object":"model"}]}
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "your-model.gguf",
"prompt": "A long time ago in a galaxy far, far away",
"temperature": 0.7
}'
```
Note: If you are on Windows, please make sure the project is on the Linux Filesystem, otherwise loading models might be slow. For more Info: [Microsoft Docs](https://learn.microsoft.com/en-us/windows/wsl/filesystems)
{{% /tab %}}
{{% tab tabName="Kubernetes" %}}
For installing LocalAI in Kubernetes, you can use the following helm chart:
```bash
# Install the helm repository
helm repo add go-skynet https://go-skynet.github.io/helm-charts/
# Update the repositories
helm repo update
# Get the values
helm show values go-skynet/local-ai > values.yaml
# Edit the values value if needed
# vim values.yaml ...
# Install the helm chart
helm install local-ai go-skynet/local-ai -f values.yaml
```
{{% /tab %}}
{{% tab tabName="From binary" %}}
LocalAI binary releases are available in [Github](https://github.com/go-skynet/LocalAI/releases).
{{% /tab %}}
{{% tab tabName="From source" %}}
See the [build section]({{%relref "docs/getting-started/build" %}}).
{{% /tab %}}
{{< /tabs >}}
### Example (Docker)
```bash
mkdir models
# Download luna-ai-llama2 to models/
wget https://huggingface.co/TheBloke/Luna-AI-Llama2-Uncensored-GGUF/resolve/main/luna-ai-llama2-uncensored.Q4_0.gguf -O models/luna-ai-llama2
# Use a template from the examples
cp -rf prompt-templates/getting_started.tmpl models/luna-ai-llama2.tmpl
docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:latest --models-path /models --context-size 700 --threads 4
# Now API is accessible at localhost:8080
curl http://localhost:8080/v1/models
# {"object":"list","data":[{"id":"luna-ai-llama2","object":"model"}]}
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "luna-ai-llama2",
"messages": [{"role": "user", "content": "How are you?"}],
"temperature": 0.9
}'
# {"model":"luna-ai-llama2","choices":[{"message":{"role":"assistant","content":"I'm doing well, thanks. How about you?"}}]}
```
For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations).

View File

@ -0,0 +1,187 @@
+++
disableToc = false
title = "Quickstart"
weight = 3
url = '/basics/getting_started/'
icon = "rocket_launch"
+++
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run [LLMs]({{%relref "docs/features/text-generation" %}}), generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures.
## Installation Methods
LocalAI is available as a container image and binary, compatible with various container engines like Docker, Podman, and Kubernetes. Container images are published on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest) and [Dockerhub](https://hub.docker.com/r/localai/localai). Binaries can be downloaded from [GitHub](https://github.com/mudler/LocalAI/releases).
{{% alert icon="💡" %}}
**Hardware Requirements:** The hardware requirements for LocalAI vary based on the [model size] and [quantization] method used. For performance benchmarks with different backends, such as `llama.cpp`, visit [this link](https://github.com/ggerganov/llama.cpp#memorydisk-requirements). The `rwkv` backend is noted for its lower resource consumption.
{{% /alert %}}
## Prerequisites
Before you begin, ensure you have a container engine installed if you are not using the binaries. Suitable options include Docker or Podman. For installation instructions, refer to the following guides:
- [Install Docker Desktop (Mac, Windows, Linux)](https://docs.docker.com/get-docker/)
- [Install Podman (Linux)](https://podman.io/getting-started/installation)
- [Install Docker engine (Servers)](https://docs.docker.com/engine/install/#get-started)
## Running Models
> _Do you have already a model file? Skip to [Run models manually]({{%relref "docs/getting-started/manual" %}})_.
LocalAI allows one-click runs with popular models. It downloads the model and starts the API with the model loaded.
There are different categories of models: [LLMs]({{%relref "docs/features/text-generation" %}}), [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) , [Embeddings]({{%relref "docs/features/embeddings" %}}), [Audio to Text]({{%relref "docs/features/audio-to-text" %}}), and [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) depending on the backend being used and the model architecture.
{{% alert icon="💡" %}}
To customize the models, see [Model customization]({{%relref "docs/getting-started/customize-model" %}}). For more model configurations, visit the [Examples Section](https://github.com/mudler/LocalAI/tree/master/examples/configurations).
{{% /alert %}}
{{< tabs tabTotal="3" >}}
{{% tab tabName="CPU-only" %}}
> 💡Don't need GPU acceleration? use the CPU images which are lighter and do not have Nvidia dependencies
| Model | Category | Docker command |
| --- | --- | --- |
| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core phi-2``` |
| [llava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava``` |
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mistral-openorca``` |
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bert-cpp``` |
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg all-minilm-l6-v2``` |
| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core whisper-base``` |
| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core rhasspy-voice-en-us-amy``` |
| coqui | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg coqui``` |
| bark | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg bark``` |
| vall-e-x | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg vall-e-x``` |
| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mixtral-instruct``` |
| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core tinyllama-chat``` |
| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core dolphin-2.5-mixtral-8x7b``` |
{{% /tab %}}
{{% tab tabName="GPU (CUDA 11)" %}}
> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}).
| Model | Category | Docker command |
| --- | --- | --- |
| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core phi-2``` |
| [llava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core llava``` |
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mistral-openorca``` |
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bert-cpp``` |
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 all-minilm-l6-v2``` |
| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core whisper-base``` |
| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core rhasspy-voice-en-us-amy``` |
| coqui | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 coqui``` |
| bark | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 bark``` |
| vall-e-x | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 vall-e-x``` |
| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mixtral-instruct``` |
| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core tinyllama-chat``` |
| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core dolphin-2.5-mixtral-8x7b``` |
{{% /tab %}}
{{% tab tabName="GPU (CUDA 12)" %}}
> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version` see also [GPU acceleration]({{%relref "docs/features/gpu-acceleration" %}}).
| Model | Category | Docker command |
| --- | --- | --- |
| [phi-2](https://huggingface.co/microsoft/phi-2) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core phi-2``` |
| [llava](https://github.com/SkunkworksAI/BakLLaVA) | [Multimodal LLM]({{%relref "docs/features/gpt-vision" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core llava``` |
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mistral-openorca``` |
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bert-cpp``` |
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | [Embeddings]({{%relref "docs/features/embeddings" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 all-minilm-l6-v2``` |
| whisper-base | [Audio to Text]({{%relref "docs/features/audio-to-text" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core whisper-base``` |
| rhasspy-voice-en-us-amy | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core rhasspy-voice-en-us-amy``` |
| coqui | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 coqui``` |
| bark | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 bark``` |
| vall-e-x | [Text to Audio]({{%relref "docs/features/text-to-audio" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 vall-e-x``` |
| mixtral-instruct Mixtral-8x7B-Instruct-v0.1 | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mixtral-instruct``` |
| [tinyllama-chat](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF) [original model](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.3) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core tinyllama-chat``` |
| [dolphin-2.5-mixtral-8x7b](https://huggingface.co/TheBloke/dolphin-2.5-mixtral-8x7b-GGUF) | [LLM]({{%relref "docs/features/text-generation" %}}) | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core dolphin-2.5-mixtral-8x7b``` |
{{% /tab %}}
{{< /tabs >}}
{{% alert icon="💡" %}}
**Tip** You can actually specify multiple models to start an instance with the models loaded, for example to have both llava and phi-2 configured:
```bash
docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava phi-2
```
{{% /alert %}}
## Container images
LocalAI provides a variety of images to support different environments. These images are available on [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags) and [Dockerhub](https://hub.docker.com/r/localai/localai).
For GPU Acceleration support for Nvidia video graphic cards, use the Nvidia/CUDA images, if you don't have a GPU, use the CPU images. If you have AMD or Mac Silicon, see the [build section]({{%relref "docs/getting-started/build" %}}).
{{% alert icon="💡" %}}
**Available Images Types**:
- Images ending with `-core` are smaller images without predownload python dependencies. Use these images if you plan to use `llama.cpp`, `stablediffusion-ncn`, `tinydream` or `rwkv` backends - if you are not sure which one to use, do **not** use these images.
- FFMpeg is **not** included in the default images due to [its licensing](https://www.ffmpeg.org/legal.html). If you need FFMpeg, use the images ending with `-ffmpeg`. Note that `ffmpeg` is needed in case of using `audio-to-text` LocalAI's features.
- If using old and outdated CPUs and no GPUs you might need to set `REBUILD` to `true` as environment variable along with options to disable the flags which your CPU does not support, however note that inference will perform poorly and slow. See also [flagset compatibility]({{%relref "docs/getting-started/build#cpu-flagset-compatibility" %}}).
{{% /alert %}}
{{< tabs tabTotal="3" >}}
{{% tab tabName="Vanilla / CPU Images" %}}
| Description | Quay | Dockerhub |
| --- | --- | --- |
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master` | `localai/localai:master` |
| Latest tag | `quay.io/go-skynet/local-ai:latest` | `localai/localai:latest` |
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}` | `localai/localai:{{< version >}}` |
| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg` | `localai/localai:{{< version >}}-ffmpeg` |
| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg-core` | `localai/localai:{{< version >}}-ffmpeg-core` |
{{% /tab %}}
{{% tab tabName="GPU Images CUDA 11" %}}
| Description | Quay | Dockerhub |
| --- | --- | --- |
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda11` | `localai/localai:master-cublas-cuda11` |
| Latest tag | `quay.io/go-skynet/local-ai:latest-cublas-cuda11` | `localai/localai:latest-cublas-cuda11` |
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11` | `localai/localai:{{< version >}}-cublas-cuda11` |
| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-ffmpeg` | `localai/localai:{{< version >}}-cublas-cuda11-ffmpeg` |
| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-ffmpeg-core` | `localai/localai:{{< version >}}-cublas-cuda11-ffmpeg-core` |
{{% /tab %}}
{{% tab tabName="GPU Images CUDA 12" %}}
| Description | Quay | Dockerhub |
| --- | --- | --- |
| Latest images from the branch (development) | `quay.io/go-skynet/local-ai:master-cublas-cuda12` | `localai/localai:master-cublas-cuda12` |
| Latest tag | `quay.io/go-skynet/local-ai:latest-cublas-cuda12` | `localai/localai:latest-cublas-cuda12` |
| Versioned image | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12` | `localai/localai:{{< version >}}-cublas-cuda12` |
| Versioned image including FFMpeg| `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-ffmpeg` | `localai/localai:{{< version >}}-cublas-cuda12-ffmpeg` |
| Versioned image including FFMpeg, no python | `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-ffmpeg-core` | `localai/localai:{{< version >}}-cublas-cuda12-ffmpeg-core` |
{{% /tab %}}
{{< /tabs >}}
## What's next?
Explore further resources and community contributions:
- [Community How to's](https://io.midori-ai.xyz/howtos/)
- [Examples](https://github.com/mudler/LocalAI/tree/master/examples#examples)
[![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)](https://github.com/mudler/LocalAI/tree/master/examples#examples)

View File

@ -0,0 +1,34 @@
+++
disableToc = false
title = "Integrations"
weight = 19
icon = "rocket_launch"
+++
## Community integrations
List of projects that are using directly LocalAI behind the scenes:
- https://github.com/sozercan/aikit
- https://github.com/aorumbayev/autogpt4all
- https://github.com/mudler/LocalAGI
## The following softwares has out-of-the-box integrations with LocalAI
LocalAI can be used as a drop-in replacement, however, the following projects provides specific integrations with LocalAI:
- [AnythingLLM](https://github.com/Mintplex-Labs/anything-llm)
- [Logseq GPT3 OpenAI plugin](https://github.com/briansunter/logseq-plugin-gpt3-openai) allows to set a base URL, and works with LocalAI.
- https://github.com/longy2k/obsidian-bmo-chatbot
- https://github.com/FlowiseAI/Flowise
- https://github.com/k8sgpt-ai/k8sgpt
- https://github.com/kairos-io/kairos
- https://github.com/langchain4j/langchain4j
- https://github.com/henomis/lingoose
- https://github.com/trypromptly/LLMStack
- https://github.com/mattermost/openops
- https://github.com/charmbracelet/mods
- https://github.com/cedriking/spark
Feel free to open up a [issue](https://github.com/go-skynet/localai-website/issues) to get a page for your project made or if you see a error on one of the pages.!

View File

@ -1,8 +1,21 @@
+++
archetype = "home"
title = "LocalAI"
title = "Overview"
weight = 1
toc = true
description = "What is LocalAI?"
tags = ["Beginners"]
categories = [""]
author = "Ettore Di Giacinto"
# This allows to overwrite the landing page
url = '/'
icon = "info"
+++
<p align="center">
<a href="https://localai.io"><img width=512 src="https://github.com/go-skynet/LocalAI/assets/2420543/0966aa2a-166e-4f99-a3e5-6c915fc997dd"></a>
</p >
<p align="center">
<a href="https://github.com/go-skynet/LocalAI/fork" target="blank">
<img src="https://img.shields.io/github/forks/go-skynet/LocalAI?style=for-the-badge" alt="LocalAI forks"/>
@ -21,11 +34,11 @@ title = "LocalAI"
[<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker">](https://hub.docker.com/r/localai/localai)
[<img src="https://img.shields.io/badge/quay.io-images-important.svg?">](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest)
> 💡 Get help - [❓FAQ](https://localai.io/faq/) [❓How tos](https://localai.io/howtos/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [💭Discord](https://discord.gg/uJAeKSAGDy)
> 💡 Get help - [❓FAQ](https://localai.io/faq/) [❓How tos](https://io.midori-ai.xyz/howtos/) [💭Discussions](https://github.com/go-skynet/LocalAI/discussions) [💭Discord](https://discord.gg/uJAeKSAGDy)
>
> [💻 Quickstart](https://localai.io/basics/getting_started/) [📣 News](https://localai.io/basics/news/) [ 🛫 Examples ](https://github.com/go-skynet/LocalAI/tree/master/examples/) [ 🖼️ Models ](https://localai.io/models/) [ 🚀 Roadmap ](https://github.com/mudler/LocalAI/issues?q=is%3Aissue+is%3Aopen+label%3Aroadmap)
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families that are compatible with the ggml format. Does not require GPU. It is maintained by [mudler](https://github.com/mudler).
**LocalAI** is the free, Open Source OpenAI alternative. LocalAI act as a drop-in replacement REST API that's compatible with OpenAI API specifications for local inferencing. It allows you to run LLMs, generate images, audio (and not only) locally or on-prem with consumer grade hardware, supporting multiple model families and architectures. Does not require GPU. It is maintained by [mudler](https://github.com/mudler).
<p align="center">
<a href="https://twitter.com/LocalAI_API" target="blank">
@ -65,7 +78,7 @@ Note that this started just as a fun weekend project by [mudler](https://github.
LocalAI is an API written in Go that serves as an OpenAI shim, enabling software already developed with OpenAI SDKs to seamlessly integrate with LocalAI. It can be effortlessly implemented as a substitute, even on consumer-grade hardware. This capability is achieved by employing various C++ backends, including [ggml](https://github.com/ggerganov/ggml), to perform inference on LLMs using both CPU and, if desired, GPU. Internally LocalAI backends are just gRPC server, indeed you can specify and build your own gRPC server and extend LocalAI in runtime as well. It is possible to specify external gRPC server and/or binaries that LocalAI will manage internally.
LocalAI uses a mixture of backends written in various languages (C++, Golang, Python, ...). You can check [the model compatibility table]({{%relref "model-compatibility" %}}) to learn about all the components of LocalAI.
LocalAI uses a mixture of backends written in various languages (C++, Golang, Python, ...). You can check [the model compatibility table]({{%relref "docs/reference/compatibility-table" %}}) to learn about all the components of LocalAI.
![localai](https://github.com/go-skynet/localai-website/assets/2420543/6492e685-8282-4217-9daa-e229a31548bc)

View File

@ -0,0 +1,11 @@
---
weight: 23
title: "References"
description: "Reference"
icon: science
lead: ""
date: 2020-10-06T08:49:15+00:00
lastmod: 2020-10-06T08:49:15+00:00
draft: false
images: []
---

View File

@ -1,29 +1,21 @@
+++
disableToc = false
title = "Model compatibility"
weight = 4
title = "Model compatibility table"
weight = 24
+++
LocalAI is compatible with the models supported by [llama.cpp](https://github.com/ggerganov/llama.cpp) supports also [GPT4ALL-J](https://github.com/nomic-ai/gpt4all) and [cerebras-GPT with ggml](https://huggingface.co/lxe/Cerebras-GPT-2.7B-Alpaca-SP-ggml).
{{% notice note %}}
LocalAI will attempt to automatically load models which are not explicitly configured for a specific backend. You can specify the backend to use by configuring a model with a YAML file. See [the advanced section]({{%relref "advanced" %}}) for more details.
{{% /notice %}}
### Hardware requirements
Depending on the model you are attempting to run might need more RAM or CPU resources. Check out also [here](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) for `gguf` based backends. `rwkv` is less expensive on resources.
### Model compatibility table
Besides llama based models, LocalAI is compatible also with other architectures. The table below lists all the compatible models families and the associated binding repository.
{{% alert note %}}
LocalAI will attempt to automatically load models which are not explicitly configured for a specific backend. You can specify the backend to use by configuring a model with a YAML file. See [the advanced section]({{%relref "docs/advanced" %}}) for more details.
{{% /alert %}}
| Backend and Bindings | Compatible models | Completion/Chat endpoint | Capability | Embeddings support | Token stream support | Acceleration |
|----------------------------------------------------------------------------------|-----------------------|--------------------------|---------------------------|-----------------------------------|----------------------|--------------|
| [llama.cpp]({{%relref "model-compatibility/llama-cpp" %}}) | Vicuna, Alpaca, LLaMa | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal |
| [llama.cpp]({{%relref "docs/features/text-generation#llama.cpp" %}}) | Vicuna, Alpaca, LLaMa | yes | GPT and Functions | yes** | yes | CUDA, openCL, cuBLAS, Metal |
| [gpt4all-llama](https://github.com/nomic-ai/gpt4all) | Vicuna, Alpaca, LLaMa | yes | GPT | no | yes | N/A |
| [gpt4all-mpt](https://github.com/nomic-ai/gpt4all) | MPT | yes | GPT | no | yes | N/A |
| [gpt4all-j](https://github.com/nomic-ai/gpt4all) | GPT4ALL-J | yes | GPT | no | yes | N/A |
@ -56,30 +48,8 @@ Besides llama based models, LocalAI is compatible also with other architectures.
| `coqui` | Coqui | no | Audio generation and Voice cloning | no | no | CPU/CUDA |
| `petals` | Various GPTs and quantization formats | yes | GPT | no | no | CPU/CUDA |
Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "advanced" %}})).
Note: any backend name listed above can be used in the `backend` field of the model configuration file (See [the advanced section]({{%relref "docs/advanced" %}})).
- \* 7b ONLY
- ** doesn't seem to be accurate
- *** 7b and 40b with the `ggccv` format, for instance: https://huggingface.co/TheBloke/WizardLM-Uncensored-Falcon-40B-GGML
Tested with:
- [X] Automatically by CI with OpenLLAMA and GPT4ALL.
- [X] LLaMA 🦙
- [X] [Vicuna](https://github.com/ggerganov/llama.cpp/discussions/643#discussioncomment-5533894)
- [Alpaca](https://github.com/ggerganov/llama.cpp#instruction-mode-with-alpaca)
- [X] [GPT4ALL](https://gpt4all.io) (see also [using GPT4All](https://github.com/ggerganov/llama.cpp#using-gpt4all))
- [X] [GPT4ALL-J](https://gpt4all.io/models/ggml-gpt4all-j.bin) (no changes required)
- [X] [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/) 🐨
- [X] Cerebras-GPT
- [X] [WizardLM](https://github.com/nlpxucan/WizardLM)
- [X] [RWKV](https://github.com/BlinkDL/RWKV-LM) models with [rwkv.cpp](https://github.com/saharNooby/rwkv.cpp)
- [X] [bloom.cpp](https://github.com/NouamaneTazi/bloomz.cpp)
- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca)
- [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne)
- [X] [OpenBuddy 🐶 (Multilingual)](https://github.com/OpenBuddy/OpenBuddy)
- [X] [Pygmalion 7B / Metharme 7B](https://github.com/ggerganov/llama.cpp#using-pygmalion-7b--metharme-7b)
- [X] [HuggingFace Inference](https://huggingface.co/inference-api) models available through API
- [X] Falcon
Note: You might need to convert some models from older models to the new format, for indications, see [the README in llama.cpp](https://github.com/ggerganov/llama.cpp#using-gpt4all) for instance to run `gpt4all`.
- *** 7b and 40b with the `ggccv` format, for instance: https://huggingface.co/TheBloke/WizardLM-Uncensored-Falcon-40B-GGML

View File

@ -1,11 +1,17 @@
+++
disableToc = false
title = "🆕 What's New"
weight = 2
title = "News"
weight = 7
url = '/basics/news/'
icon = "newspaper"
+++
Release notes have been now moved completely over Github releases.
You can see the release notes [here](https://github.com/mudler/LocalAI/releases).
# Older release notes
## 04-12-2023: __v2.0.0__
This release brings a major overhaul in some backends.
@ -68,7 +74,7 @@ From this release the `llama` backend supports only `gguf` files (see {{< pr "94
### Image generation enhancements
The [Diffusers]({{%relref "model-compatibility/diffusers" %}}) backend got now various enhancements, including support to generate images from images, longer prompts, and support for more kernels schedulers. See the [Diffusers]({{%relref "model-compatibility/diffusers" %}}) documentation for more information.
The [Diffusers]({{%relref "docs/features/image-generation" %}}) backend got now various enhancements, including support to generate images from images, longer prompts, and support for more kernels schedulers. See the [Diffusers]({{%relref "docs/features/image-generation" %}}) documentation for more information.
### Lora adapters
@ -80,7 +86,7 @@ It is now possible for single-devices with one GPU to specify `--single-active-b
### Community spotlight
![2023_08_26_15_09_27](https://github.com/go-skynet/localai-website/assets/2420543/f0204f8f-7462-4cdd-9154-4538683c1eef)
#### Resources management
@ -89,7 +95,7 @@ There is an ongoing effort in the community to better handling of resources. See
#### New how-to section
Thanks to the community efforts now we have a new [how-to section]({{%relref "howtos" %}}) with various examples on how to use LocalAI. This is a great starting point for new users! We are currently working on improving it, a huge shout out to {{< github "lunamidori5" >}} from the community for the impressive efforts on this!
Thanks to the community efforts now we have a new [how-to website](https://io.midori-ai.xyz/howtos/) with various examples on how to use LocalAI. This is a great starting point for new users! We are currently working on improving it, a huge shout out to {{< github "lunamidori5" >}} from the community for the impressive efforts on this!
#### 💡 More examples!
@ -131,7 +137,7 @@ The full changelog is available [here](https://github.com/go-skynet/LocalAI/rele
## 🔥🔥🔥🔥 12-08-2023: __v1.24.0__ 🔥🔥🔥🔥
This is release brings four(!) new additional backends to LocalAI: [🐶 Bark]({{%relref "model-compatibility/bark" %}}), 🦙 [AutoGPTQ]({{%relref "model-compatibility/autogptq" %}}), [🧨 Diffusers]({{%relref "model-compatibility/diffusers" %}}), 🦙 [exllama]({{%relref "model-compatibility/exllama" %}}) and a lot of improvements!
This is release brings four(!) new additional backends to LocalAI: [🐶 Bark]({{%relref "docs/features/text-to-audio#bark" %}}), 🦙 [AutoGPTQ]({{%relref "docs/features/text-generation#autogptq" %}}), [🧨 Diffusers]({{%relref "docs/features/image-generation" %}}), 🦙 [exllama]({{%relref "docs/features/text-generation#exllama" %}}) and a lot of improvements!
### Major improvements:
@ -143,23 +149,23 @@ This is release brings four(!) new additional backends to LocalAI: [🐶 Bark]({
### 🐶 Bark
[Bark]({{%relref "model-compatibility/bark" %}}) is a text-prompted generative audio model - it combines GPT techniques to generate Audio from text. It is a great addition to LocalAI, and it's available in the container images by default.
[Bark]({{%relref "docs/features/text-to-audio#bark" %}}) is a text-prompted generative audio model - it combines GPT techniques to generate Audio from text. It is a great addition to LocalAI, and it's available in the container images by default.
It can also generate music, see the example: [lion.webm](https://user-images.githubusercontent.com/5068315/230684766-97f5ea23-ad99-473c-924b-66b6fab24289.webm)
### 🦙 AutoGPTQ
[AutoGPTQ]({{%relref "model-compatibility/autogptq" %}}) is an easy-to-use LLMs quantization package with user-friendly apis, based on GPTQ algorithm.
[AutoGPTQ]({{%relref "docs/features/text-generation#autogptq" %}}) is an easy-to-use LLMs quantization package with user-friendly apis, based on GPTQ algorithm.
It is targeted mainly for GPU usage only. Check out the [AutoGPTQ documentation]({{%relref "model-compatibility/autogptq" %}}) for usage.
It is targeted mainly for GPU usage only. Check out the [ documentation]({{%relref "docs/features/text-generation" %}}) for usage.
### 🦙 Exllama
[Exllama]({{%relref "model-compatibility/exllama" %}}) is a "A more memory-efficient rewrite of the HF transformers implementation of Llama for use with quantized weights". It is a faster alternative to run LLaMA models on GPU.Check out the [Exllama documentation]({{%relref "model-compatibility/exllama" %}}) for usage.
[Exllama]({{%relref "docs/features/text-generation#exllama" %}}) is a "A more memory-efficient rewrite of the HF transformers implementation of Llama for use with quantized weights". It is a faster alternative to run LLaMA models on GPU.Check out the [Exllama documentation]({{%relref "docs/features/text-generation#exllama" %}}) for usage.
### 🧨 Diffusers
[Diffusers]({{%relref "model-compatibility/diffusers" %}}) is the go-to library for state-of-the-art pretrained diffusion models for generating images, audio, and even 3D structures of molecules. Currently it is experimental, and supports generation only of images so you might encounter some issues on models which weren't tested yet. Check out the [Diffusers documentation]({{%relref "model-compatibility/diffusers" %}}) for usage.
[Diffusers]({{%relref "docs/features/image-generation#diffusers" %}}) is the go-to library for state-of-the-art pretrained diffusion models for generating images, audio, and even 3D structures of molecules. Currently it is experimental, and supports generation only of images so you might encounter some issues on models which weren't tested yet. Check out the [Diffusers documentation]({{%relref "docs/features/image-generation" %}}) for usage.
### 🔑 API Keys
@ -195,11 +201,11 @@ Most notably, this release brings important fixes for CUDA (and not only):
* fix: select function calls if 'name' is set in the request by {{< github "mudler" >}} in {{< pr "827" >}}
* fix: symlink libphonemize in the container by {{< github "mudler" >}} in {{< pr "831" >}}
{{% notice note %}}
{{% alert note %}}
From this release [OpenAI functions]({{%relref "features/openai-functions" %}}) are available in the `llama` backend. The `llama-grammar` has been deprecated. See also [OpenAI functions]({{%relref "features/openai-functions" %}}).
From this release [OpenAI functions]({{%relref "docs/features/openai-functions" %}}) are available in the `llama` backend. The `llama-grammar` has been deprecated. See also [OpenAI functions]({{%relref "docs/features/openai-functions" %}}).
{{% /notice %}}
{{% /alert %}}
The full [changelog is available here](https://github.com/go-skynet/LocalAI/releases/tag/v1.23.0)
@ -213,15 +219,15 @@ The full [changelog is available here](https://github.com/go-skynet/LocalAI/rele
* feat: backends improvements by {{< github "mudler" >}} in {{< pr "778" >}}
* feat(llama2): add template for chat messages by {{< github "dave-gray101" >}} in {{< pr "782" >}}
{{% notice note %}}
{{% alert note %}}
From this release to use the OpenAI functions you need to use the `llama-grammar` backend. It has been added a `llama` backend for tracking `llama.cpp` master and `llama-grammar` for the grammar functionalities that have not been merged yet upstream. See also [OpenAI functions]({{%relref "features/openai-functions" %}}). Until the feature is merged we will have two llama backends.
From this release to use the OpenAI functions you need to use the `llama-grammar` backend. It has been added a `llama` backend for tracking `llama.cpp` master and `llama-grammar` for the grammar functionalities that have not been merged yet upstream. See also [OpenAI functions]({{%relref "docs/features/openai-functions" %}}). Until the feature is merged we will have two llama backends.
{{% /notice %}}
{{% /alert %}}
## Huggingface embeddings
In this release is now possible to specify to LocalAI external `gRPC` backends that can be used for inferencing {{< pr "778" >}}. It is now possible to write internal backends in any language, and a `huggingface-embeddings` backend is now available in the container image to be used with https://github.com/UKPLab/sentence-transformers. See also [Embeddings]({{%relref "features/embeddings" %}}).
In this release is now possible to specify to LocalAI external `gRPC` backends that can be used for inferencing {{< pr "778" >}}. It is now possible to write internal backends in any language, and a `huggingface-embeddings` backend is now available in the container image to be used with https://github.com/UKPLab/sentence-transformers. See also [Embeddings]({{%relref "docs/features/embeddings" %}}).
## LLaMa 2 has been released!
@ -266,7 +272,7 @@ The former, ggml-based backend has been renamed to `falcon-ggml`.
### Default pre-compiled binaries
From this release the default behavior of images has changed. Compilation is not triggered on start automatically, to recompile `local-ai` from scratch on start and switch back to the old behavior, you can set `REBUILD=true` in the environment variables. Rebuilding can be necessary if your CPU and/or architecture is old and the pre-compiled binaries are not compatible with your platform. See the [build section]({{%relref "build" %}}) for more information.
From this release the default behavior of images has changed. Compilation is not triggered on start automatically, to recompile `local-ai` from scratch on start and switch back to the old behavior, you can set `REBUILD=true` in the environment variables. Rebuilding can be necessary if your CPU and/or architecture is old and the pre-compiled binaries are not compatible with your platform. See the [build section]({{%relref "docs/getting-started/build" %}}) for more information.
[Full release changelog](https://github.com/go-skynet/LocalAI/releases/tag/v1.21.0)
@ -276,8 +282,8 @@ From this release the default behavior of images has changed. Compilation is not
### Exciting New Features 🎉
* Add Text-to-Audio generation with `go-piper` by {{< github "mudler" >}} in {{< pr "649" >}} See [API endpoints]({{%relref "features/text-to-audio" %}}) in our documentation.
* Add gallery repository by {{< github "mudler" >}} in {{< pr "663" >}}. See [models]({{%relref "models" %}}) for documentation.
* Add Text-to-Audio generation with `go-piper` by {{< github "mudler" >}} in {{< pr "649" >}} See [API endpoints]({{%relref "docs/features/text-to-audio" %}}) in our documentation.
* Add gallery repository by {{< github "mudler" >}} in {{< pr "663" >}}. See [models]({{%relref "docs/features/model-gallery" %}}) for documentation.
### Container images
- Standard (GPT + `stablediffusion`): `quay.io/go-skynet/local-ai:v1.20.0`
@ -289,7 +295,7 @@ From this release the default behavior of images has changed. Compilation is not
Updates to `llama.cpp`, `go-transformers`, `gpt4all.cpp` and `rwkv.cpp`.
The NUMA option was enabled by {{< github "mudler" >}} in {{< pr "684" >}}, along with many new parameters (`mmap`,`mmlock`, ..). See [advanced]({{%relref "advanced" %}}) for the full list of parameters.
The NUMA option was enabled by {{< github "mudler" >}} in {{< pr "684" >}}, along with many new parameters (`mmap`,`mmlock`, ..). See [advanced]({{%relref "docs/advanced" %}}) for the full list of parameters.
### Gallery repositories
@ -313,13 +319,13 @@ or a `tts` voice with:
curl http://localhost:8080/models/apply -H "Content-Type: application/json" -d '{ "id": "model-gallery@voice-en-us-kathleen-low" }'
```
See also [models]({{%relref "models" %}}) for a complete documentation.
See also [models]({{%relref "docs/features/model-gallery" %}}) for a complete documentation.
### Text to Audio
Now `LocalAI` uses [piper](https://github.com/rhasspy/piper) and [go-piper](https://github.com/mudler/go-piper) to generate audio from text. This is an experimental feature, and it requires `GO_TAGS=tts` to be set during build. It is enabled by default in the pre-built container images.
To setup audio models, you can use the new galleries, or setup the models manually as described in [the API section of the documentation]({{%relref "features/text-to-audio" %}}).
To setup audio models, you can use the new galleries, or setup the models manually as described in [the API section of the documentation]({{%relref "docs/features/text-to-audio" %}}).
You can check the full changelog in [Github](https://github.com/go-skynet/LocalAI/releases/tag/v1.20.0)
@ -347,7 +353,7 @@ We now support a vast variety of models, while being backward compatible with pr
### New features
- ✨ Added support for `falcon`-based model families (7b) ( [mudler](https://github.com/mudler) )
- ✨ Experimental support for Metal Apple Silicon GPU - ( [mudler](https://github.com/mudler) and thanks to [Soleblaze](https://github.com/Soleblaze) for testing! ). See the [build section]({{%relref "build#Acceleration" %}}).
- ✨ Experimental support for Metal Apple Silicon GPU - ( [mudler](https://github.com/mudler) and thanks to [Soleblaze](https://github.com/Soleblaze) for testing! ). See the [build section]({{%relref "docs/getting-started/build#Acceleration" %}}).
- ✨ Support for token stream in the `/v1/completions` endpoint ( [samm81](https://github.com/samm81) )
- ✨ Added huggingface backend ( [Evilfreelancer](https://github.com/EvilFreelancer) )
- 📷 Stablediffusion now can output `2048x2048` images size with `esrgan`! ( [mudler](https://github.com/mudler) )
@ -388,7 +394,7 @@ Two new projects offer now direct integration with LocalAI!
Support for OpenCL has been added while building from sources.
You can now build LocalAI from source with `BUILD_TYPE=clblas` to have an OpenCL build. See also the [build section]({{%relref "build#Acceleration" %}}).
You can now build LocalAI from source with `BUILD_TYPE=clblas` to have an OpenCL build. See also the [build section]({{%relref "docs/getting-started/build#Acceleration" %}}).
For instructions on how to install OpenCL/CLBlast see [here](https://github.com/ggerganov/llama.cpp#blas-build).
@ -418,7 +424,7 @@ prompt_cache_path: "alpaca-cache"
prompt_cache_all: true
```
See also the [advanced section]({{%relref "advanced" %}}).
See also the [advanced section]({{%relref "docs/advanced" %}}).
## Media, Blogs, Social
@ -431,7 +437,7 @@ See also the [advanced section]({{%relref "advanced" %}}).
- 23-05-2023: __v1.15.0__ released. `go-gpt2.cpp` backend got renamed to `go-ggml-transformers.cpp` updated including https://github.com/ggerganov/llama.cpp/pull/1508 which breaks compatibility with older models. This impacts RedPajama, GptNeoX, MPT(not `gpt4all-mpt`), Dolly, GPT2 and Starcoder based models. [Binary releases available](https://github.com/go-skynet/LocalAI/releases), various fixes, including {{< pr "341" >}} .
- 21-05-2023: __v1.14.0__ released. Minor updates to the `/models/apply` endpoint, `llama.cpp` backend updated including https://github.com/ggerganov/llama.cpp/pull/1508 which breaks compatibility with older models. `gpt4all` is still compatible with the old format.
- 19-05-2023: __v1.13.0__ released! 🔥🔥 updates to the `gpt4all` and `llama` backend, consolidated CUDA support ( {{< pr "310" >}} thanks to @bubthegreat and @Thireus ), preliminar support for [installing models via API]({{%relref "advanced#" %}}).
- 19-05-2023: __v1.13.0__ released! 🔥🔥 updates to the `gpt4all` and `llama` backend, consolidated CUDA support ( {{< pr "310" >}} thanks to @bubthegreat and @Thireus ), preliminar support for [installing models via API]({{%relref "docs/advanced#" %}}).
- 17-05-2023: __v1.12.0__ released! 🔥🔥 Minor fixes, plus CUDA ({{< pr "258" >}}) support for `llama.cpp`-compatible models and image generation ({{< pr "272" >}}).
- 16-05-2023: 🔥🔥🔥 Experimental support for CUDA ({{< pr "258" >}}) in the `llama.cpp` backend and Stable diffusion CPU image generation ({{< pr "272" >}}) in `master`.

View File

@ -1,17 +0,0 @@
+++
disableToc = false
title = "Features"
weight = 3
+++
This section contains the documentation for the features supported by LocalAI.
- [📖 Text generation (GPT)]({{%relref "features/text-generation" %}})
- [🗣 Text to Audio]({{%relref "features/text-to-audio" %}})
- [🔈 Audio to text]({{%relref "features/audio-to-text" %}})
- [🎨 Image generation]({{%relref "features/image-generation" %}})
- [🧠 Embeddings]({{%relref "features/embeddings" %}})
- [🔥 OpenAI functions]({{%relref "features/openai-functions" %}})
- [🆕 GPT Vision API]({{%relref "features/gpt-vision" %}})
- [✍️ Constrained grammars]({{%relref "features/constrained_grammars" %}})

View File

@ -1,70 +0,0 @@
+++
disableToc = false
title = "📖 Text generation (GPT)"
weight = 2
+++
LocalAI supports generating text with GPT with `llama.cpp` and other backends (such as `rwkv.cpp` as ) see also the [Model compatibility]({{%relref "model-compatibility" %}}) for an up-to-date list of the supported model families.
Note:
- You can also specify the model name as part of the OpenAI token.
- If only one model is available, the API will use it for all the requests.
### Chat completions
https://platform.openai.com/docs/api-reference/chat
For example, to generate a chat completion, you can send a POST request to the `/v1/chat/completions` endpoint with the instruction as the request body:
```bash
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.7
}'
```
Available additional parameters: `top_p`, `top_k`, `max_tokens`
### Edit completions
https://platform.openai.com/docs/api-reference/edits
To generate an edit completion you can send a POST request to the `/v1/edits` endpoint with the instruction as the request body:
```bash
curl http://localhost:8080/v1/edits -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"instruction": "rephrase",
"input": "Black cat jumped out of the window",
"temperature": 0.7
}'
```
Available additional parameters: `top_p`, `top_k`, `max_tokens`.
### Completions
https://platform.openai.com/docs/api-reference/completions
To generate a completion, you can send a POST request to the `/v1/completions` endpoint with the instruction as per the request body:
```bash
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "ggml-koala-7b-model-q4_0-r2.bin",
"prompt": "A long time ago in a galaxy far, far away",
"temperature": 0.7
}'
```
Available additional parameters: `top_p`, `top_k`, `max_tokens`
### List models
You can list all the models available with:
```bash
curl http://localhost:8080/v1/models
```

View File

@ -1,77 +0,0 @@
+++
disableToc = false
title = "🗣 Text to audio (TTS)"
weight = 2
+++
The `/tts` endpoint can be used to generate speech from text.
Input: `input`, `model`
For example, to generate an audio file, you can send a POST request to the `/tts` endpoint with the instruction as the request body:
```bash
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"input": "Hello world",
"model": "tts"
}'
```
Returns an `audio/wav` file.
#### Setup
LocalAI supports [bark]({{%relref "model-compatibility/bark" %}}) , `piper` and `vall-e-x`:
{{% notice note %}}
The `piper` backend is used for `onnx` models and requires the modules to be downloaded first.
To install the `piper` audio models manually:
- Download Voices from https://github.com/rhasspy/piper/releases/tag/v0.0.2
- Extract the `.tar.tgz` files (.onnx,.json) inside `models`
- Run the following command to test the model is working
{{% /notice %}}
To use the tts endpoint, run the following command. You can specify a backend with the `backend` parameter. For example, to use the `piper` backend:
```bash
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"model":"it-riccardo_fasol-x-low.onnx",
"backend": "piper",
"input": "Ciao, sono Ettore"
}' | aplay
```
Note:
- `aplay` is a Linux command. You can use other tools to play the audio file.
- The model name is the filename with the extension.
- The model name is case sensitive.
- LocalAI must be compiled with the `GO_TAGS=tts` flag.
LocalAI also has experimental support for `transformers-musicgen` for the generation of short musical compositions. Currently, this is implemented via the same requests used for text to speech:
```
curl --request POST \
--url http://localhost:8080/tts \
--header 'Content-Type: application/json' \
--data '{
"backend": "transformers-musicgen",
"model": "facebook/musicgen-medium",
"input": "Cello Rave"
}' | aplay```
Future versions of LocalAI will expose additional control over audio generation beyond the text prompt.
#### Configuration
Audio models can be configured via `YAML` files. This allows to configure specific setting for each backend. For instance, backends might be specifying a voice or supports voice cloning which must be specified in the configuration file.
```yaml
name: tts
backend: vall-e-x
parameters: ...
```

View File

@ -1,323 +0,0 @@
+++
disableToc = false
title = "Getting started"
weight = 1
url = '/basics/getting_started/'
+++
`LocalAI` is available as a container image and binary. It can be used with docker, podman, kubernetes and any container engine.
Container images are published to [quay.io](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest) and [Dockerhub](https://hub.docker.com/r/localai/localai).
[<img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker">](https://hub.docker.com/r/localai/localai)
[<img src="https://img.shields.io/badge/quay.io-images-important.svg?">](https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest)
See also our [How to](https://io.midori-ai.xyz/howtos/) section for end-to-end guided examples curated by the community.
### How to get started
The easiest way to run LocalAI is by using [`docker compose`](https://docs.docker.com/compose/install/) or with [Docker](https://docs.docker.com/engine/install/) (to build locally, see the [build section]({{%relref "build" %}})).
LocalAI needs at least a model file to work, or a configuration YAML file, or both. You can customize further model defaults and specific settings with a configuration file (see [advanced]({{%relref "advanced" %}})).
{{% notice note %}}
To run with GPU Accelleration, see [GPU acceleration]({{%relref "features/gpu-acceleration" %}}).
{{% /notice %}}
{{< tabs >}}
{{% tab name="Docker" %}}
```bash
# Prepare the models into the `model` directory
mkdir models
# copy your models to it
cp your-model.gguf models/
# run the LocalAI container
docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:latest --models-path /models --context-size 700 --threads 4
# You should see:
#
# ┌───────────────────────────────────────────────────┐
# │ Fiber v2.42.0 │
# │ http://127.0.0.1:8080 │
# │ (bound on host 0.0.0.0 and port 8080) │
# │ │
# │ Handlers ............. 1 Processes ........... 1 │
# │ Prefork ....... Disabled PID ................. 1 │
# └───────────────────────────────────────────────────┘
# Try the endpoint with curl
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "your-model.gguf",
"prompt": "A long time ago in a galaxy far, far away",
"temperature": 0.7
}'
```
{{% notice note %}}
- If running on Apple Silicon (ARM) it is **not** suggested to run on Docker due to emulation. Follow the [build instructions]({{%relref "build" %}}) to use Metal acceleration for full GPU support.
- If you are running Apple x86_64 you can use `docker`, there is no additional gain into building it from source.
{{% /notice %}}
{{% /tab %}}
{{% tab name="Docker compose" %}}
```bash
# Clone LocalAI
git clone https://github.com/go-skynet/LocalAI
cd LocalAI
# (optional) Checkout a specific LocalAI tag
# git checkout -b build <TAG>
# copy your models to models/
cp your-model.gguf models/
# (optional) Edit the .env file to set things like context size and threads
# vim .env
# start with docker compose
docker compose up -d --pull always
# or you can build the images with:
# docker compose up -d --build
# Now API is accessible at localhost:8080
curl http://localhost:8080/v1/models
# {"object":"list","data":[{"id":"your-model.gguf","object":"model"}]}
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "your-model.gguf",
"prompt": "A long time ago in a galaxy far, far away",
"temperature": 0.7
}'
```
Note: If you are on Windows, please make sure the project is on the Linux Filesystem, otherwise loading models might be slow. For more Info: [Microsoft Docs](https://learn.microsoft.com/en-us/windows/wsl/filesystems)
{{% /tab %}}
{{% tab name="Kubernetes" %}}
For installing LocalAI in Kubernetes, you can use the following helm chart:
```bash
# Install the helm repository
helm repo add go-skynet https://go-skynet.github.io/helm-charts/
# Update the repositories
helm repo update
# Get the values
helm show values go-skynet/local-ai > values.yaml
# Edit the values value if needed
# vim values.yaml ...
# Install the helm chart
helm install local-ai go-skynet/local-ai -f values.yaml
```
{{% /tab %}}
{{% tab name="From binary" %}}
LocalAI binary releases are available in [Github](https://github.com/go-skynet/LocalAI/releases).
{{% /tab %}}
{{% tab name="From source" %}}
See the [build section]({{%relref "build" %}}).
{{% /tab %}}
{{< /tabs >}}
### Running Popular models (one-click!)
You can run `local-ai` directly with a model name, and it will download the model and start the API with the model loaded.
> Don't need GPU acceleration? use the CPU images which are lighter and do not have Nvidia dependencies
> To know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version`
{{< tabs >}}
{{% tab name="CPU-only" %}}
| Model | Category | Docker command |
| --- | --- | --- |
| [phi-2](https://huggingface.co/microsoft/phi-2) | LLM | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core phi-2``` |
| [llava](https://github.com/SkunkworksAI/BakLLaVA) | Multimodal LLM | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core llava``` |
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | LLM | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core mistral-openorca``` |
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | Embeddings | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core bert-cpp``` |
| all-minilm-l6-v2 | Embeddings | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg all-minilm-l6-v2``` |
| whisper-base | Audio to Text | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core whisper-base``` |
| rhasspy-voice-en-us-amy | Text to Audio | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core rhasspy-voice-en-us-amy``` |
| coqui | Text to Audio | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg coqui``` |
| bark | Text to Audio | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg bark``` |
| vall-e-x | Text to Audio | ```docker run -ti -p 8080:8080 localai/localai:{{< version >}}-ffmpeg vall-e-x``` |
{{% /tab %}}
{{% tab name="GPU (CUDA 11)" %}}
| Model | Category | Docker command |
| --- | --- | --- |
| [phi-2](https://huggingface.co/microsoft/phi-2) | LLM | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core phi-2``` |
| [llava](https://github.com/SkunkworksAI/BakLLaVA) | Multimodal LLM | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core llava``` |
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | LLM | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core mistral-openorca``` |
| [bert-cpp](https://github.com/skeskinen/bert.cpp) | Embeddings | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core bert-cpp``` |
| [all-minilm-l6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | Embeddings | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 all-minilm-l6-v2``` |
| whisper-base | Audio to Text | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core whisper-base``` |
| rhasspy-voice-en-us-amy | Text to Audio | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11-core rhasspy-voice-en-us-amy``` |
| coqui | Text to Audio | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 coqui``` |
| bark | Text to Audio | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 bark``` |
| vall-e-x | Text to Audio | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda11 vall-e-x``` |
{{% /tab %}}
{{% tab name="GPU (CUDA 12)" %}}
| Model | Category | Docker command |
| --- | --- | --- |
| [phi-2](https://huggingface.co/microsoft/phi-2) | LLM | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core phi-2``` |
| [llava](https://github.com/SkunkworksAI/BakLLaVA) | Multimodal LLM | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core llava``` |
| [mistral-openorca](https://huggingface.co/Open-Orca/Mistral-7B-OpenOrca) | LLM | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core mistral-openorca``` |
| bert-cpp | Embeddings | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core bert-cpp``` |
| all-minilm-l6-v2 | Embeddings | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 all-minilm-l6-v2``` |
| whisper-base | Audio to Text | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core whisper-base``` |
| rhasspy-voice-en-us-amy | Text to Audio | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12-core rhasspy-voice-en-us-amy``` |
| coqui | Text to Audio | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 coqui``` |
| bark | Text to Audio | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 bark``` |
| vall-e-x | Text to Audio | ```docker run -ti -p 8080:8080 --gpus all localai/localai:{{< version >}}-cublas-cuda12 vall-e-x``` |
{{% /tab %}}
{{< /tabs >}}
{{% notice note %}}
LocalAI can be started (either the container image or the binary) with a list of model config files URLs or our short-handed format (e.g. `huggingface://`. `github://`). It works by passing the urls as arguments or environment variable, for example:
```
local-ai github://owner/repo/file.yaml@branch
# Env
MODELS="github://owner/repo/file.yaml@branch,github://owner/repo/file.yaml@branch" local-ai
# Args
local-ai --models github://owner/repo/file.yaml@branch --models github://owner/repo/file.yaml@branch
```
For example, to start localai with phi-2, it's possible for instance to also use a full config file from gists:
```bash
docker run -p 8080:8080 localai/localai:{{< version >}}-ffmpeg-core https://gist.githubusercontent.com/mudler/ad601a0488b497b69ec549150d9edd18/raw/a8a8869ef1bb7e3830bf5c0bae29a0cce991ff8d/phi-2.yaml
```
The file should be a valid LocalAI YAML configuration file, for the full syntax see [advanced]({{%relref "advanced" %}}).
{{% /notice %}}
### Container images
LocalAI has a set of images to support CUDA, ffmpeg and 'vanilla' (CPU-only). The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags):
{{< tabs >}}
{{% tab name="Vanilla / CPU Images" %}}
- `master`
- `latest`
- `{{< version >}}`
- `{{< version >}}-ffmpeg`
- `{{< version >}}-ffmpeg-core`
Core Images - Smaller images without predownload python dependencies
{{% /tab %}}
{{% tab name="GPU Images CUDA 11" %}}
Images with Nvidia accelleration support
> If you do not know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version`
- `master-cublas-cuda11`
- `master-cublas-cuda11-core`
- `{{< version >}}-cublas-cuda11`
- `{{< version >}}-cublas-cuda11-core`
- `{{< version >}}-cublas-cuda11-ffmpeg`
- `{{< version >}}-cublas-cuda11-ffmpeg-core`
Core Images - Smaller images without predownload python dependencies
{{% /tab %}}
{{% tab name="GPU Images CUDA 12" %}}
Images with Nvidia accelleration support
> If you do not know which version of CUDA do you have available, you can check with `nvidia-smi` or `nvcc --version`
- `master-cublas-cuda12`
- `master-cublas-cuda12-core`
- `{{< version >}}-cublas-cuda12`
- `{{< version >}}-cublas-cuda12-core`
- `{{< version >}}-cublas-cuda12-ffmpeg`
- `{{< version >}}-cublas-cuda12-ffmpeg-core`
Core Images - Smaller images without predownload python dependencies
{{% /tab %}}
{{< /tabs >}}
Example:
- Standard (GPT + `stablediffusion`): `quay.io/go-skynet/local-ai:latest`
- FFmpeg: `quay.io/go-skynet/local-ai:{{< version >}}-ffmpeg`
- CUDA 11+FFmpeg: `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda11-ffmpeg`
- CUDA 12+FFmpeg: `quay.io/go-skynet/local-ai:{{< version >}}-cublas-cuda12-ffmpeg`
{{% notice note %}}
Note: the binary inside the image is pre-compiled, and might not suite all CPUs.
To enable CPU optimizations for the execution environment,
the default behavior is to rebuild when starting the container.
To disable this auto-rebuild behavior,
set the environment variable `REBUILD` to `false`.
See [docs on all environment variables]({{%relref "advanced#environment-variables" %}})
for more info.
{{% /notice %}}
### Example: Use luna-ai-llama2 model with `docker`
```bash
mkdir models
# Download luna-ai-llama2 to models/
wget https://huggingface.co/TheBloke/Luna-AI-Llama2-Uncensored-GGUF/resolve/main/luna-ai-llama2-uncensored.Q4_0.gguf -O models/luna-ai-llama2
# Use a template from the examples
cp -rf prompt-templates/getting_started.tmpl models/luna-ai-llama2.tmpl
docker run -p 8080:8080 -v $PWD/models:/models -ti --rm quay.io/go-skynet/local-ai:latest --models-path /models --context-size 700 --threads 4
# Now API is accessible at localhost:8080
curl http://localhost:8080/v1/models
# {"object":"list","data":[{"id":"luna-ai-llama2","object":"model"}]}
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "luna-ai-llama2",
"messages": [{"role": "user", "content": "How are you?"}],
"temperature": 0.9
}'
# {"model":"luna-ai-llama2","choices":[{"message":{"role":"assistant","content":"I'm doing well, thanks. How about you?"}}]}
```
To see other model configurations, see also the example section [here](https://github.com/mudler/LocalAI/tree/master/examples/configurations).
### Examples
![Screenshot from 2023-04-26 23-59-55](https://user-images.githubusercontent.com/2420543/234715439-98d12e03-d3ce-4f94-ab54-2b256808e05e.png)
To see other examples on how to integrate with other projects for instance for question answering or for using it with chatbot-ui, see: [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/).

View File

@ -1,178 +0,0 @@
+++
disableToc = false
title = "AIKit"
description="AI + BuildKit = AIKit: Build and deploy large language models easily"
weight = 2
+++
GitHub Link - https://github.com/sozercan/aikit
[AIKit](https://github.com/sozercan/aikit) is a quick, easy, and local or cloud-agnostic way to get started to host and deploy large language models (LLMs) for inference. No GPU, internet access or additional tools are needed to get started except for [Docker](https://docs.docker.com/desktop/install/linux-install/)!
AIKit uses [LocalAI](https://localai.io/) under-the-hood to run inference. LocalAI provides a drop-in replacement REST API that is OpenAI API compatible, so you can use any OpenAI API compatible client, such as [Kubectl AI](https://github.com/sozercan/kubectl-ai), [Chatbot-UI](https://github.com/sozercan/chatbot-ui) and many more, to send requests to open-source LLMs powered by AIKit!
> At this time, AIKit is tested with LocalAI `llama` backend. Other backends may work but are not tested. Please open an issue if you'd like to see support for other backends.
## Features
- 🐳 No GPU, Internet access or additional tools needed except for [Docker](https://docs.docker.com/desktop/install/linux-install/)!
- 🤏 Minimal image size, resulting in less vulnerabilities and smaller attack surface with a custom [distroless](https://github.com/GoogleContainerTools/distroless)-based image
- 🚀 Easy to use declarative configuration
- ✨ OpenAI API compatible to use with any OpenAI API compatible client
- 🚢 Kubernetes deployment ready
- 📦 Supports multiple models with a single image
- 🖥️ Supports GPU-accelerated inferencing with NVIDIA GPUs
- 🔐 Signed images for `aikit` and pre-made models
## Pre-made Models
AIKit comes with pre-made models that you can use out-of-the-box!
### CPU
- 🦙 Llama 2 7B Chat: `ghcr.io/sozercan/llama2:7b`
- 🦙 Llama 2 13B Chat: `ghcr.io/sozercan/llama2:13b`
- 🐬 Orca 2 13B: `ghcr.io/sozercan/orca2:13b`
### NVIDIA CUDA
- 🦙 Llama 2 7B Chat (CUDA): `ghcr.io/sozercan/llama2:7b-cuda`
- 🦙 Llama 2 13B Chat (CUDA): `ghcr.io/sozercan/llama2:13b-cuda`
- 🐬 Orca 2 13B (CUDA): `ghcr.io/sozercan/orca2:13b-cuda`
> CUDA models includes CUDA v12. They are used with [NVIDIA GPU acceleration](#gpu-acceleration-support).
## Quick Start
### Creating an image
> This section shows how to create a custom image with models of your choosing. If you want to use one of the pre-made models, skip to [running models](#running-models).
>
> Please see [models folder](./models/) for pre-made model definitions. You can find more model examples at [go-skynet/model-gallery](https://github.com/go-skynet/model-gallery).
Create an `aikitfile.yaml` with the following structure:
```yaml
#syntax=ghcr.io/sozercan/aikit:latest
apiVersion: v1alpha1
models:
- name: llama-2-7b-chat
source: https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf
```
> This is the simplest way to get started to build an image. For full `aikitfile` specification, see [specs](docs/specs.md).
First, create a buildx buildkit instance. Alternatively, if you are using Docker v24 with [containerd image store](https://docs.docker.com/storage/containerd/) enabled, you can skip this step.
```bash
docker buildx create --use --name aikit-builder
```
Then build your image with:
```bash
docker buildx build . -t my-model -f aikitfile.yaml --load
```
This will build a local container image with your model(s). You can see the image with:
```bash
docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
my-model latest e7b7c5a4a2cb About an hour ago 5.51GB
```
### Running models
You can start the inferencing server for your models with:
```bash
# for pre-made models, replace "my-model" with the image name
docker run -d --rm -p 8080:8080 my-model
```
You can then send requests to `localhost:8080` to run inference from your models. For example:
```bash
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llama-2-7b-chat",
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
}'
{"created":1701236489,"object":"chat.completion","id":"dd1ff40b-31a7-4418-9e32-42151ab6875a","model":"llama-2-7b-chat","choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"\nKubernetes is a container orchestration system that automates the deployment, scaling, and management of containerized applications in a microservices architecture."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
```
## Kubernetes Deployment
It is easy to get started to deploy your models to Kubernetes!
Make sure you have a Kubernetes cluster running and `kubectl` is configured to talk to it, and your model images are accessible from the cluster.
> You can use [kind](https://kind.sigs.k8s.io/) to create a local Kubernetes cluster for testing purposes.
```bash
# create a deployment
# for pre-made models, replace "my-model" with the image name
kubectl create deployment my-llm-deployment --image=my-model
# expose it as a service
kubectl expose deployment my-llm-deployment --port=8080 --target-port=8080 --name=my-llm-service
# easy to scale up and down as needed
kubectl scale deployment my-llm-deployment --replicas=3
# port-forward for testing locally
kubectl port-forward service/my-llm-service 8080:8080
# send requests to your model
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "llama-2-7b-chat",
"messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
}'
{"created":1701236489,"object":"chat.completion","id":"dd1ff40b-31a7-4418-9e32-42151ab6875a","model":"llama-2-7b-chat","choices":[{"index":0,"finish_reason":"stop","message":{"role":"assistant","content":"\nKubernetes is a container orchestration system that automates the deployment, scaling, and management of containerized applications in a microservices architecture."}}],"usage":{"prompt_tokens":0,"completion_tokens":0,"total_tokens":0}}
```
> For an example Kubernetes deployment and service YAML, see [kubernetes folder](./kubernetes/). Please note that these are examples, you may need to customize them (such as properly configured resource requests and limits) based on your needs.
## GPU Acceleration Support
> At this time, only NVIDIA GPU acceleration is supported. Please open an issue if you'd like to see support for other GPU vendors.
### NVIDIA
AIKit supports GPU accelerated inferencing with [NVIDIA Container Toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). You must also have [NVIDIA Drivers](https://www.nvidia.com/en-us/drivers/unix/) installed on your host machine.
For Kubernetes, [NVIDIA GPU Operator](https://github.com/NVIDIA/gpu-operator) provides a streamlined way to install the NVIDIA drivers and container toolkit to configure your cluster to use GPUs.
To get started with GPU-accelerated inferencing, make sure to set the following in your `aikitfile` and build your model.
```yaml
runtime: cuda # use NVIDIA CUDA runtime
f16: true # use float16 precision
gpu_layers: 35 # number of layers to offload to GPU
low_vram: true # for devices with low VRAM
```
> Make sure to customize these values based on your model and GPU specs.
After building the model, you can run it with [`--gpus all`](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/docker-specialized.html#gpu-enumeration) flag to enable GPU support:
```bash
# for pre-made models, replace "my-model" with the image name
docker run --rm --gpus all -p 8080:8080 my-model
```
If GPU acceleration is working, you'll see output that is similar to following in the debug logs:
```bash
5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr ggml_init_cublas: found 1 CUDA devices:
5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr Device 0: Tesla T4, compute capability 7.5
...
5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: using CUDA for GPU acceleration
5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: mem required = 70.41 MB (+ 2048.00 MB per state)
5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading 32 repeating layers to GPU
5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading non-repeating layers to GPU
5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading v cache to GPU
5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloading k cache to GPU
5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: offloaded 35/35 layers to GPU
5:32AM DBG GRPC(llama-2-7b-chat.Q4_K_M.gguf-127.0.0.1:43735): stderr llm_load_tensors: VRAM used: 5869 MB
```

View File

@ -1,70 +0,0 @@
+++
disableToc = false
title = "AnythingLLM"
description="Integrate your LocalAI LLM and embedding models into AnythingLLM by Mintplex Labs"
weight = 2
+++
AnythingLLM is an open source ChatGPT equivalent tool for chatting with documents and more in a secure environment by [Mintplex Labs Inc](https://github.com/Mintplex-Labs).
![image](https://github.com/Mintplex-Labs/anything-llm/raw/master/images/screenshots/chatting.gif)
⭐ Star on Github - https://github.com/Mintplex-Labs/anything-llm
* Chat with your LocalAI models (or hosted models like OpenAi, Anthropic, and Azure)
* Embed documents (txt, pdf, json, and more) using your LocalAI Sentence Transformers
* Select any vector database you want (Chroma, Pinecone, qDrant, Weaviate ) or use the embedded on-instance vector database (LanceDB)
* Supports single or multi-user tenancy with built-in permissions
* Full developer API
* Locally running SQLite db for minimal setup.
AnythingLLM is a fully transparent tool to deliver a customized, white-label ChatGPT equivalent experience using only the models and services you or your organization are comfortable using.
### Why AnythingLLM?
AnythingLLM aims to enable you to quickly and comfortably get a ChatGPT equivalent experience using your proprietary documents for your organization with zero compromise on security or comfort.
### What does AnythingLLM include?
- Full UI
- Full admin console and panel for managing users, chats, model selection, vector db connection, and embedder selection
- Multi-user support and logins
- Supports both desktop and mobile view ports
- Built in vector database where no data leaves your instance at all
- Docker support
## Install
### Local via docker
Running via docker and integrating with your LocalAI instance is a breeze.
First, pull in the latest AnythingLLM Docker image
`docker pull mintplexlabs/anythingllm:master`
Next, run the image on a container exposing port `3001`.
`docker run -d -p 3001:3001 mintplexlabs/anythingllm:master`
Now open `http://localhost:3001` and you will start on-boarding for setting up your AnythingLLM instance to your comfort level
## Integration with your LocalAI instance.
There are two areas where you can leverage your models loaded into LocalAI - LLM and Embedding. Any LLM models should be ready to run a chat completion.
### LLM model selection
During onboarding and from the sidebar setting you can select `LocalAI` as your LLM. Here you can set both the model and token limit of the specific model. The dropdown will automatically populate once your url is set.
The URL should look like `http://localhost:8000/v1` or wherever your LocalAI instance is being served from. Non-localhost URLs are permitted if hosting LocalAI on cloud services.
![localai-setup](https://github.com/Mintplex-Labs/anything-llm/raw/master/images/LLMproviders/localai-setup.png)
### LLM embedding model selection
During onboarding and from the sidebar setting you can select `LocalAI` as your preferred embedding engine. This model will be the model used when you upload any kind of document via AnythingLLM. Here you can set the model from available models via the LocalAI API. The dropdown will automatically populate once your url is set.
The URL should look like `http://localhost:8000/v1` or wherever your LocalAI instance is being served from. Non-localhost URLs are permitted if hosting LocalAI on cloud services.
![localai-setup](https://github.com/Mintplex-Labs/anything-llm/raw/master/images/LLMproviders/localai-embedding.png)

View File

@ -1,58 +0,0 @@
+++
disableToc = false
title = "BMO Chatbo"
weight = 2
+++
Generate and brainstorm ideas while creating your notes using Large Language Models (LLMs) such as OpenAI's "gpt-3.5-turbo" and "gpt-4" for Obsidian.
![](https://raw.githubusercontent.com/longy2k/obsidian-bmo-chatbot/main/README_images/Screenshot-1.png)
Github Link - https://github.com/longy2k/obsidian-bmo-chatbot
## Features
- **Chat from anywhere in Obsidian:** Chat with your bot from anywhere within Obsidian.
- **Chat with current note:** Use your chatbot to reference and engage within your current note.
- **Chatbot responds in Markdown:** Receive formatted responses in Markdown for consistency.
- **Customizable bot name:** Personalize the chatbot's name.
- **System role prompt:** Configure the chatbot to prompt for user roles before responding to messages.
- **Set Max Tokens and Temperature:** Customize the length and randomness of the chatbot's responses with Max Tokens and Temperature settings.
- **System theme color accents:** Seamlessly matches the chatbot's interface with your system's color scheme.
- **Interact with self-hosted Large Language Models (LLMs):** Use the REST API URL provided to interact with self-hosted Large Language Models (LLMs) using [LocalAI](https://localai.io/howtos/).
## Requirements
To use this plugin, with [LocalAI](https://localai.io/howtos/), you will need to have the self-hosted API set up and running. You can follow the instructions provided by the self-hosted API provider to get it up and running.
Once you have the REST API URL for your self-hosted API, you can use it with this plugin to interact with your models.
Explore some ``GGUF`` models at [theBloke](https://huggingface.co/TheBloke).
## How to activate the plugin
Two methods:
Obsidian Community plugins (**Recommended**):
1. Search for "BMO Chatbot" in the Obsidian Community plugins.
2. Enable "BMO Chatbot" in the settings.
To activate the plugin from this repo:
1. Navigate to the plugin's folder in your terminal.
2. Run `npm install` to install any necessary dependencies for the plugin.
3. Once the dependencies have been installed, run `npm run build` to build the plugin.
4. Once the plugin has been built, it should be ready to activate.
## Getting Started
To start using the plugin, enable it in your settings menu and enter your OpenAPI key. After completing these steps, you can access the bot panel by clicking on the bot icon in the left sidebar.
If you want to clear the chat history, simply click on the bot icon again in the left ribbon bar.
## Supported Models
- OpenAI
- gpt-3.5-turbo
- gpt-3.5-turbo-16k
- gpt-4
- Anthropic
- claude-instant-1.2
- claude-2.0
- Any self-hosted models using [LocalAI](https://localai.io/howtos/)
## Other Notes
"BMO" is a tag name for this project, inspired by the character BMO from the animated TV show "Adventure Time."

View File

@ -1,103 +0,0 @@
+++
disableToc = false
title = "BionicGPT"
weight = 2
+++
an on-premise replacement for ChatGPT, offering the advantages of Generative AI while maintaining strict data confidentiality, BionicGPT can run on your laptop or scale into the data center.
![](https://raw.githubusercontent.com/purton-tech/bionicgpt/main/website/static/github-readme.png)
BionicGPT Homepage - https://bionic-gpt.com
Github link - https://github.com/purton-tech/bionicgpt
<!-- Try it out -->
## Try it out
Cut and paste the following into a `docker-compose.yaml` file and run `docker-compose up -d` access the user interface on http://localhost:7800/auth/sign_up
This has been tested on an AMD 2700x with 16GB of ram. The included `ggml-gpt4all-j` model runs on CPU only.
**Warning** - The images in this `docker-compose` are large due to having the model weights pre-loaded for convenience.
```yaml
services:
# LocalAI with pre-loaded ggml-gpt4all-j
local-ai:
image: ghcr.io/purton-tech/bionicgpt-model-api:llama-2-7b-chat
# Handles parsing of multiple documents types.
unstructured:
image: downloads.unstructured.io/unstructured-io/unstructured-api:db264d8
ports:
- "8000:8000"
# Handles routing between the application, barricade and the LLM API
envoy:
image: ghcr.io/purton-tech/bionicgpt-envoy:1.1.10
ports:
- "7800:7700"
# Postgres pre-loaded with pgVector
db:
image: ankane/pgvector
environment:
POSTGRES_PASSWORD: testpassword
POSTGRES_USER: postgres
POSTGRES_DB: finetuna
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 10s
timeout: 5s
retries: 5
# Sets up our database tables
migrations:
image: ghcr.io/purton-tech/bionicgpt-db-migrations:1.1.10
environment:
DATABASE_URL: postgresql://postgres:testpassword@db:5432/postgres?sslmode=disable
depends_on:
db:
condition: service_healthy
# Barricade handles all /auth routes for user sign up and sign in.
barricade:
image: purtontech/barricade
environment:
# This secret key is used to encrypt cookies.
SECRET_KEY: 190a5bf4b3cbb6c0991967ab1c48ab30790af876720f1835cbbf3820f4f5d949
DATABASE_URL: postgresql://postgres:testpassword@db:5432/postgres?sslmode=disable
FORWARD_URL: app
FORWARD_PORT: 7703
REDIRECT_URL: /app/post_registration
depends_on:
db:
condition: service_healthy
migrations:
condition: service_completed_successfully
# Our axum server delivering our user interface
embeddings-job:
image: ghcr.io/purton-tech/bionicgpt-embeddings-job:1.1.10
environment:
APP_DATABASE_URL: postgresql://ft_application:testpassword@db:5432/postgres?sslmode=disable
depends_on:
db:
condition: service_healthy
migrations:
condition: service_completed_successfully
# Our axum server delivering our user interface
app:
image: ghcr.io/purton-tech/bionicgpt:1.1.10
environment:
APP_DATABASE_URL: postgresql://ft_application:testpassword@db:5432/postgres?sslmode=disable
depends_on:
db:
condition: service_healthy
migrations:
condition: service_completed_successfully
```
## Kubernetes Ready
BionicGPT is optimized to run on Kubernetes and implements the full pipeline of LLM fine tuning from data acquisition to user interface.

View File

@ -1,54 +0,0 @@
+++
disableToc = false
title = "Flowise"
weight = 2
+++
Build LLM Apps Easily
![Flowise](https://raw.githubusercontent.com/FlowiseAI/Flowise/main/images/flowise.png)
Github Link - https://github.com/FlowiseAI/Flowise
## ⚡Local Install
Download and Install [NodeJS](https://nodejs.org/en/download) >= 18.15.0
1. Install Flowise
```bash
npm install -g flowise
```
2. Start Flowise
```bash
npx flowise start
```
3. Open [http://localhost:3000](http://localhost:3000)
## 🐳 Docker
### Docker Compose
1. Go to `docker` folder at the root of the project
2. Copy `.env.example` file, paste it into the same location, and rename to `.env`
3. `docker-compose up -d`
4. Open [http://localhost:3000](http://localhost:3000)
5. You can bring the containers down by `docker-compose stop --rmi all`
### Docker Compose (Flowise + LocalAI)
1. In a command line Run ``git clone https://github.com/go-skynet/LocalAI``
2. Then run ``cd LocalAI/examples/flowise``
3. Then run ``docker-compose up -d --pull always``
4. Open [http://localhost:3000](http://localhost:3000)
5. You can bring the containers down by `docker-compose stop --rmi all`
## 🌱 Env Variables
Flowise support different environment variables to configure your instance. You can specify the following variables in the `.env` file inside `packages/server` folder. Read [more](https://github.com/FlowiseAI/Flowise/blob/main/CONTRIBUTING.md#-env-variables)
## 📖 Documentation
[Flowise Docs](https://docs.flowiseai.com/)

View File

@ -1,466 +0,0 @@
+++
disableToc = false
title = "k8sgpt"
weight = 2
+++
a tool for scanning your Kubernetes clusters, diagnosing, and triaging issues in simple English.
![](https://raw.githubusercontent.com/k8sgpt-ai/k8sgpt/main/images/banner-white.png)
It has SRE experience codified into its analyzers and helps to pull out the most relevant information to enrich it with AI.
Github Link - https://github.com/k8sgpt-ai/k8sgpt
## CLI Installation
### Linux/Mac via brew
```
brew tap k8sgpt-ai/k8sgpt
brew install k8sgpt
```
<details>
<summary>RPM-based installation (RedHat/CentOS/Fedora)</summary>
**32 bit:**
<!---x-release-please-start-version-->
```
curl -LO https://github.com/k8sgpt-ai/k8sgpt/releases/download/v0.3.18/k8sgpt_386.rpm
sudo rpm -ivh k8sgpt_386.rpm
```
<!---x-release-please-end-->
**64 bit:**
<!---x-release-please-start-version-->
```
curl -LO https://github.com/k8sgpt-ai/k8sgpt/releases/download/v0.3.18/k8sgpt_amd64.rpm
sudo rpm -ivh -i k8sgpt_amd64.rpm
```
<!---x-release-please-end-->
</details>
<details>
<summary>DEB-based installation (Ubuntu/Debian)</summary>
**32 bit:**
<!---x-release-please-start-version-->
```
curl -LO https://github.com/k8sgpt-ai/k8sgpt/releases/download/v0.3.18/k8sgpt_386.deb
sudo dpkg -i k8sgpt_386.deb
```
<!---x-release-please-end-->
**64 bit:**
<!---x-release-please-start-version-->
```
curl -LO https://github.com/k8sgpt-ai/k8sgpt/releases/download/v0.3.18/k8sgpt_amd64.deb
sudo dpkg -i k8sgpt_amd64.deb
```
<!---x-release-please-end-->
</details>
<details>
<summary>APK-based installation (Alpine)</summary>
**32 bit:**
<!---x-release-please-start-version-->
```
curl -LO https://github.com/k8sgpt-ai/k8sgpt/releases/download/v0.3.18/k8sgpt_386.apk
apk add k8sgpt_386.apk
```
<!---x-release-please-end-->
**64 bit:**
<!---x-release-please-start-version-->
```
curl -LO https://github.com/k8sgpt-ai/k8sgpt/releases/download/v0.3.18/k8sgpt_amd64.apk
apk add k8sgpt_amd64.apk
```
<!---x-release-please-end-->x
</details>
<details>
<summary>Failing Installation on WSL or Linux (missing gcc)</summary>
When installing Homebrew on WSL or Linux, you may encounter the following error:
```
==> Installing k8sgpt from k8sgpt-ai/k8sgpt Error: The following formula cannot be installed from a bottle and must be
built from the source. k8sgpt Install Clang or run brew install gcc.
```
If you install gcc as suggested, the problem will persist. Therefore, you need to install the build-essential package.
```
sudo apt-get update
sudo apt-get install build-essential
```
</details>
### Windows
* Download the latest Windows binaries of **k8sgpt** from the [Release](https://github.com/k8sgpt-ai/k8sgpt/releases)
tab based on your system architecture.
* Extract the downloaded package to your desired location. Configure the system *path* variable with the binary location
## Operator Installation
To install within a Kubernetes cluster please use our `k8sgpt-operator` with installation instructions available [here](https://github.com/k8sgpt-ai/k8sgpt-operator)
_This mode of operation is ideal for continuous monitoring of your cluster and can integrate with your existing monitoring such as Prometheus and Alertmanager._
## Quick Start
* Currently the default AI provider is OpenAI, you will need to generate an API key from [OpenAI](https://openai.com)
* You can do this by running `k8sgpt generate` to open a browser link to generate it
* Run `k8sgpt auth add` to set it in k8sgpt.
* You can provide the password directly using the `--password` flag.
* Run `k8sgpt filters` to manage the active filters used by the analyzer. By default, all filters are executed during analysis.
* Run `k8sgpt analyze` to run a scan.
* And use `k8sgpt analyze --explain` to get a more detailed explanation of the issues.
* You also run `k8sgpt analyze --with-doc` (with or without the explain flag) to get the official documentation from kubernetes.
## Analyzers
K8sGPT uses analyzers to triage and diagnose issues in your cluster. It has a set of analyzers that are built in, but
you will be able to write your own analyzers.
### Built in analyzers
#### Enabled by default
- [x] podAnalyzer
- [x] pvcAnalyzer
- [x] rsAnalyzer
- [x] serviceAnalyzer
- [x] eventAnalyzer
- [x] ingressAnalyzer
- [x] statefulSetAnalyzer
- [x] deploymentAnalyzer
- [x] cronJobAnalyzer
- [x] nodeAnalyzer
- [x] mutatingWebhookAnalyzer
- [x] validatingWebhookAnalyzer
#### Optional
- [x] hpaAnalyzer
- [x] pdbAnalyzer
- [x] networkPolicyAnalyzer
## Examples
_Run a scan with the default analyzers_
```
k8sgpt generate
k8sgpt auth add
k8sgpt analyze --explain
k8sgpt analyze --explain --with-doc
```
_Filter on resource_
```
k8sgpt analyze --explain --filter=Service
```
_Filter by namespace_
```
k8sgpt analyze --explain --filter=Pod --namespace=default
```
_Output to JSON_
```
k8sgpt analyze --explain --filter=Service --output=json
```
_Anonymize during explain_
```
k8sgpt analyze --explain --filter=Service --output=json --anonymize
```
<details>
<summary> Using filters </summary>
_List filters_
```
k8sgpt filters list
```
_Add default filters_
```
k8sgpt filters add [filter(s)]
```
### Examples :
- Simple filter : `k8sgpt filters add Service`
- Multiple filters : `k8sgpt filters add Ingress,Pod`
_Remove default filters_
```
k8sgpt filters remove [filter(s)]
```
### Examples :
- Simple filter : `k8sgpt filters remove Service`
- Multiple filters : `k8sgpt filters remove Ingress,Pod`
</details>
<details>
<summary> Additional commands </summary>
_List configured backends_
```
k8sgpt auth list
```
_Update configured backends_
```
k8sgpt auth update $MY_BACKEND1,$MY_BACKEND2..
```
_Remove configured backends_
```
k8sgpt auth remove $MY_BACKEND1,$MY_BACKEND2..
```
_List integrations_
```
k8sgpt integrations list
```
_Activate integrations_
```
k8sgpt integrations activate [integration(s)]
```
_Use integration_
```
k8sgpt analyze --filter=[integration(s)]
```
_Deactivate integrations_
```
k8sgpt integrations deactivate [integration(s)]
```
_Serve mode_
```
k8sgpt serve
```
_Analysis with serve mode_
```
curl -X GET "http://localhost:8080/analyze?namespace=k8sgpt&explain=false"
```
</details>
## Key Features
<details>
<summary> LocalAI provider </summary>
To run local models, it is possible to use OpenAI compatible APIs, for instance [LocalAI](https://github.com/go-skynet/LocalAI) which uses [llama.cpp](https://github.com/ggerganov/llama.cpp) to run inference on consumer-grade hardware. Models supported by LocalAI for instance are Vicuna, Alpaca, LLaMA, Cerebras, GPT4ALL, GPT4ALL-J, Llama2 and koala.
To run local inference, you need to download the models first, for instance you can find `gguf` compatible models in [huggingface.com](https://huggingface.co/models?search=gguf) (for example vicuna, alpaca and koala).
### Start the API server
To start the API server, follow the instruction in [LocalAI](https://localai.io/howtos/).
### Run k8sgpt
To run k8sgpt, run `k8sgpt auth add` with the `localai` backend:
```
k8sgpt auth add --backend localai --model <model_name> --baseurl http://localhost:8080/v1 --temperature 0.7
```
Now you can analyze with the `localai` backend:
```
k8sgpt analyze --explain --backend localai
```
</details>
<details>
<summary>Setting a new default AI provider</summary>
There may be scenarios where you wish to have K8sGPT plugged into several default AI providers. In this case you may wish to use one as a new default, other than OpenAI which is the project default.
_To view available providers_
```
k8sgpt auth list
Default:
> openai
Active:
> openai
> azureopenai
Unused:
> localai
> noopai
```
_To set a new default provider_
```
k8sgpt auth default -p azureopenai
Default provider set to azureopenai
```
</details>
<details>
With this option, the data is anonymized before being sent to the AI Backend. During the analysis execution, `k8sgpt` retrieves sensitive data (Kubernetes object names, labels, etc.). This data is masked when sent to the AI backend and replaced by a key that can be used to de-anonymize the data when the solution is returned to the user.
<summary> Anonymization </summary>
1. Error reported during analysis:
```bash
Error: HorizontalPodAutoscaler uses StatefulSet/fake-deployment as ScaleTargetRef which does not exist.
```
2. Payload sent to the AI backend:
```bash
Error: HorizontalPodAutoscaler uses StatefulSet/tGLcCRcHa1Ce5Rs as ScaleTargetRef which does not exist.
```
3. Payload returned by the AI:
```bash
The Kubernetes system is trying to scale a StatefulSet named tGLcCRcHa1Ce5Rs using the HorizontalPodAutoscaler, but it cannot find the StatefulSet. The solution is to verify that the StatefulSet name is spelled correctly and exists in the same namespace as the HorizontalPodAutoscaler.
```
4. Payload returned to the user:
```bash
The Kubernetes system is trying to scale a StatefulSet named fake-deployment using the HorizontalPodAutoscaler, but it cannot find the StatefulSet. The solution is to verify that the StatefulSet name is spelled correctly and exists in the same namespace as the HorizontalPodAutoscaler.
```
Note: **Anonymization does not currently apply to events.**
### Further Details
**Anonymization does not currently apply to events.**
*In a few analysers like Pod, we feed to the AI backend the event messages which are not known beforehand thus we are not masking them for the **time being**.*
- The following is the list of analysers in which data is **being masked**:-
- Statefulset
- Service
- PodDisruptionBudget
- Node
- NetworkPolicy
- Ingress
- HPA
- Deployment
- Cronjob
- The following is the list of analysers in which data is **not being masked**:-
- RepicaSet
- PersistentVolumeClaim
- Pod
- **_*Events_**
***Note**:
- k8gpt will not mask the above analysers because they do not send any identifying information except **Events** analyser.
- Masking for **Events** analyzer is scheduled in the near future as seen in this [issue](https://github.com/k8sgpt-ai/k8sgpt/issues/560). _Further research has to be made to understand the patterns and be able to mask the sensitive parts of an event like pod name, namespace etc._
- The following is the list of fields which are not **being masked**:-
- Describe
- ObjectStatus
- Replicas
- ContainerStatus
- **_*Event Message_**
- ReplicaStatus
- Count (Pod)
***Note**:
- It is quite possible the payload of the event message might have something like "super-secret-project-pod-X crashed" which we don't currently redact _(scheduled in the near future as seen in this [issue](https://github.com/k8sgpt-ai/k8sgpt/issues/560))_.
### Proceed with care
- The K8gpt team recommends using an entirely different backend **(a local model) in critical production environments**. By using a local model, you can rest assured that everything stays within your DMZ, and nothing is leaked.
- If there is any uncertainty about the possibility of sending data to a public LLM (open AI, Azure AI) and it poses a risk to business-critical operations, then, in such cases, the use of public LLM should be avoided based on personal assessment and the jurisdiction of risks involved.
</details>
<details>
<summary> Configuration management</summary>
`k8sgpt` stores config data in the `$XDG_CONFIG_HOME/k8sgpt/k8sgpt.yaml` file. The data is stored in plain text, including your OpenAI key.
Config file locations:
| OS | Path |
| ------- | ------------------------------------------------ |
| MacOS | ~/Library/Application Support/k8sgpt/k8sgpt.yaml |
| Linux | ~/.config/k8sgpt/k8sgpt.yaml |
| Windows | %LOCALAPPDATA%/k8sgpt/k8sgpt.yaml |
</details>
<details>
There may be scenarios where caching remotely is preferred.
In these scenarios K8sGPT supports AWS S3 Integration.
<summary> Remote caching </summary>
_As a prerequisite `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` are required as environmental variables._
_Adding a remote cache_
Note: this will create the bucket if it does not exist
```
k8sgpt cache add --region <aws region> --bucket <name>
```
_Listing cache items_
```
k8sgpt cache list
```
_Removing the remote cache_
Note: this will not delete the bucket
```
k8sgpt cache remove --bucket <name>
```
</details>
## Documentation
Find our official documentation available [here](https://docs.k8sgpt.ai)

View File

@ -1,39 +0,0 @@
+++
disableToc = false
title = "Kairos"
weight = 2
+++
![Kairos Logo](https://user-images.githubusercontent.com/2420543/193010398-72d4ba6e-7efe-4c2e-b7ba-d3a826a55b7d.png)
[Kairos](https://github.com/kairos-io/kairos) - Kubernetes-focused, Cloud Native Linux meta-distribution
The immutable Linux meta-distribution for edge Kubernetes.
Github Link - https://github.com/kairos-io/kairos
## Intro
With Kairos you can build immutable, bootable Kubernetes and OS images for your edge devices as easily as writing a Dockerfile. Optional P2P mesh with distributed ledger automates node bootstrapping and coordination. Updating nodes is as easy as CI/CD: push a new image to your container registry and let secure, risk-free A/B atomic upgrades do the rest. Kairos is part of the Secure Edge-Native Architecture (SENA) to securely run workloads at the Edge ([whitepaper](https://github.com/kairos-io/kairos/files/11250843/Secure-Edge-Native-Architecture-white-paper-20240417.3.pdf)).
Kairos (formerly `c3os`) is an open-source project which brings Edge, cloud, and bare metal lifecycle OS management into the same design principles with a unified Cloud Native API.
## At-a-glance:
- :bowtie: Community Driven
- :octocat: Open Source
- :lock: Linux immutable, meta-distribution
- :key: Secure
- :whale: Container-based
- :penguin: Distribution agnostic
## Kairos can be used to:
- Easily spin-up a Kubernetes cluster, with the Linux distribution of your choice :penguin:
- Create your Immutable infrastructure, no more infrastructure drift! :lock:
- Manage the cluster lifecycle with Kubernetes—from building to provisioning, and upgrading :rocket:
- Create a multiple—node, a single cluster that spans up across regions :earth_africa:
For comprehensive docs, tutorials, and examples see our [documentation](https://kairos.io/docs/getting-started/).

View File

@ -1,60 +0,0 @@
+++
disableToc = false
title = "LLMStack"
weight = 2
+++
![LLMStack](https://llmstack.ai/img/llmstack-logo-light-white-bg.svg)
[LLMStack](https://github.com/trypromptly/LLMStack) - LLMStack is a no-code platform for building generative AI applications, chatbots, agents and connecting them to your data and business processes.
Github Link - https://github.com/trypromptly/LLMStack
## Overview
Build tailor-made generative AI applications, chatbots and agents that cater to your unique needs by chaining multiple LLMs. Seamlessly integrate your own data and GPT-powered models without any coding experience using LLMStack's no-code builder. Trigger your AI chains from Slack or Discord. Deploy to the cloud or on-premise.
![llmstack-quickstart](https://github.com/trypromptly/LLMStack/assets/431988/72de45f5-23f9-4cd3-91b0-7d5ae97534c3)
## Getting Started
LLMStack deployment comes with a default admin account whose credentials are `admin` and `promptly`. _Be sure to change the password from admin panel after logging in_.
## Features
**🔗 Chain multiple models**: LLMStack allows you to chain multiple LLMs together to build complex generative AI applications.
**📊 Use generative AI on your Data**: Import your data into your accounts and use it in AI chains. LLMStack allows importing various types (_CSV, TXT, PDF, DOCX, PPTX etc.,_) of data from a variety of sources (_gdrive, notion, websites, direct uploads etc.,_). Platform will take care of preprocessing and vectorization of your data and store it in the vector database that is provided out of the box.
**🛠️ No-code builder**: LLMStack comes with a no-code builder that allows you to build AI chains without any coding experience. You can chain multiple LLMs together and connect them to your data and business processes.
**☁️ Deploy to the cloud or on-premise**: LLMStack can be deployed to the cloud or on-premise. You can deploy it to your own infrastructure or use our cloud offering at [Promptly](https://trypromptly.com).
**🚀 API access**: Apps or chatbots built with LLMStack can be accessed via HTTP API. You can also trigger your AI chains from **_Slack_** or **_Discord_**.
**🏢 Multi-tenant**: LLMStack is multi-tenant. You can create multiple organizations and add users to them. Users can only access the data and AI chains that belong to their organization.
## What can you build with LLMStack?
Using LLMStack you can build a variety of generative AI applications, chatbots and agents. Here are some examples:
**📝 Text generation**: You can build apps that generate product descriptions, blog posts, news articles, tweets, emails, chat messages, etc., by using text generation models and optionally connecting your data. Check out this [marketing content generator](https://trypromptly.com/app/50ee8bae-712e-4b95-9254-74d7bcf3f0cb) for example
**🤖 Chatbots**: You can build chatbots trained on your data powered by ChatGPT like [Promptly Help](https://trypromptly.com/app/f4d7cb50-1805-4add-80c5-e30334bce53c) that is embedded on Promptly website
**🎨 Multimedia generation**: Build complex applications that can generate text, images, videos, audio, etc. from a prompt. This [story generator](https://trypromptly.com/app/9d6da897-67cf-4887-94ec-afd4b9362655) is an example
**🗣️ Conversational AI**: Build conversational AI systems that can have a conversation with a user. Check out this [Harry Potter character chatbot](https://trypromptly.com/app/bdeb9850-b32e-44cf-b2a8-e5d54dc5fba4)
**🔍 Search augmentation**: Build search augmentation systems that can augment search results with additional information using APIs. Sharebird uses LLMStack to augment search results with AI generated answer from their content similar to Bing's chatbot
**💬 Discord and Slack bots**: Apps built on LLMStack can be triggered from Slack or Discord. You can easily connect your AI chains to Slack or Discord from LLMStack's no-code app editor. Check out our [Discord server](https://discord.gg/3JsEzSXspJ) to interact with one such bot.
## Administration
Login to [http://localhost:3000/admin](http://localhost:3000/admin) using the admin account. You can add users and assign them to organizations in the admin panel.
## Documentation
Check out our documentation at [llmstack.ai/docs](https://llmstack.ai/docs/) to learn more about LLMStack.

View File

@ -1,74 +0,0 @@
+++
disableToc = false
title = "LinGoose"
weight = 2
+++
**LinGoose** (_Lingo + Go + Goose_ 🪿) aims to be a complete Go framework for creating LLM apps. 🤖 ⚙️
![lin](https://lingoose.io/assets/img/lingoose-small.png)
Github Link - https://github.com/henomis/lingoose
## Overview
**LinGoose** is a powerful Go framework for developing Large Language Model (LLM) based applications using pipelines. It is designed to be a complete solution and provides multiple components, including Prompts, Templates, Chat, Output Decoders, LLM, Pipelines, and Memory. With **LinGoose**, you can interact with LLM AI through prompts and generate complex templates. Additionally, it includes a chat feature, allowing you to create chatbots. The Output Decoders component enables you to extract specific information from the output of the LLM, while the LLM interface allows you to send prompts to various AI, such as the ones provided by OpenAI. You can chain multiple LLM steps together using Pipelines and store the output of each step in Memory for later retrieval. **LinGoose** also includes a Document component, which is used to store text, and a Loader component, which is used to load Documents from various sources. Finally, it includes TextSplitters, which are used to split text or Documents into multiple parts, Embedders, which are used to embed text or Documents into embeddings, and Indexes, which are used to store embeddings and documents and to perform searches.
## Components
**LinGoose** is composed of multiple components, each one with its own purpose.
| Component | Package | Description |
| ----------------- | ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| **Prompt** | [prompt](prompt/) | Prompts are the way to interact with LLM AI. They can be simple text, or more complex templates. Supports **Prompt Templates** and **[Whisper](https://openai.com) prompt** |
| **Chat Prompt** | [chat](chat/) | Chat is the way to interact with the chat LLM AI. It can be a simple text prompt, or a more complex chatbot. |
| **Decoders** | [decoder](decoder/) | Output decoders are used to decode the output of the LLM. They can be used to extract specific information from the output. Supports **JSONDecoder** and **RegExDecoder** |
| **LLMs** | [llm](llm/) | LLM is an interface to various AI such as the ones provided by OpenAI. It is responsible for sending the prompt to the AI and retrieving the output. Supports **[LocalAI](https://localai.io/howtos/)**, **[HuggingFace](https://huggingface.co)** and **[Llama.cpp](https://github.com/ggerganov/llama.cpp)**. |
| **Pipelines** | [pipeline](pipeline/) | Pipelines are used to chain multiple LLM steps together. |
| **Memory** | [memory](memory/) | Memory is used to store the output of each step. It can be used to retrieve the output of a previous step. Supports memory in **Ram** |
| **Document** | [document](document/) | Document is used to store a text |
| **Loaders** | [loader](loader/) | Loaders are used to load Documents from various sources. Supports **TextLoader**, **DirectoryLoader**, **PDFToTextLoader** and **PubMedLoader** . |
| **TextSplitters** | [textsplitter](textsplitter/) | TextSplitters are used to split text or Documents into multiple parts. Supports **RecursiveTextSplitter**. |
| **Embedders** | [embedder](embedder/) | Embedders are used to embed text or Documents into embeddings. Supports **[OpenAI](https://openai.com)** |
| **Indexes** | [index](index/) | Indexes are used to store embeddings and documents and to perform searches. Supports **SimpleVectorIndex**, **[Pinecone](https://pinecone.io)** and **[Qdrant](https://qdrant.tech)** |
## Usage
Please refer to the documentation at [lingoose.io](https://lingoose.io/docs/) to understand how to use LinGoose. If you prefer the 👉 [examples directory](examples/) contains a lot of examples 🚀.
However, here is a **powerful** example of what **LinGoose** is capable of:
_Talk is cheap. Show me the [code](examples/)._ - Linus Torvalds
```go
package main
import (
"context"
openaiembedder "github.com/henomis/lingoose/embedder/openai"
"github.com/henomis/lingoose/index/option"
simplevectorindex "github.com/henomis/lingoose/index/simpleVectorIndex"
"github.com/henomis/lingoose/llm/openai"
"github.com/henomis/lingoose/loader"
qapipeline "github.com/henomis/lingoose/pipeline/qa"
"github.com/henomis/lingoose/textsplitter"
)
func main() {
docs, _ := loader.NewPDFToTextLoader("./kb").WithPDFToTextPath("/opt/homebrew/bin/pdftotext").WithTextSplitter(textsplitter.NewRecursiveCharacterTextSplitter(2000, 200)).Load(context.Background())
index := simplevectorindex.New("db", ".", openaiembedder.New(openaiembedder.AdaEmbeddingV2))
index.LoadFromDocuments(context.Background(), docs)
qapipeline.New(openai.NewChat().WithVerbose(true)).WithIndex(index).Query(context.Background(), "What is the NATO purpose?", option.WithTopK(1))
}
```
This is the _famous_ 4-lines **lingoose** knowledge base chatbot. 🤖
## Installation
Be sure to have a working Go environment, then run the following command:
```shell
go get github.com/henomis/lingoose
```

View File

@ -1,174 +0,0 @@
+++
disableToc = false
title = "LocalAGI"
weight = 2
+++
LocalAGI is a small 🤖 virtual assistant that you can run locally, made by the [LocalAI](https://github.com/go-skynet/LocalAI) author and powered by it.
![localagi](https://github.com/mudler/LocalAGI/assets/2420543/b69817ce-2361-4234-a575-8f578e159f33)
[AutoGPT](https://github.com/Significant-Gravitas/Auto-GPT), [babyAGI](https://github.com/yoheinakajima/babyagi), ... and now LocalAGI!
Github Link - https://github.com/mudler/LocalAGI
## Info
The goal is:
- Keep it simple, hackable and easy to understand
- No API keys needed, No cloud services needed, 100% Local. Tailored for Local use, however still compatible with OpenAI.
- Smart-agent/virtual assistant that can do tasks
- Small set of dependencies
- Run with Docker/Podman/Containers
- Rather than trying to do everything, provide a good starting point for other projects
Note: Be warned! It was hacked in a weekend, and it's just an experiment to see what can be done with local LLMs.
![Screenshot from 2023-08-05 22-40-40](https://github.com/mudler/LocalAGI/assets/2420543/144da83d-3879-44f2-985c-efd690e2b136)
## 🚀 Features
- 🧠 LLM for intent detection
- 🧠 Uses functions for actions
- 📝 Write to long-term memory
- 📖 Read from long-term memory
- 🌐 Internet access for search
- :card_file_box: Write files
- 🔌 Plan steps to achieve a goal
- 🤖 Avatar creation with Stable Diffusion
- 🗨️ Conversational
- 🗣️ Voice synthesis with TTS
## :book: Quick start
No frills, just run docker-compose and start chatting with your virtual assistant:
```bash
# Modify the configuration
# nano .env
docker-compose run -i --rm localagi
```
## How to use it
By default localagi starts in interactive mode
### Examples
Road trip planner by limiting searching to internet to 3 results only:
```bash
docker-compose run -i --rm localagi \
--skip-avatar \
--subtask-context \
--postprocess \
--search-results 3 \
--prompt "prepare a plan for my roadtrip to san francisco"
```
Limit results of planning to 3 steps:
```bash
docker-compose run -i --rm localagi \
--skip-avatar \
--subtask-context \
--postprocess \
--search-results 1 \
--prompt "do a plan for my roadtrip to san francisco" \
--plan-message "The assistant replies with a plan of 3 steps to answer the request with a list of subtasks with logical steps. The reasoning includes a self-contained, detailed and descriptive instruction to fullfill the task."
```
### Advanced
localagi has several options in the CLI to tweak the experience:
- `--system-prompt` is the system prompt to use. If not specified, it will use none.
- `--prompt` is the prompt to use for batch mode. If not specified, it will default to interactive mode.
- `--interactive` is the interactive mode. When used with `--prompt` will drop you in an interactive session after the first prompt is evaluated.
- `--skip-avatar` will skip avatar creation. Useful if you want to run it in a headless environment.
- `--re-evaluate` will re-evaluate if another action is needed or we have completed the user request.
- `--postprocess` will postprocess the reasoning for analysis.
- `--subtask-context` will include context in subtasks.
- `--search-results` is the number of search results to use.
- `--plan-message` is the message to use during planning. You can override the message for example to force a plan to have a different message.
- `--tts-api-base` is the TTS API base. Defaults to `http://api:8080`.
- `--localai-api-base` is the LocalAI API base. Defaults to `http://api:8080`.
- `--images-api-base` is the Images API base. Defaults to `http://api:8080`.
- `--embeddings-api-base` is the Embeddings API base. Defaults to `http://api:8080`.
- `--functions-model` is the functions model to use. Defaults to `functions`.
- `--embeddings-model` is the embeddings model to use. Defaults to `all-MiniLM-L6-v2`.
- `--llm-model` is the LLM model to use. Defaults to `gpt-4`.
- `--tts-model` is the TTS model to use. Defaults to `en-us-kathleen-low.onnx`.
- `--stablediffusion-model` is the Stable Diffusion model to use. Defaults to `stablediffusion`.
- `--stablediffusion-prompt` is the Stable Diffusion prompt to use. Defaults to `DEFAULT_PROMPT`.
- `--force-action` will force a specific action.
- `--debug` will enable debug mode.
### Customize
To use a different model, you can see the examples in the `config` folder.
To select a model, modify the `.env` file and change the `PRELOAD_MODELS_CONFIG` variable to use a different configuration file.
### Caveats
The "goodness" of a model has a big impact on how LocalAGI works. Currently `13b` models are powerful enough to actually able to perform multi-step tasks or do more actions. However, it is quite slow when running on CPU (no big surprise here).
The context size is a limitation - you can find in the `config` examples to run with superhot 8k context size, but the quality is not good enough to perform complex tasks.
## What is LocalAGI?
It is a dead simple experiment to show how to tie the various LocalAI functionalities to create a virtual assistant that can do tasks. It is simple on purpose, trying to be minimalistic and easy to understand and customize for everyone.
It is different from babyAGI or AutoGPT as it uses [LocalAI functions](https://localai.io/features/openai-functions/) - it is a from scratch attempt built on purpose to run locally with [LocalAI](https://localai.io) (no API keys needed!) instead of expensive, cloud services. It sets apart from other projects as it strives to be small, and easy to fork on.
### How it works?
`LocalAGI` just does the minimal around LocalAI functions to create a virtual assistant that can do generic tasks. It works by an endless loop of `intent detection`, `function invocation`, `self-evaluation` and `reply generation` (if it decides to reply! :)). The agent is capable of planning complex tasks by invoking multiple functions, and remember things from the conversation.
In a nutshell, it goes like this:
- Decide based on the conversation history if it needs to take an action by using functions. It uses the LLM to detect the intent from the conversation.
- if it need to take an action (e.g. "remember something from the conversation" ) or generate complex tasks ( executing a chain of functions to achieve a goal ) it invokes the functions
- it re-evaluates if it needs to do any other action
- return the result back to the LLM to generate a reply for the user
Under the hood LocalAI converts functions to llama.cpp BNF grammars. While OpenAI fine-tuned a model to reply to functions, LocalAI constrains the LLM to follow grammars. This is a much more efficient way to do it, and it is also more flexible as you can define your own functions and grammars. For learning more about this, check out the [LocalAI documentation](https://localai.io/docs/llm) and my tweet that explains how it works under the hoods: https://twitter.com/mudler_it/status/1675524071457533953.
### Agent functions
The intention of this project is to keep the agent minimal, so can be built on top of it or forked. The agent is capable of doing the following functions:
- remember something from the conversation
- recall something from the conversation
- search something from the internet
- plan a complex task by invoking multiple functions
- write files to disk
## Roadmap
- [x] 100% Local, with Local AI. NO API KEYS NEEDED!
- [x] Create a simple virtual assistant
- [x] Make the virtual assistant do functions like store long-term memory and autonomously search between them when needed
- [x] Create the assistant avatar with Stable Diffusion
- [x] Give it a voice
- [ ] Use weaviate instead of Chroma
- [ ] Get voice input (push to talk or wakeword)
- [ ] Make a REST API (OpenAI compliant?) so can be plugged by e.g. a third party service
- [x] Take a system prompt so can act with a "character" (e.g. "answer in rick and morty style")
## Development
Run docker-compose with main.py checked-out:
```bash
docker-compose run -v main.py:/app/main.py -i --rm localagi
```
## Notes
- a 13b model is enough for doing contextualized research and search/retrieve memory
- a 30b model is enough to generate a roadmap trip plan ( so cool! )
- With superhot models looses its magic, but maybe suitable for search
- Context size is your enemy. `--postprocess` some times helps, but not always
- It can be silly!
- It is slow on CPU, don't expect `7b` models to perform good, and `13b` models perform better but on CPU are quite slow.

View File

@ -1,84 +0,0 @@
+++
disableToc = false
title = "Mattermost-OpenOps"
weight = 2
+++
OpenOps is an open source platform for applying generative AI to workflows in secure environments.
![image](https://github.com/azigler/zigmud/assets/7295363/91901757-923d-4fa3-a0e2-c884561aab7e)
Github Link - https://github.com/mattermost/openops
* Enables AI exploration with full data control in a multi-user pilot.
* Supports broad ecosystem of AI models from OpenAI and Microsoft to open source LLMs from Hugging Face.
* Speeds development of custom security, compliance and data custody policy from early evaluation to future scale.
Unliked closed source, vendor-controlled environments where data controls cannot be audited, OpenOps provides a transparent, open source, customer-controlled platform for developing, securing and auditing AI-accelerated workflows.
### Why Open Ops?
Everyone is in a race to deploy generative AI solutions, but need to do so in a responsible and safe way. OpenOps lets you run powerful models in a safe sandbox to establish the right safety protocols before rolling out to users. Here's an example of an evaluation, implementation, and iterative rollout process:
- **Phase 1:** Set up the OpenOps collaboration sandbox, a self-hosted service providing multi-user chat and integration with GenAI. *(this repository)*
- **Phase 2:** Evaluate different GenAI providers, whether from public SaaS services like OpenAI or local open source models, based on your security and privacy requirements.
- **Phase 3:** Invite select early adopters (especially colleagues focusing on trust and safety) to explore and evaluate the GenAI based on their workflows. Observe behavior, and record user feedback, and identify issues. Iterate on workflows and usage policies together in the sandbox. Consider issues such as data leakage, legal/copyright, privacy, response correctness and appropriateness as you apply AI at scale.
- **Phase 4:** Set and implement policies as availability is incrementally rolled out to your wider organization.
### What does OpenOps include?
Deploying the OpenOps sandbox includes the following components:
- 🏰 **Mattermost Server** - Open source, self-hosted alternative to Discord and Slack for strict security environments with playbooks/workflow automation, tools integration, real time 1-1 and group messaging, audio calling and screenshare.
- 📙 **PostgreSQL** - Database for storing private data from multi-user, chat collaboration discussions and audit history.
- 🤖 [**Mattermost AI plugin**](https://github.com/mattermost/mattermost-plugin-ai) - Extension of Mattermost platform for AI bot and generative AI integration.
- 🦙 **Open Source, Self-Hosted LLM models** - Models for evaluation and use case development from Hugging Face and other sources, including GPT4All (runs on a laptop in 4.2 GB) and Falcon LLM (example of leading scaled self-hosted models). Uses [LocalAI](https://github.com/go-skynet/LocalAI).
- 🔌🧠 ***(Configurable)* Closed Source, Vendor-Hosted AI models** - SaaS-based GenAI models from Azure AI, OpenAI, & Anthropic.
- 🔌📱 ***(Configurable)* Mattermost Mobile and Desktop Apps** - End-user apps for future production deployment.
## Install
### Local
***Rather watch a video?** 📽️ Check out our YouTube tutorial video for getting started with OpenOps: https://www.youtube.com/watch?v=20KSKBzZmik*
***Rather read a blog post?** 📝 Check out our Mattermost blog post for getting started with OpenOps: https://mattermost.com/blog/open-source-ai-framework/*
1. Clone the repository: `git clone https://github.com/mattermost/openops && cd openops`
2. Start docker services and configure plugin
- **If using OpenAI:**
- Run `env backend=openai ./init.sh`
- Run `./configure_openai.sh sk-<your openai key>` to add your API credentials *or* use the Mattermost system console to configure the plugin
- **If using LocalAI:**
- Run `env backend=localai ./init.sh`
- Run `env backend=localai ./download_model.sh` to download one *or* supply your own gguf formatted model in the `models` directory.
3. Access Mattermost and log in with the credentials provided in the terminal.
When you log in, you will start out in a direct message with your AI Assistant bot. Now you can start exploring AI [usages](#usage).
### Gitpod
[![Open in Gitpod](https://gitpod.io/button/open-in-gitpod.svg)](https://gitpod.io/#backend=openai/https://github.com/mattermost/openops)
1. Click the above badge and start your Gitpod workspace
2. You will see VSCode interface and the workspace will configure itself automatically. Wait for the services to start and for your `root` login for Mattermost to be generated in the terminal
3. Run `./configure_openai.sh sk-<your openai key>` to add your API credentials *or* use the Mattermost system console to configure the plugin
4. Access Mattermost and log in with the credentials supplied in the terminal.
When you log in, you will start out in a direct message with your AI Assistant bot. Now you can start exploring AI [usages](#usage).
## Usage
There many ways to integrate generative AI into confidential, self-hosted workplace discussions. To help you get started, here are some examples provided in OpenOps:
| Title | Image | Description |
| ---------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **Streaming Conversation** | ![Streaming Conversation](https://github.com/mattermost/mattermost-plugin-ai/blob/master/img/summarize_thread.gif?raw=true) | The OpenOps platform reproduces streamed replies from popular GenAI chatbots creating a sense of responsiveness and conversational engagement, while masking actual wait times. |
| **Thread Summarization** | ![Thread Summarization](https://github.com/mattermost/mattermost-plugin-ai/blob/master/img/summarize_button.gif?raw=true) | Use the "Summarize Thread" menu option or the `/summarize` command to get a summary of the thread in a Direct Message from an AI bot. AI-generated summaries can be created from private, chat-based discussions to speed information flows and decision-making while reducing the time and cost required for organizations to stay up-to-date. |
| **Contextual Interrogation** | ![Contextual Interrogation](https://github.com/mattermost/mattermost-plugin-ai/blob/master/img/thread_interrogation.png?raw=true) | Users can ask follow-up questions to discussion summaries generated by AI bots to learn more about the underlying information without reviewing the raw input. |
| **Meeting Summarization** | ![Meeting Summarization](https://github.com/mattermost/mattermost-plugin-ai/blob/master/img/meeting_summary.png?raw=true) | Create meeting summaries! Designed to work with the [Mattermost Calls plugin](https://github.com/mattermost/mattermost-plugin-calls) recording feature. |
| **Chat with AI Bots** | ![Chat with AI Bots](https://github.com/mattermost/mattermost-plugin-ai/blob/master/img/chat_anywhere.png?raw=true) | End users can interact with the AI bot in any discussion thread by mentioning AI bot with an `@` prefix, as they would get the attention of a human user. The bot will receive the thread information as context for replying. |
| **Sentiment Analysis** | [![React for me](https://github.com/mattermost/openops/assets/3191642/56bf132a-b834-46a3-882c-9b1f38a9f9fc)](https://github.com/mattermost/mattermost-plugin-ai/assets/3191642/5282b066-86b5-478d-ae10-57c3cb3ba038) | Use the "React for me" menu option to have the AI bot analyze the sentiment of messages use its conclusion to deliver an emoji reaction on the users behalf. |
| **Reinforcement Learning from Human Feedback** | ![RLHF](https://github.com/mattermost/openops/assets/3191642/ec330f7e-2aba-4370-bf21-e585a793160e) | Bot posts are distinguished from human posts by having 👍 👎 icons available for human end users to signal whether the AI response was positive or problematic. The history of responses can be used in future to fine-tune the underlying AI models, as well as to potentially evaluate the responses of new models based on their correlation to positive and negative user ratings for past model responses. |

View File

@ -1,246 +0,0 @@
+++
disableToc = false
title = "Mods"
weight = 2
+++
<p>
<img src="https://github.com/charmbracelet/mods/assets/25087/5442bf46-b908-47af-bf4e-60f7c38951c4" width="630" alt="Mods product art and type treatment"/>
<br>
</p>
AI for the command line, built for pipelines.
<p><img src="https://vhs.charm.sh/vhs-5Uyj0U6Hlqi1LVIIRyYKM5.gif" width="900" alt="a GIF of mods running"></p>
LLM based AI is really good at interpreting the output of commands and
returning the results in CLI friendly text formats like Markdown. Mods is a
simple tool that makes it super easy to use AI on the command line and in your
pipelines. Mods works with [OpenAI](https://platform.openai.com/account/api-keys)
and [LocalAI](https://github.com/go-skynet/LocalAI)
To get started, [install Mods](#installation) and check out some of the
examples below. Since Mods has built-in Markdown formatting, you may also want
to grab [Glow](https://github.com/charmbracelet/glow) to give the output some
_pizzazz_.
Github Link - https://github.com/charmbracelet/mods
## What Can It Do?
Mods works by reading standard in and prefacing it with a prompt supplied in
the `mods` arguments. It sends the input text to an LLM and prints out the
result, optionally asking the LLM to format the response as Markdown. This
gives you a way to "question" the output of a command. Mods will also work on
standard in or an argument supplied prompt individually.
## Installation
Mods works with OpenAI compatible endpoints. By default, Mods is configured to
support OpenAI's official API and a LocalAI installation running on port 8080.
You can configure additional endpoints in your settings file by running
`mods --settings`.
### LocalAI
LocalAI allows you to run a multitude of models locally. Mods works with the
GPT4ALL-J model as setup in [this tutorial](https://github.com/go-skynet/LocalAI#example-use-gpt4all-j-model).
You can define more LocalAI models and endpoints with `mods --settings`.
### Install Mods
```bash
# macOS or Linux
brew install charmbracelet/tap/mods
# Arch Linux (btw)
yay -S mods
# Debian/Ubuntu
sudo mkdir -p /etc/apt/keyrings
curl -fsSL https://repo.charm.sh/apt/gpg.key | sudo gpg --dearmor -o /etc/apt/keyrings/charm.gpg
echo "deb [signed-by=/etc/apt/keyrings/charm.gpg] https://repo.charm.sh/apt/ * *" | sudo tee /etc/apt/sources.list.d/charm.list
sudo apt update && sudo apt install mods
# Fedora/RHEL
echo '[charm]
name=Charm
baseurl=https://repo.charm.sh/yum/
enabled=1
gpgcheck=1
gpgkey=https://repo.charm.sh/yum/gpg.key' | sudo tee /etc/yum.repos.d/charm.repo
sudo yum install mods
```
Or, download it:
- [Packages][releases] are available in Debian and RPM formats
- [Binaries][releases] are available for Linux, macOS, and Windows
[releases]: https://github.com/charmbracelet/mods/releases
Or, just install it with `go`:
```sh
go install github.com/charmbracelet/mods@latest
```
## Saving conversations
Conversations save automatically. They are identified by their latest prompt.
Similar to Git, conversations have a SHA-1 identifier and a title. Conversations
can be updated, maintaining their SHA-1 identifier but changing their title.
<p><img src="https://vhs.charm.sh/vhs-6MMscpZwgzohYYMfTrHErF.gif" width="900" alt="a GIF listing and showing saved conversations."></p>
## Settings
`--settings`
Mods lets you tune your query with a variety of settings. You can configure
Mods with `mods --settings` or pass the settings as environment variables
and flags.
#### Model
`-m`, `--model`, `MODS_MODEL`
Mods uses `gpt-4` with OpenAI by default but you can specify any model as long
as your account has access to it or you have installed locally with LocalAI.
You can add new models to the settings with `mods --settings`.
You can also specify a model and an API endpoint with `-m` and `-a`
to use models not in the settings file.
#### Title
`-t`, `--title`
Set a custom save title for the conversation.
#### Continue last
`-C`, `--continue-last`
Continues the previous conversation.
#### Continue
`-c`, `--continue`
Continue from the last response or a given title or SHA1.
#### List
`-l`, `--list`
Lists all saved conversations.
#### Show
`-s`, `--show`
Show the saved conversation the given title or SHA1.
#### Delete
`--delete`
Deletes the saved conversation with the given title or SHA1.
#### Format As Markdown
`-f`, `--format`, `MODS_FORMAT`
Ask the LLM to format the response as markdown. You can edit the text passed to
the LLM with `mods --settings` then changing the `format-text` value.
#### Raw
`-r`, `--raw`, `MODS_RAW`
Print the raw response without syntax highlighting, even when connect to a TTY.
#### Max Tokens
`--max-tokens`, `MODS_MAX_TOKENS`
Max tokens tells the LLM to respond in less than this number of tokens. LLMs
are better at longer responses so values larger than 256 tend to work best.
#### Temperature
`--temp`, `MODS_TEMP`
Sampling temperature is a number between 0.0 and 2.0 and determines how
confident the model is in its choices. Higher values make the output more
random and lower values make it more deterministic.
#### TopP
`--topp`, `MODS_TOPP`
Top P is an alternative to sampling temperature. It's a number between 0.0 and
2.0 with smaller numbers narrowing the domain from which the model will create
its response.
#### No Limit
`--no-limit`, `MODS_NO_LIMIT`
By default Mods attempts to size the input to the maximum size the allowed by
the model. You can potentially squeeze a few more tokens into the input by
setting this but also risk getting a max token exceeded error from the OpenAI API.
#### Include Prompt
`-P`, `--prompt`, `MODS_INCLUDE_PROMPT`
Include prompt will preface the response with the entire prompt, both standard
in and the prompt supplied by the arguments.
#### Include Prompt Args
`-p`, `--prompt-args`, `MODS_INCLUDE_PROMPT_ARGS`
Include prompt args will include _only_ the prompt supplied by the arguments.
This can be useful if your standard in content is long and you just a want a
summary before the response.
#### Max Retries
`--max-retries`, `MODS_MAX_RETRIES`
The maximum number of retries to failed API calls. The retries happen with an
exponential backoff.
#### Fanciness
`--fanciness`, `MODS_FANCINESS`
Your desired level of fanciness.
#### Quiet
`-q`, `--quiet`, `MODS_QUIET`
Output nothing to standard err.
#### Reset Settings
`--reset-settings`
Backup your old settings file and reset everything to the defaults.
#### No Cache
`--no-cache`, `MODS_NO_CACHE`
Disables conversation saving.
#### HTTP Proxy
`-x`, `--http-proxy`, `MODS_HTTP_PROXY`
Use the HTTP proxy to the connect the API endpoints.

View File

@ -1,75 +0,0 @@
+++
disableToc = false
title = "Spark"
weight = 2
+++
an LLM-powered autonomous agent platform
![AI Spark](https://cdn.discordapp.com/attachments/1138961497691799693/1162248501120475227/unnamed.jpg?ex=653b3f44&is=6528ca44&hm=e0b2f74f2ba38097b8e78b80182e2e6beff4a3827847ddffb2693178a3dd6e05&)
A framework for autonomous agents who can work together to accomplish tasks using [LocalAI](https://github.com/go-skynet/LocalAI).
Github Link - https://github.com/cedriking/spark
## Setup
You will need at least Node 10.
[Download the repository](https://github.com/cedriking/spark), then install dependencies: `yarn` or `npm install`.
Rename the `.env.template` file at the root of the project to `.env` and add your secrets to it:
```
# the following are needed for the agent to be able to search the web:
GOOGLE_SEARCH_ENGINE_ID=... # create a custom search engine at https://cse.google.com/cse/all
GOOGLE_API_KEY=... # obtain from https://console.cloud.google.com/apis/credentials
AGENT_DELAY=... # optionally, a delay in milliseconds following every agent action
MODEL=... # any Llama.cpp LLM model
SERVER=... # optionally, a server to connect to (default http://localhost:8080)
```
You'll also need to enable the Google Custom Search API for your Google Cloud account, e.g. <https://console.cloud.google.com/apis/library/customsearch.googleapis.com>
## Running
Start the program:
```
yarn dev [# of agents]
```
or:
```
npm run dev [# of agents]
```
Interact with the agents through the console. Anything you type will be sent as a message to all agents currently.
## Action errors
After spinning up a new agent, you will often see them make some mistakes which generate errors:
- Trying to use an action before they've asked for `help` on it to know what its parameters are
- Trying to just use a raw text response instead of a correctly-formatted action (or raw text wrapping a code block which contains a valid action)
- Trying to use a multi-line parameter value without wrapping it in the multiline delimiter (`% ff9d7713-0bb0-40d4-823c-5a66de48761b`)
This is a normal period of adjustment as they learn to operate themselves. They generally will learn from these mistakes and recover, although agents sometimes devolve into endless error loops and can't figure out what the problem is. It's highly advised to never leave an agent unattended.
## Agent state
Each agent stores its state under the `.store` directory. Agent 1, for example, has
```
.store/1/memory
.store/1/goals
.store/1/notes
```
You can simply delete any of these things, or the whole agent folder (or the whole `.store`) to selectively wipe whatever state you want between runs. Otherwise, agents will pick up where you left off on restart.
A nice aspect of this is that when you want to debug a problem you ran into with a particular agent, you can delete the events in their memory subsequent to the point where the problem occurred, make changes to the code, and restart them to effectively replay that moment until you've fixed the bug. You can also ask an agent to implement a feature, and once they've done so you can restart, tell them that you've loaded the feature, and ask them to try it out.
Code based on [ai-legion](https://github.com/eumemic/ai-legion).

View File

@ -1,14 +0,0 @@
+++
disableToc = false
title = "Integrations"
weight = 8
+++
## The following softwares has out-of-the-box integrations with LocalAI
LocalAI can be used as a drop-in replacement, however, the projects in this folder provides specific integrations with LocalAI:
- [Logseq GPT3 OpenAI plugin](https://github.com/briansunter/logseq-plugin-gpt3-openai) allows to set a base URL, and works with LocalAI.
Feel free to open up a [issue](https://github.com/go-skynet/localai-website/issues) to get a page for your project made or if you see a error on one of the pages.!

View File

@ -1,77 +0,0 @@
+++
disableToc = false
title = "AutoGPT4all"
weight = 2
+++
AutoGPT4All provides you with both bash and python scripts to set up and configure [AutoGPT](https://github.com/Significant-Gravitas/Auto-GPT.git) running with the [GPT4All](#) model on the [LocalAI](https://github.com/go-skynet/LocalAI) server. This setup allows you to run queries against an open-source licensed model without any limits, completely free and offline.
![photo](https://bafkreif7cbmuvhztfdlscnmgi3ob32d6ulkqgbjqy4cff2krth4dynwwhe.ipfs.nftstorage.link)
Github Link - https://github.com/aorumbayev/autogpt4all
## 🚀 Quickstart
## Using Bash Script:
```sh
git clone https://github.com/aorumbayev/autogpt4all.git
cd autogpt4all
chmod +x autogpt4all.sh
./autogpt4all.sh
```
### Using Python Script:
Make sure you have Python installed on your machine.
```sh
git clone https://github.com/aorumbayev/autogpt4all.git
cd autogpt4all
python autogpt4all.py
```
> ❗️ Please note this script has been primarily tested on MacOS with an M1 processor. It should work on Linux and Windows, but it has not been thoroughly tested on these platforms. If not on MacOS install git, go and make before running the script.
## 🎛️ Script Options
## For the bash script:
`--custom_model_url` - Specify a custom URL for the model download step. By default, the script will use https://gpt4all.io/models/ggml-gpt4all-l13b-snoozy.bin.
Example:
```
./autogpt4all.sh --custom_model_url "https://example.com/path/to/model.bin"
```
`--uninstall` - Uninstall the projects from your local machine by deleting the LocalAI and Auto-GPT directories.
Example:
```
./autogpt4all.sh --uninstall
```
> To recap the commands, a --help flag is also available for the bash script.
## For the Python Script:
You can use similar options as the bash script:
`--custom_model_url` - Specify a custom URL for the model download step.
Example:
```sh
python autogpt4all.py --custom_model_url "https://example.com/path/to/model.bin"
```
`--uninstall` - Uninstall the projects from your local machine.
Example:
```sh
python autogpt4all.py --uninstall
```

View File

@ -1,399 +0,0 @@
+++
disableToc = false
title = "LangChain4j"
description="LangChain for Java: Supercharge your Java application with the power of LLMs"
weight = 2
+++
Github: https://github.com/langchain4j/langchain4j
[![](https://img.shields.io/twitter/follow/langchain4j)](https://twitter.com/intent/follow?screen_name=langchain4j)
[![](https://dcbadge.vercel.app/api/server/JzTFvyjG6R?compact=true&style=flat)](https://discord.gg/JzTFvyjG6R)
## Project goals
The goal of this project is to simplify the integration of AI/LLM capabilities into your Java application.
This can be achieved thanks to:
- **A simple and coherent layer of abstractions**, designed to ensure that your code does not depend on concrete implementations such as LLM providers, embedding store providers, etc. This allows for easy swapping of components.
- **Numerous implementations of the above-mentioned abstractions**, providing you with a variety of LLMs and embedding stores to choose from.
- **Range of in-demand features on top of LLMs, such as:**
- The capability to **ingest your own data** (documentation, codebase, etc.), allowing the LLM to act and respond based on your data.
- **Autonomous agents** for delegating tasks (defined on the fly) to the LLM, which will strive to complete them.
- **Prompt templates** to help you achieve the highest possible quality of LLM responses.
- **Memory** to provide context to the LLM for your current and past conversations.
- **Structured outputs** for receiving responses from the LLM with a desired structure as Java POJOs.
- **"AI Services"** for declaratively defining complex AI behavior behind a simple API.
- **Chains** to reduce the need for extensive boilerplate code in common use-cases.
- **Auto-moderation** to ensure that all inputs and outputs to/from the LLM are not harmful.
## News
12 November:
- Integration with [OpenSearch](https://opensearch.org/) by [@riferrei](https://github.com/riferrei)
- Add support for loading documents from S3 by [@jmgang](https://github.com/jmgang)
- Integration with [PGVector](https://github.com/pgvector/pgvector) by [@kevin-wu-os](https://github.com/kevin-wu-os)
- Integration with [Ollama](https://ollama.ai/) by [@Martin7-1](https://github.com/Martin7-1)
- Integration with [Amazon Bedrock](https://aws.amazon.com/bedrock/) by [@pascalconfluent](https://github.com/pascalconfluent)
- Adding Memory Id to Tool Method Call by [@benedictstrube](https://github.com/benedictstrube)
- [And more](https://github.com/langchain4j/langchain4j/releases/tag/0.24.0)
29 September:
- Updates to models API: return `Response<T>` instead of `T`. `Response<T>` contains token usage and finish reason.
- All model and embedding store integrations now live in their own modules
- Integration with [Vespa](https://vespa.ai/) by [@Heezer](https://github.com/Heezer)
- Integration with [Elasticsearch](https://www.elastic.co/) by [@Martin7-1](https://github.com/Martin7-1)
- Integration with [Redis](https://redis.io/) by [@Martin7-1](https://github.com/Martin7-1)
- Integration with [Milvus](https://milvus.io/) by [@IuriiKoval](https://github.com/IuriiKoval)
- Integration with [Astra DB](https://www.datastax.com/products/datastax-astra) and [Cassandra](https://cassandra.apache.org/) by [@clun](https://github.com/clun)
- Added support for overlap in document splitters
- Some bugfixes and smaller improvements
29 August:
- Offline [text classification with embeddings](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/embedding/classification/EmbeddingModelTextClassifierExample.java)
- Integration with [Google Vertex AI](https://cloud.google.com/vertex-ai) by [@kuraleta](https://github.com/kuraleta)
- Reworked [document splitters](https://github.com/langchain4j/langchain4j/blob/main/langchain4j/src/main/java/dev/langchain4j/data/document/splitter/DocumentSplitters.java)
- In-memory embedding store can now be easily persisted
- [And more](https://github.com/langchain4j/langchain4j/releases/tag/0.22.0)
19 August:
- Integration with [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview) by [@kuraleta](https://github.com/kuraleta)
- Integration with Qwen models (DashScope) by [@jiangsier-xyz](https://github.com/jiangsier-xyz)
- [Integration with Chroma](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/embedding/store/ChromaEmbeddingStoreExample.java) by [@kuraleta](https://github.com/kuraleta)
- [Support for persistent ChatMemory](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/ServiceWithPersistentMemoryForEachUserExample.java)
10 August:
- [Integration with Weaviate](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/embedding/store/WeaviateEmbeddingStoreExample.java) by [@Heezer](https://github.com/Heezer)
- [Support for DOC, XLS and PPT document types](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/DocumentLoaderExamples.java) by [@oognuyh](https://github.com/oognuyh)
- [Separate chat memory for each user](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/ServiceWithMemoryForEachUserExample.java)
- [Custom in-process embedding models](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/embedding/model/InProcessEmbeddingModelExamples.java)
- Added lots of Javadoc
- [And more](https://github.com/langchain4j/langchain4j/releases/tag/0.19.0)
26 July:
- We've added integration with [LocalAI](https://localai.io/). Now, you can use LLMs hosted locally!
- Added support for [response streaming in AI Services](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/ServiceWithStreamingExample.java).
21 July:
- Now, you can do [text embedding inside your JVM](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/embedding/model/InProcessEmbeddingModelExamples.java).
17 July:
- You can now try out OpenAI's `gpt-3.5-turbo` and `text-embedding-ada-002` models with LangChain4j for free, without needing an OpenAI account and keys! Simply use the API key "demo".
15 July:
- Added EmbeddingStoreIngestor
- Redesigned document loaders (see FileSystemDocumentLoader)
- Simplified ConversationalRetrievalChain
- Renamed DocumentSegment into TextSegment
- Added output parsers for numeric types
- Added @UserName for AI Services
- Fixed [23](https://github.com/langchain4j/langchain4j/issues/23) and [24](https://github.com/langchain4j/langchain4j/issues/24)
11 July:
- Added ["Dynamic Tools"](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/ServiceWithDynamicToolsExample.java):
Now, the LLM can generate code for tasks that require precise calculations, such as math and string manipulation. This will be dynamically executed in a style akin to GPT-4's code interpreter!
We use [Judge0, hosted by Rapid API](https://rapidapi.com/judge0-official/api/judge0-ce/pricing), for code execution. You can subscribe and receive 50 free executions per day.
5 July:
- Now you can [add your custom knowledge base to "AI Services"](https://github.com/langchain4j/langchain4j-examples/blob/main/spring-boot-example/src/test/java/dev/example/CustomerSupportApplicationTest.java).
Relevant information will be automatically retrieved and injected into the prompt. This way, the LLM will have a
context of your data and will answer based on it!
- The current date and time can now be automatically injected into the prompt using
special `{{current_date}}`, `{{current_time}}` and `{{current_date_time}}` placeholders.
3 July:
- Added support for Spring Boot 3
2 July:
- [Added Spring Boot Starter](https://github.com/langchain4j/langchain4j-examples/blob/main/spring-boot-example/src/test/java/dev/example/CustomerSupportApplicationTest.java)
- Added support for HuggingFace models
1 July:
- [Added "Tools"](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/ServiceWithToolsExample.java) (support for OpenAI functions)
## Highlights
You can declaratively define concise "AI Services" that are powered by LLMs:
```java
interface Assistant {
String chat(String userMessage);
}
Assistant assistant = AiServices.create(Assistant.class, model);
String answer = assistant.chat("Hello");
System.out.println(answer);
// Hello! How can I assist you today?
```
You can use LLM as a classifier:
```java
enum Sentiment {
POSITIVE, NEUTRAL, NEGATIVE
}
interface SentimentAnalyzer {
@UserMessage("Analyze sentiment of {{it}}")
Sentiment analyzeSentimentOf(String text);
@UserMessage("Does {{it}} have a positive sentiment?")
boolean isPositive(String text);
}
SentimentAnalyzer sentimentAnalyzer = AiServices.create(SentimentAnalyzer.class, model);
Sentiment sentiment = sentimentAnalyzer.analyzeSentimentOf("It is good!");
// POSITIVE
boolean positive = sentimentAnalyzer.isPositive("It is bad!");
// false
```
You can easily extract structured information from unstructured data:
```java
class Person {
private String firstName;
private String lastName;
private LocalDate birthDate;
public String toString() {...}
}
interface PersonExtractor {
@UserMessage("Extract information about a person from {{it}}")
Person extractPersonFrom(String text);
}
PersonExtractor extractor = AiServices.create(PersonExtractor.class, model);
String text = "In 1968, amidst the fading echoes of Independence Day, "
+ "a child named John arrived under the calm evening sky. "
+ "This newborn, bearing the surname Doe, marked the start of a new journey.";
Person person = extractor.extractPersonFrom(text);
// Person { firstName = "John", lastName = "Doe", birthDate = 1968-07-04 }
```
You can define more sophisticated prompt templates using mustache syntax:
```java
interface Translator {
@SystemMessage("You are a professional translator into {{language}}")
@UserMessage("Translate the following text: {{text}}")
String translate(@V("text") String text, @V("language") String language);
}
Translator translator = AiServices.create(Translator.class, model);
String translation = translator.translate("Hello, how are you?", "Italian");
// Ciao, come stai?
```
You can provide tools that LLMs can use! Can be anything: retrieve information from DB, call APIs, etc.
See example [here](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/ServiceWithToolsExample.java).
## Compatibility
- Java: 8 or higher
- Spring Boot: 2 or 3
## Getting started
1. Add LangChain4j OpenAI dependency to your project:
- Maven:
```
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-open-ai</artifactId>
<version>0.24.0</version>
</dependency>
```
- Gradle:
```
implementation 'dev.langchain4j:langchain4j-open-ai:0.24.0'
```
2. Import your OpenAI API key:
```java
String apiKey = System.getenv("OPENAI_API_KEY");
```
You can use the API key "demo" to test OpenAI, which we provide for free.
[How to gen an API key?](https://github.com/langchain4j/langchain4j#how-to-get-an-api-key)
3. Create an instance of a model and start interacting:
```java
OpenAiChatModel model = OpenAiChatModel.withApiKey(apiKey);
String answer = model.generate("Hello world!");
System.out.println(answer); // Hello! How can I assist you today?
```
## Disclaimer
Please note that the library is in active development and:
- Many features are still missing. We are working hard on implementing them ASAP.
- API might change at any moment. At this point, we prioritize good design in the future over backward compatibility
now. We hope for your understanding.
- We need your input! Please [let us know](https://github.com/langchain4j/langchain4j/issues/new/choose) what features you need and your concerns about the current implementation.
## Current capabilities:
- AI Services:
- [Simple](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/SimpleServiceExample.java)
- [With Memory](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/ServiceWithMemoryExample.java)
- [With Tools](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/ServiceWithToolsExample.java)
- [With Streaming](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/ServiceWithStreamingExample.java)
- [With Retriever](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/ServiceWithRetrieverExample.java)
- [With Auto-Moderation](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/ServiceWithAutoModerationExample.java)
- [With Structured Outputs, Structured Prompts, etc](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/OtherServiceExamples.java)
- Integration with [OpenAI](https://platform.openai.com/docs/introduction) and [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview) for:
- [Chats](https://platform.openai.com/docs/guides/chat) (sync + streaming + functions)
- [Completions](https://platform.openai.com/docs/guides/completion) (sync + streaming)
- [Embeddings](https://platform.openai.com/docs/guides/embeddings)
- Integration with [Google Vertex AI](https://cloud.google.com/vertex-ai) for:
- [Chats](https://cloud.google.com/vertex-ai/docs/generative-ai/chat/chat-prompts)
- [Completions](https://cloud.google.com/vertex-ai/docs/generative-ai/text/text-overview)
- [Embeddings](https://cloud.google.com/vertex-ai/docs/generative-ai/embeddings/get-text-embeddings)
- Integration with [HuggingFace Inference API](https://huggingface.co/docs/api-inference/index) for:
- [Chats](https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task)
- [Completions](https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task)
- [Embeddings](https://huggingface.co/docs/api-inference/detailed_parameters#feature-extraction-task)
- Integration with [LocalAI](https://localai.io/) for:
- Chats (sync + streaming + functions)
- Completions (sync + streaming)
- Embeddings
- Integration with [DashScope](https://dashscope.aliyun.com/) for:
- Chats (sync + streaming)
- Completions (sync + streaming)
- Embeddings
- [Chat memory](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/ChatMemoryExamples.java)
- [Persistent chat memory](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/ServiceWithPersistentMemoryForEachUserExample.java)
- [Chat with Documents](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/ChatWithDocumentsExamples.java)
- Integration with [Astra DB](https://www.datastax.com/products/datastax-astra) and [Cassandra](https://cassandra.apache.org/)
- [Integration](https://github.com/langchain4j/langchain4j-examples/blob/main/chroma-example/src/main/java/ChromaEmbeddingStoreExample.java) with [Chroma](https://www.trychroma.com/)
- [Integration](https://github.com/langchain4j/langchain4j-examples/blob/main/elasticsearch-example/src/main/java/ElasticsearchEmbeddingStoreExample.java) with [Elasticsearch](https://www.elastic.co/)
- [Integration](https://github.com/langchain4j/langchain4j-examples/blob/main/milvus-example/src/main/java/MilvusEmbeddingStoreExample.java) with [Milvus](https://milvus.io/)
- [Integration](https://github.com/langchain4j/langchain4j-examples/blob/main/pinecone-example/src/main/java/PineconeEmbeddingStoreExample.java) with [Pinecone](https://www.pinecone.io/)
- [Integration](https://github.com/langchain4j/langchain4j-examples/blob/main/redis-example/src/main/java/RedisEmbeddingStoreExample.java) with [Redis](https://redis.io/)
- [Integration](https://github.com/langchain4j/langchain4j-examples/blob/main/vespa-example/src/main/java/VespaEmbeddingStoreExample.java) with [Vespa](https://vespa.ai/)
- [Integration](https://github.com/langchain4j/langchain4j-examples/blob/main/weaviate-example/src/main/java/WeaviateEmbeddingStoreExample.java) with [Weaviate](https://weaviate.io/)
- [In-memory embedding store](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/embedding/store/InMemoryEmbeddingStoreExample.java) (can be persisted)
- [Structured outputs](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/OtherServiceExamples.java)
- [Prompt templates](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/PromptTemplateExamples.java)
- [Structured prompt templates](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/StructuredPromptTemplateExamples.java)
- [Streaming of LLM responses](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/StreamingExamples.java)
- [Loading txt, html, pdf, doc, xls and ppt documents from the file system and via URL](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/DocumentLoaderExamples.java)
- [Splitting documents into segments](https://github.com/langchain4j/langchain4j-examples/blob/main/other-examples/src/main/java/ChatWithDocumentsExamples.java):
- by paragraphs, lines, sentences, words, etc
- recursively
- with overlap
- Token count estimation (so that you can predict how much you will pay)
## Coming soon:
- Extending "AI Service" features
- Integration with more LLM providers (commercial and free)
- Integrations with more embedding stores (commercial and free)
- Support for more document types
- Long-term memory for chatbots and agents
- Chain-of-Thought and Tree-of-Thought
## Request features
Please [let us know](https://github.com/langchain4j/langchain4j/issues/new/choose) what features you need!
## Contribute
Please help us make this open-source library better by contributing.
Some guidelines:
1. Follow [Google's Best Practices for Java Libraries](https://jlbp.dev/).
2. Keep the code compatible with Java 8.
3. Avoid adding new dependencies as much as possible. If absolutely necessary, try to (re)use the same libraries which are already present.
4. Follow existing code styles present in the project.
5. Ensure to add Javadoc where necessary.
6. Provide unit and/or integration tests for your code.
7. Large features should be discussed with maintainers before implementation.
## Use cases
You might ask why would I need all of this?
Here are a couple of examples:
- You want to implement a custom AI-powered chatbot that has access to your data and behaves the way you want it:
- Customer support chatbot that can:
- politely answer customer questions
- take /change/cancel orders
- Educational assistant that can:
- Teach various subjects
- Explain unclear parts
- Assess user's understanding/knowledge
- You want to process a lot of unstructured data (files, web pages, etc) and extract structured information from them.
For example:
- extract insights from customer reviews and support chat history
- extract interesting information from the websites of your competitors
- extract insights from CVs of job applicants
- You want to generate information, for example:
- Emails tailored for each of your customers
- Content for your app/website:
- Blog posts
- Stories
- You want to transform information, for example:
- Summarize
- Proofread and rewrite
- Translate
## Best practices
We highly recommend
watching [this amazing 90-minute tutorial](https://www.deeplearning.ai/short-courses/chatgpt-prompt-engineering-for-developers/)
on prompt engineering best practices, presented by Andrew Ng (DeepLearning.AI) and Isa Fulford (OpenAI).
This course will teach you how to use LLMs efficiently and achieve the best possible results. Good investment of your
time!
Here are some best practices for using LLMs:
- Be responsible. Use AI for Good.
- Be specific. The more specific your query, the best results you will get.
- Add a ["Lets think step by step" instruction](https://arxiv.org/pdf/2205.11916.pdf) to your prompt.
- Specify steps to achieve the desired goal yourself. This will make the LLM do what you want it to do.
- Provide examples. Sometimes it is best to show LLM a few examples of what you want instead of trying to explain it.
- Ask LLM to provide structured output (JSON, XML, etc). This way you can parse response more easily and distinguish
different parts of it.
- Use unusual delimiters, such as \```triple backticks``` to help the LLM distinguish
data or input from instructions.
## How to get an API key
You will need an API key from OpenAI (paid) or HuggingFace (free) to use LLMs hosted by them.
We recommend using OpenAI LLMs (`gpt-3.5-turbo` and `gpt-4`) as they are by far the most capable and are reasonably priced.
It will cost approximately $0.01 to generate 10 pages (A4 format) of text with `gpt-3.5-turbo`. With `gpt-4`, the cost will be $0.30 to generate the same amount of text. However, for some use cases, this higher cost may be justified.
[How to get OpenAI API key](https://www.howtogeek.com/885918/how-to-get-an-openai-api-key/).
For embeddings, we recommend using one of the models from the [HuggingFace MTEB leaderboard](https://huggingface.co/spaces/mteb/leaderboard).
You'll have to find the best one for your specific use case.
Here's how to get a HuggingFace API key:
- Create an account on https://huggingface.co
- Go to https://huggingface.co/settings/tokens
- Generate a new access token

View File

@ -1,38 +0,0 @@
+++
disableToc = false
title = "🦙 AutoGPTQ"
weight = 3
+++
[AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ) is an easy-to-use LLMs quantization package with user-friendly apis, based on GPTQ algorithm.
## Prerequisites
This is an extra backend - in the container images is already available and there is nothing to do for the setup.
If you are building LocalAI locally, you need to install [AutoGPTQ manually](https://github.com/PanQiWei/AutoGPTQ#quick-installation).
## Model setup
The models are automatically downloaded from `huggingface` if not present the first time. It is possible to define models via `YAML` config file, or just by querying the endpoint with the `huggingface` repository model name. For example, create a `YAML` config file in `models/`:
```
name: orca
backend: autogptq
model_base_name: "orca_mini_v2_13b-GPTQ-4bit-128g.no-act.order"
parameters:
model: "TheBloke/orca_mini_v2_13b-GPTQ"
# ...
```
Test with:
```bash
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "orca",
"messages": [{"role": "user", "content": "How are you?"}],
"temperature": 0.1
}'
```

View File

@ -1,39 +0,0 @@
+++
disableToc = false
title = "🐶 Bark"
weight = 4
+++
[Bark](https://github.com/suno-ai/bark) allows to generate audio from text prompts.
## Setup
This is an extra backend - in the container is already available and there is nothing to do for the setup.
## Model setup
There is nothing to be done for the model setup. You can already start to use bark. The models will be downloaded the first time you use the backend.
## Usage
Use the `tts` endpoint by specifying the `bark` backend:
```
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "bark",
"input":"Hello!"
}' | aplay
```
To specify a voice from https://github.com/suno-ai/bark#-voice-presets ( https://suno-ai.notion.site/8b8e8749ed514b0cbf3f699013548683?v=bc67cff786b04b50b3ceb756fd05f68c ), use the `model` parameter:
```
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "bark",
"input":"Hello!",
"model": "v2/en_speaker_4"
}' | aplay
```

View File

@ -1,208 +0,0 @@
+++
disableToc = false
title = "🧨 Diffusers"
weight = 4
+++
[Diffusers](https://huggingface.co/docs/diffusers/index) is the go-to library for state-of-the-art pretrained diffusion models for generating images, audio, and even 3D structures of molecules. LocalAI has a diffusers backend which allows image generation using the `diffusers` library.
![anime_girl](https://github.com/go-skynet/LocalAI/assets/2420543/8aaca62a-e864-4011-98ae-dcc708103928)
(Generated with [AnimagineXL](https://huggingface.co/Linaqruf/animagine-xl))
Note: currently only the image generation is supported. It is experimental, so you might encounter some issues on models which weren't tested yet.
## Setup
This is an extra backend - in the container is already available and there is nothing to do for the setup.
## Model setup
The models will be downloaded the first time you use the backend from `huggingface` automatically.
Create a model configuration file in the `models` directory, for instance to use `Linaqruf/animagine-xl` with CPU:
```yaml
name: animagine-xl
parameters:
model: Linaqruf/animagine-xl
backend: diffusers
cuda: true
f16: true
diffusers:
scheduler_type: euler_a
```
## Local models
You can also use local models, or modify some parameters like `clip_skip`, `scheduler_type`, for instance:
```yaml
name: stablediffusion
parameters:
model: toonyou_beta6.safetensors
backend: diffusers
step: 30
f16: true
cuda: true
diffusers:
pipeline_type: StableDiffusionPipeline
enable_parameters: "negative_prompt,num_inference_steps,clip_skip"
scheduler_type: "k_dpmpp_sde"
cfg_scale: 8
clip_skip: 11
```
## Configuration parameters
The following parameters are available in the configuration file:
| Parameter | Description | Default |
| --- | --- | --- |
| `f16` | Force the usage of `float16` instead of `float32` | `false` |
| `step` | Number of steps to run the model for | `30` |
| `cuda` | Enable CUDA acceleration | `false` |
| `enable_parameters` | Parameters to enable for the model | `negative_prompt,num_inference_steps,clip_skip` |
| `scheduler_type` | Scheduler type | `k_dpp_sde` |
| `cfg_scale` | Configuration scale | `8` |
| `clip_skip` | Clip skip | None |
| `pipeline_type` | Pipeline type | `AutoPipelineForText2Image` |
There are available several types of schedulers:
| Scheduler | Description |
| --- | --- |
| `ddim` | DDIM |
| `pndm` | PNDM |
| `heun` | Heun |
| `unipc` | UniPC |
| `euler` | Euler |
| `euler_a` | Euler a |
| `lms` | LMS |
| `k_lms` | LMS Karras |
| `dpm_2` | DPM2 |
| `k_dpm_2` | DPM2 Karras |
| `dpm_2_a` | DPM2 a |
| `k_dpm_2_a` | DPM2 a Karras |
| `dpmpp_2m` | DPM++ 2M |
| `k_dpmpp_2m` | DPM++ 2M Karras |
| `dpmpp_sde` | DPM++ SDE |
| `k_dpmpp_sde` | DPM++ SDE Karras |
| `dpmpp_2m_sde` | DPM++ 2M SDE |
| `k_dpmpp_2m_sde` | DPM++ 2M SDE Karras |
Pipelines types available:
| Pipeline type | Description |
| --- | --- |
| `StableDiffusionPipeline` | Stable diffusion pipeline |
| `StableDiffusionImg2ImgPipeline` | Stable diffusion image to image pipeline |
| `StableDiffusionDepth2ImgPipeline` | Stable diffusion depth to image pipeline |
| `DiffusionPipeline` | Diffusion pipeline |
| `StableDiffusionXLPipeline` | Stable diffusion XL pipeline |
## Usage
### Text to Image
Use the `image` generation endpoint with the `model` name from the configuration file:
```bash
curl http://localhost:8080/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "<positive prompt>|<negative prompt>",
"model": "animagine-xl",
"step": 51,
"size": "1024x1024"
}'
```
## Image to Image
https://huggingface.co/docs/diffusers/using-diffusers/img2img
An example model (GPU):
```yaml
name: stablediffusion-edit
parameters:
model: nitrosocke/Ghibli-Diffusion
backend: diffusers
step: 25
cuda: true
f16: true
diffusers:
pipeline_type: StableDiffusionImg2ImgPipeline
enable_parameters: "negative_prompt,num_inference_steps,image"
```
```bash
IMAGE_PATH=/path/to/your/image
(echo -n '{"file": "'; base64 $IMAGE_PATH; echo '", "prompt": "a sky background","size": "512x512","model":"stablediffusion-edit"}') |
curl -H "Content-Type: application/json" -d @- http://localhost:8080/v1/images/generations
```
## Depth to Image
https://huggingface.co/docs/diffusers/using-diffusers/depth2img
```yaml
name: stablediffusion-depth
parameters:
model: stabilityai/stable-diffusion-2-depth
backend: diffusers
step: 50
# Force CPU usage
f16: true
cuda: true
diffusers:
pipeline_type: StableDiffusionDepth2ImgPipeline
enable_parameters: "negative_prompt,num_inference_steps,image"
cfg_scale: 6
```
```bash
(echo -n '{"file": "'; base64 ~/path/to/image.jpeg; echo '", "prompt": "a sky background","size": "512x512","model":"stablediffusion-depth"}') |
curl -H "Content-Type: application/json" -d @- http://localhost:8080/v1/images/generations
```
## img2vid
```yaml
name: img2vid
parameters:
model: stabilityai/stable-video-diffusion-img2vid
backend: diffusers
step: 25
# Force CPU usage
f16: true
cuda: true
diffusers:
pipeline_type: StableVideoDiffusionPipeline
```
```bash
(echo -n '{"file": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/svd/rocket.png?download=true","size": "512x512","model":"img2vid"}') |
curl -H "Content-Type: application/json" -X POST -d @- http://localhost:8080/v1/images/generations
```
## txt2vid
```yaml
name: txt2vid
parameters:
model: damo-vilab/text-to-video-ms-1.7b
backend: diffusers
step: 25
# Force CPU usage
f16: true
cuda: true
diffusers:
pipeline_type: VideoDiffusionPipeline
cuda: true
```
```bash
(echo -n '{"prompt": "spiderman surfing","size": "512x512","model":"txt2vid"}') |
curl -H "Content-Type: application/json" -X POST -d @- http://localhost:8080/v1/images/generations
```

View File

@ -1,42 +0,0 @@
+++
disableToc = false
title = "🦙 Exllama"
weight = 2
+++
[Exllama](https://github.com/turboderp/exllama) is a "A more memory-efficient rewrite of the HF transformers implementation of Llama for use with quantized weights"
## Prerequisites
This is an extra backend - in the container images is already available and there is nothing to do for the setup.
If you are building LocalAI locally, you need to install [exllama manually](https://github.com/jllllll/exllama#this-is-a-python-module-version-of-exllama) first.
## Model setup
Download the model as a folder inside the `model ` directory and create a YAML file specifying the `exllama` backend. For instance with the `TheBloke/WizardLM-7B-uncensored-GPTQ` model:
```
$ git lfs install
$ cd models && git clone https://huggingface.co/TheBloke/WizardLM-7B-uncensored-GPTQ
$ ls models/
.keep WizardLM-7B-uncensored-GPTQ/ exllama.yaml
$ cat models/exllama.yaml
name: exllama
parameters:
model: WizardLM-7B-uncensored-GPTQ
backend: exllama
# ...
```
Test with:
```bash
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "exllama",
"messages": [{"role": "user", "content": "How are you?"}],
"temperature": 0.1
}'
```

View File

@ -1,81 +0,0 @@
+++
disableToc = false
title = "🦙 llama.cpp"
weight = 1
+++
[llama.cpp](https://github.com/ggerganov/llama.cpp) is a popular port of Facebook's LLaMA model in C/C++.
{{% notice note %}}
The `ggml` file format has been deprecated. If you are using `ggml` models and you are configuring your model with a YAML file, specify, use the `llama-ggml` backend instead. If you are relying in automatic detection of the model, you should be fine. For `gguf` models, use the `llama` backend. The go backend is deprecated as well but still available as `go-llama`. The go backend supports still features not available in the mainline: speculative sampling and embeddings.
{{% /notice %}}
## Features
The `llama.cpp` model supports the following features:
- [📖 Text generation (GPT)]({{%relref "features/text-generation" %}})
- [🧠 Embeddings]({{%relref "features/embeddings" %}})
- [🔥 OpenAI functions]({{%relref "features/openai-functions" %}})
- [✍️ Constrained grammars]({{%relref "features/constrained_grammars" %}})
## Setup
LocalAI supports `llama.cpp` models out of the box. You can use the `llama.cpp` model in the same way as any other model.
### Manual setup
It is sufficient to copy the `ggml` or `guf` model files in the `models` folder. You can refer to the model in the `model` parameter in the API calls.
[You can optionally create an associated YAML]({{%relref "advanced" %}}) model config file to tune the model's parameters or apply a template to the prompt.
Prompt templates are useful for models that are fine-tuned towards a specific prompt.
### Automatic setup
LocalAI supports model galleries which are indexes of models. For instance, the huggingface gallery contains a large curated index of models from the huggingface model hub for `ggml` or `gguf` models.
For instance, if you have the galleries enabled, you can just start chatting with models in huggingface by running:
```bash
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "TheBloke/WizardLM-13B-V1.2-GGML/wizardlm-13b-v1.2.ggmlv3.q2_K.bin",
"messages": [{"role": "user", "content": "Say this is a test!"}],
"temperature": 0.1
}'
```
LocalAI will automatically download and configure the model in the `model` directory.
Models can be also preloaded or downloaded on demand. To learn about model galleries, check out the [model gallery documentation]({{%relref "models" %}}).
### YAML configuration
To use the `llama.cpp` backend, specify `llama` as the backend in the YAML file:
```yaml
name: llama
backend: llama
parameters:
# Relative to the models path
model: file.gguf.bin
```
In the example above we specify `llama` as the backend to restrict loading `gguf` models only.
For instance, to use the `llama-ggml` backend for `ggml` models:
```yaml
name: llama
backend: llama-ggml
parameters:
# Relative to the models path
model: file.ggml.bin
```
### Reference
- [llama](https://github.com/ggerganov/llama.cpp)
- [binding](https://github.com/go-skynet/go-llama.cpp)

View File

@ -1,15 +0,0 @@
+++
disableToc = false
title = "RWKV"
weight = 1
+++
A full example on how to run a rwkv model is in the [examples](https://github.com/go-skynet/LocalAI/tree/master/examples/rwkv).
Note: rwkv models needs to specify the backend `rwkv` in the YAML config files and have an associated tokenizer along that needs to be provided with it:
```
36464540 -rw-r--r-- 1 mudler mudler 1.2G May 3 10:51 rwkv_small
36464543 -rw-r--r-- 1 mudler mudler 2.4M May 3 10:51 rwkv_small.tokenizer.json
```

View File

@ -1,50 +0,0 @@
+++
disableToc = false
title = "Vall-E-X"
weight = 4
+++
[VALL-E-X](https://github.com/Plachtaa/VALL-E-X) is an open source implementation of Microsoft's VALL-E X zero-shot TTS model.
## Setup
The backend will automatically download the required files in order to run the model.
This is an extra backend - in the container is already available and there is nothing to do for the setup. If you are building manually, you need to install Vall-E-X manually first.
## Usage
Use the tts endpoint by specifying the vall-e-x backend:
```
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "vall-e-x",
"input":"Hello!"
}' | aplay
```
## Voice cloning
In order to use voice cloning capabilities you must create a `YAML` configuration file to setup a model:
```yaml
name: cloned-voice
backend: vall-e-x
parameters:
model: "cloned-voice"
vall-e:
# The path to the audio file to be cloned
# relative to the models directory
audio_path: "path-to-wav-source.wav"
```
Then you can specify the model name in the requests:
```
curl http://localhost:8080/tts -H "Content-Type: application/json" -d '{
"backend": "vall-e-x",
"model": "cloned-voice",
"input":"Hello!"
}' | aplay
```

View File

@ -1,39 +0,0 @@
+++
disableToc = false
title = "vLLM"
weight = 4
+++
[vLLM](https://github.com/vllm-project/vllm) is a fast and easy-to-use library for LLM inference.
LocalAI has a built-in integration with vLLM, and it can be used to run models. You can check out `vllm` performance [here](https://github.com/vllm-project/vllm#performance).
## Setup
Create a YAML file for the model you want to use with `vllm`.
To setup a model, you need to just specify the model name in the YAML config file:
```yaml
name: vllm
backend: vllm
parameters:
model: "facebook/opt-125m"
# Decomment to specify a quantization method (optional)
# quantization: "awq"
```
The backend will automatically download the required files in order to run the model.
## Usage
Use the `completions` endpoint by specifying the `vllm` backend:
```
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "vllm",
"prompt": "Hello, my name is",
"temperature": 0.1, "top_p": 0.1
}'
```

50
docs/data/landing.yaml Normal file
View File

@ -0,0 +1,50 @@
# Note: Template blocks require a 'weight' parameter so they're correctly ordered on the landing page
# Hero
hero:
enable: false
weight: 10
template: hero
# Feature Grid
featureGrid:
enable: false
weight: 20
template: feature grid
imageText:
enable: true
weight: 25
template: image text
title: LocalAI
subtitle: The Free, Open Source OpenAI Alternative
list:
- text: Optimized, fast inference
icon: speed
- text: Comprensive support for many models architectures
icon: area_chart
- text: Easy to deploy with Docker
icon: accessibility
image:
path: "images/logos"
filename: "logo.png"
alt: "LocalAI logo" # Optional but recommended
imgOrder:
desktop: 2
mobile: 1
ctaButton:
text: Learn more
url: "/docs/"
# Image compare
imageCompare:
enable: false
weight: 30
template: image compare

View File

@ -1,3 +1,5 @@
module github.com/McShelby/hugo-theme-relearn.git
go 1.19
require github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200 // indirect

View File

@ -0,0 +1,4 @@
github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200 h1:SmpwwN3DNzJWbV+IT8gaFu07ENUFpCvKou5BHYUKuVs=
github.com/gohugoio/hugo-mod-bootstrap-scss/v5 v5.20300.20200/go.mod h1:kx8MBj9T7SFR8ZClWvKZPmmUxBaltkoXvnWlZZcSnYA=
github.com/gohugoio/hugo-mod-jslibs-dist/popperjs/v2 v2.21100.20000/go.mod h1:mFberT6ZtcchrsDtfvJM7aAH2bDKLdOnruUHl0hlapI=
github.com/twbs/bootstrap v5.3.2+incompatible/go.mod h1:fZTSrkpSf0/HkL0IIJzvVspTt1r9zuf7XlZau8kpcY0=

0
docs/layouts/index.html Normal file
View File

View File

@ -0,0 +1,47 @@
{{ $repoURL := slice .Site.Params.docs.repoURL }}
{{ $repoHostname := (urls.Parse (.Site.Params.docs.repoURL)).Hostname }}
{{ $filePath := replace .File.Path "\\" "/" }}
{{ $iconPath := "" }}
{{ if strings.Contains ($repoHostname | lower) "github" }}
{{ $repoURL = $repoURL | append "blob" (.Site.Params.docs.repoBranch | default "main") }}
{{ $iconPath = "images/social/github_icon.svg" }}
{{ else if strings.Contains ($repoHostname | lower) "gitlab" }}
{{ $repoURL = $repoURL | append "-/blob" (.Site.Params.docs.repoBranch | default "main") }}
{{ $iconPath = "images/social/gitlab_icon.svg" }}
{{ else if strings.Contains ($repoHostname | lower) "bitbucket" }}
{{ $repoURL = $repoURL | append "src" (.Site.Params.docs.repoBranch | default "master") }}
{{ $iconPath = "images/social/bitbucket_icon.svg" }}
{{ end }}
{{ $repoURL = $repoURL | append "docs/content" .Site.LanguagePrefix $filePath }}
{{ $repoURL = delimit $repoURL "/" }}
{{ $editPageURL := replaceRE "(https?://)|(/)+" "$1$2" $repoURL }}
<div class="gitinfo d-flex flex-wrap justify-content-between align-items-center opacity-85 {{ if or .Site.Params.docs.lastMod .Site.Params.docs.editPage -}}pt-3{{ else }}visually-hidden{{ end }}">
{{ if .Site.Params.docs.editPage | default false -}}
<div id="edit-this-page" class="mt-1">
<a href="{{ $editPageURL }}" alt="{{ .Title }}" rel="noopener noreferrer" target="_blank">
<!-- <span class="material-icons size-20 align-text-bottom text-primary">edit</span> -->
<span class="me-1 align-text-bottom">
{{ with resources.Get $iconPath }}
{{ .Content | safeHTML }}
{{ end }}
</span>
Edit this page
<!-- <span class="material-icons size-20 align-text-bottom text-primary">open_in_new</span> -->
</a>
</div>
{{ end }}
{{ if .Site.Params.docs.lastMod | default false -}}
<div id="last-modified" class="mt-1">
<p class="mb-0 fw-semibold">Last updated <span
{{ if .Site.Params.docs.lastModRelative | default true -}}id="relativetime"{{ else }}{{ end }}
data-authdate="{{ dateFormat "2006-01-02T15:04:05Z0700" .GitInfo.AuthorDate }}"
{{ if .Site.Params.docs.lastModRelative | default true -}}title="{{ dateFormat "02 Jan 2006, 15:04 MST" .GitInfo.AuthorDate }}"{{ else }}{{ end }}>
{{ dateFormat "02 Jan 2006, 15:04 MST" .GitInfo.AuthorDate }}
</span>. <span class="material-icons size-20 align-text-bottom opacity-75">history</span>
</p>
</div>
{{ end }}
</div>

View File

@ -0,0 +1,131 @@
<!-- sidebar-wrapper -->
<nav id="sidebar" class="sidebar-wrapper">
<div class="sidebar-brand d-md-flex justify-content-between align-items-center" style=" text-align: center; height: calc(35%);">
<ul>
<li>
<a href='{{ with .Site.Params.docs.logoLinkURL }}{{ . }}{{ else }}{{ relLangURL "" }}{{ end }}' aria-label="HomePage" alt="HomePage">
{{ with .Site.Params.docs.logo }}
<img style="width: calc(65%);height: calc(65%);" src="{{ . }}">
{{ end }}
{{ with .Site.Params.docs.logo_text }}
<p class="lead mb-3">{{ . }}</p>
{{ end }}
</a>
</li>
<li>
<a href='https://github.com/go-skynet/LocalAI/releases'>
<img src='https://img.shields.io/github/release/go-skynet/LocalAI?&label=Latest&style=for-the-badge'> </a>
</li>
<li>
<a href='https://hub.docker.com/r/localai/localai' target=_blank><img src="https://img.shields.io/badge/dockerhub-images-important.svg?logo=Docker"></a>
<a href='https://quay.io/repository/go-skynet/local-ai?tab=tags&tag=latest' target=_blank><img src="https://img.shields.io/badge/quay.io-images-important.svg?"></a>
</li>
</ul>
</div>
<div class="sidebar-content" style="height: calc(65%);">
<ul class="sidebar-menu">
{{ $currentPage := . -}}
{{ $section := $currentPage.Section -}}
{{ range (where .Site.Sections.ByWeight "Section" "in" $section) }}
{{ $child_pages := union .Sections .Pages }}
{{ range $child_pages.ByWeight }}
{{ if or (.Sections) (.Pages) }}
{{ $active := in $currentPage.RelPermalink .RelPermalink }}
<li class="sidebar-dropdown {{ if eq .Site.Params.docs.sidebarIcons true -}}{{ else }}no-icon{{ end }} {{ if $active }}current active{{ end }}">
<button class="btn">
{{ if eq .Site.Params.docs.sidebarIcons true -}}
<i class="material-icons me-2">{{- .Params.icon | default "notes" }}</i>
{{ end }}
{{- .Title }}
</button>
<div class="sidebar-submenu {{ if $active }}d-block{{ end }}">
<ul>
{{ range .Pages }}
{{ $active := in $currentPage.RelPermalink .RelPermalink }}
{{ if .IsSection }}
<li class="sidebar-dropdown nested {{ if eq .Site.Params.docs.sidebarIcons true -}}{{ else }}no-icon{{ end }} {{ if $active }}current active{{ end }}">
<button class="btn">
{{ if eq .Site.Params.docs.sidebarIcons true -}}
<!-- <span class="material-icons me-2">{{- .Params.icon }}</span> -->
{{ end }}
{{- .Title }}
</button>
<div class="sidebar-submenu {{ if $active }}d-block{{ end }}">
<ul>
{{ range .Pages }}
{{ $active := in $currentPage.RelPermalink .RelPermalink }}
{{ if .IsSection }}
<li class="sidebar-dropdown nested {{ if eq .Site.Params.docs.sidebarIcons true -}}{{ else }}no-icon{{ end }} {{ if $active }}current active{{ end }}">
<button class="btn">
{{ if eq .Site.Params.docs.sidebarIcons true -}}
<!-- <span class="material-icons me-2">{{- .Params.icon }}</span> -->
{{ end }}
{{- .Title }}
</button>
<div class="sidebar-submenu {{ if $active }}d-block{{ end }}">
<ul>
{{ range .Pages }}
{{ $active := in $currentPage.RelPermalink .RelPermalink }}
{{ if .IsSection }}
<li class="sidebar-dropdown nested {{ if eq .Site.Params.docs.sidebarIcons true -}}{{ else }}no-icon{{ end }} {{ if $active }}current active{{ end }}">
<button class="btn">
{{ if eq .Site.Params.docs.sidebarIcons true -}}
<!-- <span class="material-icons me-2">{{- .Params.icon }}</span> -->
{{ end }}
{{- .Title }}
</button>
<div class="sidebar-submenu {{ if $active }}d-block{{ end }}">
<ul>
{{ range .Pages }}
{{ $active := in $currentPage.RelPermalink .RelPermalink }}
<li class="{{ if $active }}current{{ end }} {{ if eq .Site.Params.docs.sidebarIcons true -}}{{ else }}no-icon{{ end }}"><a class="sidebar-nested-link" href="{{ .Permalink }}">{{ .Title }}</a></li>
{{ end }}
</ul>
</div>
</li>
{{ else }}
<li class="{{ if $active }}current{{ end }} {{ if eq .Site.Params.docs.sidebarIcons true -}}{{ else }}no-icon{{ end }}"><a class="sidebar-nested-link" href="{{ .Permalink }}">{{ .Title }}</a></li>
{{ end }}
{{ end }}
</ul>
</div>
</li>
{{ else }}
<li class="{{ if $active }}current{{ end }} {{ if eq .Site.Params.docs.sidebarIcons true -}}{{ else }}no-icon{{ end }}"><a class="sidebar-nested-link" href="{{ .Permalink }}">{{ .Title }}</a></li>
{{ end }}
{{ end }}
</ul>
</div>
</li>
{{ else }}
<li class="{{ if $active }}current{{ end }} {{ if eq .Site.Params.docs.sidebarIcons true -}}{{ else }}no-icon{{ end }}"><a class="sidebar-nested-link" href="{{ .Permalink }}">{{ .Title }}</a></li>
{{ end }}
{{ end }}
</ul>
</div>
</li>
{{ else }}
{{ $active := in $currentPage.RelPermalink .RelPermalink }}
<li class="{{ if $active }}current{{ end }}">
<a class="sidebar-root-link" href="{{ .Permalink }}">
{{ if eq .Site.Params.docs.sidebarIcons true -}}
<i class="material-icons me-2">{{ .Params.icon }}</i>
{{ end }}
{{ .Title }}
</a>
</li>
{{ end }}
{{ end }}
{{ end }}
</ul>
<!-- sidebar-menu -->
</div>
<!-- Sidebar Footer -->
<ul class="sidebar-footer list-unstyled mb-0">
<!-- <li class="list-inline-item mb-0">
<a href="javascript:void(0)" data-bs-toggle="modal" data-bs-target="#lang-selector-popup" class="btn btn-primary m-1">Language</a>
</li> -->
</ul>
<!-- Sidebar Footer -->
</nav>
<!-- sidebar-wrapper -->

View File

@ -0,0 +1,62 @@
<head>
<meta charset="utf-8" />
<title>{{- .Site.Title }}</title>
{{- if not hugo.IsProduction }}
<meta name="robots" content="noindex">
{{- end }}
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta name="description" content="A Lightweight, Modern Documentation Theme for Hugo" />
<meta name="keywords" content="Documentation, Hugo, Hugo Theme, Bootstrap" />
<meta name="author" content="Ettore Di Giacinto" />
<meta name="email" content="info@localai.io" />
<meta name="website" content="https://localai.io" />
<meta name="Version" content="v0.1.0" />
<!-- favicon -->
{{ block "head/favicon" . }}{{ partialCached "head/favicon.html" . }}{{ end }}
<!-- Google Fonts -->
{{- partial "google-fonts" . }}
<!-- Custom CSS -->
{{- $options := dict "enableSourceMap" true }}
{{- if hugo.IsProduction}}
{{- $options := dict "enableSourceMap" false "outputStyle" "compressed" }}
{{- end }}
{{- $style := resources.Get "/scss/style.scss" }}
{{- $style = $style | resources.ExecuteAsTemplate "/scss/style.scss" . | resources.ToCSS $options }}
{{- if hugo.IsProduction }}
{{- $style = $style | minify | fingerprint "sha384" }}
{{- end -}}
<link rel="stylesheet" href="{{ $style.RelPermalink }}" {{ if hugo.IsProduction }}integrity="{{ $style.Data.Integrity }}"{{ end -}}/>
<!-- Bootstrap JS -->
{{ $js := resources.Get "js/bootstrap.js" }}
{{ $params := dict }}
{{ $sourceMap := cond hugo.IsProduction "" "inline" }}
{{ $opts := dict "sourceMap" $sourceMap "minify" hugo.IsProduction "target" "es2018" "params" $params }}
{{ $js = $js | js.Build $opts }}
{{ if hugo.IsProduction }}
{{ $js = $js | fingerprint "sha384" }}
{{ end }}
<script src="{{ $js.RelPermalink }}" {{ if hugo.IsProduction }}integrity="{{ $js.Data.Integrity }}"{{ end -}} defer></script>
<!-- Image Compare Viewer -->
{{ if ($.Scratch.Get "image_compare_enabled") }}
{{ $imagecompare := resources.Get "js/image-compare-viewer.min.js" }}
{{- if not .Site.IsServer }}
{{- $js := (slice $imagecompare) | resources.Concat "/js/image-compare.js" | minify | fingerprint "sha384" }}
<script type="text/javascript" src="{{ $js.Permalink }}" integrity="{{ $js.Data.Integrity }}"></script>
{{- else }}
{{- $js := (slice $imagecompare) | resources.Concat "/js/image-compare.js" }}
<script type="text/javascript" src="{{ $js.Permalink }}" {{ if hugo.IsProduction }}integrity="{{ $js.Data.Integrity }}"{{ end }}></script>
{{- end }}
{{- end }}
<!-- Plausible Analytics Config -->
{{- if not .Site.IsServer }}
{{ if and (.Site.Params.plausible.scriptURL) (.Site.Params.plausible.dataDomain) -}}
{{- partialCached "head/plausible" . }}
{{- end -}}
{{- end -}}
<!-- Google Analytics v4 Config -->
{{- if not .Site.IsServer }}
{{- if .Site.GoogleAnalytics }}
{{- template "_internal/google_analytics.html" . -}}
{{- end -}}
{{- end -}}
</head>

View File

@ -1,4 +1,4 @@
[build]
[build.environment]
HUGO_VERSION = "0.104.3"
HUGO_VERSION = "0.121.2"
GO_VERSION = "1.19.2"

BIN
docs/static/android-chrome-192x192.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 57 KiB

BIN
docs/static/android-chrome-512x512.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 359 KiB

BIN
docs/static/apple-touch-icon.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

BIN
docs/static/favicon-16x16.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 769 B

BIN
docs/static/favicon-32x32.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 KiB

BIN
docs/static/favicon.ico vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

1
docs/themes/lotusdocs vendored Submodule

@ -0,0 +1 @@
Subproject commit f5785a2399ca09e7fb4e7e3d69b397f85df42a24

View File

@ -0,0 +1,27 @@
name: dolphin-mixtral-8x7b
mmap: true
parameters:
model: huggingface://TheBloke/dolphin-2.5-mixtral-8x7b-GGUF/blob/main/dolphin-2.5-mixtral-8x7b.Q2_K.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
{{if .Content}}{{.Content}}{{end}}<|im_end|>
chat: |
{{.Input}}
<|im_start|>assistant
completion: |
{{.Input}}
context_size: 4096
f16: true
stopwords:
- <|im_end|>
gpu_layers: 90
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "dolphin-mixtral-8x7b",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'

View File

@ -0,0 +1,20 @@
name: mixtral-instruct
mmap: true
parameters:
model: huggingface://TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/mixtral-8x7b-instruct-v0.1.Q2_K.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
template:
chat: &chat |
[INST] {{.Input}} [/INST]
completion: *chat
context_size: 4096
f16: true
gpu_layers: 90
usage: |
curl http://localhost:8080/v1/completions -H "Content-Type: application/json" -d '{
"model": "mixtral-instruct",
"prompt": "How are you doing?"
}'

View File

@ -0,0 +1,28 @@
name: tinyllama-chat
mmap: true
parameters:
model: huggingface://TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/tinyllama-1.1b-chat-v0.3.Q8_0.gguf
temperature: 0.2
top_k: 40
top_p: 0.95
template:
chat_message: |
<|im_start|>{{if eq .RoleName "assistant"}}assistant{{else if eq .RoleName "system"}}system{{else if eq .RoleName "user"}}user{{end}}
{{if .Content}}{{.Content}}{{end}}<|im_end|>
chat: |
{{.Input}}
<|im_start|>assistant
completion: |
{{.Input}}
context_size: 4096
f16: true
stopwords:
- <|im_end|>
gpu_layers: 90
usage: |
curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
"model": "tinyllama-chat",
"messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}]
}'

View File

@ -99,7 +99,7 @@ func main() {
Usage: "A List of models to apply in JSON at start",
EnvVars: []string{"PRELOAD_MODELS"},
},
&cli.StringFlag{
&cli.StringSliceFlag{
Name: "models",
Usage: "A List of models URLs configurations.",
EnvVars: []string{"MODELS"},