add litellm
This commit is contained in:
3
active/container_litellm/compose/README.md
Normal file
3
active/container_litellm/compose/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Compose
|
||||
|
||||
Put your compose.yaml here.
|
||||
37
active/container_litellm/compose/compose.yaml
Normal file
37
active/container_litellm/compose/compose.yaml
Normal file
@@ -0,0 +1,37 @@
|
||||
services:
|
||||
litellm:
|
||||
image: docker.litellm.ai/berriai/litellm:main-latest
|
||||
ports:
|
||||
- 4000:4000
|
||||
env_file: /home/ai/litellm.env
|
||||
environment:
|
||||
DATABASE_URL: "postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm"
|
||||
STORE_MODEL_IN_DB: "True"
|
||||
restart: unless-stopped
|
||||
depends_on:
|
||||
- litellm-db # Indicates that this service depends on the 'litellm-db' service, ensuring 'litellm-db' starts first
|
||||
healthcheck: # Defines the health check configuration for the container
|
||||
test:
|
||||
- CMD-SHELL
|
||||
- python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')" # Command to execute for health check
|
||||
interval: 30s # Perform health check every 30 seconds
|
||||
timeout: 10s # Health check command times out after 10 seconds
|
||||
retries: 3 # Retry up to 3 times if health check fails
|
||||
start_period: 40s # Wait 40 seconds after container start before beginning health checks
|
||||
|
||||
litellm-db:
|
||||
image: docker.io/postgres:16
|
||||
restart: always
|
||||
environment:
|
||||
POSTGRES_DB: litellm
|
||||
POSTGRES_USER: llmproxy
|
||||
POSTGRES_PASSWORD: dbpassword9090
|
||||
ports:
|
||||
- "5432:5432"
|
||||
volumes:
|
||||
- litellm_postgres_data:/var/lib/postgresql/data:z
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
|
||||
interval: 1s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
67
active/container_litellm/config.yaml
Normal file
67
active/container_litellm/config.yaml
Normal file
@@ -0,0 +1,67 @@
|
||||
# General settings
|
||||
|
||||
general_settings:
|
||||
request_timeout: 600
|
||||
|
||||
# Models
|
||||
model_list:
|
||||
# Qwen3.5-35B variants
|
||||
- model_name: qwen3.5-35b-think-general
|
||||
litellm_params:
|
||||
model: openai/qwen3.5-35b-a3b
|
||||
api_base: https://llama-cpp.reeselink.com
|
||||
api_key: none
|
||||
temperature: 1.0
|
||||
top_p: 0.95
|
||||
presence_penalty: 1.5
|
||||
extra_body:
|
||||
top_k: 20
|
||||
min_p: 0.0
|
||||
repetition_penalty: 1.0
|
||||
chat_template_kwargs:
|
||||
enable_thinking: true
|
||||
|
||||
- model_name: qwen3.5-35b-think-code
|
||||
litellm_params:
|
||||
model: openai/qwen3.5-35b-a3b
|
||||
api_base: https://llama-cpp.reeselink.com
|
||||
api_key: none
|
||||
temperature: 0.6
|
||||
top_p: 0.95
|
||||
presence_penalty: 0.0
|
||||
extra_body:
|
||||
top_k: 20
|
||||
min_p: 0.0
|
||||
repetition_penalty: 1.0
|
||||
chat_template_kwargs:
|
||||
enable_thinking: true
|
||||
|
||||
- model_name: qwen3.5-35b-instruct-general
|
||||
litellm_params:
|
||||
model: openai/qwen3.5-35b-a3b
|
||||
api_base: https://llama-cpp.reeselink.com
|
||||
api_key: none
|
||||
temperature: 0.7
|
||||
top_p: 0.8
|
||||
presence_penalty: 1.5
|
||||
extra_body:
|
||||
top_k: 20
|
||||
min_p: 0.0
|
||||
repetition_penalty: 1.0
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
- model_name: qwen3.5-35b-instruct-reasoning
|
||||
litellm_params:
|
||||
model: openai/qwen3.5-35b-a3b
|
||||
api_base: https://llama-cpp.reeselink.com
|
||||
api_key: none
|
||||
temperature: 1.0
|
||||
top_p: 0.95
|
||||
presence_penalty: 1.5
|
||||
extra_body:
|
||||
top_k: 20
|
||||
min_p: 0.0
|
||||
repetition_penalty: 1.0
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
233
active/container_litellm/litellm.md
Normal file
233
active/container_litellm/litellm.md
Normal file
@@ -0,0 +1,233 @@
|
||||
# Podman litellm
|
||||
|
||||
- [Podman litellm](#podman-litellm)
|
||||
- [Setup litellm Project](#setup-litellm-project)
|
||||
- [Install litellm](#install-litellm)
|
||||
- [Create the ai user](#create-the-ai-user)
|
||||
- [Write the litellm compose spec](#write-the-litellm-compose-spec)
|
||||
- [A Note on Volumes](#a-note-on-volumes)
|
||||
- [Convert litellm compose spec to quadlets](#convert-litellm-compose-spec-to-quadlets)
|
||||
- [Create the litellm.env file](#create-the-litellmenv-file)
|
||||
- [Start and enable your systemd quadlet](#start-and-enable-your-systemd-quadlet)
|
||||
- [Expose litellm](#expose-litellm)
|
||||
- [Using LiteLLM](#using-litellm)
|
||||
- [Adding Models](#adding-models)
|
||||
- [Testing Models](#testing-models)
|
||||
- [Backup litellm](#backup-litellm)
|
||||
- [Upgrade litellm](#upgrade-litellm)
|
||||
- [Upgrade Quadlets](#upgrade-quadlets)
|
||||
- [Uninstall](#uninstall)
|
||||
- [Notes](#notes)
|
||||
- [SELinux](#selinux)
|
||||
|
||||
## Setup litellm Project
|
||||
|
||||
- [ ] Copy and rename this folder to active/container_litellm
|
||||
- [ ] Find and replace litellm with the name of the service.
|
||||
- [ ] Create the rootless user to run the podman containers
|
||||
- [ ] Write the compose.yaml spec for your service
|
||||
- [ ] Convert the compose.yaml spec to a quadlet
|
||||
- [ ] Install the quadlet on the podman server
|
||||
- [ ] Expose the quadlet service
|
||||
- [ ] Install a backup service and timer
|
||||
|
||||
## Install litellm
|
||||
|
||||
### Create the ai user
|
||||
|
||||
```bash
|
||||
# SSH into your podman server as root
|
||||
useradd ai
|
||||
loginctl enable-linger $(id -u ai)
|
||||
systemctl --user --machine=ai@.host enable podman-restart
|
||||
systemctl --user --machine=ai@.host enable --now podman.socket
|
||||
mkdir -p /home/ai/.config/containers/systemd
|
||||
```
|
||||
|
||||
### Write the litellm compose spec
|
||||
|
||||
See the [docker run command here](https://docs.litellm.ai/docs/proxy/docker_quick_start#32-start-proxy)
|
||||
|
||||
Edit the compose.yaml at active/container_litellm/compose/compose.yaml
|
||||
|
||||
#### A Note on Volumes
|
||||
|
||||
Named volumes are stored at `/home/litellm/.local/share/containers/storage/volumes/`.
|
||||
|
||||
### Convert litellm compose spec to quadlets
|
||||
|
||||
Run the following to convert a compose.yaml into the various `.container` files for systemd:
|
||||
|
||||
```bash
|
||||
# Generate the systemd service
|
||||
podman run \
|
||||
--security-opt label=disable \
|
||||
--rm \
|
||||
-v $(pwd)/active/container_litellm/compose:/compose \
|
||||
-v $(pwd)/active/container_litellm/quadlets:/quadlets \
|
||||
quay.io/k9withabone/podlet \
|
||||
-f /quadlets \
|
||||
-i \
|
||||
--overwrite \
|
||||
compose /compose/compose.yaml
|
||||
|
||||
# Copy the files to the server
|
||||
export PODMAN_SERVER=ai-ai
|
||||
scp -r active/container_litellm/quadlets/. $PODMAN_SERVER:/home/ai/.config/containers/systemd/
|
||||
```
|
||||
|
||||
### Create the litellm.env file
|
||||
|
||||
Should look something like:
|
||||
|
||||
```env
|
||||
LITELLM_MASTER_KEY="random-string"
|
||||
LITELLM_SALT_KEY="random-string"
|
||||
|
||||
UI_USERNAME="admin"
|
||||
UI_PASSWORD="random-string"
|
||||
```
|
||||
|
||||
Then copy it to the server
|
||||
|
||||
```bash
|
||||
export PODMAN_SERVER=ai
|
||||
scp -r active/container_litellm/config.yaml $PODMAN_SERVER:/home/ai/litellm_config.yaml
|
||||
ssh $PODMAN_SERVER chown -R ai:ai /home/ai/litellm_config.yaml
|
||||
```
|
||||
|
||||
### Start and enable your systemd quadlet
|
||||
|
||||
SSH into your podman server as root:
|
||||
|
||||
```bash
|
||||
ssh ai
|
||||
machinectl shell ai@
|
||||
systemctl --user daemon-reload
|
||||
systemctl --user restart litellm
|
||||
journalctl --user -u litellm -f
|
||||
# Enable auto-update service which will pull new container images automatically every day
|
||||
systemctl --user enable --now podman-auto-update.timer
|
||||
```
|
||||
|
||||
### Expose litellm
|
||||
|
||||
1. If you need a domain, follow the [DDNS instructions](/active/container_ddns/ddns.md#install-a-new-ddns-service)
|
||||
2. For a web service, follow the [Caddy instructions](/active/container_caddy/caddy.md#adding-a-new-caddy-record)
|
||||
3. Finally, follow your OS's guide for opening ports via its firewall service.
|
||||
|
||||
## Using LiteLLM
|
||||
|
||||
### Adding Models
|
||||
|
||||
```json
|
||||
// qwen3.5-35b-a3b-thinking
|
||||
{
|
||||
"temperature": 1,
|
||||
"top_p": 0.95,
|
||||
"presence_penalty": 1.5,
|
||||
"extra_body": {
|
||||
"top_k": 20,
|
||||
"min_p": 0,
|
||||
"repetition_penalty": 1,
|
||||
"chat_template_kwargs": {
|
||||
"enable_thinking": true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// qwen3.5-35b-a3b-coding
|
||||
{
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.95,
|
||||
"presence_penalty": 0,
|
||||
"extra_body": {
|
||||
"top_k": 20,
|
||||
"min_p": 0,
|
||||
"repetition_penalty": 1,
|
||||
"chat_template_kwargs": {
|
||||
"enable_thinking": true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// qwen3.5-35b-a3b-instruct
|
||||
{
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.8,
|
||||
"presence_penalty": 1.5,
|
||||
"extra_body": {
|
||||
"top_k": 20,
|
||||
"min_p": 0,
|
||||
"repetition_penalty": 1,
|
||||
"chat_template_kwargs": {
|
||||
"enable_thinking": false
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Testing Models
|
||||
|
||||
```bash
|
||||
# List models
|
||||
curl -L -X GET 'https://aipi.reeseapps.com/v1/models' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'Authorization: Bearer sk-1234'
|
||||
|
||||
curl -L -X POST 'https://aipi.reeseapps.com/v1/chat/completions' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
-d '{
|
||||
"model": "gpt-4o-mini", # 👈 REPLACE with 'public model name' for any db-model
|
||||
"messages": [
|
||||
{
|
||||
"content": "Hey, how's it going",
|
||||
"role": "user"
|
||||
}
|
||||
],
|
||||
}'
|
||||
```
|
||||
|
||||
## Backup litellm
|
||||
|
||||
Follow the [Borg Backup instructions](/active/systemd_borg/borg.md#set-up-a-client-for-backup)
|
||||
|
||||
## Upgrade litellm
|
||||
|
||||
### Upgrade Quadlets
|
||||
|
||||
Upgrades should be a repeat of [writing the compose spec](#convert-litellm-compose-spec-to-quadlets) and [installing the quadlets](#start-and-enable-your-systemd-quadlet)
|
||||
|
||||
```bash
|
||||
export PODMAN_SERVER=
|
||||
scp -r quadlets/. $PODMAN_SERVER$:/home/litellm/.config/containers/systemd/
|
||||
ssh litellm systemctl --user daemon-reload
|
||||
ssh litellm systemctl --user restart litellm
|
||||
```
|
||||
|
||||
## Uninstall
|
||||
|
||||
```bash
|
||||
# Stop the user's services
|
||||
systemctl --user disable podman-restart
|
||||
podman container stop --all
|
||||
systemctl --user disable --now podman.socket
|
||||
systemctl --user disable --now podman-auto-update.timer
|
||||
|
||||
# Delete the user (this won't delete their home directory)
|
||||
# userdel might spit out an error like:
|
||||
# userdel: user litellm is currently used by process 591255
|
||||
# kill those processes and try again
|
||||
userdel litellm
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
### SELinux
|
||||
|
||||
<https://blog.christophersmart.com/2021/01/31/podman-volumes-and-selinux/>
|
||||
|
||||
:z allows a container to share a mounted volume with all other containers.
|
||||
|
||||
:Z allows a container to reserve a mounted volume and prevents any other container from accessing.
|
||||
15
active/container_litellm/quadlets/litellm-db.container
Normal file
15
active/container_litellm/quadlets/litellm-db.container
Normal file
@@ -0,0 +1,15 @@
|
||||
[Container]
|
||||
Environment=POSTGRES_DB=litellm POSTGRES_USER=llmproxy POSTGRES_PASSWORD=dbpassword9090
|
||||
HealthCmd='pg_isready -d litellm -U llmproxy'
|
||||
HealthInterval=1s
|
||||
HealthRetries=10
|
||||
HealthTimeout=5s
|
||||
Image=docker.io/postgres:16
|
||||
PublishPort=5432:5432
|
||||
Volume=litellm_postgres_data:/var/lib/postgresql/data:z
|
||||
|
||||
[Service]
|
||||
Restart=always
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
19
active/container_litellm/quadlets/litellm.container
Normal file
19
active/container_litellm/quadlets/litellm.container
Normal file
@@ -0,0 +1,19 @@
|
||||
[Unit]
|
||||
Requires=litellm-db.service
|
||||
|
||||
[Container]
|
||||
Environment=DATABASE_URL=postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm STORE_MODEL_IN_DB=True
|
||||
EnvironmentFile=/home/ai/litellm.env
|
||||
HealthCmd="python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')\""
|
||||
HealthInterval=30s
|
||||
HealthRetries=3
|
||||
HealthStartPeriod=40s
|
||||
HealthTimeout=10s
|
||||
Image=docker.litellm.ai/berriai/litellm:main-latest
|
||||
PublishPort=4000:4000
|
||||
|
||||
[Service]
|
||||
Restart=always
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
Reference in New Issue
Block a user