add litellm

This commit is contained in:
2026-03-16 09:53:27 -04:00
parent eb67191706
commit 25d3a7805c
6 changed files with 374 additions and 0 deletions

View File

@@ -0,0 +1,3 @@
# Compose
Put your compose.yaml here.

View File

@@ -0,0 +1,37 @@
services:
litellm:
image: docker.litellm.ai/berriai/litellm:main-latest
ports:
- 4000:4000
env_file: /home/ai/litellm.env
environment:
DATABASE_URL: "postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm"
STORE_MODEL_IN_DB: "True"
restart: unless-stopped
depends_on:
- litellm-db # Indicates that this service depends on the 'litellm-db' service, ensuring 'litellm-db' starts first
healthcheck: # Defines the health check configuration for the container
test:
- CMD-SHELL
- python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')" # Command to execute for health check
interval: 30s # Perform health check every 30 seconds
timeout: 10s # Health check command times out after 10 seconds
retries: 3 # Retry up to 3 times if health check fails
start_period: 40s # Wait 40 seconds after container start before beginning health checks
litellm-db:
image: docker.io/postgres:16
restart: always
environment:
POSTGRES_DB: litellm
POSTGRES_USER: llmproxy
POSTGRES_PASSWORD: dbpassword9090
ports:
- "5432:5432"
volumes:
- litellm_postgres_data:/var/lib/postgresql/data:z
healthcheck:
test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
interval: 1s
timeout: 5s
retries: 10

View File

@@ -0,0 +1,67 @@
# General settings
general_settings:
request_timeout: 600
# Models
model_list:
# Qwen3.5-35B variants
- model_name: qwen3.5-35b-think-general
litellm_params:
model: openai/qwen3.5-35b-a3b
api_base: https://llama-cpp.reeselink.com
api_key: none
temperature: 1.0
top_p: 0.95
presence_penalty: 1.5
extra_body:
top_k: 20
min_p: 0.0
repetition_penalty: 1.0
chat_template_kwargs:
enable_thinking: true
- model_name: qwen3.5-35b-think-code
litellm_params:
model: openai/qwen3.5-35b-a3b
api_base: https://llama-cpp.reeselink.com
api_key: none
temperature: 0.6
top_p: 0.95
presence_penalty: 0.0
extra_body:
top_k: 20
min_p: 0.0
repetition_penalty: 1.0
chat_template_kwargs:
enable_thinking: true
- model_name: qwen3.5-35b-instruct-general
litellm_params:
model: openai/qwen3.5-35b-a3b
api_base: https://llama-cpp.reeselink.com
api_key: none
temperature: 0.7
top_p: 0.8
presence_penalty: 1.5
extra_body:
top_k: 20
min_p: 0.0
repetition_penalty: 1.0
chat_template_kwargs:
enable_thinking: false
- model_name: qwen3.5-35b-instruct-reasoning
litellm_params:
model: openai/qwen3.5-35b-a3b
api_base: https://llama-cpp.reeselink.com
api_key: none
temperature: 1.0
top_p: 0.95
presence_penalty: 1.5
extra_body:
top_k: 20
min_p: 0.0
repetition_penalty: 1.0
chat_template_kwargs:
enable_thinking: false

View File

@@ -0,0 +1,233 @@
# Podman litellm
- [Podman litellm](#podman-litellm)
- [Setup litellm Project](#setup-litellm-project)
- [Install litellm](#install-litellm)
- [Create the ai user](#create-the-ai-user)
- [Write the litellm compose spec](#write-the-litellm-compose-spec)
- [A Note on Volumes](#a-note-on-volumes)
- [Convert litellm compose spec to quadlets](#convert-litellm-compose-spec-to-quadlets)
- [Create the litellm.env file](#create-the-litellmenv-file)
- [Start and enable your systemd quadlet](#start-and-enable-your-systemd-quadlet)
- [Expose litellm](#expose-litellm)
- [Using LiteLLM](#using-litellm)
- [Adding Models](#adding-models)
- [Testing Models](#testing-models)
- [Backup litellm](#backup-litellm)
- [Upgrade litellm](#upgrade-litellm)
- [Upgrade Quadlets](#upgrade-quadlets)
- [Uninstall](#uninstall)
- [Notes](#notes)
- [SELinux](#selinux)
## Setup litellm Project
- [ ] Copy and rename this folder to active/container_litellm
- [ ] Find and replace litellm with the name of the service.
- [ ] Create the rootless user to run the podman containers
- [ ] Write the compose.yaml spec for your service
- [ ] Convert the compose.yaml spec to a quadlet
- [ ] Install the quadlet on the podman server
- [ ] Expose the quadlet service
- [ ] Install a backup service and timer
## Install litellm
### Create the ai user
```bash
# SSH into your podman server as root
useradd ai
loginctl enable-linger $(id -u ai)
systemctl --user --machine=ai@.host enable podman-restart
systemctl --user --machine=ai@.host enable --now podman.socket
mkdir -p /home/ai/.config/containers/systemd
```
### Write the litellm compose spec
See the [docker run command here](https://docs.litellm.ai/docs/proxy/docker_quick_start#32-start-proxy)
Edit the compose.yaml at active/container_litellm/compose/compose.yaml
#### A Note on Volumes
Named volumes are stored at `/home/litellm/.local/share/containers/storage/volumes/`.
### Convert litellm compose spec to quadlets
Run the following to convert a compose.yaml into the various `.container` files for systemd:
```bash
# Generate the systemd service
podman run \
--security-opt label=disable \
--rm \
-v $(pwd)/active/container_litellm/compose:/compose \
-v $(pwd)/active/container_litellm/quadlets:/quadlets \
quay.io/k9withabone/podlet \
-f /quadlets \
-i \
--overwrite \
compose /compose/compose.yaml
# Copy the files to the server
export PODMAN_SERVER=ai-ai
scp -r active/container_litellm/quadlets/. $PODMAN_SERVER:/home/ai/.config/containers/systemd/
```
### Create the litellm.env file
Should look something like:
```env
LITELLM_MASTER_KEY="random-string"
LITELLM_SALT_KEY="random-string"
UI_USERNAME="admin"
UI_PASSWORD="random-string"
```
Then copy it to the server
```bash
export PODMAN_SERVER=ai
scp -r active/container_litellm/config.yaml $PODMAN_SERVER:/home/ai/litellm_config.yaml
ssh $PODMAN_SERVER chown -R ai:ai /home/ai/litellm_config.yaml
```
### Start and enable your systemd quadlet
SSH into your podman server as root:
```bash
ssh ai
machinectl shell ai@
systemctl --user daemon-reload
systemctl --user restart litellm
journalctl --user -u litellm -f
# Enable auto-update service which will pull new container images automatically every day
systemctl --user enable --now podman-auto-update.timer
```
### Expose litellm
1. If you need a domain, follow the [DDNS instructions](/active/container_ddns/ddns.md#install-a-new-ddns-service)
2. For a web service, follow the [Caddy instructions](/active/container_caddy/caddy.md#adding-a-new-caddy-record)
3. Finally, follow your OS's guide for opening ports via its firewall service.
## Using LiteLLM
### Adding Models
```json
// qwen3.5-35b-a3b-thinking
{
"temperature": 1,
"top_p": 0.95,
"presence_penalty": 1.5,
"extra_body": {
"top_k": 20,
"min_p": 0,
"repetition_penalty": 1,
"chat_template_kwargs": {
"enable_thinking": true
}
}
}
// qwen3.5-35b-a3b-coding
{
"temperature": 0.6,
"top_p": 0.95,
"presence_penalty": 0,
"extra_body": {
"top_k": 20,
"min_p": 0,
"repetition_penalty": 1,
"chat_template_kwargs": {
"enable_thinking": true
}
}
}
// qwen3.5-35b-a3b-instruct
{
"temperature": 0.7,
"top_p": 0.8,
"presence_penalty": 1.5,
"extra_body": {
"top_k": 20,
"min_p": 0,
"repetition_penalty": 1,
"chat_template_kwargs": {
"enable_thinking": false
}
}
}
```
### Testing Models
```bash
# List models
curl -L -X GET 'https://aipi.reeseapps.com/v1/models' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234'
curl -L -X POST 'https://aipi.reeseapps.com/v1/chat/completions' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-d '{
"model": "gpt-4o-mini", # 👈 REPLACE with 'public model name' for any db-model
"messages": [
{
"content": "Hey, how's it going",
"role": "user"
}
],
}'
```
## Backup litellm
Follow the [Borg Backup instructions](/active/systemd_borg/borg.md#set-up-a-client-for-backup)
## Upgrade litellm
### Upgrade Quadlets
Upgrades should be a repeat of [writing the compose spec](#convert-litellm-compose-spec-to-quadlets) and [installing the quadlets](#start-and-enable-your-systemd-quadlet)
```bash
export PODMAN_SERVER=
scp -r quadlets/. $PODMAN_SERVER$:/home/litellm/.config/containers/systemd/
ssh litellm systemctl --user daemon-reload
ssh litellm systemctl --user restart litellm
```
## Uninstall
```bash
# Stop the user's services
systemctl --user disable podman-restart
podman container stop --all
systemctl --user disable --now podman.socket
systemctl --user disable --now podman-auto-update.timer
# Delete the user (this won't delete their home directory)
# userdel might spit out an error like:
# userdel: user litellm is currently used by process 591255
# kill those processes and try again
userdel litellm
```
## Notes
### SELinux
<https://blog.christophersmart.com/2021/01/31/podman-volumes-and-selinux/>
:z allows a container to share a mounted volume with all other containers.
:Z allows a container to reserve a mounted volume and prevents any other container from accessing.

View File

@@ -0,0 +1,15 @@
[Container]
Environment=POSTGRES_DB=litellm POSTGRES_USER=llmproxy POSTGRES_PASSWORD=dbpassword9090
HealthCmd='pg_isready -d litellm -U llmproxy'
HealthInterval=1s
HealthRetries=10
HealthTimeout=5s
Image=docker.io/postgres:16
PublishPort=5432:5432
Volume=litellm_postgres_data:/var/lib/postgresql/data:z
[Service]
Restart=always
[Install]
WantedBy=default.target

View File

@@ -0,0 +1,19 @@
[Unit]
Requires=litellm-db.service
[Container]
Environment=DATABASE_URL=postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm STORE_MODEL_IN_DB=True
EnvironmentFile=/home/ai/litellm.env
HealthCmd="python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')\""
HealthInterval=30s
HealthRetries=3
HealthStartPeriod=40s
HealthTimeout=10s
Image=docker.litellm.ai/berriai/litellm:main-latest
PublishPort=4000:4000
[Service]
Restart=always
[Install]
WantedBy=default.target