Compare commits

...

1 Commits

Author SHA1 Message Date
a56402c2cc update litellm docs
All checks were successful
Podman DDNS Image / build-and-push-ddns (push) Successful in 1m22s
2026-05-11 18:22:14 -04:00
9 changed files with 169 additions and 147 deletions

View File

@@ -1,3 +0,0 @@
# Compose
Put your compose.yaml here.

View File

@@ -1,27 +1,35 @@
services:
litellm:
image: docker.litellm.ai/berriai/litellm:main-latest
image: docker.litellm.ai/berriai/litellm:main-stable
#########################################
## Uncomment these lines to start proxy with a config.yaml file ##
# volumes:
# - ./config.yaml:/app/config.yaml
# command:
# - "--config=/app/config.yaml"
##############################################
ports:
- 4000:4000
env_file: /home/ai/litellm.env
- "4000:4000" # Map the container port to the host, change the host port if necessary
environment:
DATABASE_URL: "postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm"
STORE_MODEL_IN_DB: "True"
restart: unless-stopped
DATABASE_URL: "postgresql://llmproxy:dbpassword9090@db:5432/litellm"
STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI
env_file:
- ../secrets/litellm.env # Load local .env file
depends_on:
- litellm-db # Indicates that this service depends on the 'litellm-db' service, ensuring 'litellm-db' starts first
healthcheck: # Defines the health check configuration for the container
- db # Indicates that this service depends on the 'db' service, ensuring 'db' starts first
healthcheck: # Defines the health check configuration for the container
test:
- CMD-SHELL
- python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')" # Command to execute for health check
interval: 30s # Perform health check every 30 seconds
timeout: 10s # Health check command times out after 10 seconds
retries: 3 # Retry up to 3 times if health check fails
start_period: 40s # Wait 40 seconds after container start before beginning health checks
- python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')" # Command to execute for health check
interval: 30s # Perform health check every 30 seconds
timeout: 10s # Health check command times out after 10 seconds
retries: 3 # Retry up to 3 times if health check fails
start_period: 40s # Wait 40 seconds after container start before beginning health checks
litellm-db:
db:
image: docker.io/postgres:16
restart: always
container_name: litellm_db
environment:
POSTGRES_DB: litellm
POSTGRES_USER: llmproxy
@@ -29,9 +37,26 @@ services:
ports:
- "5432:5432"
volumes:
- litellm_postgres_data:/var/lib/postgresql/data:z
- postgres_data:/var/lib/postgresql/data # Persists Postgres data across container restarts
healthcheck:
test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
interval: 1s
timeout: 5s
retries: 10
prometheus:
image: docker.io/prom/prometheus
volumes:
- prometheus_data:/prometheus
- ../seccrets/prometheus.yml:/etc/prometheus/prometheus.yml
ports:
- "9090:9090"
command:
- "--config.file=/etc/prometheus/prometheus.yml"
- "--storage.tsdb.path=/prometheus"
- "--storage.tsdb.retention.time=15d"
restart: always
volumes:
prometheus_data:
postgres_data:

View File

@@ -1,67 +0,0 @@
# General settings
general_settings:
request_timeout: 600
# Models
model_list:
# Qwen3.5-35B variants
- model_name: qwen3.5-35b-think-general
litellm_params:
model: openai/qwen3.5-35b-a3b
api_base: https://llama-cpp.reeselink.com
api_key: none
temperature: 1.0
top_p: 0.95
presence_penalty: 1.5
extra_body:
top_k: 20
min_p: 0.0
repetition_penalty: 1.0
chat_template_kwargs:
enable_thinking: true
- model_name: qwen3.5-35b-think-code
litellm_params:
model: openai/qwen3.5-35b-a3b
api_base: https://llama-cpp.reeselink.com
api_key: none
temperature: 0.6
top_p: 0.95
presence_penalty: 0.0
extra_body:
top_k: 20
min_p: 0.0
repetition_penalty: 1.0
chat_template_kwargs:
enable_thinking: true
- model_name: qwen3.5-35b-instruct-general
litellm_params:
model: openai/qwen3.5-35b-a3b
api_base: https://llama-cpp.reeselink.com
api_key: none
temperature: 0.7
top_p: 0.8
presence_penalty: 1.5
extra_body:
top_k: 20
min_p: 0.0
repetition_penalty: 1.0
chat_template_kwargs:
enable_thinking: false
- model_name: qwen3.5-35b-instruct-reasoning
litellm_params:
model: openai/qwen3.5-35b-a3b
api_base: https://llama-cpp.reeselink.com
api_key: none
temperature: 1.0
top_p: 0.95
presence_penalty: 1.5
extra_body:
top_k: 20
min_p: 0.0
repetition_penalty: 1.0
chat_template_kwargs:
enable_thinking: false

View File

@@ -9,9 +9,8 @@
- [Convert litellm compose spec to quadlets](#convert-litellm-compose-spec-to-quadlets)
- [Create the litellm.env file](#create-the-litellmenv-file)
- [Start and enable your systemd quadlet](#start-and-enable-your-systemd-quadlet)
- [Install via Ansible](#install-via-ansible)
- [Expose litellm](#expose-litellm)
- [Using LiteLLM](#using-litellm)
- [Adding Models](#adding-models)
- [Testing Models](#testing-models)
- [Backup litellm](#backup-litellm)
- [Upgrade litellm](#upgrade-litellm)
@@ -110,63 +109,28 @@ journalctl --user -u litellm -f
systemctl --user enable --now podman-auto-update.timer
```
### Install via Ansible
Preview changes with a dry run:
```bash
ansible-playbook -i ansible/inventory.yaml active/container_litellm/playbook.yml --check --diff
```
Run the playbook from the Homelab root:
```bash
ansible-playbook -i ansible/inventory.yaml active/container_litellm/playbook.yml
```
This copies the quadlets, config, reloads the systemd user daemon, and starts both `litellm-db` and `litellm` services as the `ai` user.
### Expose litellm
1. If you need a domain, follow the [DDNS instructions](/active/container_ddns/ddns.md#install-a-new-ddns-service)
2. For a web service, follow the [Caddy instructions](/active/container_caddy/caddy.md#adding-a-new-caddy-record)
3. Finally, follow your OS's guide for opening ports via its firewall service.
## Using LiteLLM
### Adding Models
```json
// qwen3.5-35b-a3b-thinking
{
"temperature": 1,
"top_p": 0.95,
"presence_penalty": 1.5,
"extra_body": {
"top_k": 20,
"min_p": 0,
"repetition_penalty": 1,
"chat_template_kwargs": {
"enable_thinking": true
}
}
}
// qwen3.5-35b-a3b-coding
{
"temperature": 0.6,
"top_p": 0.95,
"presence_penalty": 0,
"extra_body": {
"top_k": 20,
"min_p": 0,
"repetition_penalty": 1,
"chat_template_kwargs": {
"enable_thinking": true
}
}
}
// qwen3.5-35b-a3b-instruct
{
"temperature": 0.7,
"top_p": 0.8,
"presence_penalty": 1.5,
"extra_body": {
"top_k": 20,
"min_p": 0,
"repetition_penalty": 1,
"chat_template_kwargs": {
"enable_thinking": false
}
}
}
```
### Testing Models
```bash
@@ -191,7 +155,7 @@ curl -L -X POST 'https://aipi.reeseapps.com/v1/chat/completions' \
## Backup litellm
Follow the [Borg Backup instructions](/active/systemd_borg/borg.md#set-up-a-client-for-backup)
Follow the [Borg Backup instructions](/active/software_borg/borg.md#set-up-a-client-for-backup)
## Upgrade litellm

View File

@@ -0,0 +1,86 @@
---
- name: Install and start LiteLLM quadlets for ai user
hosts: ai-ai
remote_user: ai
vars:
ai_user: ai
quadlets_dir: "/home/{{ ai_user }}/.config/containers/systemd"
tasks:
- name: Ensure ai user home directories exist
ansible.builtin.file:
path: "{{ item }}"
state: directory
owner: "{{ ai_user }}"
group: "{{ ai_user }}"
mode: "0755"
loop:
- "{{ quadlets_dir }}"
- name: Copy litellm container pod
ansible.builtin.copy:
src: quadlets/litellm.pod
dest: "{{ quadlets_dir }}/litellm.pod"
owner: "{{ ai_user }}"
group: "{{ ai_user }}"
mode: "0644"
- name: Copy litellm container quadlet
ansible.builtin.copy:
src: quadlets/litellm-web.container
dest: "{{ quadlets_dir }}/litellm-web.container"
owner: "{{ ai_user }}"
group: "{{ ai_user }}"
mode: "0644"
- name: Copy litellm-db container quadlet
ansible.builtin.copy:
src: quadlets/litellm-db.container
dest: "{{ quadlets_dir }}/litellm-db.container"
owner: "{{ ai_user }}"
group: "{{ ai_user }}"
mode: "0644"
- name: Copy prometheus container quadlet
ansible.builtin.copy:
src: quadlets/litellm-prometheus.container
dest: "{{ quadlets_dir }}/litellm-prometheus.container"
owner: "{{ ai_user }}"
group: "{{ ai_user }}"
mode: "0644"
- name: Copy prometheus config
ansible.builtin.copy:
src: secrets/litellm-prometheus.yaml
dest: "/home/{{ ai_user }}/litellm-prometheus.yaml"
owner: "{{ ai_user }}"
group: "{{ ai_user }}"
mode: "0644"
- name: Copy litellm.env file
ansible.builtin.copy:
src: secrets/litellm.env
dest: "/home/{{ ai_user }}/litellm.env"
owner: "{{ ai_user }}"
group: "{{ ai_user }}"
mode: "0600"
- name: Copy litellm-config.yaml
ansible.builtin.copy:
src: secrets/litellm-config.yaml
dest: "/home/{{ ai_user }}/litellm-config.yaml"
owner: "{{ ai_user }}"
group: "{{ ai_user }}"
mode: "0644"
- name: Reload systemd user daemon
ansible.builtin.systemd:
daemon_reload: true
scope: user
- name: Restart litellm pod
ansible.builtin.systemd:
name: litellm-pod
state: restarted
scope: user

View File

@@ -1,12 +1,13 @@
[Container]
Pod=litellm.pod
ContainerName=litellm-db
Environment=POSTGRES_DB=litellm POSTGRES_USER=llmproxy POSTGRES_PASSWORD=dbpassword9090
HealthCmd='pg_isready -d litellm -U llmproxy'
HealthInterval=1s
HealthRetries=10
HealthTimeout=5s
Image=docker.io/postgres:16
PublishPort=5432:5432
Volume=litellm_postgres_data:/var/lib/postgresql/data:z
Volume=litellm_postgres_data:/var/lib/postgresql/data
[Service]
Restart=always

View File

@@ -0,0 +1,13 @@
[Container]
Pod=litellm.pod
ContainerName=litellm-prom
Exec='--config.file=/etc/prometheus/prometheus.yml' '--storage.tsdb.path=/prometheus' '--storage.tsdb.retention.time=15d'
Image=docker.io/prom/prometheus
Volume=litellm_prometheus_data:/prometheus
Volume=/home/ai/litellm-prometheus.yaml:/etc/prometheus/prometheus.yml:z
[Service]
Restart=always
[Install]
WantedBy=default.target

View File

@@ -2,18 +2,18 @@
Requires=litellm-db.service
[Container]
Environment=DATABASE_URL=postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm STORE_MODEL_IN_DB=True
Pod=litellm.pod
ContainerName=litellm-web
Environment=DATABASE_URL=postgresql://llmproxy:dbpassword9090@localhost:5432/litellm STORE_MODEL_IN_DB=True
EnvironmentFile=/home/ai/litellm.env
HealthCmd="python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')\""
HealthInterval=30s
HealthRetries=3
HealthStartPeriod=40s
HealthTimeout=10s
Image=docker.litellm.ai/berriai/litellm:main-latest
PublishPort=4000:4000
[Service]
Restart=always
Image=ghcr.io/berriai/litellm-database:v1.83.14-stable.patch.3
Volume=/home/ai/litellm-config.yaml:/app/config.yaml:z
Exec=--config=/app/config.yaml
[Install]
WantedBy=default.target

View File

@@ -0,0 +1,3 @@
[Pod]
# litellm web interface
PublishPort=4000:4000/tcp