update litellm docs
All checks were successful
Podman DDNS Image / build-and-push-ddns (push) Successful in 1m22s
All checks were successful
Podman DDNS Image / build-and-push-ddns (push) Successful in 1m22s
This commit is contained in:
@@ -1,3 +0,0 @@
|
|||||||
# Compose
|
|
||||||
|
|
||||||
Put your compose.yaml here.
|
|
||||||
@@ -1,27 +1,35 @@
|
|||||||
services:
|
services:
|
||||||
litellm:
|
litellm:
|
||||||
image: docker.litellm.ai/berriai/litellm:main-latest
|
image: docker.litellm.ai/berriai/litellm:main-stable
|
||||||
|
#########################################
|
||||||
|
## Uncomment these lines to start proxy with a config.yaml file ##
|
||||||
|
# volumes:
|
||||||
|
# - ./config.yaml:/app/config.yaml
|
||||||
|
# command:
|
||||||
|
# - "--config=/app/config.yaml"
|
||||||
|
##############################################
|
||||||
ports:
|
ports:
|
||||||
- 4000:4000
|
- "4000:4000" # Map the container port to the host, change the host port if necessary
|
||||||
env_file: /home/ai/litellm.env
|
|
||||||
environment:
|
environment:
|
||||||
DATABASE_URL: "postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm"
|
DATABASE_URL: "postgresql://llmproxy:dbpassword9090@db:5432/litellm"
|
||||||
STORE_MODEL_IN_DB: "True"
|
STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI
|
||||||
restart: unless-stopped
|
env_file:
|
||||||
|
- ../secrets/litellm.env # Load local .env file
|
||||||
depends_on:
|
depends_on:
|
||||||
- litellm-db # Indicates that this service depends on the 'litellm-db' service, ensuring 'litellm-db' starts first
|
- db # Indicates that this service depends on the 'db' service, ensuring 'db' starts first
|
||||||
healthcheck: # Defines the health check configuration for the container
|
healthcheck: # Defines the health check configuration for the container
|
||||||
test:
|
test:
|
||||||
- CMD-SHELL
|
- CMD-SHELL
|
||||||
- python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')" # Command to execute for health check
|
- python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')" # Command to execute for health check
|
||||||
interval: 30s # Perform health check every 30 seconds
|
interval: 30s # Perform health check every 30 seconds
|
||||||
timeout: 10s # Health check command times out after 10 seconds
|
timeout: 10s # Health check command times out after 10 seconds
|
||||||
retries: 3 # Retry up to 3 times if health check fails
|
retries: 3 # Retry up to 3 times if health check fails
|
||||||
start_period: 40s # Wait 40 seconds after container start before beginning health checks
|
start_period: 40s # Wait 40 seconds after container start before beginning health checks
|
||||||
|
|
||||||
litellm-db:
|
db:
|
||||||
image: docker.io/postgres:16
|
image: docker.io/postgres:16
|
||||||
restart: always
|
restart: always
|
||||||
|
container_name: litellm_db
|
||||||
environment:
|
environment:
|
||||||
POSTGRES_DB: litellm
|
POSTGRES_DB: litellm
|
||||||
POSTGRES_USER: llmproxy
|
POSTGRES_USER: llmproxy
|
||||||
@@ -29,9 +37,26 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- "5432:5432"
|
- "5432:5432"
|
||||||
volumes:
|
volumes:
|
||||||
- litellm_postgres_data:/var/lib/postgresql/data:z
|
- postgres_data:/var/lib/postgresql/data # Persists Postgres data across container restarts
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
|
test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
|
||||||
interval: 1s
|
interval: 1s
|
||||||
timeout: 5s
|
timeout: 5s
|
||||||
retries: 10
|
retries: 10
|
||||||
|
|
||||||
|
prometheus:
|
||||||
|
image: docker.io/prom/prometheus
|
||||||
|
volumes:
|
||||||
|
- prometheus_data:/prometheus
|
||||||
|
- ../seccrets/prometheus.yml:/etc/prometheus/prometheus.yml
|
||||||
|
ports:
|
||||||
|
- "9090:9090"
|
||||||
|
command:
|
||||||
|
- "--config.file=/etc/prometheus/prometheus.yml"
|
||||||
|
- "--storage.tsdb.path=/prometheus"
|
||||||
|
- "--storage.tsdb.retention.time=15d"
|
||||||
|
restart: always
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
prometheus_data:
|
||||||
|
postgres_data:
|
||||||
|
|||||||
@@ -1,67 +0,0 @@
|
|||||||
# General settings
|
|
||||||
|
|
||||||
general_settings:
|
|
||||||
request_timeout: 600
|
|
||||||
|
|
||||||
# Models
|
|
||||||
model_list:
|
|
||||||
# Qwen3.5-35B variants
|
|
||||||
- model_name: qwen3.5-35b-think-general
|
|
||||||
litellm_params:
|
|
||||||
model: openai/qwen3.5-35b-a3b
|
|
||||||
api_base: https://llama-cpp.reeselink.com
|
|
||||||
api_key: none
|
|
||||||
temperature: 1.0
|
|
||||||
top_p: 0.95
|
|
||||||
presence_penalty: 1.5
|
|
||||||
extra_body:
|
|
||||||
top_k: 20
|
|
||||||
min_p: 0.0
|
|
||||||
repetition_penalty: 1.0
|
|
||||||
chat_template_kwargs:
|
|
||||||
enable_thinking: true
|
|
||||||
|
|
||||||
- model_name: qwen3.5-35b-think-code
|
|
||||||
litellm_params:
|
|
||||||
model: openai/qwen3.5-35b-a3b
|
|
||||||
api_base: https://llama-cpp.reeselink.com
|
|
||||||
api_key: none
|
|
||||||
temperature: 0.6
|
|
||||||
top_p: 0.95
|
|
||||||
presence_penalty: 0.0
|
|
||||||
extra_body:
|
|
||||||
top_k: 20
|
|
||||||
min_p: 0.0
|
|
||||||
repetition_penalty: 1.0
|
|
||||||
chat_template_kwargs:
|
|
||||||
enable_thinking: true
|
|
||||||
|
|
||||||
- model_name: qwen3.5-35b-instruct-general
|
|
||||||
litellm_params:
|
|
||||||
model: openai/qwen3.5-35b-a3b
|
|
||||||
api_base: https://llama-cpp.reeselink.com
|
|
||||||
api_key: none
|
|
||||||
temperature: 0.7
|
|
||||||
top_p: 0.8
|
|
||||||
presence_penalty: 1.5
|
|
||||||
extra_body:
|
|
||||||
top_k: 20
|
|
||||||
min_p: 0.0
|
|
||||||
repetition_penalty: 1.0
|
|
||||||
chat_template_kwargs:
|
|
||||||
enable_thinking: false
|
|
||||||
|
|
||||||
- model_name: qwen3.5-35b-instruct-reasoning
|
|
||||||
litellm_params:
|
|
||||||
model: openai/qwen3.5-35b-a3b
|
|
||||||
api_base: https://llama-cpp.reeselink.com
|
|
||||||
api_key: none
|
|
||||||
temperature: 1.0
|
|
||||||
top_p: 0.95
|
|
||||||
presence_penalty: 1.5
|
|
||||||
extra_body:
|
|
||||||
top_k: 20
|
|
||||||
min_p: 0.0
|
|
||||||
repetition_penalty: 1.0
|
|
||||||
chat_template_kwargs:
|
|
||||||
enable_thinking: false
|
|
||||||
@@ -9,9 +9,8 @@
|
|||||||
- [Convert litellm compose spec to quadlets](#convert-litellm-compose-spec-to-quadlets)
|
- [Convert litellm compose spec to quadlets](#convert-litellm-compose-spec-to-quadlets)
|
||||||
- [Create the litellm.env file](#create-the-litellmenv-file)
|
- [Create the litellm.env file](#create-the-litellmenv-file)
|
||||||
- [Start and enable your systemd quadlet](#start-and-enable-your-systemd-quadlet)
|
- [Start and enable your systemd quadlet](#start-and-enable-your-systemd-quadlet)
|
||||||
|
- [Install via Ansible](#install-via-ansible)
|
||||||
- [Expose litellm](#expose-litellm)
|
- [Expose litellm](#expose-litellm)
|
||||||
- [Using LiteLLM](#using-litellm)
|
|
||||||
- [Adding Models](#adding-models)
|
|
||||||
- [Testing Models](#testing-models)
|
- [Testing Models](#testing-models)
|
||||||
- [Backup litellm](#backup-litellm)
|
- [Backup litellm](#backup-litellm)
|
||||||
- [Upgrade litellm](#upgrade-litellm)
|
- [Upgrade litellm](#upgrade-litellm)
|
||||||
@@ -110,63 +109,28 @@ journalctl --user -u litellm -f
|
|||||||
systemctl --user enable --now podman-auto-update.timer
|
systemctl --user enable --now podman-auto-update.timer
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Install via Ansible
|
||||||
|
|
||||||
|
Preview changes with a dry run:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ansible-playbook -i ansible/inventory.yaml active/container_litellm/playbook.yml --check --diff
|
||||||
|
```
|
||||||
|
|
||||||
|
Run the playbook from the Homelab root:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ansible-playbook -i ansible/inventory.yaml active/container_litellm/playbook.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
This copies the quadlets, config, reloads the systemd user daemon, and starts both `litellm-db` and `litellm` services as the `ai` user.
|
||||||
|
|
||||||
### Expose litellm
|
### Expose litellm
|
||||||
|
|
||||||
1. If you need a domain, follow the [DDNS instructions](/active/container_ddns/ddns.md#install-a-new-ddns-service)
|
1. If you need a domain, follow the [DDNS instructions](/active/container_ddns/ddns.md#install-a-new-ddns-service)
|
||||||
2. For a web service, follow the [Caddy instructions](/active/container_caddy/caddy.md#adding-a-new-caddy-record)
|
2. For a web service, follow the [Caddy instructions](/active/container_caddy/caddy.md#adding-a-new-caddy-record)
|
||||||
3. Finally, follow your OS's guide for opening ports via its firewall service.
|
3. Finally, follow your OS's guide for opening ports via its firewall service.
|
||||||
|
|
||||||
## Using LiteLLM
|
|
||||||
|
|
||||||
### Adding Models
|
|
||||||
|
|
||||||
```json
|
|
||||||
// qwen3.5-35b-a3b-thinking
|
|
||||||
{
|
|
||||||
"temperature": 1,
|
|
||||||
"top_p": 0.95,
|
|
||||||
"presence_penalty": 1.5,
|
|
||||||
"extra_body": {
|
|
||||||
"top_k": 20,
|
|
||||||
"min_p": 0,
|
|
||||||
"repetition_penalty": 1,
|
|
||||||
"chat_template_kwargs": {
|
|
||||||
"enable_thinking": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// qwen3.5-35b-a3b-coding
|
|
||||||
{
|
|
||||||
"temperature": 0.6,
|
|
||||||
"top_p": 0.95,
|
|
||||||
"presence_penalty": 0,
|
|
||||||
"extra_body": {
|
|
||||||
"top_k": 20,
|
|
||||||
"min_p": 0,
|
|
||||||
"repetition_penalty": 1,
|
|
||||||
"chat_template_kwargs": {
|
|
||||||
"enable_thinking": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// qwen3.5-35b-a3b-instruct
|
|
||||||
{
|
|
||||||
"temperature": 0.7,
|
|
||||||
"top_p": 0.8,
|
|
||||||
"presence_penalty": 1.5,
|
|
||||||
"extra_body": {
|
|
||||||
"top_k": 20,
|
|
||||||
"min_p": 0,
|
|
||||||
"repetition_penalty": 1,
|
|
||||||
"chat_template_kwargs": {
|
|
||||||
"enable_thinking": false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### Testing Models
|
### Testing Models
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -191,7 +155,7 @@ curl -L -X POST 'https://aipi.reeseapps.com/v1/chat/completions' \
|
|||||||
|
|
||||||
## Backup litellm
|
## Backup litellm
|
||||||
|
|
||||||
Follow the [Borg Backup instructions](/active/systemd_borg/borg.md#set-up-a-client-for-backup)
|
Follow the [Borg Backup instructions](/active/software_borg/borg.md#set-up-a-client-for-backup)
|
||||||
|
|
||||||
## Upgrade litellm
|
## Upgrade litellm
|
||||||
|
|
||||||
|
|||||||
86
active/container_litellm/playbook.yml
Normal file
86
active/container_litellm/playbook.yml
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
---
|
||||||
|
- name: Install and start LiteLLM quadlets for ai user
|
||||||
|
hosts: ai-ai
|
||||||
|
remote_user: ai
|
||||||
|
|
||||||
|
vars:
|
||||||
|
ai_user: ai
|
||||||
|
quadlets_dir: "/home/{{ ai_user }}/.config/containers/systemd"
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- name: Ensure ai user home directories exist
|
||||||
|
ansible.builtin.file:
|
||||||
|
path: "{{ item }}"
|
||||||
|
state: directory
|
||||||
|
owner: "{{ ai_user }}"
|
||||||
|
group: "{{ ai_user }}"
|
||||||
|
mode: "0755"
|
||||||
|
loop:
|
||||||
|
- "{{ quadlets_dir }}"
|
||||||
|
|
||||||
|
- name: Copy litellm container pod
|
||||||
|
ansible.builtin.copy:
|
||||||
|
src: quadlets/litellm.pod
|
||||||
|
dest: "{{ quadlets_dir }}/litellm.pod"
|
||||||
|
owner: "{{ ai_user }}"
|
||||||
|
group: "{{ ai_user }}"
|
||||||
|
mode: "0644"
|
||||||
|
|
||||||
|
- name: Copy litellm container quadlet
|
||||||
|
ansible.builtin.copy:
|
||||||
|
src: quadlets/litellm-web.container
|
||||||
|
dest: "{{ quadlets_dir }}/litellm-web.container"
|
||||||
|
owner: "{{ ai_user }}"
|
||||||
|
group: "{{ ai_user }}"
|
||||||
|
mode: "0644"
|
||||||
|
|
||||||
|
- name: Copy litellm-db container quadlet
|
||||||
|
ansible.builtin.copy:
|
||||||
|
src: quadlets/litellm-db.container
|
||||||
|
dest: "{{ quadlets_dir }}/litellm-db.container"
|
||||||
|
owner: "{{ ai_user }}"
|
||||||
|
group: "{{ ai_user }}"
|
||||||
|
mode: "0644"
|
||||||
|
|
||||||
|
- name: Copy prometheus container quadlet
|
||||||
|
ansible.builtin.copy:
|
||||||
|
src: quadlets/litellm-prometheus.container
|
||||||
|
dest: "{{ quadlets_dir }}/litellm-prometheus.container"
|
||||||
|
owner: "{{ ai_user }}"
|
||||||
|
group: "{{ ai_user }}"
|
||||||
|
mode: "0644"
|
||||||
|
|
||||||
|
- name: Copy prometheus config
|
||||||
|
ansible.builtin.copy:
|
||||||
|
src: secrets/litellm-prometheus.yaml
|
||||||
|
dest: "/home/{{ ai_user }}/litellm-prometheus.yaml"
|
||||||
|
owner: "{{ ai_user }}"
|
||||||
|
group: "{{ ai_user }}"
|
||||||
|
mode: "0644"
|
||||||
|
|
||||||
|
- name: Copy litellm.env file
|
||||||
|
ansible.builtin.copy:
|
||||||
|
src: secrets/litellm.env
|
||||||
|
dest: "/home/{{ ai_user }}/litellm.env"
|
||||||
|
owner: "{{ ai_user }}"
|
||||||
|
group: "{{ ai_user }}"
|
||||||
|
mode: "0600"
|
||||||
|
|
||||||
|
- name: Copy litellm-config.yaml
|
||||||
|
ansible.builtin.copy:
|
||||||
|
src: secrets/litellm-config.yaml
|
||||||
|
dest: "/home/{{ ai_user }}/litellm-config.yaml"
|
||||||
|
owner: "{{ ai_user }}"
|
||||||
|
group: "{{ ai_user }}"
|
||||||
|
mode: "0644"
|
||||||
|
|
||||||
|
- name: Reload systemd user daemon
|
||||||
|
ansible.builtin.systemd:
|
||||||
|
daemon_reload: true
|
||||||
|
scope: user
|
||||||
|
|
||||||
|
- name: Restart litellm pod
|
||||||
|
ansible.builtin.systemd:
|
||||||
|
name: litellm-pod
|
||||||
|
state: restarted
|
||||||
|
scope: user
|
||||||
@@ -1,12 +1,13 @@
|
|||||||
[Container]
|
[Container]
|
||||||
|
Pod=litellm.pod
|
||||||
|
ContainerName=litellm-db
|
||||||
Environment=POSTGRES_DB=litellm POSTGRES_USER=llmproxy POSTGRES_PASSWORD=dbpassword9090
|
Environment=POSTGRES_DB=litellm POSTGRES_USER=llmproxy POSTGRES_PASSWORD=dbpassword9090
|
||||||
HealthCmd='pg_isready -d litellm -U llmproxy'
|
HealthCmd='pg_isready -d litellm -U llmproxy'
|
||||||
HealthInterval=1s
|
HealthInterval=1s
|
||||||
HealthRetries=10
|
HealthRetries=10
|
||||||
HealthTimeout=5s
|
HealthTimeout=5s
|
||||||
Image=docker.io/postgres:16
|
Image=docker.io/postgres:16
|
||||||
PublishPort=5432:5432
|
Volume=litellm_postgres_data:/var/lib/postgresql/data
|
||||||
Volume=litellm_postgres_data:/var/lib/postgresql/data:z
|
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
Restart=always
|
Restart=always
|
||||||
|
|||||||
@@ -0,0 +1,13 @@
|
|||||||
|
[Container]
|
||||||
|
Pod=litellm.pod
|
||||||
|
ContainerName=litellm-prom
|
||||||
|
Exec='--config.file=/etc/prometheus/prometheus.yml' '--storage.tsdb.path=/prometheus' '--storage.tsdb.retention.time=15d'
|
||||||
|
Image=docker.io/prom/prometheus
|
||||||
|
Volume=litellm_prometheus_data:/prometheus
|
||||||
|
Volume=/home/ai/litellm-prometheus.yaml:/etc/prometheus/prometheus.yml:z
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Restart=always
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=default.target
|
||||||
@@ -2,18 +2,18 @@
|
|||||||
Requires=litellm-db.service
|
Requires=litellm-db.service
|
||||||
|
|
||||||
[Container]
|
[Container]
|
||||||
Environment=DATABASE_URL=postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm STORE_MODEL_IN_DB=True
|
Pod=litellm.pod
|
||||||
|
ContainerName=litellm-web
|
||||||
|
Environment=DATABASE_URL=postgresql://llmproxy:dbpassword9090@localhost:5432/litellm STORE_MODEL_IN_DB=True
|
||||||
EnvironmentFile=/home/ai/litellm.env
|
EnvironmentFile=/home/ai/litellm.env
|
||||||
HealthCmd="python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')\""
|
HealthCmd="python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')\""
|
||||||
HealthInterval=30s
|
HealthInterval=30s
|
||||||
HealthRetries=3
|
HealthRetries=3
|
||||||
HealthStartPeriod=40s
|
HealthStartPeriod=40s
|
||||||
HealthTimeout=10s
|
HealthTimeout=10s
|
||||||
Image=docker.litellm.ai/berriai/litellm:main-latest
|
Image=ghcr.io/berriai/litellm-database:v1.83.14-stable.patch.3
|
||||||
PublishPort=4000:4000
|
Volume=/home/ai/litellm-config.yaml:/app/config.yaml:z
|
||||||
|
Exec=--config=/app/config.yaml
|
||||||
[Service]
|
|
||||||
Restart=always
|
|
||||||
|
|
||||||
[Install]
|
[Install]
|
||||||
WantedBy=default.target
|
WantedBy=default.target
|
||||||
3
active/container_litellm/quadlets/litellm.pod
Normal file
3
active/container_litellm/quadlets/litellm.pod
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
[Pod]
|
||||||
|
# litellm web interface
|
||||||
|
PublishPort=4000:4000/tcp
|
||||||
Reference in New Issue
Block a user