Compare commits
1 Commits
f2015e2c71
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
a56402c2cc
|
@@ -1,3 +0,0 @@
|
||||
# Compose
|
||||
|
||||
Put your compose.yaml here.
|
||||
@@ -1,15 +1,22 @@
|
||||
services:
|
||||
litellm:
|
||||
image: docker.litellm.ai/berriai/litellm:main-latest
|
||||
image: docker.litellm.ai/berriai/litellm:main-stable
|
||||
#########################################
|
||||
## Uncomment these lines to start proxy with a config.yaml file ##
|
||||
# volumes:
|
||||
# - ./config.yaml:/app/config.yaml
|
||||
# command:
|
||||
# - "--config=/app/config.yaml"
|
||||
##############################################
|
||||
ports:
|
||||
- 4000:4000
|
||||
env_file: /home/ai/litellm.env
|
||||
- "4000:4000" # Map the container port to the host, change the host port if necessary
|
||||
environment:
|
||||
DATABASE_URL: "postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm"
|
||||
STORE_MODEL_IN_DB: "True"
|
||||
restart: unless-stopped
|
||||
DATABASE_URL: "postgresql://llmproxy:dbpassword9090@db:5432/litellm"
|
||||
STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI
|
||||
env_file:
|
||||
- ../secrets/litellm.env # Load local .env file
|
||||
depends_on:
|
||||
- litellm-db # Indicates that this service depends on the 'litellm-db' service, ensuring 'litellm-db' starts first
|
||||
- db # Indicates that this service depends on the 'db' service, ensuring 'db' starts first
|
||||
healthcheck: # Defines the health check configuration for the container
|
||||
test:
|
||||
- CMD-SHELL
|
||||
@@ -19,9 +26,10 @@ services:
|
||||
retries: 3 # Retry up to 3 times if health check fails
|
||||
start_period: 40s # Wait 40 seconds after container start before beginning health checks
|
||||
|
||||
litellm-db:
|
||||
db:
|
||||
image: docker.io/postgres:16
|
||||
restart: always
|
||||
container_name: litellm_db
|
||||
environment:
|
||||
POSTGRES_DB: litellm
|
||||
POSTGRES_USER: llmproxy
|
||||
@@ -29,9 +37,26 @@ services:
|
||||
ports:
|
||||
- "5432:5432"
|
||||
volumes:
|
||||
- litellm_postgres_data:/var/lib/postgresql/data:z
|
||||
- postgres_data:/var/lib/postgresql/data # Persists Postgres data across container restarts
|
||||
healthcheck:
|
||||
test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
|
||||
interval: 1s
|
||||
timeout: 5s
|
||||
retries: 10
|
||||
|
||||
prometheus:
|
||||
image: docker.io/prom/prometheus
|
||||
volumes:
|
||||
- prometheus_data:/prometheus
|
||||
- ../seccrets/prometheus.yml:/etc/prometheus/prometheus.yml
|
||||
ports:
|
||||
- "9090:9090"
|
||||
command:
|
||||
- "--config.file=/etc/prometheus/prometheus.yml"
|
||||
- "--storage.tsdb.path=/prometheus"
|
||||
- "--storage.tsdb.retention.time=15d"
|
||||
restart: always
|
||||
|
||||
volumes:
|
||||
prometheus_data:
|
||||
postgres_data:
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
# General settings
|
||||
|
||||
general_settings:
|
||||
request_timeout: 600
|
||||
|
||||
# Models
|
||||
model_list:
|
||||
# Qwen3.5-35B variants
|
||||
- model_name: qwen3.5-35b-think-general
|
||||
litellm_params:
|
||||
model: openai/qwen3.5-35b-a3b
|
||||
api_base: https://llama-cpp.reeselink.com
|
||||
api_key: none
|
||||
temperature: 1.0
|
||||
top_p: 0.95
|
||||
presence_penalty: 1.5
|
||||
extra_body:
|
||||
top_k: 20
|
||||
min_p: 0.0
|
||||
repetition_penalty: 1.0
|
||||
chat_template_kwargs:
|
||||
enable_thinking: true
|
||||
|
||||
- model_name: qwen3.5-35b-think-code
|
||||
litellm_params:
|
||||
model: openai/qwen3.5-35b-a3b
|
||||
api_base: https://llama-cpp.reeselink.com
|
||||
api_key: none
|
||||
temperature: 0.6
|
||||
top_p: 0.95
|
||||
presence_penalty: 0.0
|
||||
extra_body:
|
||||
top_k: 20
|
||||
min_p: 0.0
|
||||
repetition_penalty: 1.0
|
||||
chat_template_kwargs:
|
||||
enable_thinking: true
|
||||
|
||||
- model_name: qwen3.5-35b-instruct-general
|
||||
litellm_params:
|
||||
model: openai/qwen3.5-35b-a3b
|
||||
api_base: https://llama-cpp.reeselink.com
|
||||
api_key: none
|
||||
temperature: 0.7
|
||||
top_p: 0.8
|
||||
presence_penalty: 1.5
|
||||
extra_body:
|
||||
top_k: 20
|
||||
min_p: 0.0
|
||||
repetition_penalty: 1.0
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
|
||||
- model_name: qwen3.5-35b-instruct-reasoning
|
||||
litellm_params:
|
||||
model: openai/qwen3.5-35b-a3b
|
||||
api_base: https://llama-cpp.reeselink.com
|
||||
api_key: none
|
||||
temperature: 1.0
|
||||
top_p: 0.95
|
||||
presence_penalty: 1.5
|
||||
extra_body:
|
||||
top_k: 20
|
||||
min_p: 0.0
|
||||
repetition_penalty: 1.0
|
||||
chat_template_kwargs:
|
||||
enable_thinking: false
|
||||
@@ -9,9 +9,8 @@
|
||||
- [Convert litellm compose spec to quadlets](#convert-litellm-compose-spec-to-quadlets)
|
||||
- [Create the litellm.env file](#create-the-litellmenv-file)
|
||||
- [Start and enable your systemd quadlet](#start-and-enable-your-systemd-quadlet)
|
||||
- [Install via Ansible](#install-via-ansible)
|
||||
- [Expose litellm](#expose-litellm)
|
||||
- [Using LiteLLM](#using-litellm)
|
||||
- [Adding Models](#adding-models)
|
||||
- [Testing Models](#testing-models)
|
||||
- [Backup litellm](#backup-litellm)
|
||||
- [Upgrade litellm](#upgrade-litellm)
|
||||
@@ -110,63 +109,28 @@ journalctl --user -u litellm -f
|
||||
systemctl --user enable --now podman-auto-update.timer
|
||||
```
|
||||
|
||||
### Install via Ansible
|
||||
|
||||
Preview changes with a dry run:
|
||||
|
||||
```bash
|
||||
ansible-playbook -i ansible/inventory.yaml active/container_litellm/playbook.yml --check --diff
|
||||
```
|
||||
|
||||
Run the playbook from the Homelab root:
|
||||
|
||||
```bash
|
||||
ansible-playbook -i ansible/inventory.yaml active/container_litellm/playbook.yml
|
||||
```
|
||||
|
||||
This copies the quadlets, config, reloads the systemd user daemon, and starts both `litellm-db` and `litellm` services as the `ai` user.
|
||||
|
||||
### Expose litellm
|
||||
|
||||
1. If you need a domain, follow the [DDNS instructions](/active/container_ddns/ddns.md#install-a-new-ddns-service)
|
||||
2. For a web service, follow the [Caddy instructions](/active/container_caddy/caddy.md#adding-a-new-caddy-record)
|
||||
3. Finally, follow your OS's guide for opening ports via its firewall service.
|
||||
|
||||
## Using LiteLLM
|
||||
|
||||
### Adding Models
|
||||
|
||||
```json
|
||||
// qwen3.5-35b-a3b-thinking
|
||||
{
|
||||
"temperature": 1,
|
||||
"top_p": 0.95,
|
||||
"presence_penalty": 1.5,
|
||||
"extra_body": {
|
||||
"top_k": 20,
|
||||
"min_p": 0,
|
||||
"repetition_penalty": 1,
|
||||
"chat_template_kwargs": {
|
||||
"enable_thinking": true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// qwen3.5-35b-a3b-coding
|
||||
{
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.95,
|
||||
"presence_penalty": 0,
|
||||
"extra_body": {
|
||||
"top_k": 20,
|
||||
"min_p": 0,
|
||||
"repetition_penalty": 1,
|
||||
"chat_template_kwargs": {
|
||||
"enable_thinking": true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// qwen3.5-35b-a3b-instruct
|
||||
{
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.8,
|
||||
"presence_penalty": 1.5,
|
||||
"extra_body": {
|
||||
"top_k": 20,
|
||||
"min_p": 0,
|
||||
"repetition_penalty": 1,
|
||||
"chat_template_kwargs": {
|
||||
"enable_thinking": false
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Testing Models
|
||||
|
||||
```bash
|
||||
@@ -191,7 +155,7 @@ curl -L -X POST 'https://aipi.reeseapps.com/v1/chat/completions' \
|
||||
|
||||
## Backup litellm
|
||||
|
||||
Follow the [Borg Backup instructions](/active/systemd_borg/borg.md#set-up-a-client-for-backup)
|
||||
Follow the [Borg Backup instructions](/active/software_borg/borg.md#set-up-a-client-for-backup)
|
||||
|
||||
## Upgrade litellm
|
||||
|
||||
|
||||
86
active/container_litellm/playbook.yml
Normal file
86
active/container_litellm/playbook.yml
Normal file
@@ -0,0 +1,86 @@
|
||||
---
|
||||
- name: Install and start LiteLLM quadlets for ai user
|
||||
hosts: ai-ai
|
||||
remote_user: ai
|
||||
|
||||
vars:
|
||||
ai_user: ai
|
||||
quadlets_dir: "/home/{{ ai_user }}/.config/containers/systemd"
|
||||
|
||||
tasks:
|
||||
- name: Ensure ai user home directories exist
|
||||
ansible.builtin.file:
|
||||
path: "{{ item }}"
|
||||
state: directory
|
||||
owner: "{{ ai_user }}"
|
||||
group: "{{ ai_user }}"
|
||||
mode: "0755"
|
||||
loop:
|
||||
- "{{ quadlets_dir }}"
|
||||
|
||||
- name: Copy litellm container pod
|
||||
ansible.builtin.copy:
|
||||
src: quadlets/litellm.pod
|
||||
dest: "{{ quadlets_dir }}/litellm.pod"
|
||||
owner: "{{ ai_user }}"
|
||||
group: "{{ ai_user }}"
|
||||
mode: "0644"
|
||||
|
||||
- name: Copy litellm container quadlet
|
||||
ansible.builtin.copy:
|
||||
src: quadlets/litellm-web.container
|
||||
dest: "{{ quadlets_dir }}/litellm-web.container"
|
||||
owner: "{{ ai_user }}"
|
||||
group: "{{ ai_user }}"
|
||||
mode: "0644"
|
||||
|
||||
- name: Copy litellm-db container quadlet
|
||||
ansible.builtin.copy:
|
||||
src: quadlets/litellm-db.container
|
||||
dest: "{{ quadlets_dir }}/litellm-db.container"
|
||||
owner: "{{ ai_user }}"
|
||||
group: "{{ ai_user }}"
|
||||
mode: "0644"
|
||||
|
||||
- name: Copy prometheus container quadlet
|
||||
ansible.builtin.copy:
|
||||
src: quadlets/litellm-prometheus.container
|
||||
dest: "{{ quadlets_dir }}/litellm-prometheus.container"
|
||||
owner: "{{ ai_user }}"
|
||||
group: "{{ ai_user }}"
|
||||
mode: "0644"
|
||||
|
||||
- name: Copy prometheus config
|
||||
ansible.builtin.copy:
|
||||
src: secrets/litellm-prometheus.yaml
|
||||
dest: "/home/{{ ai_user }}/litellm-prometheus.yaml"
|
||||
owner: "{{ ai_user }}"
|
||||
group: "{{ ai_user }}"
|
||||
mode: "0644"
|
||||
|
||||
- name: Copy litellm.env file
|
||||
ansible.builtin.copy:
|
||||
src: secrets/litellm.env
|
||||
dest: "/home/{{ ai_user }}/litellm.env"
|
||||
owner: "{{ ai_user }}"
|
||||
group: "{{ ai_user }}"
|
||||
mode: "0600"
|
||||
|
||||
- name: Copy litellm-config.yaml
|
||||
ansible.builtin.copy:
|
||||
src: secrets/litellm-config.yaml
|
||||
dest: "/home/{{ ai_user }}/litellm-config.yaml"
|
||||
owner: "{{ ai_user }}"
|
||||
group: "{{ ai_user }}"
|
||||
mode: "0644"
|
||||
|
||||
- name: Reload systemd user daemon
|
||||
ansible.builtin.systemd:
|
||||
daemon_reload: true
|
||||
scope: user
|
||||
|
||||
- name: Restart litellm pod
|
||||
ansible.builtin.systemd:
|
||||
name: litellm-pod
|
||||
state: restarted
|
||||
scope: user
|
||||
@@ -1,12 +1,13 @@
|
||||
[Container]
|
||||
Pod=litellm.pod
|
||||
ContainerName=litellm-db
|
||||
Environment=POSTGRES_DB=litellm POSTGRES_USER=llmproxy POSTGRES_PASSWORD=dbpassword9090
|
||||
HealthCmd='pg_isready -d litellm -U llmproxy'
|
||||
HealthInterval=1s
|
||||
HealthRetries=10
|
||||
HealthTimeout=5s
|
||||
Image=docker.io/postgres:16
|
||||
PublishPort=5432:5432
|
||||
Volume=litellm_postgres_data:/var/lib/postgresql/data:z
|
||||
Volume=litellm_postgres_data:/var/lib/postgresql/data
|
||||
|
||||
[Service]
|
||||
Restart=always
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
[Container]
|
||||
Pod=litellm.pod
|
||||
ContainerName=litellm-prom
|
||||
Exec='--config.file=/etc/prometheus/prometheus.yml' '--storage.tsdb.path=/prometheus' '--storage.tsdb.retention.time=15d'
|
||||
Image=docker.io/prom/prometheus
|
||||
Volume=litellm_prometheus_data:/prometheus
|
||||
Volume=/home/ai/litellm-prometheus.yaml:/etc/prometheus/prometheus.yml:z
|
||||
|
||||
[Service]
|
||||
Restart=always
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
@@ -2,18 +2,18 @@
|
||||
Requires=litellm-db.service
|
||||
|
||||
[Container]
|
||||
Environment=DATABASE_URL=postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm STORE_MODEL_IN_DB=True
|
||||
Pod=litellm.pod
|
||||
ContainerName=litellm-web
|
||||
Environment=DATABASE_URL=postgresql://llmproxy:dbpassword9090@localhost:5432/litellm STORE_MODEL_IN_DB=True
|
||||
EnvironmentFile=/home/ai/litellm.env
|
||||
HealthCmd="python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')\""
|
||||
HealthInterval=30s
|
||||
HealthRetries=3
|
||||
HealthStartPeriod=40s
|
||||
HealthTimeout=10s
|
||||
Image=docker.litellm.ai/berriai/litellm:main-latest
|
||||
PublishPort=4000:4000
|
||||
|
||||
[Service]
|
||||
Restart=always
|
||||
Image=ghcr.io/berriai/litellm-database:v1.83.14-stable.patch.3
|
||||
Volume=/home/ai/litellm-config.yaml:/app/config.yaml:z
|
||||
Exec=--config=/app/config.yaml
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
3
active/container_litellm/quadlets/litellm.pod
Normal file
3
active/container_litellm/quadlets/litellm.pod
Normal file
@@ -0,0 +1,3 @@
|
||||
[Pod]
|
||||
# litellm web interface
|
||||
PublishPort=4000:4000/tcp
|
||||
Reference in New Issue
Block a user