update litellm docs

2026-05-11 18:22:14 -04:00
9 changed files with 169 additions and 147 deletions
--- a/active/container_litellm/compose/README.md
+++ b/active/container_litellm/compose/README.md
@@ -1,3 +0,0 @@
-# Compose
-
-Put your compose.yaml here.
--- a/active/container_litellm/compose/compose.yaml
+++ b/active/container_litellm/compose/compose.yaml
@@ -1,27 +1,35 @@
 services:
  litellm:
-    image: docker.litellm.ai/berriai/litellm:main-latest
+    image: docker.litellm.ai/berriai/litellm:main-stable
+    #########################################
+    ## Uncomment these lines to start proxy with a config.yaml file ##
+    # volumes:
+    #  - ./config.yaml:/app/config.yaml
+    # command:
+    #  - "--config=/app/config.yaml"
+    ##############################################
    ports:
-      - 4000:4000
-    env_file: /home/ai/litellm.env
+      - "4000:4000" # Map the container port to the host, change the host port if necessary
    environment:
-      DATABASE_URL: "postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm"
-      STORE_MODEL_IN_DB: "True"
-    restart: unless-stopped
+      DATABASE_URL: "postgresql://llmproxy:dbpassword9090@db:5432/litellm"
+      STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI
+    env_file:
+      - ../secrets/litellm.env # Load local .env file
    depends_on:
-      - litellm-db # Indicates that this service depends on the 'litellm-db' service, ensuring 'litellm-db' starts first
-    healthcheck: # Defines the health check configuration for the container
+      - db  # Indicates that this service depends on the 'db' service, ensuring 'db' starts first
+    healthcheck:  # Defines the health check configuration for the container
      test:
        - CMD-SHELL
-        - python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')" # Command to execute for health check
-      interval: 30s # Perform health check every 30 seconds
-      timeout: 10s # Health check command times out after 10 seconds
-      retries: 3 # Retry up to 3 times if health check fails
-      start_period: 40s # Wait 40 seconds after container start before beginning health checks
+        - python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')"  # Command to execute for health check
+      interval: 30s  # Perform health check every 30 seconds
+      timeout: 10s   # Health check command times out after 10 seconds
+      retries: 3     # Retry up to 3 times if health check fails
+      start_period: 40s  # Wait 40 seconds after container start before beginning health checks

-  litellm-db:
+  db:
    image: docker.io/postgres:16
    restart: always
+    container_name: litellm_db
    environment:
      POSTGRES_DB: litellm
      POSTGRES_USER: llmproxy
@@ -29,9 +37,26 @@ services:
    ports:
      - "5432:5432"
    volumes:
-      - litellm_postgres_data:/var/lib/postgresql/data:z
+      - postgres_data:/var/lib/postgresql/data # Persists Postgres data across container restarts
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
      interval: 1s
      timeout: 5s
      retries: 10
+
+  prometheus:
+    image: docker.io/prom/prometheus
+    volumes:
+      - prometheus_data:/prometheus
+      - ../seccrets/prometheus.yml:/etc/prometheus/prometheus.yml
+    ports:
+      - "9090:9090"
+    command:
+      - "--config.file=/etc/prometheus/prometheus.yml"
+      - "--storage.tsdb.path=/prometheus"
+      - "--storage.tsdb.retention.time=15d"
+    restart: always
+
+volumes:
+  prometheus_data:
+  postgres_data:
--- a/active/container_litellm/config.yaml
+++ b/active/container_litellm/config.yaml
@@ -1,67 +0,0 @@
-# General settings
-
-general_settings:
-  request_timeout: 600
-
-# Models
-model_list:
-  # Qwen3.5-35B variants
-  - model_name: qwen3.5-35b-think-general
-    litellm_params:
-      model: openai/qwen3.5-35b-a3b
-      api_base: https://llama-cpp.reeselink.com
-      api_key: none
-      temperature: 1.0
-      top_p: 0.95
-      presence_penalty: 1.5
-      extra_body:
-        top_k: 20
-        min_p: 0.0
-        repetition_penalty: 1.0
-        chat_template_kwargs:
-          enable_thinking: true
-
-  - model_name: qwen3.5-35b-think-code
-    litellm_params:
-      model: openai/qwen3.5-35b-a3b
-      api_base: https://llama-cpp.reeselink.com
-      api_key: none
-      temperature: 0.6
-      top_p: 0.95
-      presence_penalty: 0.0
-      extra_body:
-        top_k: 20
-        min_p: 0.0
-        repetition_penalty: 1.0
-        chat_template_kwargs:
-          enable_thinking: true
-
-  - model_name: qwen3.5-35b-instruct-general
-    litellm_params:
-      model: openai/qwen3.5-35b-a3b
-      api_base: https://llama-cpp.reeselink.com
-      api_key: none
-      temperature: 0.7
-      top_p: 0.8
-      presence_penalty: 1.5
-      extra_body:
-        top_k: 20
-        min_p: 0.0
-        repetition_penalty: 1.0
-        chat_template_kwargs:
-          enable_thinking: false
-
-  - model_name: qwen3.5-35b-instruct-reasoning
-    litellm_params:
-      model: openai/qwen3.5-35b-a3b
-      api_base: https://llama-cpp.reeselink.com
-      api_key: none
-      temperature: 1.0
-      top_p: 0.95
-      presence_penalty: 1.5
-      extra_body:
-        top_k: 20
-        min_p: 0.0
-        repetition_penalty: 1.0
-        chat_template_kwargs:
-          enable_thinking: false
--- a/active/container_litellm/litellm.md
+++ b/active/container_litellm/litellm.md
@@ -9,9 +9,8 @@
    - [Convert litellm compose spec to quadlets](#convert-litellm-compose-spec-to-quadlets)
    - [Create the litellm.env file](#create-the-litellmenv-file)
    - [Start and enable your systemd quadlet](#start-and-enable-your-systemd-quadlet)
+    - [Install via Ansible](#install-via-ansible)
    - [Expose litellm](#expose-litellm)
-  - [Using LiteLLM](#using-litellm)
-    - [Adding Models](#adding-models)
    - [Testing Models](#testing-models)
  - [Backup litellm](#backup-litellm)
  - [Upgrade litellm](#upgrade-litellm)
@@ -110,63 +109,28 @@ journalctl --user -u litellm -f
 systemctl --user enable --now podman-auto-update.timer
 ```

+### Install via Ansible
+
+Preview changes with a dry run:
+
+```bash
+ansible-playbook -i ansible/inventory.yaml active/container_litellm/playbook.yml --check --diff
+```
+
+Run the playbook from the Homelab root:
+
+```bash
+ansible-playbook -i ansible/inventory.yaml active/container_litellm/playbook.yml
+```
+
+This copies the quadlets, config, reloads the systemd user daemon, and starts both `litellm-db` and `litellm` services as the `ai` user.
+
 ### Expose litellm

 1. If you need a domain, follow the [DDNS instructions](/active/container_ddns/ddns.md#install-a-new-ddns-service)
 2. For a web service, follow the [Caddy instructions](/active/container_caddy/caddy.md#adding-a-new-caddy-record)
 3. Finally, follow your OS's guide for opening ports via its firewall service.

-## Using LiteLLM
-
-### Adding Models
-
-```json
-// qwen3.5-35b-a3b-thinking
-{
-    "temperature": 1,
-    "top_p": 0.95,
-    "presence_penalty": 1.5,
-    "extra_body": {
-        "top_k": 20,
-        "min_p": 0,
-        "repetition_penalty": 1,
-        "chat_template_kwargs": {
-            "enable_thinking": true
-        }
-    }
-}
-
-// qwen3.5-35b-a3b-coding
-{
-    "temperature": 0.6,
-    "top_p": 0.95,
-    "presence_penalty": 0,
-    "extra_body": {
-        "top_k": 20,
-        "min_p": 0,
-        "repetition_penalty": 1,
-        "chat_template_kwargs": {
-            "enable_thinking": true
-        }
-    }
-}
-
-// qwen3.5-35b-a3b-instruct
-{
-    "temperature": 0.7,
-    "top_p": 0.8,
-    "presence_penalty": 1.5,
-    "extra_body": {
-        "top_k": 20,
-        "min_p": 0,
-        "repetition_penalty": 1,
-        "chat_template_kwargs": {
-            "enable_thinking": false
-        }
-    }
-}
-```
-
 ### Testing Models

 ```bash
@@ -191,7 +155,7 @@ curl -L -X POST 'https://aipi.reeseapps.com/v1/chat/completions' \

 ## Backup litellm

-Follow the [Borg Backup instructions](/active/systemd_borg/borg.md#set-up-a-client-for-backup)
+Follow the [Borg Backup instructions](/active/software_borg/borg.md#set-up-a-client-for-backup)

 ## Upgrade litellm

--- a/active/container_litellm/playbook.yml
+++ b/active/container_litellm/playbook.yml
@@ -0,0 +1,86 @@
+---
+- name: Install and start LiteLLM quadlets for ai user
+  hosts: ai-ai
+  remote_user: ai
+
+  vars:
+    ai_user: ai
+    quadlets_dir: "/home/{{ ai_user }}/.config/containers/systemd"
+
+  tasks:
+    - name: Ensure ai user home directories exist
+      ansible.builtin.file:
+        path: "{{ item }}"
+        state: directory
+        owner: "{{ ai_user }}"
+        group: "{{ ai_user }}"
+        mode: "0755"
+      loop:
+        - "{{ quadlets_dir }}"
+
+    - name: Copy litellm container pod
+      ansible.builtin.copy:
+        src: quadlets/litellm.pod
+        dest: "{{ quadlets_dir }}/litellm.pod"
+        owner: "{{ ai_user }}"
+        group: "{{ ai_user }}"
+        mode: "0644"
+
+    - name: Copy litellm container quadlet
+      ansible.builtin.copy:
+        src: quadlets/litellm-web.container
+        dest: "{{ quadlets_dir }}/litellm-web.container"
+        owner: "{{ ai_user }}"
+        group: "{{ ai_user }}"
+        mode: "0644"
+
+    - name: Copy litellm-db container quadlet
+      ansible.builtin.copy:
+        src: quadlets/litellm-db.container
+        dest: "{{ quadlets_dir }}/litellm-db.container"
+        owner: "{{ ai_user }}"
+        group: "{{ ai_user }}"
+        mode: "0644"
+
+    - name: Copy prometheus container quadlet
+      ansible.builtin.copy:
+        src: quadlets/litellm-prometheus.container
+        dest: "{{ quadlets_dir }}/litellm-prometheus.container"
+        owner: "{{ ai_user }}"
+        group: "{{ ai_user }}"
+        mode: "0644"
+
+    - name: Copy prometheus config
+      ansible.builtin.copy:
+        src: secrets/litellm-prometheus.yaml
+        dest: "/home/{{ ai_user }}/litellm-prometheus.yaml"
+        owner: "{{ ai_user }}"
+        group: "{{ ai_user }}"
+        mode: "0644"
+
+    - name: Copy litellm.env file
+      ansible.builtin.copy:
+        src: secrets/litellm.env
+        dest: "/home/{{ ai_user }}/litellm.env"
+        owner: "{{ ai_user }}"
+        group: "{{ ai_user }}"
+        mode: "0600"
+
+    - name: Copy litellm-config.yaml
+      ansible.builtin.copy:
+        src: secrets/litellm-config.yaml
+        dest: "/home/{{ ai_user }}/litellm-config.yaml"
+        owner: "{{ ai_user }}"
+        group: "{{ ai_user }}"
+        mode: "0644"
+
+    - name: Reload systemd user daemon
+      ansible.builtin.systemd:
+        daemon_reload: true
+        scope: user
+
+    - name: Restart litellm pod
+      ansible.builtin.systemd:
+        name: litellm-pod
+        state: restarted
+        scope: user
--- a/active/container_litellm/quadlets/litellm-db.container
+++ b/active/container_litellm/quadlets/litellm-db.container
@@ -1,12 +1,13 @@
 [Container]
+Pod=litellm.pod
+ContainerName=litellm-db
 Environment=POSTGRES_DB=litellm POSTGRES_USER=llmproxy POSTGRES_PASSWORD=dbpassword9090
 HealthCmd='pg_isready -d litellm -U llmproxy'
 HealthInterval=1s
 HealthRetries=10
 HealthTimeout=5s
 Image=docker.io/postgres:16
-PublishPort=5432:5432
-Volume=litellm_postgres_data:/var/lib/postgresql/data:z
+Volume=litellm_postgres_data:/var/lib/postgresql/data

 [Service]
 Restart=always
--- a/active/container_litellm/quadlets/litellm-prometheus.container
+++ b/active/container_litellm/quadlets/litellm-prometheus.container
@@ -0,0 +1,13 @@
+[Container]
+Pod=litellm.pod
+ContainerName=litellm-prom
+Exec='--config.file=/etc/prometheus/prometheus.yml' '--storage.tsdb.path=/prometheus' '--storage.tsdb.retention.time=15d'
+Image=docker.io/prom/prometheus
+Volume=litellm_prometheus_data:/prometheus
+Volume=/home/ai/litellm-prometheus.yaml:/etc/prometheus/prometheus.yml:z
+
+[Service]
+Restart=always
+
+[Install]
+WantedBy=default.target
--- a/active/container_litellm/quadlets/litellm-web.container
+++ b/active/container_litellm/quadlets/litellm-web.container
@@ -2,18 +2,18 @@
 Requires=litellm-db.service

 [Container]
-Environment=DATABASE_URL=postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm STORE_MODEL_IN_DB=True
+Pod=litellm.pod
+ContainerName=litellm-web
+Environment=DATABASE_URL=postgresql://llmproxy:dbpassword9090@localhost:5432/litellm STORE_MODEL_IN_DB=True
 EnvironmentFile=/home/ai/litellm.env
 HealthCmd="python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')\""
 HealthInterval=30s
 HealthRetries=3
 HealthStartPeriod=40s
 HealthTimeout=10s
-Image=docker.litellm.ai/berriai/litellm:main-latest
-PublishPort=4000:4000
-
-[Service]
-Restart=always
+Image=ghcr.io/berriai/litellm-database:v1.83.14-stable.patch.3
+Volume=/home/ai/litellm-config.yaml:/app/config.yaml:z
+Exec=--config=/app/config.yaml

 [Install]
 WantedBy=default.target
--- a/active/container_litellm/quadlets/litellm.pod
+++ b/active/container_litellm/quadlets/litellm.pod
@@ -0,0 +1,3 @@
+[Pod]
+# litellm web interface
+PublishPort=4000:4000/tcp