add litellm

2026-03-16 09:53:27 -04:00
parent eb67191706
commit 25d3a7805c
6 changed files with 374 additions and 0 deletions
--- a/active/container_litellm/compose/README.md
+++ b/active/container_litellm/compose/README.md
@@ -0,0 +1,3 @@
+# Compose
+
+Put your compose.yaml here.
--- a/active/container_litellm/compose/compose.yaml
+++ b/active/container_litellm/compose/compose.yaml
@@ -0,0 +1,37 @@
+services:
+  litellm:
+    image: docker.litellm.ai/berriai/litellm:main-latest
+    ports:
+      - 4000:4000
+    env_file: /home/ai/litellm.env
+    environment:
+      DATABASE_URL: "postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm"
+      STORE_MODEL_IN_DB: "True"
+    restart: unless-stopped
+    depends_on:
+      - litellm-db # Indicates that this service depends on the 'litellm-db' service, ensuring 'litellm-db' starts first
+    healthcheck: # Defines the health check configuration for the container
+      test:
+        - CMD-SHELL
+        - python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')" # Command to execute for health check
+      interval: 30s # Perform health check every 30 seconds
+      timeout: 10s # Health check command times out after 10 seconds
+      retries: 3 # Retry up to 3 times if health check fails
+      start_period: 40s # Wait 40 seconds after container start before beginning health checks
+
+  litellm-db:
+    image: docker.io/postgres:16
+    restart: always
+    environment:
+      POSTGRES_DB: litellm
+      POSTGRES_USER: llmproxy
+      POSTGRES_PASSWORD: dbpassword9090
+    ports:
+      - "5432:5432"
+    volumes:
+      - litellm_postgres_data:/var/lib/postgresql/data:z
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
+      interval: 1s
+      timeout: 5s
+      retries: 10
--- a/active/container_litellm/config.yaml
+++ b/active/container_litellm/config.yaml
@@ -0,0 +1,67 @@
+# General settings
+
+general_settings:
+  request_timeout: 600
+
+# Models
+model_list:
+  # Qwen3.5-35B variants
+  - model_name: qwen3.5-35b-think-general
+    litellm_params:
+      model: openai/qwen3.5-35b-a3b
+      api_base: https://llama-cpp.reeselink.com
+      api_key: none
+      temperature: 1.0
+      top_p: 0.95
+      presence_penalty: 1.5
+      extra_body:
+        top_k: 20
+        min_p: 0.0
+        repetition_penalty: 1.0
+        chat_template_kwargs:
+          enable_thinking: true
+
+  - model_name: qwen3.5-35b-think-code
+    litellm_params:
+      model: openai/qwen3.5-35b-a3b
+      api_base: https://llama-cpp.reeselink.com
+      api_key: none
+      temperature: 0.6
+      top_p: 0.95
+      presence_penalty: 0.0
+      extra_body:
+        top_k: 20
+        min_p: 0.0
+        repetition_penalty: 1.0
+        chat_template_kwargs:
+          enable_thinking: true
+
+  - model_name: qwen3.5-35b-instruct-general
+    litellm_params:
+      model: openai/qwen3.5-35b-a3b
+      api_base: https://llama-cpp.reeselink.com
+      api_key: none
+      temperature: 0.7
+      top_p: 0.8
+      presence_penalty: 1.5
+      extra_body:
+        top_k: 20
+        min_p: 0.0
+        repetition_penalty: 1.0
+        chat_template_kwargs:
+          enable_thinking: false
+
+  - model_name: qwen3.5-35b-instruct-reasoning
+    litellm_params:
+      model: openai/qwen3.5-35b-a3b
+      api_base: https://llama-cpp.reeselink.com
+      api_key: none
+      temperature: 1.0
+      top_p: 0.95
+      presence_penalty: 1.5
+      extra_body:
+        top_k: 20
+        min_p: 0.0
+        repetition_penalty: 1.0
+        chat_template_kwargs:
+          enable_thinking: false
--- a/active/container_litellm/litellm.md
+++ b/active/container_litellm/litellm.md
@@ -0,0 +1,233 @@
+# Podman litellm
+
+- [Podman litellm](#podman-litellm)
+  - [Setup litellm Project](#setup-litellm-project)
+  - [Install litellm](#install-litellm)
+    - [Create the ai user](#create-the-ai-user)
+    - [Write the litellm compose spec](#write-the-litellm-compose-spec)
+      - [A Note on Volumes](#a-note-on-volumes)
+    - [Convert litellm compose spec to quadlets](#convert-litellm-compose-spec-to-quadlets)
+    - [Create the litellm.env file](#create-the-litellmenv-file)
+    - [Start and enable your systemd quadlet](#start-and-enable-your-systemd-quadlet)
+    - [Expose litellm](#expose-litellm)
+  - [Using LiteLLM](#using-litellm)
+    - [Adding Models](#adding-models)
+    - [Testing Models](#testing-models)
+  - [Backup litellm](#backup-litellm)
+  - [Upgrade litellm](#upgrade-litellm)
+    - [Upgrade Quadlets](#upgrade-quadlets)
+  - [Uninstall](#uninstall)
+  - [Notes](#notes)
+    - [SELinux](#selinux)
+
+## Setup litellm Project
+
+- [ ] Copy and rename this folder to active/container_litellm
+- [ ] Find and replace litellm with the name of the service.
+- [ ] Create the rootless user to run the podman containers
+- [ ] Write the compose.yaml spec for your service
+- [ ] Convert the compose.yaml spec to a quadlet
+- [ ] Install the quadlet on the podman server
+- [ ] Expose the quadlet service
+- [ ] Install a backup service and timer
+
+## Install litellm
+
+### Create the ai user
+
+```bash
+# SSH into your podman server as root
+useradd ai
+loginctl enable-linger $(id -u ai)
+systemctl --user --machine=ai@.host enable podman-restart
+systemctl --user --machine=ai@.host enable --now podman.socket
+mkdir -p /home/ai/.config/containers/systemd
+```
+
+### Write the litellm compose spec
+
+See the [docker run command here](https://docs.litellm.ai/docs/proxy/docker_quick_start#32-start-proxy)
+
+Edit the compose.yaml at active/container_litellm/compose/compose.yaml
+
+#### A Note on Volumes
+
+Named volumes are stored at `/home/litellm/.local/share/containers/storage/volumes/`.
+
+### Convert litellm compose spec to quadlets
+
+Run the following to convert a compose.yaml into the various `.container` files for systemd:
+
+```bash
+# Generate the systemd service
+podman run \
+--security-opt label=disable \
+--rm \
+-v $(pwd)/active/container_litellm/compose:/compose \
+-v $(pwd)/active/container_litellm/quadlets:/quadlets \
+quay.io/k9withabone/podlet \
+-f /quadlets \
+-i \
+--overwrite \
+compose /compose/compose.yaml
+
+# Copy the files to the server
+export PODMAN_SERVER=ai-ai
+scp -r active/container_litellm/quadlets/. $PODMAN_SERVER:/home/ai/.config/containers/systemd/
+```
+
+### Create the litellm.env file
+
+Should look something like:
+
+```env
+LITELLM_MASTER_KEY="random-string"
+LITELLM_SALT_KEY="random-string"
+
+UI_USERNAME="admin"
+UI_PASSWORD="random-string"
+```
+
+Then copy it to the server
+
+```bash
+export PODMAN_SERVER=ai
+scp -r active/container_litellm/config.yaml $PODMAN_SERVER:/home/ai/litellm_config.yaml
+ssh $PODMAN_SERVER chown -R ai:ai /home/ai/litellm_config.yaml
+```
+
+### Start and enable your systemd quadlet
+
+SSH into your podman server as root:
+
+```bash
+ssh ai
+machinectl shell ai@
+systemctl --user daemon-reload
+systemctl --user restart litellm
+journalctl --user -u litellm -f
+# Enable auto-update service which will pull new container images automatically every day
+systemctl --user enable --now podman-auto-update.timer
+```
+
+### Expose litellm
+
+1. If you need a domain, follow the [DDNS instructions](/active/container_ddns/ddns.md#install-a-new-ddns-service)
+2. For a web service, follow the [Caddy instructions](/active/container_caddy/caddy.md#adding-a-new-caddy-record)
+3. Finally, follow your OS's guide for opening ports via its firewall service.
+
+## Using LiteLLM
+
+### Adding Models
+
+```json
+// qwen3.5-35b-a3b-thinking
+{
+    "temperature": 1,
+    "top_p": 0.95,
+    "presence_penalty": 1.5,
+    "extra_body": {
+        "top_k": 20,
+        "min_p": 0,
+        "repetition_penalty": 1,
+        "chat_template_kwargs": {
+            "enable_thinking": true
+        }
+    }
+}
+
+// qwen3.5-35b-a3b-coding
+{
+    "temperature": 0.6,
+    "top_p": 0.95,
+    "presence_penalty": 0,
+    "extra_body": {
+        "top_k": 20,
+        "min_p": 0,
+        "repetition_penalty": 1,
+        "chat_template_kwargs": {
+            "enable_thinking": true
+        }
+    }
+}
+
+// qwen3.5-35b-a3b-instruct
+{
+    "temperature": 0.7,
+    "top_p": 0.8,
+    "presence_penalty": 1.5,
+    "extra_body": {
+        "top_k": 20,
+        "min_p": 0,
+        "repetition_penalty": 1,
+        "chat_template_kwargs": {
+            "enable_thinking": false
+        }
+    }
+}
+```
+
+### Testing Models
+
+```bash
+# List models
+curl -L -X GET 'https://aipi.reeseapps.com/v1/models' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234'
+
+curl -L -X POST 'https://aipi.reeseapps.com/v1/chat/completions' \
+-H 'Content-Type: application/json' \
+-H 'Authorization: Bearer sk-1234' \
+-d '{
+    "model": "gpt-4o-mini", # 👈 REPLACE with 'public model name' for any db-model
+    "messages": [
+        {
+            "content": "Hey, how's it going",
+            "role": "user"
+        }
+    ],
+}'
+```
+
+## Backup litellm
+
+Follow the [Borg Backup instructions](/active/systemd_borg/borg.md#set-up-a-client-for-backup)
+
+## Upgrade litellm
+
+### Upgrade Quadlets
+
+Upgrades should be a repeat of [writing the compose spec](#convert-litellm-compose-spec-to-quadlets) and [installing the quadlets](#start-and-enable-your-systemd-quadlet)
+
+```bash
+export PODMAN_SERVER=
+scp -r quadlets/. $PODMAN_SERVER$:/home/litellm/.config/containers/systemd/
+ssh litellm systemctl --user daemon-reload
+ssh litellm systemctl --user restart litellm
+```
+
+## Uninstall
+
+```bash
+# Stop the user's services
+systemctl --user disable podman-restart
+podman container stop --all
+systemctl --user disable --now podman.socket
+systemctl --user disable --now podman-auto-update.timer
+
+# Delete the user (this won't delete their home directory)
+# userdel might spit out an error like:
+# userdel: user litellm is currently used by process 591255
+# kill those processes and try again
+userdel litellm
+```
+
+## Notes
+
+### SELinux
+
+<https://blog.christophersmart.com/2021/01/31/podman-volumes-and-selinux/>
+
+:z allows a container to share a mounted volume with all other containers.
+
+:Z allows a container to reserve a mounted volume and prevents any other container from accessing.
--- a/active/container_litellm/quadlets/litellm-db.container
+++ b/active/container_litellm/quadlets/litellm-db.container
@@ -0,0 +1,15 @@
+[Container]
+Environment=POSTGRES_DB=litellm POSTGRES_USER=llmproxy POSTGRES_PASSWORD=dbpassword9090
+HealthCmd='pg_isready -d litellm -U llmproxy'
+HealthInterval=1s
+HealthRetries=10
+HealthTimeout=5s
+Image=docker.io/postgres:16
+PublishPort=5432:5432
+Volume=litellm_postgres_data:/var/lib/postgresql/data:z
+
+[Service]
+Restart=always
+
+[Install]
+WantedBy=default.target
--- a/active/container_litellm/quadlets/litellm.container
+++ b/active/container_litellm/quadlets/litellm.container
@@ -0,0 +1,19 @@
+[Unit]
+Requires=litellm-db.service
+
+[Container]
+Environment=DATABASE_URL=postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm STORE_MODEL_IN_DB=True
+EnvironmentFile=/home/ai/litellm.env
+HealthCmd="python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')\""
+HealthInterval=30s
+HealthRetries=3
+HealthStartPeriod=40s
+HealthTimeout=10s
+Image=docker.litellm.ai/berriai/litellm:main-latest
+PublishPort=4000:4000
+
+[Service]
+Restart=always
+
+[Install]
+WantedBy=default.target