add litellm

2026-03-16 09:53:27 -04:00
parent eb67191706
commit 25d3a7805c
6 changed files with 374 additions and 0 deletions
--- a/active/container_litellm/compose/README.md
+++ b/active/container_litellm/compose/README.md
@@ -0,0 +1,3 @@
 # Compose
 Put your compose.yaml here.
--- a/active/container_litellm/compose/compose.yaml
+++ b/active/container_litellm/compose/compose.yaml
@@ -0,0 +1,37 @@
 services:
  litellm:
    image: docker.litellm.ai/berriai/litellm:main-latest
    ports:
      - 4000:4000
    env_file: /home/ai/litellm.env
    environment:
      DATABASE_URL: "postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm"
      STORE_MODEL_IN_DB: "True"
    restart: unless-stopped
    depends_on:
      - litellm-db # Indicates that this service depends on the 'litellm-db' service, ensuring 'litellm-db' starts first
    healthcheck: # Defines the health check configuration for the container
      test:
        - CMD-SHELL
        - python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')" # Command to execute for health check
      interval: 30s # Perform health check every 30 seconds
      timeout: 10s # Health check command times out after 10 seconds
      retries: 3 # Retry up to 3 times if health check fails
      start_period: 40s # Wait 40 seconds after container start before beginning health checks
  litellm-db:
    image: docker.io/postgres:16
    restart: always
    environment:
      POSTGRES_DB: litellm
      POSTGRES_USER: llmproxy
      POSTGRES_PASSWORD: dbpassword9090
    ports:
      - "5432:5432"
    volumes:
      - litellm_postgres_data:/var/lib/postgresql/data:z
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"]
      interval: 1s
      timeout: 5s
      retries: 10
--- a/active/container_litellm/config.yaml
+++ b/active/container_litellm/config.yaml
@@ -0,0 +1,67 @@
 # General settings
 general_settings:
  request_timeout: 600
 # Models
 model_list:
  # Qwen3.5-35B variants
  - model_name: qwen3.5-35b-think-general
    litellm_params:
      model: openai/qwen3.5-35b-a3b
      api_base: https://llama-cpp.reeselink.com
      api_key: none
      temperature: 1.0
      top_p: 0.95
      presence_penalty: 1.5
      extra_body:
        top_k: 20
        min_p: 0.0
        repetition_penalty: 1.0
        chat_template_kwargs:
          enable_thinking: true
  - model_name: qwen3.5-35b-think-code
    litellm_params:
      model: openai/qwen3.5-35b-a3b
      api_base: https://llama-cpp.reeselink.com
      api_key: none
      temperature: 0.6
      top_p: 0.95
      presence_penalty: 0.0
      extra_body:
        top_k: 20
        min_p: 0.0
        repetition_penalty: 1.0
        chat_template_kwargs:
          enable_thinking: true
  - model_name: qwen3.5-35b-instruct-general
    litellm_params:
      model: openai/qwen3.5-35b-a3b
      api_base: https://llama-cpp.reeselink.com
      api_key: none
      temperature: 0.7
      top_p: 0.8
      presence_penalty: 1.5
      extra_body:
        top_k: 20
        min_p: 0.0
        repetition_penalty: 1.0
        chat_template_kwargs:
          enable_thinking: false
  - model_name: qwen3.5-35b-instruct-reasoning
    litellm_params:
      model: openai/qwen3.5-35b-a3b
      api_base: https://llama-cpp.reeselink.com
      api_key: none
      temperature: 1.0
      top_p: 0.95
      presence_penalty: 1.5
      extra_body:
        top_k: 20
        min_p: 0.0
        repetition_penalty: 1.0
        chat_template_kwargs:
          enable_thinking: false
--- a/active/container_litellm/litellm.md
+++ b/active/container_litellm/litellm.md
@@ -0,0 +1,233 @@
 # Podman litellm
 - [Podman litellm](#podman-litellm)
  - [Setup litellm Project](#setup-litellm-project)
  - [Install litellm](#install-litellm)
    - [Create the ai user](#create-the-ai-user)
    - [Write the litellm compose spec](#write-the-litellm-compose-spec)
      - [A Note on Volumes](#a-note-on-volumes)
    - [Convert litellm compose spec to quadlets](#convert-litellm-compose-spec-to-quadlets)
    - [Create the litellm.env file](#create-the-litellmenv-file)
    - [Start and enable your systemd quadlet](#start-and-enable-your-systemd-quadlet)
    - [Expose litellm](#expose-litellm)
  - [Using LiteLLM](#using-litellm)
    - [Adding Models](#adding-models)
    - [Testing Models](#testing-models)
  - [Backup litellm](#backup-litellm)
  - [Upgrade litellm](#upgrade-litellm)
    - [Upgrade Quadlets](#upgrade-quadlets)
  - [Uninstall](#uninstall)
  - [Notes](#notes)
    - [SELinux](#selinux)
 ## Setup litellm Project
 - [ ] Copy and rename this folder to active/container_litellm
 - [ ] Find and replace litellm with the name of the service.
 - [ ] Create the rootless user to run the podman containers
 - [ ] Write the compose.yaml spec for your service
 - [ ] Convert the compose.yaml spec to a quadlet
 - [ ] Install the quadlet on the podman server
 - [ ] Expose the quadlet service
 - [ ] Install a backup service and timer
 ## Install litellm
 ### Create the ai user
 ```bash
 # SSH into your podman server as root
 useradd ai
 loginctl enable-linger $(id -u ai)
 systemctl --user --machine=ai@.host enable podman-restart
 systemctl --user --machine=ai@.host enable --now podman.socket
 mkdir -p /home/ai/.config/containers/systemd
 ```
 ### Write the litellm compose spec
 See the [docker run command here](https://docs.litellm.ai/docs/proxy/docker_quick_start#32-start-proxy)
 Edit the compose.yaml at active/container_litellm/compose/compose.yaml
 #### A Note on Volumes
 Named volumes are stored at `/home/litellm/.local/share/containers/storage/volumes/`.
 ### Convert litellm compose spec to quadlets
 Run the following to convert a compose.yaml into the various `.container` files for systemd:
 ```bash
 # Generate the systemd service
 podman run \
 --security-opt label=disable \
 --rm \
 -v $(pwd)/active/container_litellm/compose:/compose \
 -v $(pwd)/active/container_litellm/quadlets:/quadlets \
 quay.io/k9withabone/podlet \
 -f /quadlets \
 -i \
 --overwrite \
 compose /compose/compose.yaml
 # Copy the files to the server
 export PODMAN_SERVER=ai-ai
 scp -r active/container_litellm/quadlets/. $PODMAN_SERVER:/home/ai/.config/containers/systemd/
 ```
 ### Create the litellm.env file
 Should look something like:
 ```env
 LITELLM_MASTER_KEY="random-string"
 LITELLM_SALT_KEY="random-string"
 UI_USERNAME="admin"
 UI_PASSWORD="random-string"
 ```
 Then copy it to the server
 ```bash
 export PODMAN_SERVER=ai
 scp -r active/container_litellm/config.yaml $PODMAN_SERVER:/home/ai/litellm_config.yaml
 ssh $PODMAN_SERVER chown -R ai:ai /home/ai/litellm_config.yaml
 ```
 ### Start and enable your systemd quadlet
 SSH into your podman server as root:
 ```bash
 ssh ai
 machinectl shell ai@
 systemctl --user daemon-reload
 systemctl --user restart litellm
 journalctl --user -u litellm -f
 # Enable auto-update service which will pull new container images automatically every day
 systemctl --user enable --now podman-auto-update.timer
 ```
 ### Expose litellm
 1. If you need a domain, follow the [DDNS instructions](/active/container_ddns/ddns.md#install-a-new-ddns-service)
 2. For a web service, follow the [Caddy instructions](/active/container_caddy/caddy.md#adding-a-new-caddy-record)
 3. Finally, follow your OS's guide for opening ports via its firewall service.
 ## Using LiteLLM
 ### Adding Models
 ```json
 // qwen3.5-35b-a3b-thinking
 {
    "temperature": 1,
    "top_p": 0.95,
    "presence_penalty": 1.5,
    "extra_body": {
        "top_k": 20,
        "min_p": 0,
        "repetition_penalty": 1,
        "chat_template_kwargs": {
            "enable_thinking": true
        }
    }
 }
 // qwen3.5-35b-a3b-coding
 {
    "temperature": 0.6,
    "top_p": 0.95,
    "presence_penalty": 0,
    "extra_body": {
        "top_k": 20,
        "min_p": 0,
        "repetition_penalty": 1,
        "chat_template_kwargs": {
            "enable_thinking": true
        }
    }
 }
 // qwen3.5-35b-a3b-instruct
 {
    "temperature": 0.7,
    "top_p": 0.8,
    "presence_penalty": 1.5,
    "extra_body": {
        "top_k": 20,
        "min_p": 0,
        "repetition_penalty": 1,
        "chat_template_kwargs": {
            "enable_thinking": false
        }
    }
 }
 ```
 ### Testing Models
 ```bash
 # List models
 curl -L -X GET 'https://aipi.reeseapps.com/v1/models' \
 -H 'Content-Type: application/json' \
 -H 'Authorization: Bearer sk-1234'
 curl -L -X POST 'https://aipi.reeseapps.com/v1/chat/completions' \
 -H 'Content-Type: application/json' \
 -H 'Authorization: Bearer sk-1234' \
 -d '{
    "model": "gpt-4o-mini", # 👈 REPLACE with 'public model name' for any db-model
    "messages": [
        {
            "content": "Hey, how's it going",
            "role": "user"
        }
    ],
 }'
 ```
 ## Backup litellm
 Follow the [Borg Backup instructions](/active/systemd_borg/borg.md#set-up-a-client-for-backup)
 ## Upgrade litellm
 ### Upgrade Quadlets
 Upgrades should be a repeat of [writing the compose spec](#convert-litellm-compose-spec-to-quadlets) and [installing the quadlets](#start-and-enable-your-systemd-quadlet)
 ```bash
 export PODMAN_SERVER=
 scp -r quadlets/. $PODMAN_SERVER$:/home/litellm/.config/containers/systemd/
 ssh litellm systemctl --user daemon-reload
 ssh litellm systemctl --user restart litellm
 ```
 ## Uninstall
 ```bash
 # Stop the user's services
 systemctl --user disable podman-restart
 podman container stop --all
 systemctl --user disable --now podman.socket
 systemctl --user disable --now podman-auto-update.timer
 # Delete the user (this won't delete their home directory)
 # userdel might spit out an error like:
 # userdel: user litellm is currently used by process 591255
 # kill those processes and try again
 userdel litellm
 ```
 ## Notes
 ### SELinux
 <https://blog.christophersmart.com/2021/01/31/podman-volumes-and-selinux/>
 :z allows a container to share a mounted volume with all other containers.
 :Z allows a container to reserve a mounted volume and prevents any other container from accessing.
--- a/active/container_litellm/quadlets/litellm-db.container
+++ b/active/container_litellm/quadlets/litellm-db.container
@@ -0,0 +1,15 @@
 [Container]
 Environment=POSTGRES_DB=litellm POSTGRES_USER=llmproxy POSTGRES_PASSWORD=dbpassword9090
 HealthCmd='pg_isready -d litellm -U llmproxy'
 HealthInterval=1s
 HealthRetries=10
 HealthTimeout=5s
 Image=docker.io/postgres:16
 PublishPort=5432:5432
 Volume=litellm_postgres_data:/var/lib/postgresql/data:z
 [Service]
 Restart=always
 [Install]
 WantedBy=default.target
--- a/active/container_litellm/quadlets/litellm.container
+++ b/active/container_litellm/quadlets/litellm.container
@@ -0,0 +1,19 @@
 [Unit]
 Requires=litellm-db.service
 [Container]
 Environment=DATABASE_URL=postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm STORE_MODEL_IN_DB=True
 EnvironmentFile=/home/ai/litellm.env
 HealthCmd="python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')\""
 HealthInterval=30s
 HealthRetries=3
 HealthStartPeriod=40s
 HealthTimeout=10s
 Image=docker.litellm.ai/berriai/litellm:main-latest
 PublishPort=4000:4000
 [Service]
 Restart=always
 [Install]
 WantedBy=default.target