diff --git a/active/container_litellm/compose/README.md b/active/container_litellm/compose/README.md new file mode 100644 index 0000000..54893f3 --- /dev/null +++ b/active/container_litellm/compose/README.md @@ -0,0 +1,3 @@ +# Compose + +Put your compose.yaml here. diff --git a/active/container_litellm/compose/compose.yaml b/active/container_litellm/compose/compose.yaml new file mode 100644 index 0000000..7eb5bf6 --- /dev/null +++ b/active/container_litellm/compose/compose.yaml @@ -0,0 +1,37 @@ +services: + litellm: + image: docker.litellm.ai/berriai/litellm:main-latest + ports: + - 4000:4000 + env_file: /home/ai/litellm.env + environment: + DATABASE_URL: "postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm" + STORE_MODEL_IN_DB: "True" + restart: unless-stopped + depends_on: + - litellm-db # Indicates that this service depends on the 'litellm-db' service, ensuring 'litellm-db' starts first + healthcheck: # Defines the health check configuration for the container + test: + - CMD-SHELL + - python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')" # Command to execute for health check + interval: 30s # Perform health check every 30 seconds + timeout: 10s # Health check command times out after 10 seconds + retries: 3 # Retry up to 3 times if health check fails + start_period: 40s # Wait 40 seconds after container start before beginning health checks + + litellm-db: + image: docker.io/postgres:16 + restart: always + environment: + POSTGRES_DB: litellm + POSTGRES_USER: llmproxy + POSTGRES_PASSWORD: dbpassword9090 + ports: + - "5432:5432" + volumes: + - litellm_postgres_data:/var/lib/postgresql/data:z + healthcheck: + test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"] + interval: 1s + timeout: 5s + retries: 10 diff --git a/active/container_litellm/config.yaml b/active/container_litellm/config.yaml new file mode 100644 index 0000000..e1da826 --- /dev/null +++ b/active/container_litellm/config.yaml @@ -0,0 +1,67 @@ +# General settings + +general_settings: + request_timeout: 600 + +# Models +model_list: + # Qwen3.5-35B variants + - model_name: qwen3.5-35b-think-general + litellm_params: + model: openai/qwen3.5-35b-a3b + api_base: https://llama-cpp.reeselink.com + api_key: none + temperature: 1.0 + top_p: 0.95 + presence_penalty: 1.5 + extra_body: + top_k: 20 + min_p: 0.0 + repetition_penalty: 1.0 + chat_template_kwargs: + enable_thinking: true + + - model_name: qwen3.5-35b-think-code + litellm_params: + model: openai/qwen3.5-35b-a3b + api_base: https://llama-cpp.reeselink.com + api_key: none + temperature: 0.6 + top_p: 0.95 + presence_penalty: 0.0 + extra_body: + top_k: 20 + min_p: 0.0 + repetition_penalty: 1.0 + chat_template_kwargs: + enable_thinking: true + + - model_name: qwen3.5-35b-instruct-general + litellm_params: + model: openai/qwen3.5-35b-a3b + api_base: https://llama-cpp.reeselink.com + api_key: none + temperature: 0.7 + top_p: 0.8 + presence_penalty: 1.5 + extra_body: + top_k: 20 + min_p: 0.0 + repetition_penalty: 1.0 + chat_template_kwargs: + enable_thinking: false + + - model_name: qwen3.5-35b-instruct-reasoning + litellm_params: + model: openai/qwen3.5-35b-a3b + api_base: https://llama-cpp.reeselink.com + api_key: none + temperature: 1.0 + top_p: 0.95 + presence_penalty: 1.5 + extra_body: + top_k: 20 + min_p: 0.0 + repetition_penalty: 1.0 + chat_template_kwargs: + enable_thinking: false diff --git a/active/container_litellm/litellm.md b/active/container_litellm/litellm.md new file mode 100644 index 0000000..e8096d8 --- /dev/null +++ b/active/container_litellm/litellm.md @@ -0,0 +1,233 @@ +# Podman litellm + +- [Podman litellm](#podman-litellm) + - [Setup litellm Project](#setup-litellm-project) + - [Install litellm](#install-litellm) + - [Create the ai user](#create-the-ai-user) + - [Write the litellm compose spec](#write-the-litellm-compose-spec) + - [A Note on Volumes](#a-note-on-volumes) + - [Convert litellm compose spec to quadlets](#convert-litellm-compose-spec-to-quadlets) + - [Create the litellm.env file](#create-the-litellmenv-file) + - [Start and enable your systemd quadlet](#start-and-enable-your-systemd-quadlet) + - [Expose litellm](#expose-litellm) + - [Using LiteLLM](#using-litellm) + - [Adding Models](#adding-models) + - [Testing Models](#testing-models) + - [Backup litellm](#backup-litellm) + - [Upgrade litellm](#upgrade-litellm) + - [Upgrade Quadlets](#upgrade-quadlets) + - [Uninstall](#uninstall) + - [Notes](#notes) + - [SELinux](#selinux) + +## Setup litellm Project + +- [ ] Copy and rename this folder to active/container_litellm +- [ ] Find and replace litellm with the name of the service. +- [ ] Create the rootless user to run the podman containers +- [ ] Write the compose.yaml spec for your service +- [ ] Convert the compose.yaml spec to a quadlet +- [ ] Install the quadlet on the podman server +- [ ] Expose the quadlet service +- [ ] Install a backup service and timer + +## Install litellm + +### Create the ai user + +```bash +# SSH into your podman server as root +useradd ai +loginctl enable-linger $(id -u ai) +systemctl --user --machine=ai@.host enable podman-restart +systemctl --user --machine=ai@.host enable --now podman.socket +mkdir -p /home/ai/.config/containers/systemd +``` + +### Write the litellm compose spec + +See the [docker run command here](https://docs.litellm.ai/docs/proxy/docker_quick_start#32-start-proxy) + +Edit the compose.yaml at active/container_litellm/compose/compose.yaml + +#### A Note on Volumes + +Named volumes are stored at `/home/litellm/.local/share/containers/storage/volumes/`. + +### Convert litellm compose spec to quadlets + +Run the following to convert a compose.yaml into the various `.container` files for systemd: + +```bash +# Generate the systemd service +podman run \ +--security-opt label=disable \ +--rm \ +-v $(pwd)/active/container_litellm/compose:/compose \ +-v $(pwd)/active/container_litellm/quadlets:/quadlets \ +quay.io/k9withabone/podlet \ +-f /quadlets \ +-i \ +--overwrite \ +compose /compose/compose.yaml + +# Copy the files to the server +export PODMAN_SERVER=ai-ai +scp -r active/container_litellm/quadlets/. $PODMAN_SERVER:/home/ai/.config/containers/systemd/ +``` + +### Create the litellm.env file + +Should look something like: + +```env +LITELLM_MASTER_KEY="random-string" +LITELLM_SALT_KEY="random-string" + +UI_USERNAME="admin" +UI_PASSWORD="random-string" +``` + +Then copy it to the server + +```bash +export PODMAN_SERVER=ai +scp -r active/container_litellm/config.yaml $PODMAN_SERVER:/home/ai/litellm_config.yaml +ssh $PODMAN_SERVER chown -R ai:ai /home/ai/litellm_config.yaml +``` + +### Start and enable your systemd quadlet + +SSH into your podman server as root: + +```bash +ssh ai +machinectl shell ai@ +systemctl --user daemon-reload +systemctl --user restart litellm +journalctl --user -u litellm -f +# Enable auto-update service which will pull new container images automatically every day +systemctl --user enable --now podman-auto-update.timer +``` + +### Expose litellm + +1. If you need a domain, follow the [DDNS instructions](/active/container_ddns/ddns.md#install-a-new-ddns-service) +2. For a web service, follow the [Caddy instructions](/active/container_caddy/caddy.md#adding-a-new-caddy-record) +3. Finally, follow your OS's guide for opening ports via its firewall service. + +## Using LiteLLM + +### Adding Models + +```json +// qwen3.5-35b-a3b-thinking +{ + "temperature": 1, + "top_p": 0.95, + "presence_penalty": 1.5, + "extra_body": { + "top_k": 20, + "min_p": 0, + "repetition_penalty": 1, + "chat_template_kwargs": { + "enable_thinking": true + } + } +} + +// qwen3.5-35b-a3b-coding +{ + "temperature": 0.6, + "top_p": 0.95, + "presence_penalty": 0, + "extra_body": { + "top_k": 20, + "min_p": 0, + "repetition_penalty": 1, + "chat_template_kwargs": { + "enable_thinking": true + } + } +} + +// qwen3.5-35b-a3b-instruct +{ + "temperature": 0.7, + "top_p": 0.8, + "presence_penalty": 1.5, + "extra_body": { + "top_k": 20, + "min_p": 0, + "repetition_penalty": 1, + "chat_template_kwargs": { + "enable_thinking": false + } + } +} +``` + +### Testing Models + +```bash +# List models +curl -L -X GET 'https://aipi.reeseapps.com/v1/models' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' + +curl -L -X POST 'https://aipi.reeseapps.com/v1/chat/completions' \ +-H 'Content-Type: application/json' \ +-H 'Authorization: Bearer sk-1234' \ +-d '{ + "model": "gpt-4o-mini", # 👈 REPLACE with 'public model name' for any db-model + "messages": [ + { + "content": "Hey, how's it going", + "role": "user" + } + ], +}' +``` + +## Backup litellm + +Follow the [Borg Backup instructions](/active/systemd_borg/borg.md#set-up-a-client-for-backup) + +## Upgrade litellm + +### Upgrade Quadlets + +Upgrades should be a repeat of [writing the compose spec](#convert-litellm-compose-spec-to-quadlets) and [installing the quadlets](#start-and-enable-your-systemd-quadlet) + +```bash +export PODMAN_SERVER= +scp -r quadlets/. $PODMAN_SERVER$:/home/litellm/.config/containers/systemd/ +ssh litellm systemctl --user daemon-reload +ssh litellm systemctl --user restart litellm +``` + +## Uninstall + +```bash +# Stop the user's services +systemctl --user disable podman-restart +podman container stop --all +systemctl --user disable --now podman.socket +systemctl --user disable --now podman-auto-update.timer + +# Delete the user (this won't delete their home directory) +# userdel might spit out an error like: +# userdel: user litellm is currently used by process 591255 +# kill those processes and try again +userdel litellm +``` + +## Notes + +### SELinux + + + +:z allows a container to share a mounted volume with all other containers. + +:Z allows a container to reserve a mounted volume and prevents any other container from accessing. diff --git a/active/container_litellm/quadlets/litellm-db.container b/active/container_litellm/quadlets/litellm-db.container new file mode 100644 index 0000000..9c77d9f --- /dev/null +++ b/active/container_litellm/quadlets/litellm-db.container @@ -0,0 +1,15 @@ +[Container] +Environment=POSTGRES_DB=litellm POSTGRES_USER=llmproxy POSTGRES_PASSWORD=dbpassword9090 +HealthCmd='pg_isready -d litellm -U llmproxy' +HealthInterval=1s +HealthRetries=10 +HealthTimeout=5s +Image=docker.io/postgres:16 +PublishPort=5432:5432 +Volume=litellm_postgres_data:/var/lib/postgresql/data:z + +[Service] +Restart=always + +[Install] +WantedBy=default.target diff --git a/active/container_litellm/quadlets/litellm.container b/active/container_litellm/quadlets/litellm.container new file mode 100644 index 0000000..42301de --- /dev/null +++ b/active/container_litellm/quadlets/litellm.container @@ -0,0 +1,19 @@ +[Unit] +Requires=litellm-db.service + +[Container] +Environment=DATABASE_URL=postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm STORE_MODEL_IN_DB=True +EnvironmentFile=/home/ai/litellm.env +HealthCmd="python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')\"" +HealthInterval=30s +HealthRetries=3 +HealthStartPeriod=40s +HealthTimeout=10s +Image=docker.litellm.ai/berriai/litellm:main-latest +PublishPort=4000:4000 + +[Service] +Restart=always + +[Install] +WantedBy=default.target