diff --git a/active/container_litellm/compose/README.md b/active/container_litellm/compose/README.md deleted file mode 100644 index 54893f3..0000000 --- a/active/container_litellm/compose/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Compose - -Put your compose.yaml here. diff --git a/active/container_litellm/compose/compose.yaml b/active/container_litellm/compose/compose.yaml index 7eb5bf6..a859067 100644 --- a/active/container_litellm/compose/compose.yaml +++ b/active/container_litellm/compose/compose.yaml @@ -1,27 +1,35 @@ services: litellm: - image: docker.litellm.ai/berriai/litellm:main-latest + image: docker.litellm.ai/berriai/litellm:main-stable + ######################################### + ## Uncomment these lines to start proxy with a config.yaml file ## + # volumes: + # - ./config.yaml:/app/config.yaml + # command: + # - "--config=/app/config.yaml" + ############################################## ports: - - 4000:4000 - env_file: /home/ai/litellm.env + - "4000:4000" # Map the container port to the host, change the host port if necessary environment: - DATABASE_URL: "postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm" - STORE_MODEL_IN_DB: "True" - restart: unless-stopped + DATABASE_URL: "postgresql://llmproxy:dbpassword9090@db:5432/litellm" + STORE_MODEL_IN_DB: "True" # allows adding models to proxy via UI + env_file: + - ../secrets/litellm.env # Load local .env file depends_on: - - litellm-db # Indicates that this service depends on the 'litellm-db' service, ensuring 'litellm-db' starts first - healthcheck: # Defines the health check configuration for the container + - db # Indicates that this service depends on the 'db' service, ensuring 'db' starts first + healthcheck: # Defines the health check configuration for the container test: - CMD-SHELL - - python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')" # Command to execute for health check - interval: 30s # Perform health check every 30 seconds - timeout: 10s # Health check command times out after 10 seconds - retries: 3 # Retry up to 3 times if health check fails - start_period: 40s # Wait 40 seconds after container start before beginning health checks + - python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')" # Command to execute for health check + interval: 30s # Perform health check every 30 seconds + timeout: 10s # Health check command times out after 10 seconds + retries: 3 # Retry up to 3 times if health check fails + start_period: 40s # Wait 40 seconds after container start before beginning health checks - litellm-db: + db: image: docker.io/postgres:16 restart: always + container_name: litellm_db environment: POSTGRES_DB: litellm POSTGRES_USER: llmproxy @@ -29,9 +37,26 @@ services: ports: - "5432:5432" volumes: - - litellm_postgres_data:/var/lib/postgresql/data:z + - postgres_data:/var/lib/postgresql/data # Persists Postgres data across container restarts healthcheck: test: ["CMD-SHELL", "pg_isready -d litellm -U llmproxy"] interval: 1s timeout: 5s retries: 10 + + prometheus: + image: docker.io/prom/prometheus + volumes: + - prometheus_data:/prometheus + - ../seccrets/prometheus.yml:/etc/prometheus/prometheus.yml + ports: + - "9090:9090" + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--storage.tsdb.retention.time=15d" + restart: always + +volumes: + prometheus_data: + postgres_data: diff --git a/active/container_litellm/config.yaml b/active/container_litellm/config.yaml deleted file mode 100644 index e1da826..0000000 --- a/active/container_litellm/config.yaml +++ /dev/null @@ -1,67 +0,0 @@ -# General settings - -general_settings: - request_timeout: 600 - -# Models -model_list: - # Qwen3.5-35B variants - - model_name: qwen3.5-35b-think-general - litellm_params: - model: openai/qwen3.5-35b-a3b - api_base: https://llama-cpp.reeselink.com - api_key: none - temperature: 1.0 - top_p: 0.95 - presence_penalty: 1.5 - extra_body: - top_k: 20 - min_p: 0.0 - repetition_penalty: 1.0 - chat_template_kwargs: - enable_thinking: true - - - model_name: qwen3.5-35b-think-code - litellm_params: - model: openai/qwen3.5-35b-a3b - api_base: https://llama-cpp.reeselink.com - api_key: none - temperature: 0.6 - top_p: 0.95 - presence_penalty: 0.0 - extra_body: - top_k: 20 - min_p: 0.0 - repetition_penalty: 1.0 - chat_template_kwargs: - enable_thinking: true - - - model_name: qwen3.5-35b-instruct-general - litellm_params: - model: openai/qwen3.5-35b-a3b - api_base: https://llama-cpp.reeselink.com - api_key: none - temperature: 0.7 - top_p: 0.8 - presence_penalty: 1.5 - extra_body: - top_k: 20 - min_p: 0.0 - repetition_penalty: 1.0 - chat_template_kwargs: - enable_thinking: false - - - model_name: qwen3.5-35b-instruct-reasoning - litellm_params: - model: openai/qwen3.5-35b-a3b - api_base: https://llama-cpp.reeselink.com - api_key: none - temperature: 1.0 - top_p: 0.95 - presence_penalty: 1.5 - extra_body: - top_k: 20 - min_p: 0.0 - repetition_penalty: 1.0 - chat_template_kwargs: - enable_thinking: false diff --git a/active/container_litellm/litellm.md b/active/container_litellm/litellm.md index e8096d8..8954fe7 100644 --- a/active/container_litellm/litellm.md +++ b/active/container_litellm/litellm.md @@ -9,9 +9,8 @@ - [Convert litellm compose spec to quadlets](#convert-litellm-compose-spec-to-quadlets) - [Create the litellm.env file](#create-the-litellmenv-file) - [Start and enable your systemd quadlet](#start-and-enable-your-systemd-quadlet) + - [Install via Ansible](#install-via-ansible) - [Expose litellm](#expose-litellm) - - [Using LiteLLM](#using-litellm) - - [Adding Models](#adding-models) - [Testing Models](#testing-models) - [Backup litellm](#backup-litellm) - [Upgrade litellm](#upgrade-litellm) @@ -110,63 +109,28 @@ journalctl --user -u litellm -f systemctl --user enable --now podman-auto-update.timer ``` +### Install via Ansible + +Preview changes with a dry run: + +```bash +ansible-playbook -i ansible/inventory.yaml active/container_litellm/playbook.yml --check --diff +``` + +Run the playbook from the Homelab root: + +```bash +ansible-playbook -i ansible/inventory.yaml active/container_litellm/playbook.yml +``` + +This copies the quadlets, config, reloads the systemd user daemon, and starts both `litellm-db` and `litellm` services as the `ai` user. + ### Expose litellm 1. If you need a domain, follow the [DDNS instructions](/active/container_ddns/ddns.md#install-a-new-ddns-service) 2. For a web service, follow the [Caddy instructions](/active/container_caddy/caddy.md#adding-a-new-caddy-record) 3. Finally, follow your OS's guide for opening ports via its firewall service. -## Using LiteLLM - -### Adding Models - -```json -// qwen3.5-35b-a3b-thinking -{ - "temperature": 1, - "top_p": 0.95, - "presence_penalty": 1.5, - "extra_body": { - "top_k": 20, - "min_p": 0, - "repetition_penalty": 1, - "chat_template_kwargs": { - "enable_thinking": true - } - } -} - -// qwen3.5-35b-a3b-coding -{ - "temperature": 0.6, - "top_p": 0.95, - "presence_penalty": 0, - "extra_body": { - "top_k": 20, - "min_p": 0, - "repetition_penalty": 1, - "chat_template_kwargs": { - "enable_thinking": true - } - } -} - -// qwen3.5-35b-a3b-instruct -{ - "temperature": 0.7, - "top_p": 0.8, - "presence_penalty": 1.5, - "extra_body": { - "top_k": 20, - "min_p": 0, - "repetition_penalty": 1, - "chat_template_kwargs": { - "enable_thinking": false - } - } -} -``` - ### Testing Models ```bash @@ -191,7 +155,7 @@ curl -L -X POST 'https://aipi.reeseapps.com/v1/chat/completions' \ ## Backup litellm -Follow the [Borg Backup instructions](/active/systemd_borg/borg.md#set-up-a-client-for-backup) +Follow the [Borg Backup instructions](/active/software_borg/borg.md#set-up-a-client-for-backup) ## Upgrade litellm diff --git a/active/container_litellm/playbook.yml b/active/container_litellm/playbook.yml new file mode 100644 index 0000000..d539548 --- /dev/null +++ b/active/container_litellm/playbook.yml @@ -0,0 +1,86 @@ +--- +- name: Install and start LiteLLM quadlets for ai user + hosts: ai-ai + remote_user: ai + + vars: + ai_user: ai + quadlets_dir: "/home/{{ ai_user }}/.config/containers/systemd" + + tasks: + - name: Ensure ai user home directories exist + ansible.builtin.file: + path: "{{ item }}" + state: directory + owner: "{{ ai_user }}" + group: "{{ ai_user }}" + mode: "0755" + loop: + - "{{ quadlets_dir }}" + + - name: Copy litellm container pod + ansible.builtin.copy: + src: quadlets/litellm.pod + dest: "{{ quadlets_dir }}/litellm.pod" + owner: "{{ ai_user }}" + group: "{{ ai_user }}" + mode: "0644" + + - name: Copy litellm container quadlet + ansible.builtin.copy: + src: quadlets/litellm-web.container + dest: "{{ quadlets_dir }}/litellm-web.container" + owner: "{{ ai_user }}" + group: "{{ ai_user }}" + mode: "0644" + + - name: Copy litellm-db container quadlet + ansible.builtin.copy: + src: quadlets/litellm-db.container + dest: "{{ quadlets_dir }}/litellm-db.container" + owner: "{{ ai_user }}" + group: "{{ ai_user }}" + mode: "0644" + + - name: Copy prometheus container quadlet + ansible.builtin.copy: + src: quadlets/litellm-prometheus.container + dest: "{{ quadlets_dir }}/litellm-prometheus.container" + owner: "{{ ai_user }}" + group: "{{ ai_user }}" + mode: "0644" + + - name: Copy prometheus config + ansible.builtin.copy: + src: secrets/litellm-prometheus.yaml + dest: "/home/{{ ai_user }}/litellm-prometheus.yaml" + owner: "{{ ai_user }}" + group: "{{ ai_user }}" + mode: "0644" + + - name: Copy litellm.env file + ansible.builtin.copy: + src: secrets/litellm.env + dest: "/home/{{ ai_user }}/litellm.env" + owner: "{{ ai_user }}" + group: "{{ ai_user }}" + mode: "0600" + + - name: Copy litellm-config.yaml + ansible.builtin.copy: + src: secrets/litellm-config.yaml + dest: "/home/{{ ai_user }}/litellm-config.yaml" + owner: "{{ ai_user }}" + group: "{{ ai_user }}" + mode: "0644" + + - name: Reload systemd user daemon + ansible.builtin.systemd: + daemon_reload: true + scope: user + + - name: Restart litellm pod + ansible.builtin.systemd: + name: litellm-pod + state: restarted + scope: user diff --git a/active/container_litellm/quadlets/litellm-db.container b/active/container_litellm/quadlets/litellm-db.container index 9c77d9f..10c0fd8 100644 --- a/active/container_litellm/quadlets/litellm-db.container +++ b/active/container_litellm/quadlets/litellm-db.container @@ -1,12 +1,13 @@ [Container] +Pod=litellm.pod +ContainerName=litellm-db Environment=POSTGRES_DB=litellm POSTGRES_USER=llmproxy POSTGRES_PASSWORD=dbpassword9090 HealthCmd='pg_isready -d litellm -U llmproxy' HealthInterval=1s HealthRetries=10 HealthTimeout=5s Image=docker.io/postgres:16 -PublishPort=5432:5432 -Volume=litellm_postgres_data:/var/lib/postgresql/data:z +Volume=litellm_postgres_data:/var/lib/postgresql/data [Service] Restart=always diff --git a/active/container_litellm/quadlets/litellm-prometheus.container b/active/container_litellm/quadlets/litellm-prometheus.container new file mode 100644 index 0000000..271cb6a --- /dev/null +++ b/active/container_litellm/quadlets/litellm-prometheus.container @@ -0,0 +1,13 @@ +[Container] +Pod=litellm.pod +ContainerName=litellm-prom +Exec='--config.file=/etc/prometheus/prometheus.yml' '--storage.tsdb.path=/prometheus' '--storage.tsdb.retention.time=15d' +Image=docker.io/prom/prometheus +Volume=litellm_prometheus_data:/prometheus +Volume=/home/ai/litellm-prometheus.yaml:/etc/prometheus/prometheus.yml:z + +[Service] +Restart=always + +[Install] +WantedBy=default.target diff --git a/active/container_litellm/quadlets/litellm.container b/active/container_litellm/quadlets/litellm-web.container similarity index 50% rename from active/container_litellm/quadlets/litellm.container rename to active/container_litellm/quadlets/litellm-web.container index 42301de..4fa82b2 100644 --- a/active/container_litellm/quadlets/litellm.container +++ b/active/container_litellm/quadlets/litellm-web.container @@ -2,18 +2,18 @@ Requires=litellm-db.service [Container] -Environment=DATABASE_URL=postgresql://llmproxy:dbpassword9090@host.containers.internal:5432/litellm STORE_MODEL_IN_DB=True +Pod=litellm.pod +ContainerName=litellm-web +Environment=DATABASE_URL=postgresql://llmproxy:dbpassword9090@localhost:5432/litellm STORE_MODEL_IN_DB=True EnvironmentFile=/home/ai/litellm.env HealthCmd="python3 -c \"import urllib.request; urllib.request.urlopen('http://localhost:4000/health/liveliness')\"" HealthInterval=30s HealthRetries=3 HealthStartPeriod=40s HealthTimeout=10s -Image=docker.litellm.ai/berriai/litellm:main-latest -PublishPort=4000:4000 - -[Service] -Restart=always +Image=ghcr.io/berriai/litellm-database:v1.83.14-stable.patch.3 +Volume=/home/ai/litellm-config.yaml:/app/config.yaml:z +Exec=--config=/app/config.yaml [Install] WantedBy=default.target diff --git a/active/container_litellm/quadlets/litellm.pod b/active/container_litellm/quadlets/litellm.pod new file mode 100644 index 0000000..6b0aac7 --- /dev/null +++ b/active/container_litellm/quadlets/litellm.pod @@ -0,0 +1,3 @@ +[Pod] +# litellm web interface +PublishPort=4000:4000/tcp