From 4f3102a2ffa295c453fa4a78f3158f283e212d4f Mon Sep 17 00:00:00 2001 From: ducoterra Date: Mon, 19 Jan 2026 20:50:05 -0500 Subject: [PATCH] local ai checkpoint --- .../framework_desktop.md | 363 +++++++----------- .../quadlets/ai.network | 2 + .../device_framework_desktop/quadlets/ai.pod | 7 +- .../quadlets/anything-llm.container | 21 - .../quadlets/llama-server.container | 30 +- .../quadlets/ollama-server.container | 19 - .../quadlets/open-webui.container | 5 + .../stable-diffusion-edit-server.container | 41 ++ .../stable-diffusion-gen-server.container | 41 ++ 9 files changed, 253 insertions(+), 276 deletions(-) create mode 100644 active/device_framework_desktop/quadlets/ai.network delete mode 100644 active/device_framework_desktop/quadlets/anything-llm.container delete mode 100644 active/device_framework_desktop/quadlets/ollama-server.container create mode 100644 active/device_framework_desktop/quadlets/stable-diffusion-edit-server.container create mode 100644 active/device_framework_desktop/quadlets/stable-diffusion-gen-server.container diff --git a/active/device_framework_desktop/framework_desktop.md b/active/device_framework_desktop/framework_desktop.md index 1250e36..5644f59 100644 --- a/active/device_framework_desktop/framework_desktop.md +++ b/active/device_framework_desktop/framework_desktop.md @@ -1,5 +1,23 @@ # Framework Desktop +- [Framework Desktop](#framework-desktop) + - [BIOS](#bios) + - [References](#references) + - [Notes](#notes) + - [Volume Locations](#volume-locations) + - [Setup](#setup) + - [Create the AI user](#create-the-ai-user) + - [Create the models dir](#create-the-models-dir) + - [Install the Hugging Face CLI](#install-the-hugging-face-cli) + - [Download models](#download-models) + - [Text models](#text-models) + - [Image models](#image-models) + - [Create the systemd-ai pod](#create-the-systemd-ai-pod) + - [llama.cpp](#llamacpp) + - [stable-diffusion.cpp](#stable-diffusioncpp) + - [open-webui](#open-webui) + - [Install the whole thing with quadlets (TM)](#install-the-whole-thing-with-quadlets-tm) + ## BIOS @@ -16,18 +34,13 @@ ## Notes -### Update quadlets - -```bash -scp -r active/device_framework_desktop/quadlets/* deskwork-ai:quadlets/ -podman quadlet install --replace quadlets/* -``` - ### Volume Locations `~/.local/share/containers/storage/volumes/` -## User +## Setup + +### Create the AI user ```bash # Create your local ai user. This will be the user you launch podman processes from. @@ -37,260 +50,168 @@ su -l ai mkdir -p ~/.config/containers/systemd/ ``` -## Llama.cpp +Models are big. You'll want some tools to help find large files quickly when space runs out. + +Add this to your .bashrc: + +```bash +# Calculate all folder sizes in current dir +alias {dudir,dud}='du -h --max-depth 1 | sort -h' + +# Calculate all file sizes in current dir +alias {dufile,duf}='ls -lhSr' +``` + +### Create the models dir + +```bash +mkdir -p /home/ai/models/{text,image,video} +``` + +### Install the Hugging Face CLI + + + +```bash +# Install +curl -LsSf https://hf.co/cli/install.sh | bash + +# Login +hf auth login +``` + +### Download models + +#### Text models + + + +```bash +# gpt-oss-120b +mkdir /home/ai/models/text/gpt-oss-120b +hf download --local-dir /home/ai/models/text/gpt-oss-120b ggml-org/gpt-oss-120b-GGUF + +# devstral-2-123b +mkdir /home/ai/models/text/devstral-2-123b +hf download --local-dir /home/ai/models/text/devstral-2-123b unsloth/Devstral-2-123B-Instruct-2512-GGUF Q4_K_M/Devstral-2-123B-Instruct-2512-Q4_K_M-00001-of-00002.gguf +hf download --local-dir /home/ai/models/text/devstral-2-123b unsloth/Devstral-2-123B-Instruct-2512-GGUF Q4_K_M/Devstral-2-123B-Instruct-2512-Q4_K_M-00002-of-00002.gguf + +# devstral-small-2-24b +mkdir /home/ai/models/text/devstral-small-2-24b +hf download --local-dir /home/ai/models/text/devstral-small-2-24b unsloth/Devstral-Small-2-24B-Instruct-2512-GGUF Devstral-Small-2-24B-Instruct-2512-Q4_K_M.gguf +``` + +#### Image models + +### Create the systemd-ai pod + +You'll at least want the ai pod and network. Copy `ai.pod` and `ai.network` out +of `quadlets` into `~/.config/containers/systemd`. + +Then run `systemctl --user daemon-reload && systemctl --user start ai-pod` + +## llama.cpp + + ```bash # Build the llama.cpp container image git clone https://github.com/ggml-org/llama.cpp.git +cd llama.cpp export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S") -podman build -t llama-cpp-vulkan:${BUILD_TAG} -f .devops/vulkan.Dockerfile . +podman build -t llama-cpp-vulkan:${BUILD_TAG} -t llama-cpp-vulkan:latest -f .devops/vulkan.Dockerfile . -# Run llama server with gpt-oss-120b +# Run llama server (Available on port 8000) +# Add `--n-cpu-moe 32` to gpt-oss-120b to keep minimal number of expert in GPU podman run \ --d \ ---replace \ ---restart always \ ---name=llama-server \ --p 8000:8000 \ +--rm \ +--name llama-server-demo \ +--pod systemd-ai \ --device=/dev/kfd \ --device=/dev/dri \ --v llama-server-cache:/root/.cache \ -localhost/llama-cpp-vulkan:2026-01-12-10-13-30 \ --hf ggml-org/gpt-oss-120b-GGUF --ctx-size 32000 --jinja -ub 2048 -b 2048 \ ---port 8000 --host 0.0.0.0 -n -1 --n-gpu-layers 999 - -# To enable autostart, you'll need to create a quadlet -# Quadlets are documented in podman manual pages -# Search for "EXAMPLES" when you run the below command -# Put your quadlet at ~/.config/containers/systemd/ -man "podman-systemd.unit(5)" - -# Run llama server with devstral-small-2 24b -podman run \ --d \ ---name=llama-server-devstral \ ---network=host \ ---device=/dev/kfd \ ---device=/dev/dri \ --v llama-server-cache:/root/.cache \ -llama-cpp-vulkan:${BUILD_TAG} \ --hf bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF \ ---ctx-size 0 --jinja -ub 2048 -b 2048 \ ---port 8001 --host 0.0.0.0 -n -1 --n-gpu-layers 999 - -# Firewall -firewall-cmd --add-port=8000/tcp --permanent -firewall-cmd --reload +-v /home/ai/models/text:/models:z \ +localhost/llama-cpp-vulkan:2026-01-19-18-00-02 \ +--port 8000 \ +-c 0 \ +-b 2048 \ +-ub 2048 \ +--perf \ +--n-gpu-layers all \ +--jinja \ +--models-max 1 \ +--models-dir /models ``` -## Ollama +## stable-diffusion.cpp ```bash -# Install CLI -curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | tar xz -C ~/.local - -# Add export OLLAMA_HOST=127.0.0.1 -vim ~/.bashrc.d/ollama.sh +git clone https://github.com/leejet/stable-diffusion.cpp.git +cd stable-diffusion.cpp +git submodule update --init --recursive +export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S") +podman build -f Dockerfile.vulkan -t stable-diffusion-cpp:${BUILD_TAG} -t stable-diffusion-cpp:latest . ``` -```bash -# Run ollama -# Will be available on port 11434 -podman run \ --d \ ---restart always \ ---device /dev/kfd \ ---device /dev/dri \ --v ollama:/root/.ollama \ --e OLLAMA_VULKAN=1 \ ---name ollama \ ---network host \ -docker.io/ollama/ollama:0.13.5 - -# Run an image -podman exec -it ollama ollama run gpt-oss:20b - -# Firewall -firewall-cmd --add-port=11434/tcp --permanent -firewall-cmd --reload -``` - -## Anything LLM - -Per [the docs](https://docs.anythingllm.com/installation-docker/cloud-docker): - -> Note --cap-add SYS_ADMIN is a required command if you want to scrape webpages. -> We use PuppeeteerJS to scrape websites links and --cap-add SYS_ADMIN lets us -> use sandboxed Chromium across all runtimes for best security practices - -```bash -mkdir /etc/anything-llm -touch /etc/anything-llm/.env -chown 1000:1000 /etc/anything-llm/.env -chmod 600 /etc/anything-llm/.env - -# Add JWT_SECRET= to this file -vim /etc/anything-llm/.env - -# Server will be accessible on port 3001 -# Connect llama.cpp as a generic OpenAI LLM provider and use host -# http://172.17.0.1:3001/v1 -# Chat model name doesn't matter. -podman run \ --d \ ---restart always \ ---network host \ ---name anythingllm \ ---cap-add SYS_ADMIN \ --v anythingllm:/app/server/storage \ --v /etc/anything-llm/.env:/app/server/.env \ --e STORAGE_DIR="/app/server/storage" \ -docker.io/mintplexlabs/anythingllm - -# Firewall -firewall-cmd --add-port=3001/tcp --permanent -firewall-cmd --reload -``` - -## Stable Diffusion CPP - ```bash # z-turbo podman run --rm \ --v /home/ai/stable-diffusion.cpp/models:/models:z \ --v /home/ai/stable-diffusion.cpp/output:/output:z \ +-v /home/ai/models:/models:z \ +-v /home/ai/output:/output:z \ --device /dev/kfd \ --device /dev/dri \ -ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ ---diffusion-model /models/z_turbo/z_image_turbo_bf16.safetensors \ ---vae /models/z_turbo/ae.safetensors \ ---llm /models/z_turbo/qwen_3_4b.safetensors \ +localhost/stable-diffusion-cpp:latest \ +--diffusion-model /models/image/z-turbo/z_image_turbo-Q4_K.gguf \ +--vae /models/image/z-turbo/ae.safetensors \ +--llm /models/image/z-turbo/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \ --cfg-scale 1.0 \ -v \ --H 1024 \ --W 512 \ ---seed -1 \ --o /output/output.png \ --p "Framework Laptop 13" - -# Flux -podman run --rm \ --v /srv/stable-diffusion.cpp/models:/models:z \ --v ./output:/output:z \ ---device /dev/kfd \ ---device /dev/dri \ -ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ ---diffusion-model /models/flux/flux1-dev-q4_k.gguf \ ---vae /models/flux/ae.safetensors \ ---clip_l /models/flux/clip_l.safetensors \ ---t5xxl /models/flux/t5xxl_fp16.safetensors \ ---cfg-scale 1.0 \ ---sampling-method euler \ --v \ --H 512 \ --W 512 \ ---seed -1 \ ---steps 20 \ --o /output/output.png \ --p "An Everquest video game poster but with ribeye steaks for heads with the words 'EverSteak'" - -# Flux2 -podman run --rm \ --v /home/ai/stable-diffusion.cpp/models:/models:z \ --v /home/ai/stable-diffusion.cpp/output:/output:z \ ---device /dev/kfd \ ---device /dev/dri \ -ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ ---diffusion-model /models/flux2/flux2-dev-Q8_0.gguf \ ---vae /models/flux2/ae.safetensors \ ---llm /models/flux2/Mistral-Small-3.2-24B-Instruct-2506-Q8_0.gguf \ ---cfg-scale 1.0 \ ---sampling-method euler \ --v \ -H 512 \ -W 1024 \ --seed -1 \ ---steps 10 \ +--steps 8 \ -o /output/output.png \ --p "A picture of sign that says 'framework'" +-p "A watercolor dragon with flowing ink lines, pastel palette, white paper background, soft brush strokes, high-resolution" -# Qwen +# Edit with flux kontext podman run --rm \ --v /home/ai/stable-diffusion.cpp/models:/models:z \ --v /home/ai/stable-diffusion.cpp/output:/output:z \ +-v /home/ai/models:/models:z \ +-v /home/ai/output:/output:z \ --device /dev/kfd \ --device /dev/dri \ -ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ ---diffusion-model /models/qwen_image/Qwen_Image-Q4_K_M.gguf \ ---vae /models/qwen_image/qwen_image_vae.safetensors \ ---llm /models/qwen_image/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \ ---cfg-scale 2.5 \ ---sampling-method euler \ --v \ ---offload-to-cpu \ --H 512 -W 512 \ ---flow-shift 3 \ ---seed -1 \ --o /output/output.png \ --p 'Everquest DND mash up poster that says "ever dungeons and dragons"' - -# SD3 -podman run --rm \ --v /home/ai/stable-diffusion.cpp/models:/models:z \ --v /home/ai/stable-diffusion.cpp/output:/output:z \ ---device /dev/kfd \ ---device /dev/dri \ -ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ --m /models/sd3/sd3.5_large.safetensors \ ---clip_l /models/sd3/clip_l.safetensors \ ---clip_g /models/sd3/clip_g.safetensors \ ---t5xxl /models/sd3/t5xxl_fp16.safetensors \ --H 512 -W 512 \ ---cfg-scale 4.5 \ ---sampling-method euler \ --v \ ---seed -1 \ --o /output/output.png \ --p 'Everquest DND mash up poster that says "ever dungeons and dragons"' -``` - -### Stable Diffusion CPP Server - -Uses OpenAI Compatible Endpoints - -```bash -# z-turbo server -podman run \ --d \ ---name stable-diffusion-cpp-server \ --v /srv/stable-diffusion.cpp/models:/models \ --v /srv/stable-diffusion.cpp/build:/output \ ---device /dev/kfd \ ---device /dev/dri \ ---entrypoint "/sd-server" \ ---network host \ -ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ ---diffusion-model /models/z_turbo/z_image_turbo_bf16.safetensors \ ---vae /models/z_turbo/ae.safetensors \ ---llm /models/z_turbo/qwen_3_4b.safetensors \ +localhost/stable-diffusion-cpp:latest \ +--diffusion-model /models/image/flux-1-kontext/flux1-kontext-dev-Q4_K_M.gguf \ +--vae /models/image/flux-1-kontext/ae.safetensors \ +--clip_l /models/image/flux-1-kontext/clip_l.safetensors \ +--t5xxl /models/image/flux-1-kontext/t5xxl_fp16.safetensors \ --cfg-scale 1.0 \ --v \ ---diffusion-fa \ --H 1024 \ --W 512 \ +--sampling-method euler \ --seed -1 \ --l 0.0.0.0 +--steps 20 \ +-H 512 \ +-W 1024 \ +-r /output/everquest_logo.png \ +-p "change 'EverQuest' to 'EverSteak'" \ +-o /output/output.png ``` -## Openai API Web UI +## open-webui ```bash # Will be available on port 8080 podman run \ -d \ ---network host \ +--pod ai \ -v open-webui:/app/backend/data \ --name open-webui \ --restart always \ ghcr.io/open-webui/open-webui:main ``` + +## Install the whole thing with quadlets (TM) + +```bash +scp -r active/device_framework_desktop/quadlets/* deskwork-ai:.config/containers/systemd/ +ssh deskwork-ai +systemctl --user daemon-reload +systemctl --user restart ai-pod.service +``` diff --git a/active/device_framework_desktop/quadlets/ai.network b/active/device_framework_desktop/quadlets/ai.network new file mode 100644 index 0000000..ce02935 --- /dev/null +++ b/active/device_framework_desktop/quadlets/ai.network @@ -0,0 +1,2 @@ +[Network] +IPv6=true \ No newline at end of file diff --git a/active/device_framework_desktop/quadlets/ai.pod b/active/device_framework_desktop/quadlets/ai.pod index d0f7f01..a646415 100644 --- a/active/device_framework_desktop/quadlets/ai.pod +++ b/active/device_framework_desktop/quadlets/ai.pod @@ -1,9 +1,8 @@ [Pod] +Network=ai.network # llama.cpp PublishPort=8000:8000/tcp # open-webui PublishPort=8080:8080/tcp -# anything-llm -PublishPort=3001:3001/tcp -# ollama -PublishPort=11434:11434/tcp \ No newline at end of file +# stable-diffusion.cpp +PublishPort=1234:1234/tcp \ No newline at end of file diff --git a/active/device_framework_desktop/quadlets/anything-llm.container b/active/device_framework_desktop/quadlets/anything-llm.container deleted file mode 100644 index e3e7f1e..0000000 --- a/active/device_framework_desktop/quadlets/anything-llm.container +++ /dev/null @@ -1,21 +0,0 @@ -[Unit] -Description=An Anything LLM Frontend for Local AI Services - -[Container] -Pod=ai.pod -Image=docker.io/mintplexlabs/anythingllm -Volume=anythingllm:/app/server/storage -Volume=/home/ai/anything-llm/.env:/app/server/.env:z -Environment=STORAGE_DIR=/app/server/storage -AddCapability=SYS_ADMIN -User=1000 -Group=1000 - -[Service] -Restart=always -# Extend Timeout to allow time to pull the image -TimeoutStartSec=900 - -[Install] -# Start by default on boot -WantedBy=multi-user.target default.target \ No newline at end of file diff --git a/active/device_framework_desktop/quadlets/llama-server.container b/active/device_framework_desktop/quadlets/llama-server.container index fdaf762..b9fe840 100644 --- a/active/device_framework_desktop/quadlets/llama-server.container +++ b/active/device_framework_desktop/quadlets/llama-server.container @@ -2,21 +2,29 @@ Description=A Llama CPP Server Running GPT OSS 120b [Container] +# Shared AI pod Pod=ai.pod -Image=localhost/llama-cpp-vulkan:2026-01-12-10-13-30 -Volume=llama-server-cache:/root/.cache + +# Image is built locally via podman build +Image=localhost/llama-cpp-vulkan:latest + +# Downloaded models volume +Volume=/home/ai/models/text:/models:z + +# GPU Device AddDevice=/dev/kfd AddDevice=/dev/dri -Exec=-hf ggml-org/gpt-oss-120b-GGUF \ ---ctx-size 32000 \ ---jinja \ --ub 2048 \ --b 2048 \ ---port 8000 \ ---host 0.0.0.0 \ --n -1 \ ---n-gpu-layers 999 +# Server command +Exec=--port 8000 \ + -c 0 \ + -b 2048 \ + -ub 2048 \ + --perf \ + --n-gpu-layers all \ + --jinja \ + --models-max 1 \ + --models-dir /models [Service] Restart=always diff --git a/active/device_framework_desktop/quadlets/ollama-server.container b/active/device_framework_desktop/quadlets/ollama-server.container deleted file mode 100644 index 84d376f..0000000 --- a/active/device_framework_desktop/quadlets/ollama-server.container +++ /dev/null @@ -1,19 +0,0 @@ -[Unit] -Description=An Ollama Server - -[Container] -Pod=ai.pod -Image=docker.io/ollama/ollama:0.13.5 -Volume=ollama:/root/.ollama -AddDevice=/dev/kfd -AddDevice=/dev/dri -Environment=OLLAMA_VULKAN=1 - -[Service] -Restart=always -# Extend Timeout to allow time to pull the image -TimeoutStartSec=900 - -[Install] -# Start by default on boot -WantedBy=multi-user.target default.target diff --git a/active/device_framework_desktop/quadlets/open-webui.container b/active/device_framework_desktop/quadlets/open-webui.container index ca667bc..ea08e1d 100644 --- a/active/device_framework_desktop/quadlets/open-webui.container +++ b/active/device_framework_desktop/quadlets/open-webui.container @@ -2,8 +2,13 @@ Description=An Open Webui Frontend for Local AI Services [Container] +# Shared AI pod Pod=ai.pod + +# Open Webui base image Image=ghcr.io/open-webui/open-webui:main + +# Nothing too complicated here. Open Webui will basically configure itself. Volume=open-webui-data:/app/backend/data [Service] diff --git a/active/device_framework_desktop/quadlets/stable-diffusion-edit-server.container b/active/device_framework_desktop/quadlets/stable-diffusion-edit-server.container new file mode 100644 index 0000000..b69a7f4 --- /dev/null +++ b/active/device_framework_desktop/quadlets/stable-diffusion-edit-server.container @@ -0,0 +1,41 @@ +[Unit] +Description=A Stable Diffusion CPP Server for Editing Images + +[Container] +# Shared AI pod +Pod=ai.pod + +# Vulkan image for AMD GPU +Image=localhost/stable-diffusion-cpp:latest + +# Shared models directory +Volume=/home/ai/models:/models:z + +# GPU Device +AddDevice=/dev/kfd +AddDevice=/dev/dri + +# Override entrypoint to use server +Entrypoint=/sd-server + +# Server args +Exec=-l 0.0.0.0 \ + --listen-port 1235 \ + --diffusion-model /models/image/flux-1-kontext/flux1-kontext-dev-Q4_K_M.gguf \ + --vae /models/image/flux-1-kontext/ae.safetensors \ + --clip_l /models/image/flux-1-kontext/clip_l.safetensors \ + --t5xxl /models/image/flux-1-kontext/t5xxl_fp16.safetensors \ + --cfg-scale 1.0 \ + --sampling-method euler \ + -v \ + --seed -1 \ + --steps 28 + +[Service] +Restart=always +# Extend Timeout to allow time to pull the image +TimeoutStartSec=900 + +[Install] +# Start by default on boot +WantedBy=multi-user.target default.target diff --git a/active/device_framework_desktop/quadlets/stable-diffusion-gen-server.container b/active/device_framework_desktop/quadlets/stable-diffusion-gen-server.container new file mode 100644 index 0000000..740d0b8 --- /dev/null +++ b/active/device_framework_desktop/quadlets/stable-diffusion-gen-server.container @@ -0,0 +1,41 @@ +[Unit] +Description=A Stable Diffusion CPP Server for Generating Images + +[Container] +# Shared AI pod +Pod=ai.pod + +# Vulkan image for AMD GPU +Image=localhost/stable-diffusion-cpp:latest + +# Shared models directory +Volume=/home/ai/models:/models:z + +# GPU Device +AddDevice=/dev/kfd +AddDevice=/dev/dri + +# Override entrypoint to use server +Entrypoint=/sd-server + +# Server args +Exec=-l 0.0.0.0 \ + --listen-port 1234 \ + --diffusion-model /models/image/z-turbo/z_image_turbo-Q4_K.gguf \ + --vae /models/image/z-turbo/ae.safetensors \ + --llm /models/image/z-turbo/qwen_3_4b.safetensors \ + -l 0.0.0.0 \ + --listen-port 1234 \ + --cfg-scale 1.0 \ + -v \ + --seed -1 \ + --steps 8 + +[Service] +Restart=always +# Extend Timeout to allow time to pull the image +TimeoutStartSec=900 + +[Install] +# Start by default on boot +WantedBy=multi-user.target default.target