From ed65f8924ded251cfa4e1c1c496b0a930ce80e95 Mon Sep 17 00:00:00 2001 From: ducoterra Date: Tue, 13 Jan 2026 13:51:42 -0500 Subject: [PATCH] add initial framework desktop config --- .../framework_desktop.md | 192 +++++++++++++++--- .../device_framework_desktop/quadlets/ai.pod | 9 + .../quadlets/anything-llm.container | 21 ++ .../quadlets/llama-server.container | 28 +++ .../quadlets/ollama-server.container | 19 ++ .../quadlets/open-webui.container | 16 ++ 6 files changed, 254 insertions(+), 31 deletions(-) create mode 100644 active/device_framework_desktop/quadlets/ai.pod create mode 100644 active/device_framework_desktop/quadlets/anything-llm.container create mode 100644 active/device_framework_desktop/quadlets/llama-server.container create mode 100644 active/device_framework_desktop/quadlets/ollama-server.container create mode 100644 active/device_framework_desktop/quadlets/open-webui.container diff --git a/active/device_framework_desktop/framework_desktop.md b/active/device_framework_desktop/framework_desktop.md index d0e61d9..1250e36 100644 --- a/active/device_framework_desktop/framework_desktop.md +++ b/active/device_framework_desktop/framework_desktop.md @@ -10,45 +10,64 @@ 3. Update grub `grub2-mkconfig -o /boot/grub2/grub.cfg` 4. Reboot -## Docker +## References + + + +## Notes + +### Update quadlets ```bash -dnf config-manager addrepo \ ---from-repofile https://download.docker.com/linux/fedora/docker-ce.repo +scp -r active/device_framework_desktop/quadlets/* deskwork-ai:quadlets/ +podman quadlet install --replace quadlets/* +``` -dnf install \ -docker-ce \ -docker-ce-cli \ -containerd.io \ -docker-buildx-plugin \ -docker-compose-plugin +### Volume Locations + +`~/.local/share/containers/storage/volumes/` + +## User + +```bash +# Create your local ai user. This will be the user you launch podman processes from. +useradd -m ai +loginctl enable-linger ai +su -l ai +mkdir -p ~/.config/containers/systemd/ ``` ## Llama.cpp ```bash -# Build the llama.cpp docker image +# Build the llama.cpp container image git clone https://github.com/ggml-org/llama.cpp.git export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S") -docker build -t llama-cpp-vulkan:${BUILD_TAG} -f .devops/vulkan.Dockerfile . +podman build -t llama-cpp-vulkan:${BUILD_TAG} -f .devops/vulkan.Dockerfile . # Run llama server with gpt-oss-120b -docker run \ +podman run \ -d \ +--replace \ --restart always \ --name=llama-server \ ---network=host \ +-p 8000:8000 \ --device=/dev/kfd \ --device=/dev/dri \ -v llama-server-cache:/root/.cache \ -llama-cpp-vulkan:${BUILD_TAG} \ --hf ggml-org/gpt-oss-120b-GGUF --ctx-size 0 --jinja -ub 2048 -b 2048 \ +localhost/llama-cpp-vulkan:2026-01-12-10-13-30 \ +-hf ggml-org/gpt-oss-120b-GGUF --ctx-size 32000 --jinja -ub 2048 -b 2048 \ --port 8000 --host 0.0.0.0 -n -1 --n-gpu-layers 999 +# To enable autostart, you'll need to create a quadlet +# Quadlets are documented in podman manual pages +# Search for "EXAMPLES" when you run the below command +# Put your quadlet at ~/.config/containers/systemd/ +man "podman-systemd.unit(5)" + # Run llama server with devstral-small-2 24b -docker run \ +podman run \ -d \ ---restart always \ --name=llama-server-devstral \ --network=host \ --device=/dev/kfd \ @@ -66,10 +85,18 @@ firewall-cmd --reload ## Ollama +```bash +# Install CLI +curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | tar xz -C ~/.local + +# Add export OLLAMA_HOST=127.0.0.1 +vim ~/.bashrc.d/ollama.sh +``` + ```bash # Run ollama # Will be available on port 11434 -docker run \ +podman run \ -d \ --restart always \ --device /dev/kfd \ @@ -81,7 +108,7 @@ docker run \ docker.io/ollama/ollama:0.13.5 # Run an image -docker exec -it ollama ollama run gpt-oss:20b +podman exec -it ollama ollama run gpt-oss:20b # Firewall firewall-cmd --add-port=11434/tcp --permanent @@ -107,9 +134,9 @@ vim /etc/anything-llm/.env # Server will be accessible on port 3001 # Connect llama.cpp as a generic OpenAI LLM provider and use host -# http://172.17.0.1:8000/v1 +# http://172.17.0.1:3001/v1 # Chat model name doesn't matter. -docker run \ +podman run \ -d \ --restart always \ --network host \ @@ -129,9 +156,9 @@ firewall-cmd --reload ```bash # z-turbo -docker run --rm \ --v ./models:/models \ --v ./build:/output \ +podman run --rm \ +-v /home/ai/stable-diffusion.cpp/models:/models:z \ +-v /home/ai/stable-diffusion.cpp/output:/output:z \ --device /dev/kfd \ --device /dev/dri \ ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ @@ -140,17 +167,37 @@ ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ --llm /models/z_turbo/qwen_3_4b.safetensors \ --cfg-scale 1.0 \ -v \ ---diffusion-fa \ -H 1024 \ -W 512 \ --o /output/output.png \ --seed -1 \ +-o /output/output.png \ -p "Framework Laptop 13" +# Flux +podman run --rm \ +-v /srv/stable-diffusion.cpp/models:/models:z \ +-v ./output:/output:z \ +--device /dev/kfd \ +--device /dev/dri \ +ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ +--diffusion-model /models/flux/flux1-dev-q4_k.gguf \ +--vae /models/flux/ae.safetensors \ +--clip_l /models/flux/clip_l.safetensors \ +--t5xxl /models/flux/t5xxl_fp16.safetensors \ +--cfg-scale 1.0 \ +--sampling-method euler \ +-v \ +-H 512 \ +-W 512 \ +--seed -1 \ +--steps 20 \ +-o /output/output.png \ +-p "An Everquest video game poster but with ribeye steaks for heads with the words 'EverSteak'" + # Flux2 -docker run --rm \ --v ./models:/models \ --v ./build:/output \ +podman run --rm \ +-v /home/ai/stable-diffusion.cpp/models:/models:z \ +-v /home/ai/stable-diffusion.cpp/output:/output:z \ --device /dev/kfd \ --device /dev/dri \ ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ @@ -160,7 +207,90 @@ ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ --cfg-scale 1.0 \ --sampling-method euler \ -v \ ---diffusion-fa \ +-H 512 \ +-W 1024 \ +--seed -1 \ +--steps 10 \ -o /output/output.png \ -p "A picture of sign that says 'framework'" -``` \ No newline at end of file + +# Qwen +podman run --rm \ +-v /home/ai/stable-diffusion.cpp/models:/models:z \ +-v /home/ai/stable-diffusion.cpp/output:/output:z \ +--device /dev/kfd \ +--device /dev/dri \ +ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ +--diffusion-model /models/qwen_image/Qwen_Image-Q4_K_M.gguf \ +--vae /models/qwen_image/qwen_image_vae.safetensors \ +--llm /models/qwen_image/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \ +--cfg-scale 2.5 \ +--sampling-method euler \ +-v \ +--offload-to-cpu \ +-H 512 -W 512 \ +--flow-shift 3 \ +--seed -1 \ +-o /output/output.png \ +-p 'Everquest DND mash up poster that says "ever dungeons and dragons"' + +# SD3 +podman run --rm \ +-v /home/ai/stable-diffusion.cpp/models:/models:z \ +-v /home/ai/stable-diffusion.cpp/output:/output:z \ +--device /dev/kfd \ +--device /dev/dri \ +ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ +-m /models/sd3/sd3.5_large.safetensors \ +--clip_l /models/sd3/clip_l.safetensors \ +--clip_g /models/sd3/clip_g.safetensors \ +--t5xxl /models/sd3/t5xxl_fp16.safetensors \ +-H 512 -W 512 \ +--cfg-scale 4.5 \ +--sampling-method euler \ +-v \ +--seed -1 \ +-o /output/output.png \ +-p 'Everquest DND mash up poster that says "ever dungeons and dragons"' +``` + +### Stable Diffusion CPP Server + +Uses OpenAI Compatible Endpoints + +```bash +# z-turbo server +podman run \ +-d \ +--name stable-diffusion-cpp-server \ +-v /srv/stable-diffusion.cpp/models:/models \ +-v /srv/stable-diffusion.cpp/build:/output \ +--device /dev/kfd \ +--device /dev/dri \ +--entrypoint "/sd-server" \ +--network host \ +ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ +--diffusion-model /models/z_turbo/z_image_turbo_bf16.safetensors \ +--vae /models/z_turbo/ae.safetensors \ +--llm /models/z_turbo/qwen_3_4b.safetensors \ +--cfg-scale 1.0 \ +-v \ +--diffusion-fa \ +-H 1024 \ +-W 512 \ +--seed -1 \ +-l 0.0.0.0 +``` + +## Openai API Web UI + +```bash +# Will be available on port 8080 +podman run \ +-d \ +--network host \ +-v open-webui:/app/backend/data \ +--name open-webui \ +--restart always \ +ghcr.io/open-webui/open-webui:main +``` diff --git a/active/device_framework_desktop/quadlets/ai.pod b/active/device_framework_desktop/quadlets/ai.pod new file mode 100644 index 0000000..d0f7f01 --- /dev/null +++ b/active/device_framework_desktop/quadlets/ai.pod @@ -0,0 +1,9 @@ +[Pod] +# llama.cpp +PublishPort=8000:8000/tcp +# open-webui +PublishPort=8080:8080/tcp +# anything-llm +PublishPort=3001:3001/tcp +# ollama +PublishPort=11434:11434/tcp \ No newline at end of file diff --git a/active/device_framework_desktop/quadlets/anything-llm.container b/active/device_framework_desktop/quadlets/anything-llm.container new file mode 100644 index 0000000..e3e7f1e --- /dev/null +++ b/active/device_framework_desktop/quadlets/anything-llm.container @@ -0,0 +1,21 @@ +[Unit] +Description=An Anything LLM Frontend for Local AI Services + +[Container] +Pod=ai.pod +Image=docker.io/mintplexlabs/anythingllm +Volume=anythingllm:/app/server/storage +Volume=/home/ai/anything-llm/.env:/app/server/.env:z +Environment=STORAGE_DIR=/app/server/storage +AddCapability=SYS_ADMIN +User=1000 +Group=1000 + +[Service] +Restart=always +# Extend Timeout to allow time to pull the image +TimeoutStartSec=900 + +[Install] +# Start by default on boot +WantedBy=multi-user.target default.target \ No newline at end of file diff --git a/active/device_framework_desktop/quadlets/llama-server.container b/active/device_framework_desktop/quadlets/llama-server.container new file mode 100644 index 0000000..fdaf762 --- /dev/null +++ b/active/device_framework_desktop/quadlets/llama-server.container @@ -0,0 +1,28 @@ +[Unit] +Description=A Llama CPP Server Running GPT OSS 120b + +[Container] +Pod=ai.pod +Image=localhost/llama-cpp-vulkan:2026-01-12-10-13-30 +Volume=llama-server-cache:/root/.cache +AddDevice=/dev/kfd +AddDevice=/dev/dri + +Exec=-hf ggml-org/gpt-oss-120b-GGUF \ +--ctx-size 32000 \ +--jinja \ +-ub 2048 \ +-b 2048 \ +--port 8000 \ +--host 0.0.0.0 \ +-n -1 \ +--n-gpu-layers 999 + +[Service] +Restart=always +# Extend Timeout to allow time to pull the image +TimeoutStartSec=900 + +[Install] +# Start by default on boot +WantedBy=multi-user.target default.target diff --git a/active/device_framework_desktop/quadlets/ollama-server.container b/active/device_framework_desktop/quadlets/ollama-server.container new file mode 100644 index 0000000..84d376f --- /dev/null +++ b/active/device_framework_desktop/quadlets/ollama-server.container @@ -0,0 +1,19 @@ +[Unit] +Description=An Ollama Server + +[Container] +Pod=ai.pod +Image=docker.io/ollama/ollama:0.13.5 +Volume=ollama:/root/.ollama +AddDevice=/dev/kfd +AddDevice=/dev/dri +Environment=OLLAMA_VULKAN=1 + +[Service] +Restart=always +# Extend Timeout to allow time to pull the image +TimeoutStartSec=900 + +[Install] +# Start by default on boot +WantedBy=multi-user.target default.target diff --git a/active/device_framework_desktop/quadlets/open-webui.container b/active/device_framework_desktop/quadlets/open-webui.container new file mode 100644 index 0000000..ca667bc --- /dev/null +++ b/active/device_framework_desktop/quadlets/open-webui.container @@ -0,0 +1,16 @@ +[Unit] +Description=An Open Webui Frontend for Local AI Services + +[Container] +Pod=ai.pod +Image=ghcr.io/open-webui/open-webui:main +Volume=open-webui-data:/app/backend/data + +[Service] +Restart=always +# Extend Timeout to allow time to pull the image +TimeoutStartSec=900 + +[Install] +# Start by default on boot +WantedBy=multi-user.target default.target \ No newline at end of file