framework desktop offline ai updates

2026-02-06 20:11:19 -05:00
parent 7626cdf998
commit 525e14965d
12 changed files with 354 additions and 45 deletions
--- a/active/device_framework_desktop/framework_desktop.md
+++ b/active/device_framework_desktop/framework_desktop.md
@@ -4,21 +4,38 @@
  - [BIOS](#bios)
  - [References](#references)
  - [Notes](#notes)
    - [Firmware and Kernel](#firmware-and-kernel)
    - [Kernel args](#kernel-args)
    - [Volume Locations](#volume-locations)
  - [Setup](#setup)
    - [Create the AI user](#create-the-ai-user)
    - [Helper aliases](#helper-aliases)
    - [Create the models dir](#create-the-models-dir)
    - [Install the Hugging Face CLI](#install-the-hugging-face-cli)
    - [Samba Model Storage](#samba-model-storage)
    - [Download models](#download-models)
      - [Text models](#text-models)
        - [GPT-OSS](#gpt-oss)
        - [Mistral](#mistral)
        - [Nemotron](#nemotron)
        - [Qwen](#qwen)
        - [GLM](#glm)
        - [Llama](#llama)
        - [Gemma](#gemma)
        - [Dolphin (Abliterated)](#dolphin-abliterated)
      - [Image models](#image-models)
-    - [Create the systemd-ai pod](#create-the-systemd-ai-pod)
+        - [Z-Image](#z-image)
        - [Flux](#flux)
        - [Qwen Image 2512](#qwen-image-2512)
      - [Embedding Models](#embedding-models)
        - [Nomic](#nomic)
  - [llama.cpp](#llamacpp)
  - [stable-diffusion.cpp](#stable-diffusioncpp)
  - [open-webui](#open-webui)
  - [VLLM](#vllm)
  - [Install the whole thing with quadlets (TM)](#install-the-whole-thing-with-quadlets-tm)
    - [Install the update script](#install-the-update-script)
  - [Voice Cloning](#voice-cloning)
 ## BIOS
@@ -32,6 +49,22 @@
 ## Notes
 ### Firmware and Kernel
 See: <https://github.com/kyuz0/amd-strix-halo-toolboxes?tab=readme-ov-file#-stable-configuration>
 Current stable is kernel 6.18.3-200 with linux-firmware 20251111
 ### Kernel args
 Edit /etc/default/grub and add the following:
 ```conf
 amd_iommu=off amdgpu.gttsize=126976 ttm.pages_limit=32505856
 ```
 Then `grub2-mkconfig -o /boot/grub2/grub.cfg` and `reboot`.
 ### Volume Locations
 `~/.local/share/containers/storage/volumes/`
@@ -45,7 +78,8 @@
 useradd -m ai
 loginctl enable-linger ai
 su -l ai
-mkdir -p ~/.config/containers/systemd/
+mkdir -p /home/ai/.config/containers/systemd/
 mkdir -p /home/ai/.ssh
 ```
 Models are big. You'll want some tools to help find large files quickly when space runs out.
@@ -75,7 +109,7 @@ alias sd-edit-logs='journalctl --user -xeu stable-diffusion-edit-server'
 ### Create the models dir
 ```bash
-mkdir -p /home/ai/models/{text,image,video}
+mkdir -p /home/ai/models/{text,image,video,embedding,tts,stt}
 ```
 ### Install the Hugging Face CLI
@@ -90,12 +124,34 @@ curl -LsSf https://hf.co/cli/install.sh | bash
 hf auth login
 ```
 ### Samba Model Storage
 I recommend adding network storage for keeping models offloaded. This mounts a samba share at `/srv/models`.
 ```bash
 # Add this to /etc/fstab
 //driveripper.reeselink.com/smb_models /srv/models cifs _netdev,nofail,uid=1001,gid=1001,credentials=/etc/samba/credentials 0 0
 # Then mount
 systemctl daemon-reload
 mount -a --mkdir
 ```
 Here are some sync commands that I use to keep the samba share in sync with the home directory:
 ```bash
 # Sync models from home dir to the samba share
 rsync -av --progress /home/ai/models/ /srv/models/
 ```
 ### Download models
 #### Text models
 <https://huggingface.co/ggml-org/collections>
 ##### GPT-OSS
 ```bash
 # gpt-oss-120b
 mkdir /home/ai/models/text/gpt-oss-120b
@@ -104,7 +160,11 @@ hf download --local-dir /home/ai/models/text/gpt-oss-120b ggml-org/gpt-oss-120b-
 # gpt-oss-20b
 mkdir /home/ai/models/text/gpt-oss-20b
 hf download --local-dir /home/ai/models/text/gpt-oss-20b ggml-org/gpt-oss-20b-GGUF
 ```
 ##### Mistral
 ```bash
 # devstral-2-123b
 mkdir /home/ai/models/text/devstral-2-123b
 hf download --local-dir /home/ai/models/text/devstral-2-123b unsloth/Devstral-2-123B-Instruct-2512-GGUF Q4_K_M/Devstral-2-123B-Instruct-2512-Q4_K_M-00001-of-00002.gguf
@@ -121,10 +181,22 @@ hf download --local-dir /home/ai/models/text/ministral-3-14b ggml-org/Ministral-
 # ministral-3-3b-instruct
 mkdir /home/ai/models/text/ministral-3-3b-instruct
 hf download --local-dir /home/ai/models/text/ministral-3-3b-instruct ggml-org/Ministral-3-3B-Instruct-2512-GGUF
 ```
 ##### Nemotron
 ```bash
 # nemotron-nano-30b
 mkdir /home/ai/models/text/nemotron-nano-30b
 hf download --local-dir /home/ai/models/text/nemotron-nano-30b ggml-org/Nemotron-Nano-3-30B-A3B-GGUF Nemotron-Nano-3-30B-A3B-Q4_K_M.gguf
 ```
 ##### Qwen
 ```bash
 # qwen3-30b-a3b-thinking
 mkdir /home/ai/models/text/qwen3-30b-a3b-thinking
 hf download --local-dir /home/ai/models/text/qwen3-30b-a3b-thinking ggml-org/Qwen3-30B-A3B-GGUF Qwen3-30B-A3B-Q4_K_M.gguf
 # qwen3-30b-a3b-instruct
 mkdir /home/ai/models/text/qwen3-30b-a3b-instruct
@@ -133,17 +205,98 @@ hf download --local-dir /home/ai/models/text/qwen3-30b-a3b-instruct ggml-org/Qwe
 # qwen3-coder-30b-a3b-instruct
 mkdir /home/ai/models/text/qwen3-coder-30b-a3b-instruct
 hf download --local-dir /home/ai/models/text/qwen3-coder-30b-a3b-instruct ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF
 # qwen3-coder-next
 mkdir /home/ai/models/text/qwen3-coder-next
 hf download --local-dir /home/ai/models/text/qwen3-coder-next --include "unsloth/Qwen3-Coder-Next-GGUF Q5_K_M/*.gguf"
 # qwen3-vl-30b-thinking
 mkdir /home/ai/models/text/qwen3-vl-30b-thinking
 hf download --local-dir /home/ai/models/text/qwen3-vl-30b-thinking unsloth/Qwen3-VL-30B-A3B-Thinking-1M-GGUF Qwen3-VL-30B-A3B-Thinking-1M-Q4_K_M.gguf
 hf download --local-dir /home/ai/models/text/qwen3-vl-30b-thinking unsloth/Qwen3-VL-30B-A3B-Thinking-1M-GGUF mmproj-F16.gguf
 # qwen3-vl-8b-instruct
 mkdir /home/ai/models/text/qwen3-vl-8b-instruct
 hf download --local-dir /home/ai/models/text/qwen3-vl-8b-instruct Qwen/Qwen3-VL-8B-Instruct-GGUF Qwen3VL-8B-Instruct-Q4_K_M.gguf
 hf download --local-dir /home/ai/models/text/qwen3-vl-8b-instruct Qwen/Qwen3-VL-8B-Instruct-GGUF mmproj-Qwen3VL-8B-Instruct-Q8_0.gguf
 # qwen3-4b-2507-abliterated
 mkdir /home/ai/models/text/qwen3-4b-2507-abliterated
 hf download --local-dir /home/ai/models/text/qwen3-4b-2507-abliterated prithivMLmods/Qwen3-4B-2507-abliterated-GGUF Qwen3-4B-Thinking-2507-abliterated-GGUF/Qwen3-4B-Thinking-2507-abliterated.Q4_K_M.gguf
 ```
 ##### GLM
 ```bash
 # glm-4.7-flash-30b
 mkdir /home/ai/models/text/glm-4.7-flash-30b
 hf download --local-dir /home/ai/models/text/glm-4.7-flash-30b unsloth/GLM-4.7-Flash-GGUF GLM-4.7-Flash-Q4_K_M.gguf
 ```
 ##### Llama
 ```bash
 # llama4-scout
 mkdir /home/ai/models/text/llama4-scout
 # Remember to move the gguf files into the llama4-scout folder, otherwise it won't pick up
 hf download --local-dir /home/ai/models/text/llama4-scout unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF --include "Q4_K_M/*.gguf"
 hf download --local-dir /home/ai/models/text/llama4-scout unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF mmproj-F16.gguf
 ```
 ##### Gemma
 ```bash
 # Note "it" vs "pt" suffixes. "it" is instruction following, "pt" is the base model (not as good for out-of-the-box use)
 # gemma-3-27b-it
 mkdir /home/ai/models/text/gemma-3-27b-it
 hf download --local-dir /home/ai/models/text/gemma-3-27b-it unsloth/gemma-3-27b-it-GGUF gemma-3-27b-it-Q4_K_M.gguf
 hf download --local-dir /home/ai/models/text/gemma-3-27b-it unsloth/gemma-3-27b-it-GGUF mmproj-F16.gguf
 ```
 ##### Dolphin (Abliterated)
 ```bash
 # dolphin-x1-8b
 mkdir /home/ai/models/text/dolphin-x1-8b
 hf download --local-dir /home/ai/models/text/dolphin-x1-8b dphn/Dolphin-X1-8B-GGUF Dolphin-X1-8B-Q4_K_M.gguf
 # dolphin-mistral-24b-venice
 mkdir /home/ai/models/text/dolphin-mistral-24b-venice
 hf download --local-dir /home/ai/models/text/dolphin-mistral-24b-venice bartowski/cognitivecomputations_Dolphin-Mistral-24B-Venice-Edition-GGUF cognitivecomputations_Dolphin-Mistral-24B-Venice-Edition-Q4_K_M.gguf
 ```
 #### Image models
 ##### Z-Image
 ```bash
 # z-turbo
 # Fastest image generation in 8 steps. Great a text and prompt following.
 # Lacks variety.
 mkdir /home/ai/models/image/z-turbo
 hf download --local-dir /home/ai/models/image/z-turbo QuantStack/FLUX.1-Kontext-dev-GGUF flux1-kontext-dev-Q4_K_M.gguf
 hf download --local-dir /home/ai/models/image/z-turbo black-forest-labs/FLUX.1-schnell ae.safetensors
 hf download --local-dir /home/ai/models/image/z-turbo unsloth/Qwen3-4B-Instruct-2507-GGUF Qwen3-4B-Instruct-2507-Q4_K_M.gguf
 # z-image
 # Full version of z-turbo. Needs 28-50 steps.
 # Note, image quality not as good as z-turbo
 mkdir /home/ai/models/image/z-image
 hf download --local-dir /home/ai/models/image/z-image unsloth/Z-Image-GGUF z-image-Q4_K_M.gguf
 hf download --local-dir /home/ai/models/image/z-image black-forest-labs/FLUX.1-schnell ae.safetensors
 hf download --local-dir /home/ai/models/image/z-image unsloth/Qwen3-4B-Instruct-2507-GGUF Qwen3-4B-Instruct-2507-Q4_K_M.gguf
 ```
 ##### Flux
 ```bash
 # flux2-klein
 # Capable of generating images in 4 steps
 mkdir /home/ai/models/image/flux2-klein
 hf download --local-dir /home/ai/models/image/flux2-klein leejet/FLUX.2-klein-9B-GGUF flux-2-klein-9b-Q4_0.gguf
 hf download --local-dir /home/ai/models/image/flux2-klein black-forest-labs/FLUX.2-dev ae.safetensors
 hf download --local-dir /home/ai/models/image/flux2-klein unsloth/Qwen3-8B-GGUF Qwen3-8B-Q4_K_M.gguf
 # flux-1-kontext
 mkdir /home/ai/models/image/flux-1-kontext
 hf download --local-dir /home/ai/models/image/flux-1-kontext leejet/Z-Image-Turbo-GGUF z_image_turbo-Q4_K.gguf
@@ -152,12 +305,21 @@ hf download --local-dir /home/ai/models/image/flux-1-kontext comfyanonymous/flux
 hf download --local-dir /home/ai/models/image/flux-1-kontext comfyanonymous/flux_text_encoders t5xxl_fp16.safetensors
 ```
-### Create the systemd-ai pod
+##### Qwen Image 2512
-You'll at least want the ai pod and network. Copy `ai.pod` and `ai.network` out
+```bash
 of `quadlets` into `~/.config/containers/systemd`.
-Then run `systemctl --user daemon-reload && systemctl --user start ai-pod`
+```
 #### Embedding Models
 ##### Nomic
 ```bash
 # nomic-embed-text-v2
 mkdir /home/ai/models/embedding/nomic-embed-text-v2
 hf download --local-dir /home/ai/models/embedding/nomic-embed-text-v2 ggml-org/Nomic-Embed-Text-V2-GGUF
 ```
 ## llama.cpp
@@ -172,20 +334,38 @@ export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S")
 # Vulkan
 podman build -f .devops/vulkan.Dockerfile -t llama-cpp-vulkan:${BUILD_TAG} -t llama-cpp-vulkan:latest .
 # ROCM
 podman build -f .devops/rocm.Dockerfile -t llama-cpp-rocm:${BUILD_TAG} -t llama-cpp-rocm:latest .
 # Run llama server (Available on port 8000)
 # Add `--n-cpu-moe 32` to gpt-oss-120b to keep minimal number of expert in GPU
 podman run \
 --rm \
 --name llama-server-demo \
 --pod systemd-ai \
 --device=/dev/kfd \
 --device=/dev/dri \
 --pod systemd-ai-internal \
 -v /home/ai/models/text:/models:z \
 localhost/llama-cpp-vulkan:latest \
 --port 8000 \
-c 64000 \
+-c 32000 \
-b 64000 \
+--perf \
-ub 500 \
+--n-gpu-layers all \
 --jinja \
 --models-max 1 \
 --models-dir /models
 # ROCM
 podman run \
 --rm \
 --name llama-server-demo \
 --device=/dev/kfd \
 --device=/dev/dri \
 --pod systemd-ai-internal \
 -v /home/ai/models/text:/models:z \
 localhost/llama-cpp-rocm:latest \
 --port 8000 \
 -c 0 \
 --perf \
 --n-gpu-layers all \
 --jinja \
@@ -222,14 +402,67 @@ localhost/stable-diffusion-cpp:latest \
 --llm /models/image/z-turbo/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
 --cfg-scale 1.0 \
 -v \
 -H 1024 \
 -W 1024 \
 --seed -1 \
 --steps 8 \
 --vae-conv-direct \
 -H 1024 \
 -W 1024 \
 -o /output/output.png \
 -p "A photorealistic dragon"
 # z-image
 podman run --rm \
 -v /home/ai/models:/models:z \
 -v /home/ai/output:/output:z \
 --device /dev/kfd \
 --device /dev/dri \
 localhost/stable-diffusion-cpp:latest \
 --diffusion-model /models/image/z-image/z-image-Q4_K_M.gguf \
 --vae /models/image/z-image/ae.safetensors  \
 --llm /models/image/z-image/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \
 --cfg-scale 1.0 \
 -v \
 --seed -1 \
 --steps 28 \
 --vae-conv-direct \
 -H 1024 \
 -W 1024 \
 -o /output/output.png \
 -p "A photorealistic dragon"
 # flux2-klein
 podman run --rm \
 -v /home/ai/models:/models:z \
 -v /home/ai/output:/output:z \
 --device /dev/kfd \
 --device /dev/dri \
 localhost/stable-diffusion-cpp:latest \
 --diffusion-model  /models/image/flux2-klein/flux-2-klein-9b-Q4_0.gguf \
 --vae /models/image/flux2-klein/ae.safetensors \
 --llm /models/image/flux2-klein/Qwen3-8B-Q4_K_M.gguf \
 --cfg-scale 1.0 \
 --steps 4 \
 -v \
 --seed -1 \
 --vae-conv-direct \
 -H 1024 \
 -W 1024 \
 -o /output/output.png \
 -p "A photorealistic dragon"
 # Edit with flux2 klein
 .\bin\Release\sd-cli.exe \
 --diffusion-model  /models/image/flux2-klein/flux-2-klein-9b-Q4_0.gguf \
 --vae /models/image/flux2-klein/ae.safetensors \
 --llm /models/image/flux2-klein/Qwen3-8B-Q4_K_M.gguf \
 --cfg-scale 1.0 \
 --sampling-method euler \
 -v \
 --vae-conv-direct \
 --steps 4
 -r .\kontext_input.png \
 -p "change 'flux.cpp' to 'klein.cpp'" \
 # Edit with flux kontext
 podman run --rm \
 -v /home/ai/models:/models:z \
@@ -271,6 +504,33 @@ podman run \
 ghcr.io/open-webui/open-webui:main
 ```
 Use the following connections:
 | Service              | Endpoint                                  |
 | -------------------- | ----------------------------------------- |
 | llama.cpp            | <http://host.containers.internal:8000>    |
 | stable-diffusion.cpp | <http://host.containers.internal:1234/v1> |
 ## VLLM
 ```bash
 --group-add=video \
 --cap-add=SYS_PTRACE \
 --security-opt seccomp=unconfined \
 --env "HF_TOKEN=$HF_TOKEN" \
 --ipc=host \
 mkdir -p /home/ai/vllm/.cache/huggingface
 podman run --rm \
 --device /dev/kfd \
 --device /dev/dri \
 -v /home/ai/vllm/.cache/huggingface:/root/.cache/huggingface:z \
 -p 8002:8000 \
 docker.io/vllm/vllm-openai-rocm:latest \
 --model Qwen/Qwen3-0.6B
 ```
 ## Install the whole thing with quadlets (TM)
 ```bash
@@ -278,7 +538,7 @@ ghcr.io/open-webui/open-webui:main
 scp -r active/device_framework_desktop/quadlets/* deskwork-ai:.config/containers/systemd/
 ssh deskwork-ai
 systemctl --user daemon-reload
-systemctl --user restart ai-pod.service
+systemctl --user restart ai-internal-pod.service
 ```
 Note, all services will be available at `host.containers.internal`. So llama.cpp
@@ -290,8 +550,10 @@ will be up at `http://host.containers.internal:8000`.
 # 1. Builds the latest llama.cpp and stable-diffusion.cpp
 # 2. Pulls the latest open-webui
 # 3. Restarts all services
-scp active/device_framework_desktop/update-script.sh deskwork:
+scp active/device_framework_desktop/update-script.sh deskwork-ai:
 ssh deskwork-ai
 chmod +x update-script.sh
 ./update-script.sh
 ```
 ## Voice Cloning
--- a/active/device_framework_desktop/quadlets/ai-external.network
+++ b/active/device_framework_desktop/quadlets/ai-external.network
--- a/active/device_framework_desktop/quadlets/ai-external.pod
+++ b/active/device_framework_desktop/quadlets/ai-external.pod
@@ -0,0 +1,6 @@
 [Pod]
 # ai-external is the primary network
 Network=ai-external.network
 Network=ai-internal.network
 # open-webui
 PublishPort=8080:8080/tcp
--- a/active/device_framework_desktop/quadlets/ai-internal.network
+++ b/active/device_framework_desktop/quadlets/ai-internal.network
@@ -0,0 +1,3 @@
 [Network]
 IPv6=true
 Internal=true
--- a/active/device_framework_desktop/quadlets/ai-internal.pod
+++ b/active/device_framework_desktop/quadlets/ai-internal.pod
@@ -0,0 +1,8 @@
 [Pod]
 Network=ai-internal.network
 # llama.cpp
 PublishPort=8000:8000/tcp
 # stable-diffusion.cpp gen
 PublishPort=1234:1234/tcp
 # stable-diffusion.cpp edit
 PublishPort=1235:1235/tcp
--- a/active/device_framework_desktop/quadlets/ai.pod
+++ b/active/device_framework_desktop/quadlets/ai.pod
@@ -1,8 +0,0 @@
 [Pod]
 Network=ai.network
 # llama.cpp
 PublishPort=8000:8000/tcp
 # open-webui
 PublishPort=8080:8080/tcp
 # stable-diffusion.cpp
 PublishPort=1234:1234/tcp
--- a/active/device_framework_desktop/quadlets/llama-server.container
+++ b/active/device_framework_desktop/quadlets/llama-server.container
@@ -2,8 +2,8 @@
 Description=A Llama CPP Server Running GPT OSS 120b
 [Container]
-# Shared AI pod
+# Shared AI internal pod
-Pod=ai.pod
+Pod=ai-internal.pod
 # Image is built locally via podman build
 Image=localhost/llama-cpp-vulkan:latest
@@ -18,8 +18,6 @@ AddDevice=/dev/dri
 # Server command
 Exec=--port 8000 \
    -c 48000 \
    -b 48000 \
    -ub 500 \
    --perf \
    --n-gpu-layers all \
    --jinja \
@@ -27,7 +25,7 @@ Exec=--port 8000 \
    --models-dir /models
 # Health Check
-HealthCmd=CMD-SHELL curl --fail http://127.0.0.1:8000/props?model=gpt-oss-120b || exit 1
+HealthCmd=CMD-SHELL curl --fail http://127.0.0.1:8000/props || exit 1
 HealthInterval=10s
 HealthRetries=3
 HealthStartPeriod=10s
--- a/active/device_framework_desktop/quadlets/open-webui.container
+++ b/active/device_framework_desktop/quadlets/open-webui.container
@@ -2,8 +2,8 @@
 Description=An Open Webui Frontend for Local AI Services
 [Container]
-# Shared AI pod
+# Shared AI external pod
-Pod=ai.pod
+Pod=ai-external.pod
 # Open Webui base image
 Image=ghcr.io/open-webui/open-webui:main
--- a/active/device_framework_desktop/quadlets/stable-diffusion-edit-server.container
+++ b/active/device_framework_desktop/quadlets/stable-diffusion-edit-server.container
@@ -2,8 +2,8 @@
 Description=A Stable Diffusion CPP Server for Editing Images
 [Container]
-# Shared AI pod
+# Shared AI Internal pod
-Pod=ai.pod
+Pod=ai-internal.pod
 # Vulkan image for AMD GPU
 Image=localhost/stable-diffusion-cpp:latest
@@ -21,16 +21,14 @@ Entrypoint=/sd-server
 # Server args
 Exec=-l 0.0.0.0 \
    --listen-port 1235 \
-    --diffusion-model /models/image/flux-1-kontext/flux1-kontext-dev-Q4_K_M.gguf \
+    --diffusion-model /models/image/flux2-klein/flux-2-klein-9b-Q4_0.gguf \
-    --vae /models/image/flux-1-kontext/ae.safetensors \
+    --vae /models/image/flux2-klein/ae.safetensors \
-    --clip_l /models/image/flux-1-kontext/clip_l.safetensors \
+    --llm /models/image/flux2-klein/Qwen3-8B-Q4_K_M.gguf \
    --t5xxl /models/image/flux-1-kontext/t5xxl_fp16.safetensors \
    --cfg-scale 1.0 \
    --sampling-method euler \
-    --vae-conv-direct \
+    -v \
-    --seed -1 \
+    --steps 4 \
-    --steps 28 \
+    --vae-conv-direct
    -v
 [Service]
 Restart=always
--- a/active/device_framework_desktop/quadlets/stable-diffusion-gen-server.container
+++ b/active/device_framework_desktop/quadlets/stable-diffusion-gen-server.container
@@ -2,8 +2,8 @@
 Description=A Stable Diffusion CPP Server for Generating Images
 [Container]
-# Shared AI pod
+# Shared AI internal pod
-Pod=ai.pod
+Pod=ai-internal.pod
 # Vulkan image for AMD GPU
 Image=localhost/stable-diffusion-cpp:latest
@@ -24,8 +24,6 @@ Exec=-l 0.0.0.0 \
    --diffusion-model /models/image/z-turbo/z_image_turbo-Q4_K.gguf \
    --vae /models/image/z-turbo/ae.safetensors  \
    --llm /models/image/z-turbo/qwen_3_4b.safetensors \
    -l 0.0.0.0 \
    --listen-port 1234 \
    --cfg-scale 1.0 \
    --vae-conv-direct \
    -v \
--- a/active/device_framework_desktop/quadlets_beta/stable-diffusion-edit-server.container
+++ b/active/device_framework_desktop/quadlets_beta/stable-diffusion-edit-server.container
@@ -0,0 +1,42 @@
 [Unit]
 Description=A Stable Diffusion CPP Server for Editing Images
 [Container]
 # Shared AI pod
 Pod=ai.pod
 # Vulkan image for AMD GPU
 Image=localhost/stable-diffusion-cpp:latest
 # Shared models directory
 Volume=/home/ai/models:/models:z
 # GPU Device
 AddDevice=/dev/kfd
 AddDevice=/dev/dri
 # Override entrypoint to use server
 Entrypoint=/sd-server
 # Server args
 Exec=-l 0.0.0.0 \
    --listen-port 1235 \
    --diffusion-model /models/image/flux-1-kontext/flux1-kontext-dev-Q4_K_M.gguf \
    --vae /models/image/flux-1-kontext/ae.safetensors \
    --clip_l /models/image/flux-1-kontext/clip_l.safetensors \
    --t5xxl /models/image/flux-1-kontext/t5xxl_fp16.safetensors \
    --cfg-scale 1.0 \
    --sampling-method euler \
    --vae-conv-direct \
    --seed -1 \
    --steps 28 \
    -v
 [Service]
 Restart=always
 # Extend Timeout to allow time to pull the image
 TimeoutStartSec=900
 [Install]
 # Start by default on boot
 WantedBy=multi-user.target default.target
--- a/active/device_framework_desktop/update-script.sh
+++ b/active/device_framework_desktop/update-script.sh
@@ -15,6 +15,8 @@ git pull
 git submodule update --init --recursive
 podman build -f Dockerfile.vulkan -t stable-diffusion-cpp:${BUILD_TAG} -t stable-diffusion-cpp:latest .
 systemctl --user restart ai-internal-pod
 podman image pull ghcr.io/open-webui/open-webui:main
-systemctl --user restart ai-pod
+systemctl --user restart ai-external-pod