From 525e14965d8df159b5667abd30bdf0daaafe92eb Mon Sep 17 00:00:00 2001 From: ducoterra Date: Fri, 6 Feb 2026 20:11:19 -0500 Subject: [PATCH] framework desktop offline ai updates --- .../framework_desktop.md | 292 +++++++++++++++++- .../{ai.network => ai-external.network} | 0 .../quadlets/ai-external.pod | 6 + .../quadlets/ai-internal.network | 3 + .../quadlets/ai-internal.pod | 8 + .../device_framework_desktop/quadlets/ai.pod | 8 - .../quadlets/llama-server.container | 8 +- .../quadlets/open-webui.container | 4 +- .../stable-diffusion-edit-server.container | 18 +- .../stable-diffusion-gen-server.container | 6 +- .../stable-diffusion-edit-server.container | 42 +++ .../device_framework_desktop/update-script.sh | 4 +- 12 files changed, 354 insertions(+), 45 deletions(-) rename active/device_framework_desktop/quadlets/{ai.network => ai-external.network} (100%) create mode 100644 active/device_framework_desktop/quadlets/ai-external.pod create mode 100644 active/device_framework_desktop/quadlets/ai-internal.network create mode 100644 active/device_framework_desktop/quadlets/ai-internal.pod delete mode 100644 active/device_framework_desktop/quadlets/ai.pod create mode 100644 active/device_framework_desktop/quadlets_beta/stable-diffusion-edit-server.container diff --git a/active/device_framework_desktop/framework_desktop.md b/active/device_framework_desktop/framework_desktop.md index 3ddc005..a8473ea 100644 --- a/active/device_framework_desktop/framework_desktop.md +++ b/active/device_framework_desktop/framework_desktop.md @@ -4,21 +4,38 @@ - [BIOS](#bios) - [References](#references) - [Notes](#notes) + - [Firmware and Kernel](#firmware-and-kernel) + - [Kernel args](#kernel-args) - [Volume Locations](#volume-locations) - [Setup](#setup) - [Create the AI user](#create-the-ai-user) - [Helper aliases](#helper-aliases) - [Create the models dir](#create-the-models-dir) - [Install the Hugging Face CLI](#install-the-hugging-face-cli) + - [Samba Model Storage](#samba-model-storage) - [Download models](#download-models) - [Text models](#text-models) + - [GPT-OSS](#gpt-oss) + - [Mistral](#mistral) + - [Nemotron](#nemotron) + - [Qwen](#qwen) + - [GLM](#glm) + - [Llama](#llama) + - [Gemma](#gemma) + - [Dolphin (Abliterated)](#dolphin-abliterated) - [Image models](#image-models) - - [Create the systemd-ai pod](#create-the-systemd-ai-pod) + - [Z-Image](#z-image) + - [Flux](#flux) + - [Qwen Image 2512](#qwen-image-2512) + - [Embedding Models](#embedding-models) + - [Nomic](#nomic) - [llama.cpp](#llamacpp) - [stable-diffusion.cpp](#stable-diffusioncpp) - [open-webui](#open-webui) + - [VLLM](#vllm) - [Install the whole thing with quadlets (TM)](#install-the-whole-thing-with-quadlets-tm) - [Install the update script](#install-the-update-script) + - [Voice Cloning](#voice-cloning) ## BIOS @@ -32,6 +49,22 @@ ## Notes +### Firmware and Kernel + +See: + +Current stable is kernel 6.18.3-200 with linux-firmware 20251111 + +### Kernel args + +Edit /etc/default/grub and add the following: + +```conf +amd_iommu=off amdgpu.gttsize=126976 ttm.pages_limit=32505856 +``` + +Then `grub2-mkconfig -o /boot/grub2/grub.cfg` and `reboot`. + ### Volume Locations `~/.local/share/containers/storage/volumes/` @@ -45,7 +78,8 @@ useradd -m ai loginctl enable-linger ai su -l ai -mkdir -p ~/.config/containers/systemd/ +mkdir -p /home/ai/.config/containers/systemd/ +mkdir -p /home/ai/.ssh ``` Models are big. You'll want some tools to help find large files quickly when space runs out. @@ -75,7 +109,7 @@ alias sd-edit-logs='journalctl --user -xeu stable-diffusion-edit-server' ### Create the models dir ```bash -mkdir -p /home/ai/models/{text,image,video} +mkdir -p /home/ai/models/{text,image,video,embedding,tts,stt} ``` ### Install the Hugging Face CLI @@ -90,12 +124,34 @@ curl -LsSf https://hf.co/cli/install.sh | bash hf auth login ``` +### Samba Model Storage + +I recommend adding network storage for keeping models offloaded. This mounts a samba share at `/srv/models`. + +```bash +# Add this to /etc/fstab +//driveripper.reeselink.com/smb_models /srv/models cifs _netdev,nofail,uid=1001,gid=1001,credentials=/etc/samba/credentials 0 0 + +# Then mount +systemctl daemon-reload +mount -a --mkdir +``` + +Here are some sync commands that I use to keep the samba share in sync with the home directory: + +```bash +# Sync models from home dir to the samba share +rsync -av --progress /home/ai/models/ /srv/models/ +``` + ### Download models #### Text models +##### GPT-OSS + ```bash # gpt-oss-120b mkdir /home/ai/models/text/gpt-oss-120b @@ -104,7 +160,11 @@ hf download --local-dir /home/ai/models/text/gpt-oss-120b ggml-org/gpt-oss-120b- # gpt-oss-20b mkdir /home/ai/models/text/gpt-oss-20b hf download --local-dir /home/ai/models/text/gpt-oss-20b ggml-org/gpt-oss-20b-GGUF +``` +##### Mistral + +```bash # devstral-2-123b mkdir /home/ai/models/text/devstral-2-123b hf download --local-dir /home/ai/models/text/devstral-2-123b unsloth/Devstral-2-123B-Instruct-2512-GGUF Q4_K_M/Devstral-2-123B-Instruct-2512-Q4_K_M-00001-of-00002.gguf @@ -121,10 +181,22 @@ hf download --local-dir /home/ai/models/text/ministral-3-14b ggml-org/Ministral- # ministral-3-3b-instruct mkdir /home/ai/models/text/ministral-3-3b-instruct hf download --local-dir /home/ai/models/text/ministral-3-3b-instruct ggml-org/Ministral-3-3B-Instruct-2512-GGUF +``` +##### Nemotron + +```bash # nemotron-nano-30b mkdir /home/ai/models/text/nemotron-nano-30b hf download --local-dir /home/ai/models/text/nemotron-nano-30b ggml-org/Nemotron-Nano-3-30B-A3B-GGUF Nemotron-Nano-3-30B-A3B-Q4_K_M.gguf +``` + +##### Qwen + +```bash +# qwen3-30b-a3b-thinking +mkdir /home/ai/models/text/qwen3-30b-a3b-thinking +hf download --local-dir /home/ai/models/text/qwen3-30b-a3b-thinking ggml-org/Qwen3-30B-A3B-GGUF Qwen3-30B-A3B-Q4_K_M.gguf # qwen3-30b-a3b-instruct mkdir /home/ai/models/text/qwen3-30b-a3b-instruct @@ -133,17 +205,98 @@ hf download --local-dir /home/ai/models/text/qwen3-30b-a3b-instruct ggml-org/Qwe # qwen3-coder-30b-a3b-instruct mkdir /home/ai/models/text/qwen3-coder-30b-a3b-instruct hf download --local-dir /home/ai/models/text/qwen3-coder-30b-a3b-instruct ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF + +# qwen3-coder-next +mkdir /home/ai/models/text/qwen3-coder-next +hf download --local-dir /home/ai/models/text/qwen3-coder-next --include "unsloth/Qwen3-Coder-Next-GGUF Q5_K_M/*.gguf" + +# qwen3-vl-30b-thinking +mkdir /home/ai/models/text/qwen3-vl-30b-thinking +hf download --local-dir /home/ai/models/text/qwen3-vl-30b-thinking unsloth/Qwen3-VL-30B-A3B-Thinking-1M-GGUF Qwen3-VL-30B-A3B-Thinking-1M-Q4_K_M.gguf +hf download --local-dir /home/ai/models/text/qwen3-vl-30b-thinking unsloth/Qwen3-VL-30B-A3B-Thinking-1M-GGUF mmproj-F16.gguf + +# qwen3-vl-8b-instruct +mkdir /home/ai/models/text/qwen3-vl-8b-instruct +hf download --local-dir /home/ai/models/text/qwen3-vl-8b-instruct Qwen/Qwen3-VL-8B-Instruct-GGUF Qwen3VL-8B-Instruct-Q4_K_M.gguf +hf download --local-dir /home/ai/models/text/qwen3-vl-8b-instruct Qwen/Qwen3-VL-8B-Instruct-GGUF mmproj-Qwen3VL-8B-Instruct-Q8_0.gguf + +# qwen3-4b-2507-abliterated +mkdir /home/ai/models/text/qwen3-4b-2507-abliterated +hf download --local-dir /home/ai/models/text/qwen3-4b-2507-abliterated prithivMLmods/Qwen3-4B-2507-abliterated-GGUF Qwen3-4B-Thinking-2507-abliterated-GGUF/Qwen3-4B-Thinking-2507-abliterated.Q4_K_M.gguf +``` + +##### GLM + +```bash +# glm-4.7-flash-30b +mkdir /home/ai/models/text/glm-4.7-flash-30b +hf download --local-dir /home/ai/models/text/glm-4.7-flash-30b unsloth/GLM-4.7-Flash-GGUF GLM-4.7-Flash-Q4_K_M.gguf +``` + +##### Llama + +```bash +# llama4-scout +mkdir /home/ai/models/text/llama4-scout +# Remember to move the gguf files into the llama4-scout folder, otherwise it won't pick up +hf download --local-dir /home/ai/models/text/llama4-scout unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF --include "Q4_K_M/*.gguf" +hf download --local-dir /home/ai/models/text/llama4-scout unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF mmproj-F16.gguf +``` + +##### Gemma + +```bash +# Note "it" vs "pt" suffixes. "it" is instruction following, "pt" is the base model (not as good for out-of-the-box use) +# gemma-3-27b-it +mkdir /home/ai/models/text/gemma-3-27b-it +hf download --local-dir /home/ai/models/text/gemma-3-27b-it unsloth/gemma-3-27b-it-GGUF gemma-3-27b-it-Q4_K_M.gguf +hf download --local-dir /home/ai/models/text/gemma-3-27b-it unsloth/gemma-3-27b-it-GGUF mmproj-F16.gguf +``` + +##### Dolphin (Abliterated) + +```bash +# dolphin-x1-8b +mkdir /home/ai/models/text/dolphin-x1-8b +hf download --local-dir /home/ai/models/text/dolphin-x1-8b dphn/Dolphin-X1-8B-GGUF Dolphin-X1-8B-Q4_K_M.gguf + +# dolphin-mistral-24b-venice +mkdir /home/ai/models/text/dolphin-mistral-24b-venice +hf download --local-dir /home/ai/models/text/dolphin-mistral-24b-venice bartowski/cognitivecomputations_Dolphin-Mistral-24B-Venice-Edition-GGUF cognitivecomputations_Dolphin-Mistral-24B-Venice-Edition-Q4_K_M.gguf ``` #### Image models +##### Z-Image + ```bash # z-turbo +# Fastest image generation in 8 steps. Great a text and prompt following. +# Lacks variety. mkdir /home/ai/models/image/z-turbo hf download --local-dir /home/ai/models/image/z-turbo QuantStack/FLUX.1-Kontext-dev-GGUF flux1-kontext-dev-Q4_K_M.gguf hf download --local-dir /home/ai/models/image/z-turbo black-forest-labs/FLUX.1-schnell ae.safetensors hf download --local-dir /home/ai/models/image/z-turbo unsloth/Qwen3-4B-Instruct-2507-GGUF Qwen3-4B-Instruct-2507-Q4_K_M.gguf +# z-image +# Full version of z-turbo. Needs 28-50 steps. +# Note, image quality not as good as z-turbo +mkdir /home/ai/models/image/z-image +hf download --local-dir /home/ai/models/image/z-image unsloth/Z-Image-GGUF z-image-Q4_K_M.gguf +hf download --local-dir /home/ai/models/image/z-image black-forest-labs/FLUX.1-schnell ae.safetensors +hf download --local-dir /home/ai/models/image/z-image unsloth/Qwen3-4B-Instruct-2507-GGUF Qwen3-4B-Instruct-2507-Q4_K_M.gguf +``` + +##### Flux + +```bash +# flux2-klein +# Capable of generating images in 4 steps +mkdir /home/ai/models/image/flux2-klein +hf download --local-dir /home/ai/models/image/flux2-klein leejet/FLUX.2-klein-9B-GGUF flux-2-klein-9b-Q4_0.gguf +hf download --local-dir /home/ai/models/image/flux2-klein black-forest-labs/FLUX.2-dev ae.safetensors +hf download --local-dir /home/ai/models/image/flux2-klein unsloth/Qwen3-8B-GGUF Qwen3-8B-Q4_K_M.gguf + # flux-1-kontext mkdir /home/ai/models/image/flux-1-kontext hf download --local-dir /home/ai/models/image/flux-1-kontext leejet/Z-Image-Turbo-GGUF z_image_turbo-Q4_K.gguf @@ -152,12 +305,21 @@ hf download --local-dir /home/ai/models/image/flux-1-kontext comfyanonymous/flux hf download --local-dir /home/ai/models/image/flux-1-kontext comfyanonymous/flux_text_encoders t5xxl_fp16.safetensors ``` -### Create the systemd-ai pod +##### Qwen Image 2512 -You'll at least want the ai pod and network. Copy `ai.pod` and `ai.network` out -of `quadlets` into `~/.config/containers/systemd`. +```bash -Then run `systemctl --user daemon-reload && systemctl --user start ai-pod` +``` + +#### Embedding Models + +##### Nomic + +```bash +# nomic-embed-text-v2 +mkdir /home/ai/models/embedding/nomic-embed-text-v2 +hf download --local-dir /home/ai/models/embedding/nomic-embed-text-v2 ggml-org/Nomic-Embed-Text-V2-GGUF +``` ## llama.cpp @@ -172,20 +334,38 @@ export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S") # Vulkan podman build -f .devops/vulkan.Dockerfile -t llama-cpp-vulkan:${BUILD_TAG} -t llama-cpp-vulkan:latest . +# ROCM +podman build -f .devops/rocm.Dockerfile -t llama-cpp-rocm:${BUILD_TAG} -t llama-cpp-rocm:latest . + # Run llama server (Available on port 8000) # Add `--n-cpu-moe 32` to gpt-oss-120b to keep minimal number of expert in GPU podman run \ --rm \ --name llama-server-demo \ ---pod systemd-ai \ --device=/dev/kfd \ --device=/dev/dri \ +--pod systemd-ai-internal \ -v /home/ai/models/text:/models:z \ localhost/llama-cpp-vulkan:latest \ --port 8000 \ --c 64000 \ --b 64000 \ --ub 500 \ +-c 32000 \ +--perf \ +--n-gpu-layers all \ +--jinja \ +--models-max 1 \ +--models-dir /models + +# ROCM +podman run \ +--rm \ +--name llama-server-demo \ +--device=/dev/kfd \ +--device=/dev/dri \ +--pod systemd-ai-internal \ +-v /home/ai/models/text:/models:z \ +localhost/llama-cpp-rocm:latest \ +--port 8000 \ +-c 0 \ --perf \ --n-gpu-layers all \ --jinja \ @@ -222,14 +402,67 @@ localhost/stable-diffusion-cpp:latest \ --llm /models/image/z-turbo/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \ --cfg-scale 1.0 \ -v \ --H 1024 \ --W 1024 \ --seed -1 \ --steps 8 \ --vae-conv-direct \ +-H 1024 \ +-W 1024 \ -o /output/output.png \ -p "A photorealistic dragon" +# z-image +podman run --rm \ +-v /home/ai/models:/models:z \ +-v /home/ai/output:/output:z \ +--device /dev/kfd \ +--device /dev/dri \ +localhost/stable-diffusion-cpp:latest \ +--diffusion-model /models/image/z-image/z-image-Q4_K_M.gguf \ +--vae /models/image/z-image/ae.safetensors \ +--llm /models/image/z-image/Qwen3-4B-Instruct-2507-Q4_K_M.gguf \ +--cfg-scale 1.0 \ +-v \ +--seed -1 \ +--steps 28 \ +--vae-conv-direct \ +-H 1024 \ +-W 1024 \ +-o /output/output.png \ +-p "A photorealistic dragon" + +# flux2-klein +podman run --rm \ +-v /home/ai/models:/models:z \ +-v /home/ai/output:/output:z \ +--device /dev/kfd \ +--device /dev/dri \ +localhost/stable-diffusion-cpp:latest \ +--diffusion-model /models/image/flux2-klein/flux-2-klein-9b-Q4_0.gguf \ +--vae /models/image/flux2-klein/ae.safetensors \ +--llm /models/image/flux2-klein/Qwen3-8B-Q4_K_M.gguf \ +--cfg-scale 1.0 \ +--steps 4 \ +-v \ +--seed -1 \ +--vae-conv-direct \ +-H 1024 \ +-W 1024 \ +-o /output/output.png \ +-p "A photorealistic dragon" + +# Edit with flux2 klein +.\bin\Release\sd-cli.exe \ +--diffusion-model /models/image/flux2-klein/flux-2-klein-9b-Q4_0.gguf \ +--vae /models/image/flux2-klein/ae.safetensors \ +--llm /models/image/flux2-klein/Qwen3-8B-Q4_K_M.gguf \ +--cfg-scale 1.0 \ +--sampling-method euler \ +-v \ +--vae-conv-direct \ +--steps 4 +-r .\kontext_input.png \ +-p "change 'flux.cpp' to 'klein.cpp'" \ + # Edit with flux kontext podman run --rm \ -v /home/ai/models:/models:z \ @@ -271,6 +504,33 @@ podman run \ ghcr.io/open-webui/open-webui:main ``` +Use the following connections: + +| Service | Endpoint | +| -------------------- | ----------------------------------------- | +| llama.cpp | | +| stable-diffusion.cpp | | + +## VLLM + +```bash +--group-add=video \ +--cap-add=SYS_PTRACE \ +--security-opt seccomp=unconfined \ +--env "HF_TOKEN=$HF_TOKEN" \ +--ipc=host \ + +mkdir -p /home/ai/vllm/.cache/huggingface + +podman run --rm \ +--device /dev/kfd \ +--device /dev/dri \ +-v /home/ai/vllm/.cache/huggingface:/root/.cache/huggingface:z \ +-p 8002:8000 \ +docker.io/vllm/vllm-openai-rocm:latest \ +--model Qwen/Qwen3-0.6B +``` + ## Install the whole thing with quadlets (TM) ```bash @@ -278,7 +538,7 @@ ghcr.io/open-webui/open-webui:main scp -r active/device_framework_desktop/quadlets/* deskwork-ai:.config/containers/systemd/ ssh deskwork-ai systemctl --user daemon-reload -systemctl --user restart ai-pod.service +systemctl --user restart ai-internal-pod.service ``` Note, all services will be available at `host.containers.internal`. So llama.cpp @@ -290,8 +550,10 @@ will be up at `http://host.containers.internal:8000`. # 1. Builds the latest llama.cpp and stable-diffusion.cpp # 2. Pulls the latest open-webui # 3. Restarts all services -scp active/device_framework_desktop/update-script.sh deskwork: +scp active/device_framework_desktop/update-script.sh deskwork-ai: ssh deskwork-ai chmod +x update-script.sh ./update-script.sh ``` + +## Voice Cloning \ No newline at end of file diff --git a/active/device_framework_desktop/quadlets/ai.network b/active/device_framework_desktop/quadlets/ai-external.network similarity index 100% rename from active/device_framework_desktop/quadlets/ai.network rename to active/device_framework_desktop/quadlets/ai-external.network diff --git a/active/device_framework_desktop/quadlets/ai-external.pod b/active/device_framework_desktop/quadlets/ai-external.pod new file mode 100644 index 0000000..2dd4bbc --- /dev/null +++ b/active/device_framework_desktop/quadlets/ai-external.pod @@ -0,0 +1,6 @@ +[Pod] +# ai-external is the primary network +Network=ai-external.network +Network=ai-internal.network +# open-webui +PublishPort=8080:8080/tcp \ No newline at end of file diff --git a/active/device_framework_desktop/quadlets/ai-internal.network b/active/device_framework_desktop/quadlets/ai-internal.network new file mode 100644 index 0000000..cf2c43f --- /dev/null +++ b/active/device_framework_desktop/quadlets/ai-internal.network @@ -0,0 +1,3 @@ +[Network] +IPv6=true +Internal=true \ No newline at end of file diff --git a/active/device_framework_desktop/quadlets/ai-internal.pod b/active/device_framework_desktop/quadlets/ai-internal.pod new file mode 100644 index 0000000..a5b22bb --- /dev/null +++ b/active/device_framework_desktop/quadlets/ai-internal.pod @@ -0,0 +1,8 @@ +[Pod] +Network=ai-internal.network +# llama.cpp +PublishPort=8000:8000/tcp +# stable-diffusion.cpp gen +PublishPort=1234:1234/tcp +# stable-diffusion.cpp edit +PublishPort=1235:1235/tcp \ No newline at end of file diff --git a/active/device_framework_desktop/quadlets/ai.pod b/active/device_framework_desktop/quadlets/ai.pod deleted file mode 100644 index a646415..0000000 --- a/active/device_framework_desktop/quadlets/ai.pod +++ /dev/null @@ -1,8 +0,0 @@ -[Pod] -Network=ai.network -# llama.cpp -PublishPort=8000:8000/tcp -# open-webui -PublishPort=8080:8080/tcp -# stable-diffusion.cpp -PublishPort=1234:1234/tcp \ No newline at end of file diff --git a/active/device_framework_desktop/quadlets/llama-server.container b/active/device_framework_desktop/quadlets/llama-server.container index 2507aa7..8d0f43f 100644 --- a/active/device_framework_desktop/quadlets/llama-server.container +++ b/active/device_framework_desktop/quadlets/llama-server.container @@ -2,8 +2,8 @@ Description=A Llama CPP Server Running GPT OSS 120b [Container] -# Shared AI pod -Pod=ai.pod +# Shared AI internal pod +Pod=ai-internal.pod # Image is built locally via podman build Image=localhost/llama-cpp-vulkan:latest @@ -18,8 +18,6 @@ AddDevice=/dev/dri # Server command Exec=--port 8000 \ -c 48000 \ - -b 48000 \ - -ub 500 \ --perf \ --n-gpu-layers all \ --jinja \ @@ -27,7 +25,7 @@ Exec=--port 8000 \ --models-dir /models # Health Check -HealthCmd=CMD-SHELL curl --fail http://127.0.0.1:8000/props?model=gpt-oss-120b || exit 1 +HealthCmd=CMD-SHELL curl --fail http://127.0.0.1:8000/props || exit 1 HealthInterval=10s HealthRetries=3 HealthStartPeriod=10s diff --git a/active/device_framework_desktop/quadlets/open-webui.container b/active/device_framework_desktop/quadlets/open-webui.container index ed1e27b..8babd56 100644 --- a/active/device_framework_desktop/quadlets/open-webui.container +++ b/active/device_framework_desktop/quadlets/open-webui.container @@ -2,8 +2,8 @@ Description=An Open Webui Frontend for Local AI Services [Container] -# Shared AI pod -Pod=ai.pod +# Shared AI external pod +Pod=ai-external.pod # Open Webui base image Image=ghcr.io/open-webui/open-webui:main diff --git a/active/device_framework_desktop/quadlets/stable-diffusion-edit-server.container b/active/device_framework_desktop/quadlets/stable-diffusion-edit-server.container index 6b5d6a1..e559074 100644 --- a/active/device_framework_desktop/quadlets/stable-diffusion-edit-server.container +++ b/active/device_framework_desktop/quadlets/stable-diffusion-edit-server.container @@ -2,8 +2,8 @@ Description=A Stable Diffusion CPP Server for Editing Images [Container] -# Shared AI pod -Pod=ai.pod +# Shared AI Internal pod +Pod=ai-internal.pod # Vulkan image for AMD GPU Image=localhost/stable-diffusion-cpp:latest @@ -21,16 +21,14 @@ Entrypoint=/sd-server # Server args Exec=-l 0.0.0.0 \ --listen-port 1235 \ - --diffusion-model /models/image/flux-1-kontext/flux1-kontext-dev-Q4_K_M.gguf \ - --vae /models/image/flux-1-kontext/ae.safetensors \ - --clip_l /models/image/flux-1-kontext/clip_l.safetensors \ - --t5xxl /models/image/flux-1-kontext/t5xxl_fp16.safetensors \ + --diffusion-model /models/image/flux2-klein/flux-2-klein-9b-Q4_0.gguf \ + --vae /models/image/flux2-klein/ae.safetensors \ + --llm /models/image/flux2-klein/Qwen3-8B-Q4_K_M.gguf \ --cfg-scale 1.0 \ --sampling-method euler \ - --vae-conv-direct \ - --seed -1 \ - --steps 28 \ - -v + -v \ + --steps 4 \ + --vae-conv-direct [Service] Restart=always diff --git a/active/device_framework_desktop/quadlets/stable-diffusion-gen-server.container b/active/device_framework_desktop/quadlets/stable-diffusion-gen-server.container index 0ad1751..f82c174 100644 --- a/active/device_framework_desktop/quadlets/stable-diffusion-gen-server.container +++ b/active/device_framework_desktop/quadlets/stable-diffusion-gen-server.container @@ -2,8 +2,8 @@ Description=A Stable Diffusion CPP Server for Generating Images [Container] -# Shared AI pod -Pod=ai.pod +# Shared AI internal pod +Pod=ai-internal.pod # Vulkan image for AMD GPU Image=localhost/stable-diffusion-cpp:latest @@ -24,8 +24,6 @@ Exec=-l 0.0.0.0 \ --diffusion-model /models/image/z-turbo/z_image_turbo-Q4_K.gguf \ --vae /models/image/z-turbo/ae.safetensors \ --llm /models/image/z-turbo/qwen_3_4b.safetensors \ - -l 0.0.0.0 \ - --listen-port 1234 \ --cfg-scale 1.0 \ --vae-conv-direct \ -v \ diff --git a/active/device_framework_desktop/quadlets_beta/stable-diffusion-edit-server.container b/active/device_framework_desktop/quadlets_beta/stable-diffusion-edit-server.container new file mode 100644 index 0000000..6b5d6a1 --- /dev/null +++ b/active/device_framework_desktop/quadlets_beta/stable-diffusion-edit-server.container @@ -0,0 +1,42 @@ +[Unit] +Description=A Stable Diffusion CPP Server for Editing Images + +[Container] +# Shared AI pod +Pod=ai.pod + +# Vulkan image for AMD GPU +Image=localhost/stable-diffusion-cpp:latest + +# Shared models directory +Volume=/home/ai/models:/models:z + +# GPU Device +AddDevice=/dev/kfd +AddDevice=/dev/dri + +# Override entrypoint to use server +Entrypoint=/sd-server + +# Server args +Exec=-l 0.0.0.0 \ + --listen-port 1235 \ + --diffusion-model /models/image/flux-1-kontext/flux1-kontext-dev-Q4_K_M.gguf \ + --vae /models/image/flux-1-kontext/ae.safetensors \ + --clip_l /models/image/flux-1-kontext/clip_l.safetensors \ + --t5xxl /models/image/flux-1-kontext/t5xxl_fp16.safetensors \ + --cfg-scale 1.0 \ + --sampling-method euler \ + --vae-conv-direct \ + --seed -1 \ + --steps 28 \ + -v + +[Service] +Restart=always +# Extend Timeout to allow time to pull the image +TimeoutStartSec=900 + +[Install] +# Start by default on boot +WantedBy=multi-user.target default.target diff --git a/active/device_framework_desktop/update-script.sh b/active/device_framework_desktop/update-script.sh index 6a15d70..cfc8a32 100644 --- a/active/device_framework_desktop/update-script.sh +++ b/active/device_framework_desktop/update-script.sh @@ -15,6 +15,8 @@ git pull git submodule update --init --recursive podman build -f Dockerfile.vulkan -t stable-diffusion-cpp:${BUILD_TAG} -t stable-diffusion-cpp:latest . +systemctl --user restart ai-internal-pod + podman image pull ghcr.io/open-webui/open-webui:main -systemctl --user restart ai-pod \ No newline at end of file +systemctl --user restart ai-external-pod \ No newline at end of file