From 4c0a263d509c08fb4a2ec463c0d79099f46a6be5 Mon Sep 17 00:00:00 2001 From: ducoterra Date: Wed, 21 Jan 2026 13:33:33 -0500 Subject: [PATCH] fix vulkan 4gb limit --- .../framework_desktop.md | 58 +++++++++++++++---- .../quadlets/llama-server.container | 14 ++++- .../stable-diffusion-edit-server.container | 4 +- 3 files changed, 61 insertions(+), 15 deletions(-) diff --git a/active/device_framework_desktop/framework_desktop.md b/active/device_framework_desktop/framework_desktop.md index 059d114..0bcd23f 100644 --- a/active/device_framework_desktop/framework_desktop.md +++ b/active/device_framework_desktop/framework_desktop.md @@ -7,6 +7,7 @@ - [Volume Locations](#volume-locations) - [Setup](#setup) - [Create the AI user](#create-the-ai-user) + - [Helper aliases](#helper-aliases) - [Create the models dir](#create-the-models-dir) - [Install the Hugging Face CLI](#install-the-hugging-face-cli) - [Download models](#download-models) @@ -53,7 +54,9 @@ mkdir -p ~/.config/containers/systemd/ Models are big. You'll want some tools to help find large files quickly when space runs out. -Add this to your .bashrc: +### Helper aliases + +Add these to your .bashrc: ```bash # Calculate all folder sizes in current dir @@ -61,6 +64,16 @@ alias {dudir,dud}='du -h --max-depth 1 | sort -h' # Calculate all file sizes in current dir alias {dufile,duf}='ls -lhSr' + +# Restart llama-server / follow logs +alias llama-reload="systemctl --user daemon-reload && systemctl --user restart llama-server.service" +alias llama-logs="journalctl --user -fu llama-server" + +# Restart stable diffusion gen and edit server / follow logs +alias sd-gen-reload='systemctl --user daemon-reload && systemctl --user restart stable-diffusion-gen-server' +alias sd-gen-logs='journalctl --user -xeu stable-diffusion-gen-server' +alias sd-edit-reload='systemctl --user daemon-reload && systemctl --user restart stable-diffusion-edit-server' +alias sd-edit-logs='journalctl --user -xeu stable-diffusion-edit-server' ``` ### Create the models dir @@ -109,6 +122,10 @@ hf download --local-dir /home/ai/models/text/devstral-small-2-24b unsloth/Devstr mkdir /home/ai/models/text/ministral-3-14b hf download --local-dir /home/ai/models/text/ministral-3-14b ggml-org/Ministral-3-14B-Reasoning-2512-GGUF +# ministral-3-3b-instruct +mkdir /home/ai/models/text/ministral-3-3b-instruct +hf download --local-dir /home/ai/models/text/ministral-3-3b-instruct ggml-org/Ministral-3-3B-Instruct-2512-GGUF + # nemotron-nano-30b mkdir /home/ai/models/text/nemotron-nano-30b hf download --local-dir /home/ai/models/text/nemotron-nano-30b ggml-org/Nemotron-Nano-3-30B-A3B-GGUF Nemotron-Nano-3-30B-A3B-Q4_K_M.gguf @@ -116,6 +133,21 @@ hf download --local-dir /home/ai/models/text/nemotron-nano-30b ggml-org/Nemotron #### Image models +```bash +# z-turbo +mkdir /home/ai/models/image/z-turbo +hf download --local-dir /home/ai/models/image/z-turbo QuantStack/FLUX.1-Kontext-dev-GGUF flux1-kontext-dev-Q4_K_M.gguf +hf download --local-dir /home/ai/models/image/z-turbo black-forest-labs/FLUX.1-schnell ae.safetensors +hf download --local-dir /home/ai/models/image/z-turbo unsloth/Qwen3-4B-Instruct-2507-GGUF Qwen3-4B-Instruct-2507-Q4_K_M.gguf + +# flux-1-kontext +mkdir /home/ai/models/image/flux-1-kontext +hf download --local-dir /home/ai/models/image/flux-1-kontext leejet/Z-Image-Turbo-GGUF z_image_turbo-Q4_K.gguf +hf download --local-dir /home/ai/models/image/flux-1-kontext black-forest-labs/FLUX.1-dev ae.safetensors +hf download --local-dir /home/ai/models/image/flux-1-kontext comfyanonymous/flux_text_encoders clip_l.safetensors +hf download --local-dir /home/ai/models/image/flux-1-kontext comfyanonymous/flux_text_encoders t5xxl_fp16.safetensors +``` + ### Create the systemd-ai pod You'll at least want the ai pod and network. Copy `ai.pod` and `ai.network` out @@ -132,7 +164,9 @@ Then run `systemctl --user daemon-reload && systemctl --user start ai-pod` git clone https://github.com/ggml-org/llama.cpp.git cd llama.cpp export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S") -podman build -t llama-cpp-vulkan:${BUILD_TAG} -t llama-cpp-vulkan:latest -f .devops/vulkan.Dockerfile . + +# Vulkan +podman build -f .devops/vulkan.Dockerfile -t llama-cpp-vulkan:${BUILD_TAG} -t llama-cpp-vulkan:latest . # Run llama server (Available on port 8000) # Add `--n-cpu-moe 32` to gpt-oss-120b to keep minimal number of expert in GPU @@ -145,9 +179,9 @@ podman run \ -v /home/ai/models/text:/models:z \ localhost/llama-cpp-vulkan:2026-01-19-18-00-02 \ --port 8000 \ --c 0 \ --b 2048 \ --ub 2048 \ +-c 64000 \ +-b 64000 \ +-ub 500 \ --perf \ --n-gpu-layers all \ --jinja \ @@ -166,6 +200,8 @@ git clone https://github.com/leejet/stable-diffusion.cpp.git cd stable-diffusion.cpp git submodule update --init --recursive export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S") + +# Vulkan podman build -f Dockerfile.vulkan -t stable-diffusion-cpp:${BUILD_TAG} -t stable-diffusion-cpp:latest . ``` @@ -204,12 +240,14 @@ localhost/stable-diffusion-cpp:latest \ --cfg-scale 1.0 \ --sampling-method euler \ --seed -1 \ ---steps 20 \ --H 1024 \ --W 1024 \ +--steps 28 \ +--vae-conv-direct \ +-v \ +-H 512 \ +-W 512 \ +-o /output/output.png \ -r /output/everquest_logo.png \ --p "change 'EverQuest' to 'EverSteak'" \ --o /output/output.png +-p "Add the text 'EverQuest'" ``` ## open-webui diff --git a/active/device_framework_desktop/quadlets/llama-server.container b/active/device_framework_desktop/quadlets/llama-server.container index b9fe840..2507aa7 100644 --- a/active/device_framework_desktop/quadlets/llama-server.container +++ b/active/device_framework_desktop/quadlets/llama-server.container @@ -17,15 +17,23 @@ AddDevice=/dev/dri # Server command Exec=--port 8000 \ - -c 0 \ - -b 2048 \ - -ub 2048 \ + -c 48000 \ + -b 48000 \ + -ub 500 \ --perf \ --n-gpu-layers all \ --jinja \ --models-max 1 \ --models-dir /models +# Health Check +HealthCmd=CMD-SHELL curl --fail http://127.0.0.1:8000/props?model=gpt-oss-120b || exit 1 +HealthInterval=10s +HealthRetries=3 +HealthStartPeriod=10s +HealthTimeout=30s +HealthOnFailure=kill + [Service] Restart=always # Extend Timeout to allow time to pull the image diff --git a/active/device_framework_desktop/quadlets/stable-diffusion-edit-server.container b/active/device_framework_desktop/quadlets/stable-diffusion-edit-server.container index f7fa255..6b5d6a1 100644 --- a/active/device_framework_desktop/quadlets/stable-diffusion-edit-server.container +++ b/active/device_framework_desktop/quadlets/stable-diffusion-edit-server.container @@ -28,9 +28,9 @@ Exec=-l 0.0.0.0 \ --cfg-scale 1.0 \ --sampling-method euler \ --vae-conv-direct \ - -v \ --seed -1 \ - --steps 28 + --steps 28 \ + -v [Service] Restart=always