fix vulkan 4gb limit
All checks were successful
Podman DDNS Image / build-and-push-ddns (push) Successful in 1m0s

This commit is contained in:
2026-01-21 13:33:33 -05:00
parent 70259d9542
commit 4c0a263d50
3 changed files with 61 additions and 15 deletions

View File

@@ -7,6 +7,7 @@
- [Volume Locations](#volume-locations) - [Volume Locations](#volume-locations)
- [Setup](#setup) - [Setup](#setup)
- [Create the AI user](#create-the-ai-user) - [Create the AI user](#create-the-ai-user)
- [Helper aliases](#helper-aliases)
- [Create the models dir](#create-the-models-dir) - [Create the models dir](#create-the-models-dir)
- [Install the Hugging Face CLI](#install-the-hugging-face-cli) - [Install the Hugging Face CLI](#install-the-hugging-face-cli)
- [Download models](#download-models) - [Download models](#download-models)
@@ -53,7 +54,9 @@ mkdir -p ~/.config/containers/systemd/
Models are big. You'll want some tools to help find large files quickly when space runs out. Models are big. You'll want some tools to help find large files quickly when space runs out.
Add this to your .bashrc: ### Helper aliases
Add these to your .bashrc:
```bash ```bash
# Calculate all folder sizes in current dir # Calculate all folder sizes in current dir
@@ -61,6 +64,16 @@ alias {dudir,dud}='du -h --max-depth 1 | sort -h'
# Calculate all file sizes in current dir # Calculate all file sizes in current dir
alias {dufile,duf}='ls -lhSr' alias {dufile,duf}='ls -lhSr'
# Restart llama-server / follow logs
alias llama-reload="systemctl --user daemon-reload && systemctl --user restart llama-server.service"
alias llama-logs="journalctl --user -fu llama-server"
# Restart stable diffusion gen and edit server / follow logs
alias sd-gen-reload='systemctl --user daemon-reload && systemctl --user restart stable-diffusion-gen-server'
alias sd-gen-logs='journalctl --user -xeu stable-diffusion-gen-server'
alias sd-edit-reload='systemctl --user daemon-reload && systemctl --user restart stable-diffusion-edit-server'
alias sd-edit-logs='journalctl --user -xeu stable-diffusion-edit-server'
``` ```
### Create the models dir ### Create the models dir
@@ -109,6 +122,10 @@ hf download --local-dir /home/ai/models/text/devstral-small-2-24b unsloth/Devstr
mkdir /home/ai/models/text/ministral-3-14b mkdir /home/ai/models/text/ministral-3-14b
hf download --local-dir /home/ai/models/text/ministral-3-14b ggml-org/Ministral-3-14B-Reasoning-2512-GGUF hf download --local-dir /home/ai/models/text/ministral-3-14b ggml-org/Ministral-3-14B-Reasoning-2512-GGUF
# ministral-3-3b-instruct
mkdir /home/ai/models/text/ministral-3-3b-instruct
hf download --local-dir /home/ai/models/text/ministral-3-3b-instruct ggml-org/Ministral-3-3B-Instruct-2512-GGUF
# nemotron-nano-30b # nemotron-nano-30b
mkdir /home/ai/models/text/nemotron-nano-30b mkdir /home/ai/models/text/nemotron-nano-30b
hf download --local-dir /home/ai/models/text/nemotron-nano-30b ggml-org/Nemotron-Nano-3-30B-A3B-GGUF Nemotron-Nano-3-30B-A3B-Q4_K_M.gguf hf download --local-dir /home/ai/models/text/nemotron-nano-30b ggml-org/Nemotron-Nano-3-30B-A3B-GGUF Nemotron-Nano-3-30B-A3B-Q4_K_M.gguf
@@ -116,6 +133,21 @@ hf download --local-dir /home/ai/models/text/nemotron-nano-30b ggml-org/Nemotron
#### Image models #### Image models
```bash
# z-turbo
mkdir /home/ai/models/image/z-turbo
hf download --local-dir /home/ai/models/image/z-turbo QuantStack/FLUX.1-Kontext-dev-GGUF flux1-kontext-dev-Q4_K_M.gguf
hf download --local-dir /home/ai/models/image/z-turbo black-forest-labs/FLUX.1-schnell ae.safetensors
hf download --local-dir /home/ai/models/image/z-turbo unsloth/Qwen3-4B-Instruct-2507-GGUF Qwen3-4B-Instruct-2507-Q4_K_M.gguf
# flux-1-kontext
mkdir /home/ai/models/image/flux-1-kontext
hf download --local-dir /home/ai/models/image/flux-1-kontext leejet/Z-Image-Turbo-GGUF z_image_turbo-Q4_K.gguf
hf download --local-dir /home/ai/models/image/flux-1-kontext black-forest-labs/FLUX.1-dev ae.safetensors
hf download --local-dir /home/ai/models/image/flux-1-kontext comfyanonymous/flux_text_encoders clip_l.safetensors
hf download --local-dir /home/ai/models/image/flux-1-kontext comfyanonymous/flux_text_encoders t5xxl_fp16.safetensors
```
### Create the systemd-ai pod ### Create the systemd-ai pod
You'll at least want the ai pod and network. Copy `ai.pod` and `ai.network` out You'll at least want the ai pod and network. Copy `ai.pod` and `ai.network` out
@@ -132,7 +164,9 @@ Then run `systemctl --user daemon-reload && systemctl --user start ai-pod`
git clone https://github.com/ggml-org/llama.cpp.git git clone https://github.com/ggml-org/llama.cpp.git
cd llama.cpp cd llama.cpp
export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S") export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S")
podman build -t llama-cpp-vulkan:${BUILD_TAG} -t llama-cpp-vulkan:latest -f .devops/vulkan.Dockerfile .
# Vulkan
podman build -f .devops/vulkan.Dockerfile -t llama-cpp-vulkan:${BUILD_TAG} -t llama-cpp-vulkan:latest .
# Run llama server (Available on port 8000) # Run llama server (Available on port 8000)
# Add `--n-cpu-moe 32` to gpt-oss-120b to keep minimal number of expert in GPU # Add `--n-cpu-moe 32` to gpt-oss-120b to keep minimal number of expert in GPU
@@ -145,9 +179,9 @@ podman run \
-v /home/ai/models/text:/models:z \ -v /home/ai/models/text:/models:z \
localhost/llama-cpp-vulkan:2026-01-19-18-00-02 \ localhost/llama-cpp-vulkan:2026-01-19-18-00-02 \
--port 8000 \ --port 8000 \
-c 0 \ -c 64000 \
-b 2048 \ -b 64000 \
-ub 2048 \ -ub 500 \
--perf \ --perf \
--n-gpu-layers all \ --n-gpu-layers all \
--jinja \ --jinja \
@@ -166,6 +200,8 @@ git clone https://github.com/leejet/stable-diffusion.cpp.git
cd stable-diffusion.cpp cd stable-diffusion.cpp
git submodule update --init --recursive git submodule update --init --recursive
export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S") export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S")
# Vulkan
podman build -f Dockerfile.vulkan -t stable-diffusion-cpp:${BUILD_TAG} -t stable-diffusion-cpp:latest . podman build -f Dockerfile.vulkan -t stable-diffusion-cpp:${BUILD_TAG} -t stable-diffusion-cpp:latest .
``` ```
@@ -204,12 +240,14 @@ localhost/stable-diffusion-cpp:latest \
--cfg-scale 1.0 \ --cfg-scale 1.0 \
--sampling-method euler \ --sampling-method euler \
--seed -1 \ --seed -1 \
--steps 20 \ --steps 28 \
-H 1024 \ --vae-conv-direct \
-W 1024 \ -v \
-H 512 \
-W 512 \
-o /output/output.png \
-r /output/everquest_logo.png \ -r /output/everquest_logo.png \
-p "change 'EverQuest' to 'EverSteak'" \ -p "Add the text 'EverQuest'"
-o /output/output.png
``` ```
## open-webui ## open-webui

View File

@@ -17,15 +17,23 @@ AddDevice=/dev/dri
# Server command # Server command
Exec=--port 8000 \ Exec=--port 8000 \
-c 0 \ -c 48000 \
-b 2048 \ -b 48000 \
-ub 2048 \ -ub 500 \
--perf \ --perf \
--n-gpu-layers all \ --n-gpu-layers all \
--jinja \ --jinja \
--models-max 1 \ --models-max 1 \
--models-dir /models --models-dir /models
# Health Check
HealthCmd=CMD-SHELL curl --fail http://127.0.0.1:8000/props?model=gpt-oss-120b || exit 1
HealthInterval=10s
HealthRetries=3
HealthStartPeriod=10s
HealthTimeout=30s
HealthOnFailure=kill
[Service] [Service]
Restart=always Restart=always
# Extend Timeout to allow time to pull the image # Extend Timeout to allow time to pull the image

View File

@@ -28,9 +28,9 @@ Exec=-l 0.0.0.0 \
--cfg-scale 1.0 \ --cfg-scale 1.0 \
--sampling-method euler \ --sampling-method euler \
--vae-conv-direct \ --vae-conv-direct \
-v \
--seed -1 \ --seed -1 \
--steps 28 --steps 28 \
-v
[Service] [Service]
Restart=always Restart=always