All checks were successful
Podman DDNS Image / build-and-push-ddns (push) Successful in 1m9s
4.0 KiB
4.0 KiB
Framework Desktop
BIOS
- Set GPU memory to 96GB
- Add the following kernel args to /etc/default/grub (set allocatable VRAM to
112GB):
ttm.pages_limit=28027343750 ttm.page_pool_size=28027343750 - Update grub
grub2-mkconfig -o /boot/grub2/grub.cfg - Reboot
Docker
dnf config-manager addrepo \
--from-repofile https://download.docker.com/linux/fedora/docker-ce.repo
dnf install \
docker-ce \
docker-ce-cli \
containerd.io \
docker-buildx-plugin \
docker-compose-plugin
Llama.cpp
# Build the llama.cpp docker image
git clone https://github.com/ggml-org/llama.cpp.git
export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S")
docker build -t llama-cpp-vulkan:${BUILD_TAG} -f .devops/vulkan.Dockerfile .
# Run llama server with gpt-oss-120b
docker run \
-d \
--restart always \
--name=llama-server \
--network=host \
--device=/dev/kfd \
--device=/dev/dri \
-v llama-server-cache:/root/.cache \
llama-cpp-vulkan:${BUILD_TAG} \
-hf ggml-org/gpt-oss-120b-GGUF --ctx-size 0 --jinja -ub 2048 -b 2048 \
--port 8000 --host 0.0.0.0 -n -1 --n-gpu-layers 999
# Run llama server with devstral-small-2 24b
docker run \
-d \
--restart always \
--name=llama-server-devstral \
--network=host \
--device=/dev/kfd \
--device=/dev/dri \
-v llama-server-cache:/root/.cache \
llama-cpp-vulkan:${BUILD_TAG} \
-hf bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF \
--ctx-size 0 --jinja -ub 2048 -b 2048 \
--port 8001 --host 0.0.0.0 -n -1 --n-gpu-layers 999
# Firewall
firewall-cmd --add-port=8000/tcp --permanent
firewall-cmd --reload
Ollama
# Run ollama
# Will be available on port 11434
docker run \
-d \
--restart always \
--device /dev/kfd \
--device /dev/dri \
-v ollama:/root/.ollama \
-e OLLAMA_VULKAN=1 \
--name ollama \
--network host \
docker.io/ollama/ollama:0.13.5
# Run an image
docker exec -it ollama ollama run gpt-oss:20b
# Firewall
firewall-cmd --add-port=11434/tcp --permanent
firewall-cmd --reload
Anything LLM
Per the docs:
Note --cap-add SYS_ADMIN is a required command if you want to scrape webpages. We use PuppeeteerJS to scrape websites links and --cap-add SYS_ADMIN lets us use sandboxed Chromium across all runtimes for best security practices
mkdir /etc/anything-llm
touch /etc/anything-llm/.env
chown 1000:1000 /etc/anything-llm/.env
chmod 600 /etc/anything-llm/.env
# Add JWT_SECRET=<random string> to this file
vim /etc/anything-llm/.env
# Server will be accessible on port 3001
# Connect llama.cpp as a generic OpenAI LLM provider and use host
# http://172.17.0.1:8000/v1
# Chat model name doesn't matter.
docker run \
-d \
--restart always \
--network host \
--name anythingllm \
--cap-add SYS_ADMIN \
-v anythingllm:/app/server/storage \
-v /etc/anything-llm/.env:/app/server/.env \
-e STORAGE_DIR="/app/server/storage" \
docker.io/mintplexlabs/anythingllm
# Firewall
firewall-cmd --add-port=3001/tcp --permanent
firewall-cmd --reload
Stable Diffusion CPP
# z-turbo
docker run --rm \
-v ./models:/models \
-v ./build:/output \
--device /dev/kfd \
--device /dev/dri \
ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \
--diffusion-model /models/z_turbo/z_image_turbo_bf16.safetensors \
--vae /models/z_turbo/ae.safetensors \
--llm /models/z_turbo/qwen_3_4b.safetensors \
--cfg-scale 1.0 \
-v \
--diffusion-fa \
-H 1024 \
-W 512 \
-o /output/output.png \
--seed -1 \
-p "Framework Laptop 13"
# Flux2
docker run --rm \
-v ./models:/models \
-v ./build:/output \
--device /dev/kfd \
--device /dev/dri \
ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \
--diffusion-model /models/flux2/flux2-dev-Q8_0.gguf \
--vae /models/flux2/ae.safetensors \
--llm /models/flux2/Mistral-Small-3.2-24B-Instruct-2506-Q8_0.gguf \
--cfg-scale 1.0 \
--sampling-method euler \
-v \
--diffusion-fa \
-o /output/output.png \
-p "A picture of sign that says 'framework'"