# Framework Desktop ## BIOS 1. Set GPU memory to 96GB 2. Add the following kernel args to /etc/default/grub (set allocatable VRAM to 112GB): `ttm.pages_limit=28027343750 ttm.page_pool_size=28027343750` 3. Update grub `grub2-mkconfig -o /boot/grub2/grub.cfg` 4. Reboot ## References ## Notes ### Update quadlets ```bash scp -r active/device_framework_desktop/quadlets/* deskwork-ai:quadlets/ podman quadlet install --replace quadlets/* ``` ### Volume Locations `~/.local/share/containers/storage/volumes/` ## User ```bash # Create your local ai user. This will be the user you launch podman processes from. useradd -m ai loginctl enable-linger ai su -l ai mkdir -p ~/.config/containers/systemd/ ``` ## Llama.cpp ```bash # Build the llama.cpp container image git clone https://github.com/ggml-org/llama.cpp.git export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S") podman build -t llama-cpp-vulkan:${BUILD_TAG} -f .devops/vulkan.Dockerfile . # Run llama server with gpt-oss-120b podman run \ -d \ --replace \ --restart always \ --name=llama-server \ -p 8000:8000 \ --device=/dev/kfd \ --device=/dev/dri \ -v llama-server-cache:/root/.cache \ localhost/llama-cpp-vulkan:2026-01-12-10-13-30 \ -hf ggml-org/gpt-oss-120b-GGUF --ctx-size 32000 --jinja -ub 2048 -b 2048 \ --port 8000 --host 0.0.0.0 -n -1 --n-gpu-layers 999 # To enable autostart, you'll need to create a quadlet # Quadlets are documented in podman manual pages # Search for "EXAMPLES" when you run the below command # Put your quadlet at ~/.config/containers/systemd/ man "podman-systemd.unit(5)" # Run llama server with devstral-small-2 24b podman run \ -d \ --name=llama-server-devstral \ --network=host \ --device=/dev/kfd \ --device=/dev/dri \ -v llama-server-cache:/root/.cache \ llama-cpp-vulkan:${BUILD_TAG} \ -hf bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF \ --ctx-size 0 --jinja -ub 2048 -b 2048 \ --port 8001 --host 0.0.0.0 -n -1 --n-gpu-layers 999 # Firewall firewall-cmd --add-port=8000/tcp --permanent firewall-cmd --reload ``` ## Ollama ```bash # Install CLI curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | tar xz -C ~/.local # Add export OLLAMA_HOST=127.0.0.1 vim ~/.bashrc.d/ollama.sh ``` ```bash # Run ollama # Will be available on port 11434 podman run \ -d \ --restart always \ --device /dev/kfd \ --device /dev/dri \ -v ollama:/root/.ollama \ -e OLLAMA_VULKAN=1 \ --name ollama \ --network host \ docker.io/ollama/ollama:0.13.5 # Run an image podman exec -it ollama ollama run gpt-oss:20b # Firewall firewall-cmd --add-port=11434/tcp --permanent firewall-cmd --reload ``` ## Anything LLM Per [the docs](https://docs.anythingllm.com/installation-docker/cloud-docker): > Note --cap-add SYS_ADMIN is a required command if you want to scrape webpages. > We use PuppeeteerJS to scrape websites links and --cap-add SYS_ADMIN lets us > use sandboxed Chromium across all runtimes for best security practices ```bash mkdir /etc/anything-llm touch /etc/anything-llm/.env chown 1000:1000 /etc/anything-llm/.env chmod 600 /etc/anything-llm/.env # Add JWT_SECRET= to this file vim /etc/anything-llm/.env # Server will be accessible on port 3001 # Connect llama.cpp as a generic OpenAI LLM provider and use host # http://172.17.0.1:3001/v1 # Chat model name doesn't matter. podman run \ -d \ --restart always \ --network host \ --name anythingllm \ --cap-add SYS_ADMIN \ -v anythingllm:/app/server/storage \ -v /etc/anything-llm/.env:/app/server/.env \ -e STORAGE_DIR="/app/server/storage" \ docker.io/mintplexlabs/anythingllm # Firewall firewall-cmd --add-port=3001/tcp --permanent firewall-cmd --reload ``` ## Stable Diffusion CPP ```bash # z-turbo podman run --rm \ -v /home/ai/stable-diffusion.cpp/models:/models:z \ -v /home/ai/stable-diffusion.cpp/output:/output:z \ --device /dev/kfd \ --device /dev/dri \ ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ --diffusion-model /models/z_turbo/z_image_turbo_bf16.safetensors \ --vae /models/z_turbo/ae.safetensors \ --llm /models/z_turbo/qwen_3_4b.safetensors \ --cfg-scale 1.0 \ -v \ -H 1024 \ -W 512 \ --seed -1 \ -o /output/output.png \ -p "Framework Laptop 13" # Flux podman run --rm \ -v /srv/stable-diffusion.cpp/models:/models:z \ -v ./output:/output:z \ --device /dev/kfd \ --device /dev/dri \ ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ --diffusion-model /models/flux/flux1-dev-q4_k.gguf \ --vae /models/flux/ae.safetensors \ --clip_l /models/flux/clip_l.safetensors \ --t5xxl /models/flux/t5xxl_fp16.safetensors \ --cfg-scale 1.0 \ --sampling-method euler \ -v \ -H 512 \ -W 512 \ --seed -1 \ --steps 20 \ -o /output/output.png \ -p "An Everquest video game poster but with ribeye steaks for heads with the words 'EverSteak'" # Flux2 podman run --rm \ -v /home/ai/stable-diffusion.cpp/models:/models:z \ -v /home/ai/stable-diffusion.cpp/output:/output:z \ --device /dev/kfd \ --device /dev/dri \ ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ --diffusion-model /models/flux2/flux2-dev-Q8_0.gguf \ --vae /models/flux2/ae.safetensors \ --llm /models/flux2/Mistral-Small-3.2-24B-Instruct-2506-Q8_0.gguf \ --cfg-scale 1.0 \ --sampling-method euler \ -v \ -H 512 \ -W 1024 \ --seed -1 \ --steps 10 \ -o /output/output.png \ -p "A picture of sign that says 'framework'" # Qwen podman run --rm \ -v /home/ai/stable-diffusion.cpp/models:/models:z \ -v /home/ai/stable-diffusion.cpp/output:/output:z \ --device /dev/kfd \ --device /dev/dri \ ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ --diffusion-model /models/qwen_image/Qwen_Image-Q4_K_M.gguf \ --vae /models/qwen_image/qwen_image_vae.safetensors \ --llm /models/qwen_image/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \ --cfg-scale 2.5 \ --sampling-method euler \ -v \ --offload-to-cpu \ -H 512 -W 512 \ --flow-shift 3 \ --seed -1 \ -o /output/output.png \ -p 'Everquest DND mash up poster that says "ever dungeons and dragons"' # SD3 podman run --rm \ -v /home/ai/stable-diffusion.cpp/models:/models:z \ -v /home/ai/stable-diffusion.cpp/output:/output:z \ --device /dev/kfd \ --device /dev/dri \ ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ -m /models/sd3/sd3.5_large.safetensors \ --clip_l /models/sd3/clip_l.safetensors \ --clip_g /models/sd3/clip_g.safetensors \ --t5xxl /models/sd3/t5xxl_fp16.safetensors \ -H 512 -W 512 \ --cfg-scale 4.5 \ --sampling-method euler \ -v \ --seed -1 \ -o /output/output.png \ -p 'Everquest DND mash up poster that says "ever dungeons and dragons"' ``` ### Stable Diffusion CPP Server Uses OpenAI Compatible Endpoints ```bash # z-turbo server podman run \ -d \ --name stable-diffusion-cpp-server \ -v /srv/stable-diffusion.cpp/models:/models \ -v /srv/stable-diffusion.cpp/build:/output \ --device /dev/kfd \ --device /dev/dri \ --entrypoint "/sd-server" \ --network host \ ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ --diffusion-model /models/z_turbo/z_image_turbo_bf16.safetensors \ --vae /models/z_turbo/ae.safetensors \ --llm /models/z_turbo/qwen_3_4b.safetensors \ --cfg-scale 1.0 \ -v \ --diffusion-fa \ -H 1024 \ -W 512 \ --seed -1 \ -l 0.0.0.0 ``` ## Openai API Web UI ```bash # Will be available on port 8080 podman run \ -d \ --network host \ -v open-webui:/app/backend/data \ --name open-webui \ --restart always \ ghcr.io/open-webui/open-webui:main ```