# Framework Desktop ## BIOS 1. Set GPU memory to 96GB 2. Add the following kernel args to /etc/default/grub (set allocatable VRAM to 112GB): `ttm.pages_limit=28027343750 ttm.page_pool_size=28027343750` 3. Update grub `grub2-mkconfig -o /boot/grub2/grub.cfg` 4. Reboot ## Docker ```bash dnf config-manager addrepo \ --from-repofile https://download.docker.com/linux/fedora/docker-ce.repo dnf install \ docker-ce \ docker-ce-cli \ containerd.io \ docker-buildx-plugin \ docker-compose-plugin ``` ## Llama.cpp ```bash # Build the llama.cpp docker image git clone https://github.com/ggml-org/llama.cpp.git export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S") docker build -t llama-cpp-vulkan:${BUILD_TAG} -f .devops/vulkan.Dockerfile . # Run llama server with gpt-oss-120b docker run \ -d \ --restart always \ --name=llama-server \ --network=host \ --device=/dev/kfd \ --device=/dev/dri \ -v llama-server-cache:/root/.cache \ llama-cpp-vulkan:${BUILD_TAG} \ -hf ggml-org/gpt-oss-120b-GGUF --ctx-size 0 --jinja -ub 2048 -b 2048 \ --port 8000 --host 0.0.0.0 -n -1 --n-gpu-layers 999 # Run llama server with devstral-small-2 24b docker run \ -d \ --restart always \ --name=llama-server-devstral \ --network=host \ --device=/dev/kfd \ --device=/dev/dri \ -v llama-server-cache:/root/.cache \ llama-cpp-vulkan:${BUILD_TAG} \ -hf bartowski/mistralai_Devstral-Small-2-24B-Instruct-2512-GGUF \ --ctx-size 0 --jinja -ub 2048 -b 2048 \ --port 8001 --host 0.0.0.0 -n -1 --n-gpu-layers 999 # Firewall firewall-cmd --add-port=8000/tcp --permanent firewall-cmd --reload ``` ## Ollama ```bash # Run ollama # Will be available on port 11434 docker run \ -d \ --restart always \ --device /dev/kfd \ --device /dev/dri \ -v ollama:/root/.ollama \ -e OLLAMA_VULKAN=1 \ --name ollama \ --network host \ docker.io/ollama/ollama:0.13.5 # Run an image docker exec -it ollama ollama run gpt-oss:20b # Firewall firewall-cmd --add-port=11434/tcp --permanent firewall-cmd --reload ``` ## Anything LLM Per [the docs](https://docs.anythingllm.com/installation-docker/cloud-docker): > Note --cap-add SYS_ADMIN is a required command if you want to scrape webpages. > We use PuppeeteerJS to scrape websites links and --cap-add SYS_ADMIN lets us > use sandboxed Chromium across all runtimes for best security practices ```bash mkdir /etc/anything-llm touch /etc/anything-llm/.env chown 1000:1000 /etc/anything-llm/.env chmod 600 /etc/anything-llm/.env # Add JWT_SECRET= to this file vim /etc/anything-llm/.env # Server will be accessible on port 3001 # Connect llama.cpp as a generic OpenAI LLM provider and use host # http://172.17.0.1:8000/v1 # Chat model name doesn't matter. docker run \ -d \ --restart always \ --network host \ --name anythingllm \ --cap-add SYS_ADMIN \ -v anythingllm:/app/server/storage \ -v /etc/anything-llm/.env:/app/server/.env \ -e STORAGE_DIR="/app/server/storage" \ docker.io/mintplexlabs/anythingllm # Firewall firewall-cmd --add-port=3001/tcp --permanent firewall-cmd --reload ``` ## Stable Diffusion CPP ```bash # z-turbo docker run --rm \ -v ./models:/models \ -v ./build:/output \ --device /dev/kfd \ --device /dev/dri \ ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ --diffusion-model /models/z_turbo/z_image_turbo_bf16.safetensors \ --vae /models/z_turbo/ae.safetensors \ --llm /models/z_turbo/qwen_3_4b.safetensors \ --cfg-scale 1.0 \ -v \ --diffusion-fa \ -H 1024 \ -W 512 \ -o /output/output.png \ --seed -1 \ -p "Framework Laptop 13" # Flux2 docker run --rm \ -v ./models:/models \ -v ./build:/output \ --device /dev/kfd \ --device /dev/dri \ ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \ --diffusion-model /models/flux2/flux2-dev-Q8_0.gguf \ --vae /models/flux2/ae.safetensors \ --llm /models/flux2/Mistral-Small-3.2-24B-Instruct-2506-Q8_0.gguf \ --cfg-scale 1.0 \ --sampling-method euler \ -v \ --diffusion-fa \ -o /output/output.png \ -p "A picture of sign that says 'framework'" ```