From a2cef18efe0d1ff832916394f8b0ba1adf54a59a Mon Sep 17 00:00:00 2001 From: ducoterra Date: Wed, 7 Jan 2026 09:28:29 -0500 Subject: [PATCH] Add framework desktop LLM instructions --- .../framework_desktop.md | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 active/device_framework_desktop/framework_desktop.md diff --git a/active/device_framework_desktop/framework_desktop.md b/active/device_framework_desktop/framework_desktop.md new file mode 100644 index 0000000..617f560 --- /dev/null +++ b/active/device_framework_desktop/framework_desktop.md @@ -0,0 +1,103 @@ +# Framework Desktop + +## BIOS + + + +1. Set GPU memory to 96GB +2. Add the following kernel args to /etc/default/grub (set allocatable VRAM to + 112GB): `ttm.pages_limit=28027343750 ttm.page_pool_size=28027343750` +3. Update grub `grub2-mkconfig -o /boot/grub2/grub.cfg` +4. Reboot + +## Docker + +```bash +dnf config-manager addrepo \ +--from-repofile https://download.docker.com/linux/fedora/docker-ce.repo + +dnf install \ +docker-ce \ +docker-ce-cli \ +containerd.io \ +docker-buildx-plugin \ +docker-compose-plugin +``` + +## Llama.cpp + +```bash +# Build the llama.cpp docker image +git clone https://github.com/ggml-org/llama.cpp.git +export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S") +docker build -t llama-cpp-vulkan:${BUILD_TAG} -f .devops/vulkan.Dockerfile . + +# Run llama server +docker run \ +-d \ +--restart always \ +--name=llama-server \ +--network=host \ +--device=/dev/kfd \ +--device=/dev/dri \ +-v llama-server-cache:/root/.cache \ +llama-cpp-vulkan:${BUILD_TAG} \ +-hf ggml-org/gpt-oss-120b-GGUF --ctx-size 0 --jinja -ub 2048 -b 2048 \ +--port 8000 --host 0.0.0.0 -n -1 --n-gpu-layers 999 + +# Firewall +firewall-cmd --add-port=8000/tcp --permanent +firewall-cmd --reload +``` + +## Ollama + +```bash +# Run ollama +# Will be available on port 11434 +docker run \ +-d \ +--restart always \ +--device /dev/kfd \ +--device /dev/dri \ +-v ollama:/root/.ollama \ +-e OLLAMA_VULKAN=1 \ +--name ollama \ +--network host \ +docker.io/ollama/ollama:0.13.5 + +# Run an image +docker exec -it ollama ollama run gpt-oss:20b + +# Firewall +firewall-cmd --add-port=11434/tcp --permanent +firewall-cmd --reload +``` + +## Anything LLM + +Per [the docs](https://docs.anythingllm.com/installation-docker/cloud-docker): + +> Note --cap-add SYS_ADMIN is a required command if you want to scrape webpages. +> We use PuppeeteerJS to scrape websites links and --cap-add SYS_ADMIN lets us +> use sandboxed Chromium across all runtimes for best security practices + +```bash +# Server will be accessible on port 3001 +# Connect llama.cpp as a generic OpenAI LLM provider and use host +# http://172.17.0.1:8000/v1 +# Chat model name doesn't matter. +docker run \ +-d \ +--restart always \ +--network host \ +--name anythingllm \ +--cap-add SYS_ADMIN \ +-v anythingllm:/app/server/storage \ +-e STORAGE_DIR="/app/server/storage" \ +docker.io/mintplexlabs/anythingllm + +# Firewall +firewall-cmd --add-port=3001/tcp --permanent +firewall-cmd --reload +``` \ No newline at end of file