Add framework desktop LLM instructions

2026-01-07 09:28:29 -05:00
parent 1c245a593a
commit a2cef18efe
1 changed files with 103 additions and 0 deletions
--- a/active/device_framework_desktop/framework_desktop.md
+++ b/active/device_framework_desktop/framework_desktop.md
@@ -0,0 +1,103 @@
 # Framework Desktop
 ## BIOS
 <https://knowledgebase.frame.work/en_us/changing-memory-allocation-amd-ryzen-ai-max-300-series-By1LG5Yrll>
 1. Set GPU memory to 96GB
 2. Add the following kernel args to /etc/default/grub (set allocatable VRAM to
   112GB): `ttm.pages_limit=28027343750 ttm.page_pool_size=28027343750`
 3. Update grub `grub2-mkconfig -o /boot/grub2/grub.cfg`
 4. Reboot
 ## Docker
 ```bash
 dnf config-manager addrepo \
 --from-repofile https://download.docker.com/linux/fedora/docker-ce.repo
 dnf install \
 docker-ce \
 docker-ce-cli \
 containerd.io \
 docker-buildx-plugin \
 docker-compose-plugin
 ```
 ## Llama.cpp
 ```bash
 # Build the llama.cpp docker image
 git clone https://github.com/ggml-org/llama.cpp.git
 export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S")
 docker build -t llama-cpp-vulkan:${BUILD_TAG} -f .devops/vulkan.Dockerfile .
 # Run llama server
 docker run \
 -d \
 --restart always \
 --name=llama-server \
 --network=host \
 --device=/dev/kfd \
 --device=/dev/dri \
 -v llama-server-cache:/root/.cache \
 llama-cpp-vulkan:${BUILD_TAG} \
 -hf ggml-org/gpt-oss-120b-GGUF  --ctx-size 0 --jinja -ub 2048 -b 2048 \
 --port 8000 --host 0.0.0.0 -n -1 --n-gpu-layers 999
 # Firewall
 firewall-cmd --add-port=8000/tcp --permanent
 firewall-cmd --reload
 ```
 ## Ollama
 ```bash
 # Run ollama
 # Will be available on port 11434
 docker run \
 -d \
 --restart always \
 --device /dev/kfd \
 --device /dev/dri \
 -v ollama:/root/.ollama \
 -e OLLAMA_VULKAN=1 \
 --name ollama \
 --network host \
 docker.io/ollama/ollama:0.13.5
 # Run an image
 docker exec -it ollama ollama run gpt-oss:20b
 # Firewall
 firewall-cmd --add-port=11434/tcp --permanent
 firewall-cmd --reload
 ```
 ## Anything LLM
 Per [the docs](https://docs.anythingllm.com/installation-docker/cloud-docker):
 > Note --cap-add SYS_ADMIN is a required command if you want to scrape webpages.
 > We use PuppeeteerJS to scrape websites links and --cap-add SYS_ADMIN lets us
 > use sandboxed Chromium across all runtimes for best security practices
 ```bash
 # Server will be accessible on port 3001
 # Connect llama.cpp as a generic OpenAI LLM provider and use host
 # http://172.17.0.1:8000/v1
 # Chat model name doesn't matter.
 docker run \
 -d \
 --restart always \
 --network host \
 --name anythingllm \
 --cap-add SYS_ADMIN \
 -v anythingllm:/app/server/storage \
 -e STORAGE_DIR="/app/server/storage" \
 docker.io/mintplexlabs/anythingllm
 # Firewall
 firewall-cmd --add-port=3001/tcp --permanent
 firewall-cmd --reload
 ```