diff --git a/active/device_framework_desktop/framework_desktop.md b/active/device_framework_desktop/framework_desktop.md index 0bcd23f..3ddc005 100644 --- a/active/device_framework_desktop/framework_desktop.md +++ b/active/device_framework_desktop/framework_desktop.md @@ -24,11 +24,7 @@ -1. Set GPU memory to 96GB -2. Add the following kernel args to /etc/default/grub (set allocatable VRAM to - 112GB): `ttm.pages_limit=28027343750 ttm.page_pool_size=28027343750` -3. Update grub `grub2-mkconfig -o /boot/grub2/grub.cfg` -4. Reboot +1. Set GPU memory to 512MB ## References @@ -129,6 +125,14 @@ hf download --local-dir /home/ai/models/text/ministral-3-3b-instruct ggml-org/Mi # nemotron-nano-30b mkdir /home/ai/models/text/nemotron-nano-30b hf download --local-dir /home/ai/models/text/nemotron-nano-30b ggml-org/Nemotron-Nano-3-30B-A3B-GGUF Nemotron-Nano-3-30B-A3B-Q4_K_M.gguf + +# qwen3-30b-a3b-instruct +mkdir /home/ai/models/text/qwen3-30b-a3b-instruct +hf download --local-dir /home/ai/models/text/qwen3-30b-a3b-instruct ggml-org/Qwen3-30B-A3B-Instruct-2507-Q8_0-GGUF + +# qwen3-coder-30b-a3b-instruct +mkdir /home/ai/models/text/qwen3-coder-30b-a3b-instruct +hf download --local-dir /home/ai/models/text/qwen3-coder-30b-a3b-instruct ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF ``` #### Image models @@ -177,7 +181,7 @@ podman run \ --device=/dev/kfd \ --device=/dev/dri \ -v /home/ai/models/text:/models:z \ -localhost/llama-cpp-vulkan:2026-01-19-18-00-02 \ +localhost/llama-cpp-vulkan:latest \ --port 8000 \ -c 64000 \ -b 64000 \ @@ -270,16 +274,22 @@ ghcr.io/open-webui/open-webui:main ## Install the whole thing with quadlets (TM) ```bash +# Installs and runs all services in `quadlets/` scp -r active/device_framework_desktop/quadlets/* deskwork-ai:.config/containers/systemd/ ssh deskwork-ai systemctl --user daemon-reload systemctl --user restart ai-pod.service ``` +Note, all services will be available at `host.containers.internal`. So llama.cpp +will be up at `http://host.containers.internal:8000`. + ### Install the update script ```bash -# Copy update script and run it (assumes you have llama.cpp and stable-diffusion.cpp) +# 1. Builds the latest llama.cpp and stable-diffusion.cpp +# 2. Pulls the latest open-webui +# 3. Restarts all services scp active/device_framework_desktop/update-script.sh deskwork: ssh deskwork-ai chmod +x update-script.sh