add qwen3 base and coder

2026-01-27 19:32:24 -05:00
parent 4c0a263d50
commit d4571c9b70
1 changed files with 17 additions and 7 deletions
--- a/active/device_framework_desktop/framework_desktop.md
+++ b/active/device_framework_desktop/framework_desktop.md
@@ -24,11 +24,7 @@

 <https://knowledgebase.frame.work/en_us/changing-memory-allocation-amd-ryzen-ai-max-300-series-By1LG5Yrll>

-1. Set GPU memory to 96GB
-2. Add the following kernel args to /etc/default/grub (set allocatable VRAM to
-   112GB): `ttm.pages_limit=28027343750 ttm.page_pool_size=28027343750`
-3. Update grub `grub2-mkconfig -o /boot/grub2/grub.cfg`
-4. Reboot
+1. Set GPU memory to 512MB

 ## References

@@ -129,6 +125,14 @@ hf download --local-dir /home/ai/models/text/ministral-3-3b-instruct ggml-org/Mi
 # nemotron-nano-30b
 mkdir /home/ai/models/text/nemotron-nano-30b
 hf download --local-dir /home/ai/models/text/nemotron-nano-30b ggml-org/Nemotron-Nano-3-30B-A3B-GGUF Nemotron-Nano-3-30B-A3B-Q4_K_M.gguf
+
+# qwen3-30b-a3b-instruct
+mkdir /home/ai/models/text/qwen3-30b-a3b-instruct
+hf download --local-dir /home/ai/models/text/qwen3-30b-a3b-instruct ggml-org/Qwen3-30B-A3B-Instruct-2507-Q8_0-GGUF
+
+# qwen3-coder-30b-a3b-instruct
+mkdir /home/ai/models/text/qwen3-coder-30b-a3b-instruct
+hf download --local-dir /home/ai/models/text/qwen3-coder-30b-a3b-instruct ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF
 ```

 #### Image models
@@ -177,7 +181,7 @@ podman run \
 --device=/dev/kfd \
 --device=/dev/dri \
 -v /home/ai/models/text:/models:z \
-localhost/llama-cpp-vulkan:2026-01-19-18-00-02 \
+localhost/llama-cpp-vulkan:latest \
 --port 8000 \
 -c 64000 \
 -b 64000 \
@@ -270,16 +274,22 @@ ghcr.io/open-webui/open-webui:main
 ## Install the whole thing with quadlets (TM)

 ```bash
+# Installs and runs all services in `quadlets/`
 scp -r active/device_framework_desktop/quadlets/* deskwork-ai:.config/containers/systemd/
 ssh deskwork-ai
 systemctl --user daemon-reload
 systemctl --user restart ai-pod.service
 ```

+Note, all services will be available at `host.containers.internal`. So llama.cpp
+will be up at `http://host.containers.internal:8000`.
+
 ### Install the update script

 ```bash
-# Copy update script and run it (assumes you have llama.cpp and stable-diffusion.cpp)
+# 1. Builds the latest llama.cpp and stable-diffusion.cpp
+# 2. Pulls the latest open-webui
+# 3. Restarts all services
 scp active/device_framework_desktop/update-script.sh deskwork:
 ssh deskwork-ai
 chmod +x update-script.sh