framework 16 ai updates

2026-02-06 20:10:45 -05:00
parent 40f221376f
commit 7626cdf998
4 changed files with 198 additions and 0 deletions
@@ -1,5 +1,102 @@
 # Framework Laptop 16
 ## Local AI
 ### Setup
 #### Create the AI user
 ```bash
 # Create your local ai user. This will be the user you launch podman processes from.
 useradd -m ai
 loginctl enable-linger ai
 su -l ai
 mkdir -p /home/ai/.config/containers/systemd/
 ```
 #### Create the models dir
 ```bash
 mkdir -p /home/ai/models/{text,image,video,embedding,tts,stt}
 ```
 #### Install the Hugging Face CLI
 <https://huggingface.co/docs/huggingface_hub/en/guides/cli#getting-started>
 ```bash
 # Install
 curl -LsSf https://hf.co/cli/install.sh | bash
 # Login
 hf auth login
 ```
 ### Models
 ```bash
 # nomic-embed-text-v2 (embed)
 mkdir /home/ai/models/embedding/nomic-embed-text-v2
 hf download --local-dir /home/ai/models/embedding/nomic-embed-text-v2 ggml-org/Nomic-Embed-Text-V2-GGUF
 # qwen2.5-coder-3b-fim (completion)
 mkdir /home/ai/models/text/qwen2.5-coder-3b-fim
 hf download --local-dir /home/ai/models/text/qwen2.5-coder-3b-fim ggml-org/Qwen2.5-Coder-3B-Q8_0-GGUF
 ```
 ### Testing
 ```bash
 # Embedding Server
 podman run \
 --rm \
 --name llama-server-embed \
 --device=/dev/kfd \
 --device=/dev/dri \
 -p 8010:8010 \
 -v /home/ai/models/text:/models:z \
 localhost/llama-cpp-vulkan:latest \
 --port 8010 \
 -ngl all \
 -ub 2048 \
 -b 2048 \
 --ctx-size 2048 \
 --embeddings \
 --models-dir /models \
 -m /models/embedding/nomic-embed-text-v2/nomic-embed-text-v2-moe-q8_0.gguf
 # Completion Server
 podman run \
 --rm \
 --name llama-server-completion \
 -p 8011:8011 \
 --device=/dev/kfd \
 --device=/dev/dri \
 -v /home/ai/models:/models:z \
 localhost/llama-cpp-vulkan:latest \
 --port 8011 \
 -c 0 \
 --perf \
 --n-gpu-layers all \
 --models-dir /models \
 -m /models/text/qwen2.5-coder-3b/qwen2.5-coder-3b-q8_0.gguf
 ```
 ### Quadlets
 ```bash
 sudo install -C -o ai -g ai active/device_framework_16/quadlets/* /home/ai/.config/containers/systemd/
 sudo machinectl shell ai@
 systemctl --user daemon-reload
 systemctl --user restart llama-completion.service
 systemctl --user restart llama-embed.service
 ```
 ## Keyboard VIA
 Access keyboard configuration at <https://keyboard.frame.work/>
 ## Keyboard VIA
 Access keyboard configuration at <https://keyboard.frame.work/>
@@ -0,0 +1,33 @@
 [Unit]
 Description=A Llama CPP Server Running GPT OSS 120b
 [Container]
 # Image is built locally via podman build
 Image=localhost/llama-cpp-vulkan:latest
 # Downloaded models volume
 Volume=/home/ai/models:/models:z
 # Ports
 PublishPort=8012:8012
 # GPU Device
 AddDevice=/dev/kfd
 AddDevice=/dev/dri
 # Server command
 Exec=--port 8012 \
    -c 0 \
    --perf \
    --n-gpu-layers all \
    --models-dir /models \
    -m /models/text/qwen2.5-coder-1.5b-instruct/qwen2.5-coder-1.5b-instruct-q8_0.gguf
 [Service]
 Restart=always
 # Extend Timeout to allow time to pull the image
 TimeoutStartSec=900
 [Install]
 # Start by default on boot
 WantedBy=multi-user.target default.target
@@ -0,0 +1,33 @@
 [Unit]
 Description=A Llama CPP Server Running GPT OSS 120b
 [Container]
 # Image is built locally via podman build
 Image=localhost/llama-cpp-vulkan:latest
 # Downloaded models volume
 Volume=/home/ai/models:/models:z
 # Ports
 PublishPort=8011:8011
 # GPU Device
 AddDevice=/dev/kfd
 AddDevice=/dev/dri
 # Server command
 Exec=--port 8011 \
    -c 0 \
    --perf \
    --n-gpu-layers all \
    --models-dir /models \
    -m /models/text/qwen2.5-coder-3b-fim/qwen2.5-coder-3b-q8_0.gguf
 [Service]
 Restart=always
 # Extend Timeout to allow time to pull the image
 TimeoutStartSec=900
 [Install]
 # Start by default on boot
 WantedBy=multi-user.target default.target
@@ -0,0 +1,35 @@
 [Unit]
 Description=A Llama CPP Server Running GPT OSS 120b
 [Container]
 # Image is built locally via podman build
 Image=localhost/llama-cpp-vulkan:latest
 # Downloaded models volume
 Volume=/home/ai/models:/models:z
 # Ports
 PublishPort=8010:8010
 # GPU Device
 AddDevice=/dev/kfd
 AddDevice=/dev/dri
 # Server command
 Exec=--port 8010 \
    -m /models/embedding/nomic-embed-text-v2/nomic-embed-text-v2-moe-q8_0.gguf \
    -ngl all \
    -ub 2048 \
    -b 2048 \
    --ctx-size 2048 \
    --embeddings \
    --models-dir /models
 [Service]
 Restart=always
 # Extend Timeout to allow time to pull the image
 TimeoutStartSec=900
 [Install]
 # Start by default on boot
 WantedBy=multi-user.target default.target