diff --git a/active/device_framework_16/framework_16.md b/active/device_framework_16/framework_16.md index 83fe50c..397e88f 100644 --- a/active/device_framework_16/framework_16.md +++ b/active/device_framework_16/framework_16.md @@ -1,5 +1,102 @@ # Framework Laptop 16 +## Local AI + +### Setup + +#### Create the AI user + +```bash +# Create your local ai user. This will be the user you launch podman processes from. +useradd -m ai +loginctl enable-linger ai +su -l ai +mkdir -p /home/ai/.config/containers/systemd/ +``` + +#### Create the models dir + +```bash +mkdir -p /home/ai/models/{text,image,video,embedding,tts,stt} +``` + +#### Install the Hugging Face CLI + + + +```bash +# Install +curl -LsSf https://hf.co/cli/install.sh | bash + +# Login +hf auth login +``` + +### Models + +```bash +# nomic-embed-text-v2 (embed) +mkdir /home/ai/models/embedding/nomic-embed-text-v2 +hf download --local-dir /home/ai/models/embedding/nomic-embed-text-v2 ggml-org/Nomic-Embed-Text-V2-GGUF + +# qwen2.5-coder-3b-fim (completion) +mkdir /home/ai/models/text/qwen2.5-coder-3b-fim +hf download --local-dir /home/ai/models/text/qwen2.5-coder-3b-fim ggml-org/Qwen2.5-Coder-3B-Q8_0-GGUF +``` + +### Testing + +```bash +# Embedding Server +podman run \ +--rm \ +--name llama-server-embed \ +--device=/dev/kfd \ +--device=/dev/dri \ +-p 8010:8010 \ +-v /home/ai/models/text:/models:z \ +localhost/llama-cpp-vulkan:latest \ +--port 8010 \ +-ngl all \ +-ub 2048 \ +-b 2048 \ +--ctx-size 2048 \ +--embeddings \ +--models-dir /models \ +-m /models/embedding/nomic-embed-text-v2/nomic-embed-text-v2-moe-q8_0.gguf + +# Completion Server +podman run \ +--rm \ +--name llama-server-completion \ +-p 8011:8011 \ +--device=/dev/kfd \ +--device=/dev/dri \ +-v /home/ai/models:/models:z \ +localhost/llama-cpp-vulkan:latest \ +--port 8011 \ +-c 0 \ +--perf \ +--n-gpu-layers all \ +--models-dir /models \ +-m /models/text/qwen2.5-coder-3b/qwen2.5-coder-3b-q8_0.gguf +``` + +### Quadlets + +```bash +sudo install -C -o ai -g ai active/device_framework_16/quadlets/* /home/ai/.config/containers/systemd/ +sudo machinectl shell ai@ + +systemctl --user daemon-reload +systemctl --user restart llama-completion.service +systemctl --user restart llama-embed.service +``` + +## Keyboard VIA + +Access keyboard configuration at + ## Keyboard VIA Access keyboard configuration at diff --git a/active/device_framework_16/quadlets/llama-chat.container b/active/device_framework_16/quadlets/llama-chat.container new file mode 100644 index 0000000..df07eac --- /dev/null +++ b/active/device_framework_16/quadlets/llama-chat.container @@ -0,0 +1,33 @@ +[Unit] +Description=A Llama CPP Server Running GPT OSS 120b + +[Container] +# Image is built locally via podman build +Image=localhost/llama-cpp-vulkan:latest + +# Downloaded models volume +Volume=/home/ai/models:/models:z + +# Ports +PublishPort=8012:8012 + +# GPU Device +AddDevice=/dev/kfd +AddDevice=/dev/dri + +# Server command +Exec=--port 8012 \ + -c 0 \ + --perf \ + --n-gpu-layers all \ + --models-dir /models \ + -m /models/text/qwen2.5-coder-1.5b-instruct/qwen2.5-coder-1.5b-instruct-q8_0.gguf + +[Service] +Restart=always +# Extend Timeout to allow time to pull the image +TimeoutStartSec=900 + +[Install] +# Start by default on boot +WantedBy=multi-user.target default.target diff --git a/active/device_framework_16/quadlets/llama-completion.container b/active/device_framework_16/quadlets/llama-completion.container new file mode 100644 index 0000000..ed79f63 --- /dev/null +++ b/active/device_framework_16/quadlets/llama-completion.container @@ -0,0 +1,33 @@ +[Unit] +Description=A Llama CPP Server Running GPT OSS 120b + +[Container] +# Image is built locally via podman build +Image=localhost/llama-cpp-vulkan:latest + +# Downloaded models volume +Volume=/home/ai/models:/models:z + +# Ports +PublishPort=8011:8011 + +# GPU Device +AddDevice=/dev/kfd +AddDevice=/dev/dri + +# Server command +Exec=--port 8011 \ + -c 0 \ + --perf \ + --n-gpu-layers all \ + --models-dir /models \ + -m /models/text/qwen2.5-coder-3b-fim/qwen2.5-coder-3b-q8_0.gguf + +[Service] +Restart=always +# Extend Timeout to allow time to pull the image +TimeoutStartSec=900 + +[Install] +# Start by default on boot +WantedBy=multi-user.target default.target diff --git a/active/device_framework_16/quadlets/llama-embed.container b/active/device_framework_16/quadlets/llama-embed.container new file mode 100644 index 0000000..d9d7297 --- /dev/null +++ b/active/device_framework_16/quadlets/llama-embed.container @@ -0,0 +1,35 @@ +[Unit] +Description=A Llama CPP Server Running GPT OSS 120b + +[Container] +# Image is built locally via podman build +Image=localhost/llama-cpp-vulkan:latest + +# Downloaded models volume +Volume=/home/ai/models:/models:z + +# Ports +PublishPort=8010:8010 + +# GPU Device +AddDevice=/dev/kfd +AddDevice=/dev/dri + +# Server command +Exec=--port 8010 \ + -m /models/embedding/nomic-embed-text-v2/nomic-embed-text-v2-moe-q8_0.gguf \ + -ngl all \ + -ub 2048 \ + -b 2048 \ + --ctx-size 2048 \ + --embeddings \ + --models-dir /models + +[Service] +Restart=always +# Extend Timeout to allow time to pull the image +TimeoutStartSec=900 + +[Install] +# Start by default on boot +WantedBy=multi-user.target default.target