add initial framework desktop config
All checks were successful
Podman DDNS Image / build-and-push-ddns (push) Successful in 1m13s
All checks were successful
Podman DDNS Image / build-and-push-ddns (push) Successful in 1m13s
This commit is contained in:
@@ -10,45 +10,64 @@
|
||||
3. Update grub `grub2-mkconfig -o /boot/grub2/grub.cfg`
|
||||
4. Reboot
|
||||
|
||||
## Docker
|
||||
## References
|
||||
|
||||
<https://docs.podman.io/en/latest/markdown/podman-systemd.unit.5.html>
|
||||
|
||||
## Notes
|
||||
|
||||
### Update quadlets
|
||||
|
||||
```bash
|
||||
dnf config-manager addrepo \
|
||||
--from-repofile https://download.docker.com/linux/fedora/docker-ce.repo
|
||||
scp -r active/device_framework_desktop/quadlets/* deskwork-ai:quadlets/
|
||||
podman quadlet install --replace quadlets/*
|
||||
```
|
||||
|
||||
dnf install \
|
||||
docker-ce \
|
||||
docker-ce-cli \
|
||||
containerd.io \
|
||||
docker-buildx-plugin \
|
||||
docker-compose-plugin
|
||||
### Volume Locations
|
||||
|
||||
`~/.local/share/containers/storage/volumes/`
|
||||
|
||||
## User
|
||||
|
||||
```bash
|
||||
# Create your local ai user. This will be the user you launch podman processes from.
|
||||
useradd -m ai
|
||||
loginctl enable-linger ai
|
||||
su -l ai
|
||||
mkdir -p ~/.config/containers/systemd/
|
||||
```
|
||||
|
||||
## Llama.cpp
|
||||
|
||||
```bash
|
||||
# Build the llama.cpp docker image
|
||||
# Build the llama.cpp container image
|
||||
git clone https://github.com/ggml-org/llama.cpp.git
|
||||
export BUILD_TAG=$(date +"%Y-%m-%d-%H-%M-%S")
|
||||
docker build -t llama-cpp-vulkan:${BUILD_TAG} -f .devops/vulkan.Dockerfile .
|
||||
podman build -t llama-cpp-vulkan:${BUILD_TAG} -f .devops/vulkan.Dockerfile .
|
||||
|
||||
# Run llama server with gpt-oss-120b
|
||||
docker run \
|
||||
podman run \
|
||||
-d \
|
||||
--replace \
|
||||
--restart always \
|
||||
--name=llama-server \
|
||||
--network=host \
|
||||
-p 8000:8000 \
|
||||
--device=/dev/kfd \
|
||||
--device=/dev/dri \
|
||||
-v llama-server-cache:/root/.cache \
|
||||
llama-cpp-vulkan:${BUILD_TAG} \
|
||||
-hf ggml-org/gpt-oss-120b-GGUF --ctx-size 0 --jinja -ub 2048 -b 2048 \
|
||||
localhost/llama-cpp-vulkan:2026-01-12-10-13-30 \
|
||||
-hf ggml-org/gpt-oss-120b-GGUF --ctx-size 32000 --jinja -ub 2048 -b 2048 \
|
||||
--port 8000 --host 0.0.0.0 -n -1 --n-gpu-layers 999
|
||||
|
||||
# To enable autostart, you'll need to create a quadlet
|
||||
# Quadlets are documented in podman manual pages
|
||||
# Search for "EXAMPLES" when you run the below command
|
||||
# Put your quadlet at ~/.config/containers/systemd/
|
||||
man "podman-systemd.unit(5)"
|
||||
|
||||
# Run llama server with devstral-small-2 24b
|
||||
docker run \
|
||||
podman run \
|
||||
-d \
|
||||
--restart always \
|
||||
--name=llama-server-devstral \
|
||||
--network=host \
|
||||
--device=/dev/kfd \
|
||||
@@ -66,10 +85,18 @@ firewall-cmd --reload
|
||||
|
||||
## Ollama
|
||||
|
||||
```bash
|
||||
# Install CLI
|
||||
curl -fsSL https://ollama.com/download/ollama-linux-amd64.tgz | tar xz -C ~/.local
|
||||
|
||||
# Add export OLLAMA_HOST=127.0.0.1
|
||||
vim ~/.bashrc.d/ollama.sh
|
||||
```
|
||||
|
||||
```bash
|
||||
# Run ollama
|
||||
# Will be available on port 11434
|
||||
docker run \
|
||||
podman run \
|
||||
-d \
|
||||
--restart always \
|
||||
--device /dev/kfd \
|
||||
@@ -81,7 +108,7 @@ docker run \
|
||||
docker.io/ollama/ollama:0.13.5
|
||||
|
||||
# Run an image
|
||||
docker exec -it ollama ollama run gpt-oss:20b
|
||||
podman exec -it ollama ollama run gpt-oss:20b
|
||||
|
||||
# Firewall
|
||||
firewall-cmd --add-port=11434/tcp --permanent
|
||||
@@ -107,9 +134,9 @@ vim /etc/anything-llm/.env
|
||||
|
||||
# Server will be accessible on port 3001
|
||||
# Connect llama.cpp as a generic OpenAI LLM provider and use host
|
||||
# http://172.17.0.1:8000/v1
|
||||
# http://172.17.0.1:3001/v1
|
||||
# Chat model name doesn't matter.
|
||||
docker run \
|
||||
podman run \
|
||||
-d \
|
||||
--restart always \
|
||||
--network host \
|
||||
@@ -129,9 +156,9 @@ firewall-cmd --reload
|
||||
|
||||
```bash
|
||||
# z-turbo
|
||||
docker run --rm \
|
||||
-v ./models:/models \
|
||||
-v ./build:/output \
|
||||
podman run --rm \
|
||||
-v /home/ai/stable-diffusion.cpp/models:/models:z \
|
||||
-v /home/ai/stable-diffusion.cpp/output:/output:z \
|
||||
--device /dev/kfd \
|
||||
--device /dev/dri \
|
||||
ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \
|
||||
@@ -140,17 +167,37 @@ ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \
|
||||
--llm /models/z_turbo/qwen_3_4b.safetensors \
|
||||
--cfg-scale 1.0 \
|
||||
-v \
|
||||
--diffusion-fa \
|
||||
-H 1024 \
|
||||
-W 512 \
|
||||
-o /output/output.png \
|
||||
--seed -1 \
|
||||
-o /output/output.png \
|
||||
-p "Framework Laptop 13"
|
||||
|
||||
# Flux
|
||||
podman run --rm \
|
||||
-v /srv/stable-diffusion.cpp/models:/models:z \
|
||||
-v ./output:/output:z \
|
||||
--device /dev/kfd \
|
||||
--device /dev/dri \
|
||||
ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \
|
||||
--diffusion-model /models/flux/flux1-dev-q4_k.gguf \
|
||||
--vae /models/flux/ae.safetensors \
|
||||
--clip_l /models/flux/clip_l.safetensors \
|
||||
--t5xxl /models/flux/t5xxl_fp16.safetensors \
|
||||
--cfg-scale 1.0 \
|
||||
--sampling-method euler \
|
||||
-v \
|
||||
-H 512 \
|
||||
-W 512 \
|
||||
--seed -1 \
|
||||
--steps 20 \
|
||||
-o /output/output.png \
|
||||
-p "An Everquest video game poster but with ribeye steaks for heads with the words 'EverSteak'"
|
||||
|
||||
# Flux2
|
||||
docker run --rm \
|
||||
-v ./models:/models \
|
||||
-v ./build:/output \
|
||||
podman run --rm \
|
||||
-v /home/ai/stable-diffusion.cpp/models:/models:z \
|
||||
-v /home/ai/stable-diffusion.cpp/output:/output:z \
|
||||
--device /dev/kfd \
|
||||
--device /dev/dri \
|
||||
ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \
|
||||
@@ -160,7 +207,90 @@ ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \
|
||||
--cfg-scale 1.0 \
|
||||
--sampling-method euler \
|
||||
-v \
|
||||
--diffusion-fa \
|
||||
-H 512 \
|
||||
-W 1024 \
|
||||
--seed -1 \
|
||||
--steps 10 \
|
||||
-o /output/output.png \
|
||||
-p "A picture of sign that says 'framework'"
|
||||
```
|
||||
|
||||
# Qwen
|
||||
podman run --rm \
|
||||
-v /home/ai/stable-diffusion.cpp/models:/models:z \
|
||||
-v /home/ai/stable-diffusion.cpp/output:/output:z \
|
||||
--device /dev/kfd \
|
||||
--device /dev/dri \
|
||||
ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \
|
||||
--diffusion-model /models/qwen_image/Qwen_Image-Q4_K_M.gguf \
|
||||
--vae /models/qwen_image/qwen_image_vae.safetensors \
|
||||
--llm /models/qwen_image/Qwen2.5-VL-7B-Instruct.Q4_K_M.gguf \
|
||||
--cfg-scale 2.5 \
|
||||
--sampling-method euler \
|
||||
-v \
|
||||
--offload-to-cpu \
|
||||
-H 512 -W 512 \
|
||||
--flow-shift 3 \
|
||||
--seed -1 \
|
||||
-o /output/output.png \
|
||||
-p 'Everquest DND mash up poster that says "ever dungeons and dragons"'
|
||||
|
||||
# SD3
|
||||
podman run --rm \
|
||||
-v /home/ai/stable-diffusion.cpp/models:/models:z \
|
||||
-v /home/ai/stable-diffusion.cpp/output:/output:z \
|
||||
--device /dev/kfd \
|
||||
--device /dev/dri \
|
||||
ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \
|
||||
-m /models/sd3/sd3.5_large.safetensors \
|
||||
--clip_l /models/sd3/clip_l.safetensors \
|
||||
--clip_g /models/sd3/clip_g.safetensors \
|
||||
--t5xxl /models/sd3/t5xxl_fp16.safetensors \
|
||||
-H 512 -W 512 \
|
||||
--cfg-scale 4.5 \
|
||||
--sampling-method euler \
|
||||
-v \
|
||||
--seed -1 \
|
||||
-o /output/output.png \
|
||||
-p 'Everquest DND mash up poster that says "ever dungeons and dragons"'
|
||||
```
|
||||
|
||||
### Stable Diffusion CPP Server
|
||||
|
||||
Uses OpenAI Compatible Endpoints
|
||||
|
||||
```bash
|
||||
# z-turbo server
|
||||
podman run \
|
||||
-d \
|
||||
--name stable-diffusion-cpp-server \
|
||||
-v /srv/stable-diffusion.cpp/models:/models \
|
||||
-v /srv/stable-diffusion.cpp/build:/output \
|
||||
--device /dev/kfd \
|
||||
--device /dev/dri \
|
||||
--entrypoint "/sd-server" \
|
||||
--network host \
|
||||
ghcr.io/leejet/stable-diffusion.cpp:master-vulkan \
|
||||
--diffusion-model /models/z_turbo/z_image_turbo_bf16.safetensors \
|
||||
--vae /models/z_turbo/ae.safetensors \
|
||||
--llm /models/z_turbo/qwen_3_4b.safetensors \
|
||||
--cfg-scale 1.0 \
|
||||
-v \
|
||||
--diffusion-fa \
|
||||
-H 1024 \
|
||||
-W 512 \
|
||||
--seed -1 \
|
||||
-l 0.0.0.0
|
||||
```
|
||||
|
||||
## Openai API Web UI
|
||||
|
||||
```bash
|
||||
# Will be available on port 8080
|
||||
podman run \
|
||||
-d \
|
||||
--network host \
|
||||
-v open-webui:/app/backend/data \
|
||||
--name open-webui \
|
||||
--restart always \
|
||||
ghcr.io/open-webui/open-webui:main
|
||||
```
|
||||
|
||||
9
active/device_framework_desktop/quadlets/ai.pod
Normal file
9
active/device_framework_desktop/quadlets/ai.pod
Normal file
@@ -0,0 +1,9 @@
|
||||
[Pod]
|
||||
# llama.cpp
|
||||
PublishPort=8000:8000/tcp
|
||||
# open-webui
|
||||
PublishPort=8080:8080/tcp
|
||||
# anything-llm
|
||||
PublishPort=3001:3001/tcp
|
||||
# ollama
|
||||
PublishPort=11434:11434/tcp
|
||||
@@ -0,0 +1,21 @@
|
||||
[Unit]
|
||||
Description=An Anything LLM Frontend for Local AI Services
|
||||
|
||||
[Container]
|
||||
Pod=ai.pod
|
||||
Image=docker.io/mintplexlabs/anythingllm
|
||||
Volume=anythingllm:/app/server/storage
|
||||
Volume=/home/ai/anything-llm/.env:/app/server/.env:z
|
||||
Environment=STORAGE_DIR=/app/server/storage
|
||||
AddCapability=SYS_ADMIN
|
||||
User=1000
|
||||
Group=1000
|
||||
|
||||
[Service]
|
||||
Restart=always
|
||||
# Extend Timeout to allow time to pull the image
|
||||
TimeoutStartSec=900
|
||||
|
||||
[Install]
|
||||
# Start by default on boot
|
||||
WantedBy=multi-user.target default.target
|
||||
@@ -0,0 +1,28 @@
|
||||
[Unit]
|
||||
Description=A Llama CPP Server Running GPT OSS 120b
|
||||
|
||||
[Container]
|
||||
Pod=ai.pod
|
||||
Image=localhost/llama-cpp-vulkan:2026-01-12-10-13-30
|
||||
Volume=llama-server-cache:/root/.cache
|
||||
AddDevice=/dev/kfd
|
||||
AddDevice=/dev/dri
|
||||
|
||||
Exec=-hf ggml-org/gpt-oss-120b-GGUF \
|
||||
--ctx-size 32000 \
|
||||
--jinja \
|
||||
-ub 2048 \
|
||||
-b 2048 \
|
||||
--port 8000 \
|
||||
--host 0.0.0.0 \
|
||||
-n -1 \
|
||||
--n-gpu-layers 999
|
||||
|
||||
[Service]
|
||||
Restart=always
|
||||
# Extend Timeout to allow time to pull the image
|
||||
TimeoutStartSec=900
|
||||
|
||||
[Install]
|
||||
# Start by default on boot
|
||||
WantedBy=multi-user.target default.target
|
||||
@@ -0,0 +1,19 @@
|
||||
[Unit]
|
||||
Description=An Ollama Server
|
||||
|
||||
[Container]
|
||||
Pod=ai.pod
|
||||
Image=docker.io/ollama/ollama:0.13.5
|
||||
Volume=ollama:/root/.ollama
|
||||
AddDevice=/dev/kfd
|
||||
AddDevice=/dev/dri
|
||||
Environment=OLLAMA_VULKAN=1
|
||||
|
||||
[Service]
|
||||
Restart=always
|
||||
# Extend Timeout to allow time to pull the image
|
||||
TimeoutStartSec=900
|
||||
|
||||
[Install]
|
||||
# Start by default on boot
|
||||
WantedBy=multi-user.target default.target
|
||||
@@ -0,0 +1,16 @@
|
||||
[Unit]
|
||||
Description=An Open Webui Frontend for Local AI Services
|
||||
|
||||
[Container]
|
||||
Pod=ai.pod
|
||||
Image=ghcr.io/open-webui/open-webui:main
|
||||
Volume=open-webui-data:/app/backend/data
|
||||
|
||||
[Service]
|
||||
Restart=always
|
||||
# Extend Timeout to allow time to pull the image
|
||||
TimeoutStartSec=900
|
||||
|
||||
[Install]
|
||||
# Start by default on boot
|
||||
WantedBy=multi-user.target default.target
|
||||
Reference in New Issue
Block a user