[Unit] Description=A Llama CPP Server Running GPT OSS 120b [Container] Pod=ai.pod Image=localhost/llama-cpp-vulkan:2026-01-12-10-13-30 Volume=llama-server-cache:/root/.cache AddDevice=/dev/kfd AddDevice=/dev/dri Exec=-hf ggml-org/gpt-oss-120b-GGUF \ --ctx-size 32000 \ --jinja \ -ub 2048 \ -b 2048 \ --port 8000 \ --host 0.0.0.0 \ -n -1 \ --n-gpu-layers 999 [Service] Restart=always # Extend Timeout to allow time to pull the image TimeoutStartSec=900 [Install] # Start by default on boot WantedBy=multi-user.target default.target