local ai checkpoint
This commit is contained in:
@@ -2,21 +2,29 @@
|
||||
Description=A Llama CPP Server Running GPT OSS 120b
|
||||
|
||||
[Container]
|
||||
# Shared AI pod
|
||||
Pod=ai.pod
|
||||
Image=localhost/llama-cpp-vulkan:2026-01-12-10-13-30
|
||||
Volume=llama-server-cache:/root/.cache
|
||||
|
||||
# Image is built locally via podman build
|
||||
Image=localhost/llama-cpp-vulkan:latest
|
||||
|
||||
# Downloaded models volume
|
||||
Volume=/home/ai/models/text:/models:z
|
||||
|
||||
# GPU Device
|
||||
AddDevice=/dev/kfd
|
||||
AddDevice=/dev/dri
|
||||
|
||||
Exec=-hf ggml-org/gpt-oss-120b-GGUF \
|
||||
--ctx-size 32000 \
|
||||
--jinja \
|
||||
-ub 2048 \
|
||||
-b 2048 \
|
||||
--port 8000 \
|
||||
--host 0.0.0.0 \
|
||||
-n -1 \
|
||||
--n-gpu-layers 999
|
||||
# Server command
|
||||
Exec=--port 8000 \
|
||||
-c 0 \
|
||||
-b 2048 \
|
||||
-ub 2048 \
|
||||
--perf \
|
||||
--n-gpu-layers all \
|
||||
--jinja \
|
||||
--models-max 1 \
|
||||
--models-dir /models
|
||||
|
||||
[Service]
|
||||
Restart=always
|
||||
|
||||
Reference in New Issue
Block a user