diff --git a/active/software_ai_stack/llama-instruct.container b/active/software_ai_stack/llama-instruct.container index 2bc3116..ce4f225 100644 --- a/active/software_ai_stack/llama-instruct.container +++ b/active/software_ai_stack/llama-instruct.container @@ -17,7 +17,7 @@ AddDevice=/dev/dri # Server command Exec=--port 8002 \ - -c 16000 \ + -c 64000 \ --perf \ -v \ --top-k 20 \ diff --git a/active/software_ai_stack/llama-think.container b/active/software_ai_stack/llama-think.container index b1ce4f2..0d2d472 100644 --- a/active/software_ai_stack/llama-think.container +++ b/active/software_ai_stack/llama-think.container @@ -17,12 +17,17 @@ AddDevice=/dev/dri # Server command Exec=--port 8000 \ - -c 64000 \ + -c 128000 \ + --top-k 64 \ + --top-p 0.95 \ + --temp 1.0 \ --perf \ + -v \ --n-gpu-layers all \ --jinja \ - --models-max 1 \ - --models-dir /models + -m /models/gemma-4-26b-a4b/gemma-4-26B-A4B-it-UD-Q8_K_XL.gguf \ + --mmproj /models/gemma-4-26b-a4b/mmproj-BF16.gguf \ + --alias think # Health Check HealthCmd=CMD-SHELL curl --fail http://127.0.0.1:8000/health || exit 1