clarify localai deploy instructions

2025-03-04 09:32:57 -05:00
parent d53d95df1a
commit 3c7c1ce60a
1 changed files with 10 additions and 4 deletions
--- a/podman/graduated/localai/localai.md
+++ b/podman/graduated/localai/localai.md
@@ -47,6 +47,7 @@ apt install -y cuda-toolkit
 apt install -y nvidia-cuda-toolkit

 # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html#generating-a-cdi-specification
+# You'll need to run this after every apt update
 nvidia-ctk cdi generate --output=/etc/cdi/nvidia.yaml

 # monitor nvidia card
@@ -55,7 +56,7 @@ nvidia-smi
 # Create IPv6 Network
 # Use the below to generate a quadlet for /etc/containers/systemd/localai.network
 # podman run --rm ghcr.io/containers/podlet --install --description "Local AI" \
-podman network create --ipv6 --label local-ai local-ai
+podman network create --ipv6 --label local-ai systemd-localai

 # You might want to mount an external drive here.
 mkdir /models
@@ -69,6 +70,11 @@ mkdir ~/.localai
 echo $(pwgen --capitalize --numerals --secure 64 1) > ~/.localai/token

 export MODEL_DIR=/models
+export GPU_CONTAINER_IMAGE=quay.io/go-skynet/local-ai:master-cublas-cuda12-ffmpeg
+export CPU_CONTAINER_IMAGE=quay.io/go-skynet/local-ai:master-ffmpeg
+
+podman image pull $GPU_CONTAINER_IMAGE
+podman image pull $CPU_CONTAINER_IMAGE

 # LOCALAI_SINGLE_ACTIVE_BACKEND will unload the previous model before loading the next one
 # LOCALAI_API_KEY will set an API key, omit to run unprotected.
@@ -88,7 +94,7 @@ podman run \
 --restart always \
 -v $MODEL_DIR:/build/models \
 -v localai-tmp:/tmp/generated \
-quay.io/go-skynet/local-ai:master-cublas-cuda12-ffmpeg
+$GPU_CONTAINER_IMAGE

 # The second (8081) will be our frontend. We'll protect it with basic auth.
 # Use the below to generate a quadlet for /etc/containers/systemd/local-ai-webui.container
@@ -101,7 +107,7 @@ podman run \
 --restart always \
 -v $MODEL_DIR:/build/models \
 -v localai-tmp:/tmp/generated \
-quay.io/go-skynet/local-ai:master-ffmpeg
+$CPU_CONTAINER_IMAGE
 ```

 ## Running Local AI on Arch with AMD GPU
@@ -252,7 +258,7 @@ client_body_timeout 1h;
 client_header_timeout 1h;
 proxy_connect_timeout 1h;
 proxy_read_timeout 1h;
-proxy_send_timeout 1h;s
+proxy_send_timeout 1h;
 ```

 Now write your nginx http config files. You'll need two: