diff --git a/podman/incubating/ollama/README.md b/podman/incubating/ollama/README.md index a70d35c..3fecc5a 100644 --- a/podman/incubating/ollama/README.md +++ b/podman/incubating/ollama/README.md @@ -2,88 +2,90 @@ -## Running with Podman +## Run natively with GPU support + + ```bash -podman network create localai +# Install script +curl -fsSL https://ollama.com/install.sh | sh +# Check service is running +systemctl status ollama +``` +Remember to add `Environment="OLLAMA_HOST=0.0.0.0"` to `/etc/systemd/system/ollama.service` to +make it accessible on the network. + +For Radeon 6000 cards you'll need to add `Environment="HSA_OVERRIDE_GFX_VERSION=10.3.0"` as well. + +```bash +# Pull models +# Try to use higher parameter models. Grab the q5_K_M variant at minimum. + +# For a 24GB VRAM Card I'd recommend: + +# Anything-LLM Coding +ollama pull qwen2.5-coder:14b-instruct-q5_K_M +# Anything-LLM Math +ollama pull qwen2-math:7b-instruct-fp16 +# Anything-LLM Chat +ollama pull llama3.2-vision:11b-instruct-q8_0 + +# VSCode Continue Autocomplete +ollama pull starcoder2:15b-q5_K_M +# VSCode Continue Chat +ollama pull llama3.1:8b-instruct-fp16 +# VSCode Continue Embedder +ollama pull nomic-embed-text:137m-v1.5-fp16 +``` + +Note your ollama instance will be available to podman containers via `http://host.containers.internal:11434` + +## Run Anything LLM Interface + +```bash podman run \ -d \ - -v ollama:/root/.ollama \ - -p 127.0.0.1:11434:po \ - --network localai \ - --name ollama \ - docker.io/ollama/ollama - -# Pull new models -podman container exec ollama ollama pull llama3.2:3b -podman container exec ollama ollama pull llama3.2:1b -podman container exec ollama ollama pull llama3.2-vision:11b -podman container exec ollama ollama pull llava-llama3:8b -podman container exec ollama ollama pull deepseek-coder-v2:16b -podman container exec ollama ollama pull opencoder:8b -podman container exec ollama ollama pull codestral:22b - -# Talk to an existing model via cli -podman container exec -it ollama ollama run llama3.2:3b - -podman run \ - -d \ - -p 127.0.0.1:3001:3001 \ + -p 3001:3001 \ --name anything-llm \ - --network localai \ --cap-add SYS_ADMIN \ -v anything-llm:/app/server \ -e STORAGE_DIR="/app/server/storage" \ - mintplexlabs/anythingllm + docker.io/mintplexlabs/anythingllm ``` -### Quadlets with Podlet +This should now be accessible on port 3001. Note, you'll need to allow traffic between podman +and the host: + +Use `podman network ls` to see which networks podman is running on and `podman network inspect` +to get the IP address range. Then allow traffic from that range to port 11434 (ollama): ```bash -# Create volume for ollama -mkdir /ollama +ufw allow from 10.89.0.1/24 to any port 11434 +``` -podman run --rm ghcr.io/containers/podlet --install --description "Local AI Network" \ - podman network create localai +## Anything LLM Quadlet with Podlet -podman run --rm ghcr.io/containers/podlet --install --description "Ollama" \ - podman run \ - -d \ - -v /ollama:/root/.ollama \ - -p 127.0.0.1:11434:11434 \ - --network localai \ - --name ollama \ - docker.io/ollama/ollama - -export STORAGE_LOCATION=/anything-llm && \ +```bash podman run --rm ghcr.io/containers/podlet --install --description "Anything LLM" \ podman run \ -d \ - -p 127.0.0.1:3001:3001 \ + -p 3001:3001 \ --name anything-llm \ - --network localai \ --cap-add SYS_ADMIN \ - -v ${STORAGE_LOCATION}:/app/server/storage \ - -v ${STORAGE_LOCATION}/.env:/app/server/.env \ + --restart always \ + -v anything-llm:/app/server \ -e STORAGE_DIR="/app/server/storage" \ - mintplexlabs/anythingllm -``` - -Make sure to add - -```conf -[Service] -Restart=always + docker.io/mintplexlabs/anythingllm ``` To the service to have them autostart. Put the generated files in `/usr/share/containers/systemd/`. -## Podman systemd service +## Now with Nginx and Certbot See [generating AWS credentials](cloud/graduated/aws_iam/README.md) @@ -99,9 +101,14 @@ aws configure Open http/s in firewalld: ```bash +# Remember to firewall-cmd --set-default-zone=public firewall-cmd --permanent --zone=public --add-service=http firewall-cmd --permanent --zone=public --add-service=https firewall-cmd --reload + +# or +ufw allow 80/tcp +ufw allow 443/tcp ``` Here are the detailed instructions for installing and setting up Nginx on Fedora Linux with Certbot @@ -109,33 +116,44 @@ using the Route53 DNS challenge to put in front of a service called "Anything LL 3001 with WebSockets. The domain will be chatreesept.reeseapps.com. 1. Install Nginx: - ``` - dnf install -y nginx - ``` + + ``` + dnf install -y nginx + ``` 2. Start and enable Nginx service: - ``` - systemctl enable --now nginx - ``` + + ``` + systemctl enable --now nginx + ``` 3. Install Certbot and the Route53 DNS plugin: - ``` - dnf install -y certbot python3-certbot-dns-route53 - ``` + + ``` + # Fedora + dnf install -y certbot python3-certbot-dns-route53 + + # Arch + pacman -S certbot certbot-dns-route53 + ``` 4. Request a certificate for your domain using the Route53 DNS challenge: - ``` - certbot certonly --dns-route53 -d chatreesept.reeseapps.com - ``` - Follow the prompts to provide your Route53 credentials and email address. + + ``` + certbot certonly --dns-route53 -d chatreesept.reeseapps.com + ``` + + Follow the prompts to provide your Route53 credentials and email address. 5. Configure Nginx for your domain: Create a new Nginx configuration file for your domain: - ``` - vim /etc/nginx/conf.d/chatreesept.reeseapps.com.conf - ``` - Add the following configuration to the file: - ``` + Update your nginx conf with the following + + ``` + vim /etc/nginx/nginx.conf + ``` + + ``` keepalive_timeout 1h; send_timeout 1h; client_body_timeout 1h; @@ -143,7 +161,47 @@ using the Route53 DNS challenge to put in front of a service called "Anything LL proxy_connect_timeout 1h; proxy_read_timeout 1h; proxy_send_timeout 1h; + ``` + ``` + vim /etc/nginx/conf.d/ollama.reeselink.com.conf + ``` + + ``` + server { + listen 80; + server_name ollama.reeselink.com; + + location / { + return 301 https://$host$request_uri; + } + } + + server { + listen 443 ssl; + server_name ollama.reeselink.com; + + ssl_certificate /etc/letsencrypt/live/ollama.reeselink.com/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/ollama.reeselink.com/privkey.pem; + + location / { + proxy_pass http://localhost:11434; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + proxy_buffering off; + } + } + ``` + + ``` + vim /etc/nginx/conf.d/chatreesept.reeseapps.com.conf + ``` + + Add the following configuration to the file: + ``` server { listen 80; server_name chatreesept.reeseapps.com; @@ -169,67 +227,118 @@ using the Route53 DNS challenge to put in front of a service called "Anything LL proxy_set_header Connection "upgrade"; proxy_set_header Host $host; proxy_cache_bypass $http_upgrade; + proxy_buffering off; } } - ``` + `` 6. Test your Nginx configuration for syntax errors: - ``` - nginx -t - ``` - If there are no errors, reload Nginx to apply the changes: - ``` - systemctl reload nginx - ``` + + ``` + nginx -t + ``` + + If there are no errors, reload Nginx to apply the changes: + + ``` + systemctl reload nginx + ``` 7. Set up automatic certificate renewal: Add the following line to your crontab to renew the - certificate daily: - ``` - sudo crontab -e - ``` - Add the following line to the end of the file: - ``` - 0 0 * * * certbot renew --quiet --no-self-upgrade --pre-hook "systemctl stop nginx" --post-hook "systemctl start nginx" - ``` + certificate daily: + + ``` + + pacman -S cronie + sudo crontab -e + ``` + + Add the following line to the end of the file: + + ``` + 0 0 * * * certbot renew --quiet + ``` Now, your "Anything LLM" service running on port 3001 with WebSockets is accessible through the domain chatreesept.reeseapps.com with a valid SSL certificate from Let's Encrypt. The certificate will be automatically renewed daily. -## Nginx +## Custom Models + + + +### From Existing Model ```bash -certbot-3 certonly --dns-route53 -d chatreesept.reeseapps.com +ollama show --modelfile opencoder > Modelfile +PARAMETER num_ctx 8192 +ollama create opencoder-fix -f Modelfile ``` -Make sure to add the following timeout configurations to your http block: +### From Scratch -```conf -server { - # Enable websocket connections for agent protocol. - location ~* ^/api/agent-invocation/(.*) { - proxy_pass http://0.0.0.0:3001; - proxy_http_version 1.1; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "Upgrade"; - } +Install git lfs and clone the model you're interested in - listen 80; - server_name [insert FQDN here]; - location / { - # Prevent timeouts on long-running requests. - proxy_connect_timeout 605; - proxy_send_timeout 605; - proxy_read_timeout 605; - send_timeout 605; - keepalive_timeout 605; +```bash +# Make sure you have git-lfs installed (https://git-lfs.com) +git lfs install - # Enable readable HTTP Streaming for LLM streamed responses - proxy_buffering off; - proxy_cache off; +git clone https://huggingface.co/bartowski/Starling-LM-7B-beta-GGUF +``` - # Proxy your locally running service - proxy_pass http://0.0.0.0:3001; - } -} +Create a modelfile + +``` +# Modelfile +FROM "./path/to/gguf" + +TEMPLATE """{{ if .Prompt }}<|im_start|> +{{ .Prompt }}<|im_end|> +{{ end }} +""" + +SYSTEM You are OpenCoder, created by OpenCoder Team. + +PARAMETER stop <|im_start|> +PARAMETER stop <|im_end|> +PARAMETER stop <|fim_prefix|> +PARAMETER stop <|fim_middle|> +PARAMETER stop <|fim_suffix|> +PARAMETER stop <|fim_end|> +PARAMETER stop """ + + +""" + +``` + +Build the model + +```bash +ollama create "Starling-LM-7B-beta-Q6_K" -f Modelfile +``` + +Run the model + +```bash +ollama run Starling-LM-7B-beta-Q6_K:latest +``` + +## Converting to gguf + + + +1. Clone the llama.cpp repository and install its dependencies: + +```bash +git clone https://github.com/ggerganov/llama.cpp.git +cd ~/llama.cpp +python3 -m venv venv && source venv/bin/activate +pip3 install -r requirements.txt + +mkdir ~/llama.cpp/models/mistral +huggingface-cli login #necessary to download gated models +huggingface-cli download mistralai/Mistral-7B-Instruct-v0.3 --local-dir ~/llama.cpp/models/mistral/ + +python3 convert_hf_to_gguf.py ~/.cache/huggingface/hub/models--infly--OpenCoder-8B-Instruct/snapshots/01badbbf10c2dfd7e2a0b5f570065ef44548576c ``` \ No newline at end of file