From 87a578f1def3c78c8ccf2544749a016545af9a88 Mon Sep 17 00:00:00 2001
From: ducoterra <git@ducoterra.net>
Date: Sat, 23 May 2026 23:56:03 -0400
Subject: [PATCH] everything working again after cleanup

---
 .vscode/launch.json                  |  13 +-
 README.md                            | 288 ++++++++++++++++-----------
 pyproject.toml                       |   3 +
 uv.lock                              |  19 ++
 vibe_bot/database.py                 |   9 +-
 vibe_bot/llama_wrapper.py            |  82 ++++++--
 vibe_bot/main.py                     |  32 ++-
 vibe_bot/tests/conftest.py           |  31 +--
 vibe_bot/tests/test_config.py        |   6 +-
 vibe_bot/tests/test_database.py      |  36 +++-
 vibe_bot/tests/test_llama_wrapper.py |  33 +--
 vibe_bot/tests/test_main.py          |  12 +-
 vibe_bot/tests/test_tts.py           |  16 +-
 13 files changed, 380 insertions(+), 200 deletions(-)
diff --git a/.vscode/launch.json b/.vscode/launch.json
index c4b6f68..0769182 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -1,9 +1,14 @@
 {
-    // Use IntelliSense to learn about possible attributes.
-    // Hover to view descriptions of existing attributes.
-    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
     "version": "0.2.0",
     "configurations": [
+        {
+            "name": "Vibe Bot: Module",
+            "type": "debugpy",
+            "request": "launch",
+            "module": "vibe_bot.main",
+            "console": "integratedTerminal",
+            "envFile": "${workspaceFolder}/.env"
+        },
         {
             "name": "Python Debugger: Current File",
             "type": "debugpy",
@@ -13,4 +18,4 @@
             "envFile": "${workspaceFolder}/.env"
         }
     ]
-}
\ No newline at end of file
+}
diff --git a/README.md b/README.md
index fc69db8..369ead9 100644
--- a/README.md
+++ b/README.md
@@ -1,217 +1,271 @@
 # Vibe Discord Bot with RAG Chat History
 
-A Discord bot that stores long-term chat history using SQLite database with RAG (Retrieval-Augmented Generation) capabilities powered by custom embedding models.
+A Discord bot that stores long-term chat history using SQLite with RAG (Retrieval-Augmented Generation) capabilities. It supports custom bots with personalities, text-to-speech via Kokoro, image generation, and image editing.
 
 - [Vibe Discord Bot with RAG Chat History](#vibe-discord-bot-with-rag-chat-history)
-  - [Quick Start - Available Commands](#quick-start---available-commands)
-    - [Pre-built Bots](#pre-built-bots)
+  - [Available Commands](#available-commands)
     - [Custom Bot Management](#custom-bot-management)
     - [Using Custom Bots](#using-custom-bots)
+    - [Text-to-Speech](#text-to-speech)
+    - [Image Commands](#image-commands)
+    - [Bot Conversations](#bot-conversations)
   - [Features](#features)
   - [Setup](#setup)
     - [Prerequisites](#prerequisites)
     - [Environment Variables](#environment-variables)
     - [Installation](#installation)
+    - [Running the Bot](#running-the-bot)
   - [How It Works](#how-it-works)
     - [Database Structure](#database-structure)
     - [RAG Process](#rag-process)
-    - [Configuration Options](#configuration-options)
-  - [Usage](#usage)
   - [File Structure](#file-structure)
-  - [Build](#build)
-    - [Using uv](#using-uv)
+  - [Building](#building)
+    - [Local](#local)
     - [Container](#container)
-  - [Docs](#docs)
-    - [Open AI](#open-ai)
-  - [Models](#models)
-    - [Qwen3.5](#qwen35)
+  - [Testing](#testing)
+  - [Configuration](#configuration)
 
-
-## Quick Start - Available Commands
-
-### Pre-built Bots
-
-| Command      | Description                   | Example Usage                              |
-| ------------ | ----------------------------- | ------------------------------------------ |
-| `!doodlebob` | Generate images from text     | `!doodlebob a cat sitting on a moon`       |
-| `!retcon`    | Edit images with text prompts | `!retcon <image attachment> Make it sunny` |
+## Available Commands
 
 ### Custom Bot Management
 
-| Command                        | Description                                   | Example Usage                                    |
-| ------------------------------ | --------------------------------------------- | ------------------------------------------------ |
-| `!custom <name> <personality>` | Create a custom bot with specific personality | `!custom alfred you are a proper british butler` |
-| `!list-custom-bots`            | List all available custom bots                | `!list-custom-bots`                              |
-| `!delete-custom-bot <name>`    | Delete your custom bot                        | `!delete-custom-bot alfred`                      |
+| Command                            | Description                            | Example Usage                                        |
+| ---------------------------------- | -------------------------------------- | ---------------------------------------------------- |
+| `!custom-bot <name> <personality>` | Create a custom bot with a personality | `!custom-bot alfred you are a proper british butler` |
+| `!list-custom-bots`                | List all available custom bots         | `!list-custom-bots`                                  |
+| `!delete-custom-bot <name>`        | Delete your custom bot (owner only)    | `!delete-custom-bot alfred`                          |
 
 ### Using Custom Bots
 
-Once you create a custom bot, you can interact with it directly by prefixing your message with the bot name:
+Once you create a custom bot, interact with it by prefixing your message with the bot name:
 
-```bash
+```text
 !<bot_name> <your message>
 ```
 
 **Example:**
 
-1. Create a bot: `!custom alfred you are a proper british butler`
+1. Create a bot: `!custom-bot alfred you are a proper british butler`
 2. Use the bot: `alfred Could you fetch me some tea?`
 3. The bot will respond in character as a British butler
 
+### Text-to-Speech
+
+| Command                    | Description                             | Example Usage                   |
+| -------------------------- | --------------------------------------- | ------------------------------- |
+| `!speak <text>`            | Convert text to speech (MP3 attachment) | `!speak hello world`            |
+| `!speak <bot_name> <text>` | Have a custom bot respond and speak     | `!speak alfred what time is it` |
+
+### Image Commands
+
+| Command      | Description                          | Example Usage                              |
+| ------------ | ------------------------------------ | ------------------------------------------ |
+| `!doodlebob` | Generate an image from a text prompt | `!doodlebob a cat sitting on the moon`     |
+| `!retcon`    | Edit an attached image with text     | `!retcon <image attachment> Make it sunny` |
+
+### Bot Conversations
+
+| Command                                | Description                                 | Example Usage                                    |
+| -------------------------------------- | ------------------------------------------- | ------------------------------------------------ |
+| `!talkforme <bot1> <bot2> <n> <topic>` | Have two bots discuss a topic for n replies | `!talkforme alfred jarvis 4 the meaning of life` |
+
 ## Features
 
-- **Long-term chat history storage**: Persistent storage of all bot interactions
+- **Long-term chat history storage**: Persistent storage of all bot interactions in SQLite
 - **RAG-based context retrieval**: Smart retrieval of relevant conversation history using vector embeddings
-- **Custom embedding model**: Uses qwen3-embed-4b for semantic search capabilities
-- **Efficient message management**: Automatic cleanup of old messages based on configurable limits
-
-- **Long-term chat history storage**: Persistent storage of all bot interactions
-- **RAG-based context retrieval**: Smart retrieval of relevant conversation history using vector embeddings
-- **Custom embedding model**: Uses qwen3-embed-4b for semantic search capabilities
-- **Efficient message management**: Automatic cleanup of old messages based on configurable limits
+- **Custom bots**: Create unlimited bots with unique personalities
+- **Text-to-speech**: Kokoro TTS engine converts bot responses to MP3 audio
+- **Image generation**: Generate images from text prompts via OpenAI-compatible API
+- **Image editing**: Edit uploaded images with text instructions
+- **Bot conversations**: Two custom bots can discuss a topic autonomously
+- **Automatic message cleanup**: Configurable limits on stored messages
 
 ## Setup
 
 ### Prerequisites
 
-- Python 3.10 or higher
+- Python 3.13 or higher
 - [uv](https://docs.astral.sh/uv/) package manager
-- Embedding API key
 - Discord bot token
+- OpenAI-compatible API endpoints (for chat, embeddings, and image generation)
 
 ### Environment Variables
 
-Create a `.env` file or export the following variables:
+Create a `.env` file with the following variables:
 
 ```bash
-# Discord Bot Token
-export DISCORD_TOKEN=your_discord_bot_token
+# Discord Bot Token (required)
+DISCORD_TOKEN=your_discord_bot_token
 
-# Embedding API Configuration
-export OPENAI_API_KEY=your_embedding_api_key
-export OPENAI_API_ENDPOINT=https://llama-embed.reeselink.com/embedding
+# Chat/Completion API (required)
+CHAT_ENDPOINT=https://your-api.com/v1
+COMPLETION_ENDPOINT=https://your-api.com/v1
+CHAT_ENDPOINT_KEY=your_api_key
+COMPLETION_ENDPOINT_KEY=your_api_key
+CHAT_MODEL=your_model_name
+COMPLETION_MODEL=your_model_name
 
-# Image Generation (optional)
-export IMAGE_GEN_ENDPOINT=http://toybox.reeselink.com:1234/v1
-export IMAGE_EDIT_ENDPOINT=http://toybox.reeselink.com:1235/v1
+# Image Generation (required)
+IMAGE_GEN_ENDPOINT=https://your-api.com/v1
+IMAGE_EDIT_ENDPOINT=https://your-api.com/v1
+IMAGE_GEN_ENDPOINT_KEY=your_api_key
+IMAGE_EDIT_ENDPOINT_KEY=your_api_key
+IMAGE_GEN_MODEL=gen
+IMAGE_EDIT_MODEL=edit
 
-# Database Configuration (optional)
-export CHAT_DB_PATH=chat_history.db
-export EMBEDDING_MODEL=qwen3-embed-4b
-export EMBEDDING_DIMENSION=2048
-export MAX_HISTORY_MESSAGES=1000
-export SIMILARITY_THRESHOLD=0.7
-export TOP_K_RESULTS=5
+# Embedding API (required)
+EMBEDDING_ENDPOINT=https://your-api.com/v1
+EMBEDDING_ENDPOINT_KEY=your_api_key
+EMBEDDING_MODEL=your_embed_model
+
+# Optional: TTS Configuration
+TTS_MODEL_PATH=kokoro-v1.0.onnx
+TTS_VOICES_PATH=voices-v1.0.bin
+TTS_VOICE=af_sarah
+TTS_SPEED=1.0
+
+# Optional: Database/Chat Settings
+DB_PATH=chat_history.db
+MAX_COMPLETION_TOKENS=1000
+MAX_HISTORY_MESSAGES=1000
+SIMILARITY_THRESHOLD=0.7
+TOP_K_RESULTS=5
 ```
 
 ### Installation
 
-1. Sync dependencies with uv:
-```bash
-uv sync
-```
+1. Clone the repository and sync dependencies:
+
+    ```bash
+    uv sync
+    ```
+
+2. Ensure the TTS model files are present in the project root:
+
+   - `kokoro-v1.0.onnx`
+   - `voices-v1.0.bin`
+
+### Running the Bot
 
-2. Run the bot:
 ```bash
-uv run main.py
+uv run python -m vibe_bot.main
 ```
 
 ## How It Works
 
 ### Database Structure
 
-The system uses two SQLite tables:
+The system uses SQLite with three tables:
 
 1. **chat_messages**: Stores message metadata
-   - message_id, user_id, username, content, timestamp, channel_id, guild_id
+   - `message_id`, `user_id`, `username`, `content`, `timestamp`, `channel_id`, `guild_id`
 
 2. **message_embeddings**: Stores vector embeddings for RAG
-   - message_id, embedding (as binary blob)
+   - `message_id` (PK), `embedding` (binary blob of float32 values)
+
+3. **custom_bots**: Stores custom bot configurations
+   - `bot_name` (PK), `system_prompt`, `created_by`, `created_at`, `is_active`
 
 ### RAG Process
 
-1. When a message is received, it's stored in the database
-2. An embedding is generated using OpenAI's embedding API
-3. The embedding is stored alongside the message
-4. When a new message is sent to the bot:
-   - The system searches for similar messages using vector similarity
-   - Relevant context is retrieved and added to the prompt
+1. When a message is sent to a custom bot, it's stored in `chat_messages`
+2. An embedding is generated via the configured embedding API and stored in `message_embeddings`
+3. When a new message is sent:
+   - The system retrieves recent messages from the same user
+   - It searches for semantically similar messages using cosine similarity on embeddings
+   - Relevant context (user + bot message pairs) is prepended to the prompt
    - The LLM generates a response with awareness of past conversations
 
-### Configuration Options
-
-- **MAX_HISTORY_MESSAGES**: Maximum number of messages to keep (default: 1000)
-- **SIMILARITY_THRESHOLD**: Minimum similarity score for context retrieval (default: 0.7)
-- **TOP_K_RESULTS**: Number of similar messages to retrieve (default: 5)
-- **EMBEDDING_MODEL**: OpenAI embedding model to use (default: text-embedding-3-small)
-
-## Usage
-
-The bot maintains conversation context automatically. When you ask a question, it will:
-
-1. Search for similar past conversations
-2. Include relevant context in the prompt
-3. Generate responses that are aware of the conversation history
-
 ## File Structure
 
 ```text
 vibe_discord_bots/
-├── main.py              # Main bot application
-├── database.py          # SQLite database with RAG support
-├── pyproject.toml       # Project dependencies (uv)
-├── .env                 # Environment variables
-├── .venv/               # Virtual environment (created by uv)
-└── README.md           # This file
+├── vibe_bot/
+│   ├── __init__.py            # Package marker
+│   ├── main.py                # Main bot application (commands, event handlers)
+│   ├── config.py              # Environment variable loading and validation
+│   ├── database.py            # SQLite database with RAG + CustomBotManager
+│   ├── llama_wrapper.py       # OpenAI-compatible API wrappers (chat, images, embeddings)
+│   ├── tts.py                 # Kokoro TTS engine
+│   └── tests/
+│       ├── conftest.py        # Shared test fixtures
+│       ├── test_main.py       # Bot command tests
+│       ├── test_config.py     # Config loading tests
+│       ├── test_database.py   # Database + CustomBotManager tests
+│       ├── test_llama_wrapper.py  # API wrapper tests
+│       └── test_tts.py        # TTS engine tests
+├── pyproject.toml             # Project dependencies (uv)
+├── uv.lock                    # Locked dependency versions
+├── .env                       # Environment variables
+├── kokoro-v1.0.onnx           # Kokoro TTS model
+├── voices-v1.0.bin            # Kokoro voice definitions
+├── Containerfile              # Podman/Docker build file
+└── README.md                  # This file
 ```
 
-## Build
+## Building
 
-### Using uv
+### Local
 
 ```bash
-# Set environment variables
-export DISCORD_TOKEN=$(cat .token)
-export OPENAI_API_KEY=your_api_key
-export OPENAI_API_ENDPOINT="https://llama-cpp.reeselink.com"
-export IMAGE_GEN_ENDPOINT="http://toybox.reeselink.com:1234/v1"
-export IMAGE_EDIT_ENDPOINT="http://toybox.reeselink.com:1235/v1"
+# Sync dependencies
+uv sync
 
-# Run with uv
-uv run main.py
+# Run the bot
+uv run python -m vibe_bot.main
 ```
 
 ### Container
 
 ```bash
-# Build
+# Build the container image
 podman build -t vibe-bot:latest .
 
-# Run
+# Run with environment file
 podman run --env-file .env localhost/vibe-bot:latest
 ```
 
-## Docs
+## Testing
 
-### Open AI
+Run the full test suite:
 
-Chat
+```bash
+uv run pytest vibe_bot/tests/ -v
+```
 
-<https://developers.openai.com/api/reference/resources/chat/subresources/completions/methods/create>
+Run linters:
 
-Images
+```bash
+# Ruff (linter + formatter)
+uv run ruff check vibe_bot/
 
-<https://developers.openai.com/api/reference/python/resources/images/methods/edit>
+# Mypy (type checking)
+uv run mypy vibe_bot/
 
-## Models
+# Pyright (type checking)
+uv run pyright vibe_bot/
 
-### Qwen3.5
+# Black (formatter check)
+uv run black --check vibe_bot/
+```
 
-> We recommend using the following set of sampling parameters for generation
+## Configuration
 
-- Non-thinking mode for text tasks: temperature=1.0, top_p=1.00, top_k=20, min_p=0.0, presence_penalty=2.0, repetition_penalty=1.0
-- Non-thinking mode for VL tasks: temperature=0.7, top_p=0.80, top_k=20, min_p=0.0, presence_penalty=1.5, repetition_penalty=1.0
-- Thinking mode for text tasks: temperature=1.0, top_p=0.95, top_k=20, min_p=0.0, presence_penalty=1.5, repetition_penalty=1.0
-- Thinking mode for VL or precise coding (e.g. WebDev) tasks : temperature=0.6, top_p=0.95, top_k=20, min_p=0.0, presence_penalty=0.0, repetition_penalty=1.0
-
-> Please note that the support for sampling parameters varies according to inference frameworks.
+| Variable                | Default            | Description                           |
+| ----------------------- | ------------------ | ------------------------------------- |
+| `DISCORD_TOKEN`         | *(required)*       | Discord bot authentication token      |
+| `CHAT_ENDPOINT`         | *(required)*       | OpenAI-compatible chat API URL        |
+| `CHAT_MODEL`            | *(required)*       | Model name for chat completions       |
+| `IMAGE_GEN_ENDPOINT`    | *(required)*       | Image generation API URL              |
+| `IMAGE_EDIT_ENDPOINT`   | *(required)*       | Image editing API URL                 |
+| `EMBEDDING_ENDPOINT`    | *(required)*       | Embedding API URL                     |
+| `EMBEDDING_MODEL`       | *(required)*       | Model name for text embeddings        |
+| `MAX_COMPLETION_TOKENS` | `1000`             | Max tokens in LLM responses           |
+| `MAX_HISTORY_MESSAGES`  | `1000`             | Max messages kept in the database     |
+| `SIMILARITY_THRESHOLD`  | `0.7`              | Min cosine similarity for RAG context |
+| `TOP_K_RESULTS`         | `5`                | Number of similar messages retrieved  |
+| `TTS_MODEL_PATH`        | `kokoro-v1.0.onnx` | Path to Kokoro ONNX model file        |
+| `TTS_VOICES_PATH`       | `voices-v1.0.bin`  | Path to Kokoro voices binary file     |
+| `TTS_VOICE`             | `af_sarah`         | Default voice for TTS                 |
+| `TTS_SPEED`             | `1.0`              | Speech speed multiplier               |
+| `DB_PATH`               | `chat_history.db`  | SQLite database file path             |
diff --git a/pyproject.toml b/pyproject.toml
index d49f63a..dd31e12 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,7 @@ dev = [
     "pyright>=1.1.398",
     "mypy>=1.17.0",
     "black>=25.1.0",
+    "debugpy>=1.8.0",
 ]
 
 [tool.uv]
@@ -56,6 +57,8 @@ ignore = [
     "EM101",    # string literals in exceptions
     "TC003",    # stdlib import in type-checking block
     "F401",     # unused imports (bytesio used in isinstance)
+    "SIM117",   # nested with statements (needed for test fixtures)
+    "PT018",    # assertion breakdown (test-specific patterns)
 ]
 
 [tool.ruff.lint.isort]
diff --git a/uv.lock b/uv.lock
index 5fc5c39..ea0ecbe 100644
--- a/uv.lock
+++ b/uv.lock
@@ -434,6 +434,23 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3a/88/9713d1ecac111742d60e1d9c2c15fec56fd121940f97a73d014dc9a7d521/csvw-4.0.0-py2.py3-none-any.whl", hash = "sha256:df875fcb1505afd15061b5f370268522bf162640de0662a724453dcb4db6a88b", size = 69424, upload-time = "2026-05-05T06:25:24.646Z" },
 ]
 
+[[package]]
+name = "debugpy"
+version = "1.8.20"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e0/b7/cd8080344452e4874aae67c40d8940e2b4d47b01601a8fd9f44786c757c7/debugpy-1.8.20.tar.gz", hash = "sha256:55bc8701714969f1ab89a6d5f2f3d40c36f91b2cbe2f65d98bf8196f6a6a2c33", size = 1645207, upload-time = "2026-01-29T23:03:28.199Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/15/e2/fc500524cc6f104a9d049abc85a0a8b3f0d14c0a39b9c140511c61e5b40b/debugpy-1.8.20-cp313-cp313-macosx_15_0_universal2.whl", hash = "sha256:5dff4bb27027821fdfcc9e8f87309a28988231165147c31730128b1c983e282a", size = 2539560, upload-time = "2026-01-29T23:03:48.738Z" },
+    { url = "https://files.pythonhosted.org/packages/90/83/fb33dcea789ed6018f8da20c5a9bc9d82adc65c0c990faed43f7c955da46/debugpy-1.8.20-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:84562982dd7cf5ebebfdea667ca20a064e096099997b175fe204e86817f64eaf", size = 4293272, upload-time = "2026-01-29T23:03:50.169Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/25/b1e4a01bfb824d79a6af24b99ef291e24189080c93576dfd9b1a2815cd0f/debugpy-1.8.20-cp313-cp313-win32.whl", hash = "sha256:da11dea6447b2cadbf8ce2bec59ecea87cc18d2c574980f643f2d2dfe4862393", size = 5331208, upload-time = "2026-01-29T23:03:51.547Z" },
+    { url = "https://files.pythonhosted.org/packages/13/f7/a0b368ce54ffff9e9028c098bd2d28cfc5b54f9f6c186929083d4c60ba58/debugpy-1.8.20-cp313-cp313-win_amd64.whl", hash = "sha256:eb506e45943cab2efb7c6eafdd65b842f3ae779f020c82221f55aca9de135ed7", size = 5372930, upload-time = "2026-01-29T23:03:53.585Z" },
+    { url = "https://files.pythonhosted.org/packages/33/2e/f6cb9a8a13f5058f0a20fe09711a7b726232cd5a78c6a7c05b2ec726cff9/debugpy-1.8.20-cp314-cp314-macosx_15_0_universal2.whl", hash = "sha256:9c74df62fc064cd5e5eaca1353a3ef5a5d50da5eb8058fcef63106f7bebe6173", size = 2538066, upload-time = "2026-01-29T23:03:54.999Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/56/6ddca50b53624e1ca3ce1d1e49ff22db46c47ea5fb4c0cc5c9b90a616364/debugpy-1.8.20-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:077a7447589ee9bc1ff0cdf443566d0ecf540ac8aa7333b775ebcb8ce9f4ecad", size = 4269425, upload-time = "2026-01-29T23:03:56.518Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/d9/d64199c14a0d4c476df46c82470a3ce45c8d183a6796cfb5e66533b3663c/debugpy-1.8.20-cp314-cp314-win32.whl", hash = "sha256:352036a99dd35053b37b7803f748efc456076f929c6a895556932eaf2d23b07f", size = 5331407, upload-time = "2026-01-29T23:03:58.481Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/d9/1f07395b54413432624d61524dfd98c1a7c7827d2abfdb8829ac92638205/debugpy-1.8.20-cp314-cp314-win_amd64.whl", hash = "sha256:a98eec61135465b062846112e5ecf2eebb855305acc1dfbae43b72903b8ab5be", size = 5372521, upload-time = "2026-01-29T23:03:59.864Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/c3/7f67dea8ccf8fdcb9c99033bbe3e90b9e7395415843accb81428c441be2d/debugpy-1.8.20-py2.py3-none-any.whl", hash = "sha256:5be9bed9ae3be00665a06acaa48f8329d2b9632f15fd09f6a9a8c8d9907e54d7", size = 5337658, upload-time = "2026-01-29T23:04:17.404Z" },
+]
+
 [[package]]
 name = "decorator"
 version = "5.3.1"
@@ -2215,6 +2232,7 @@ dependencies = [
 [package.optional-dependencies]
 dev = [
     { name = "black" },
+    { name = "debugpy" },
     { name = "mypy" },
     { name = "pyright" },
     { name = "ruff" },
@@ -2223,6 +2241,7 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "black", marker = "extra == 'dev'", specifier = ">=25.1.0" },
+    { name = "debugpy", marker = "extra == 'dev'", specifier = ">=1.8.0" },
     { name = "discord", specifier = ">=2.3.2" },
     { name = "kokoro-tts", specifier = ">=2.3.1" },
     { name = "mypy", specifier = ">=2.1.0" },
diff --git a/vibe_bot/database.py b/vibe_bot/database.py
index a8c2ad0..4850548 100644
--- a/vibe_bot/database.py
+++ b/vibe_bot/database.py
@@ -330,7 +330,7 @@ class ChatDatabase:
         results.sort(key=lambda x: x[2], reverse=True)
         return results[:top_k]
 
-    def get_user_history(self, _user_id: str, limit: int = 20) -> list[tuple[str, str]]:
+    def get_user_history(self, user_id: str, limit: int = 20) -> list[tuple[str, str]]:
         """Get message history for a specific user."""
         conn = sqlite3.connect(self.db_path)
         cursor = conn.cursor()
@@ -340,11 +340,11 @@ class ChatDatabase:
             """
             SELECT message_id, content, timestamp
             FROM chat_messages
-            WHERE username != 'vibe-bot'
+            WHERE user_id = ? AND username != 'vibe-bot'
             ORDER BY timestamp DESC
             LIMIT ?
         """,
-            (limit,),
+            (user_id, limit),
         )
 
         messages = cursor.fetchall()
@@ -528,9 +528,10 @@ class CustomBotManager:
                 """
                 SELECT bot_name, system_prompt, created_by
                 FROM custom_bots
-                WHERE is_active = 1
+                WHERE is_active = 1 AND created_by = ?
                 ORDER BY created_at DESC
             """,
+                (user_id,),
             )
         else:
             cursor.execute(
diff --git a/vibe_bot/llama_wrapper.py b/vibe_bot/llama_wrapper.py
index e793e2d..395ffbe 100644
--- a/vibe_bot/llama_wrapper.py
+++ b/vibe_bot/llama_wrapper.py
@@ -6,9 +6,11 @@ Allows custom endpoints for each of the above supported functions.
 
 from __future__ import annotations
 
+import json
 from typing import TYPE_CHECKING, cast
 
 import openai
+import requests
 
 if TYPE_CHECKING:
     from io import BufferedReader, BytesIO
@@ -54,8 +56,12 @@ def chat_completion(
         model=model,
         messages=messages,
         max_tokens=max_tokens,
+        timeout=60.0,
     )
 
+    if not response.choices:
+        return ""
+
     content = response.choices[0].message.content
     if content:
         return content.strip()
@@ -101,8 +107,12 @@ def chat_completion_with_history(
         messages=messages,
         max_tokens=max_tokens,
         seed=-1,
+        timeout=60.0,
     )
 
+    if not response.choices:
+        return ""
+
     content = response.choices[0].message.content
     if content:
         return content.strip()
@@ -148,8 +158,12 @@ def chat_completion_instruct(
         messages=messages,
         max_tokens=max_tokens,
         seed=-1,
+        timeout=60.0,
     )
 
+    if not response.choices:
+        return ""
+
     content = response.choices[0].message.content
     if content:
         return content.strip()
@@ -158,8 +172,10 @@ def chat_completion_instruct(
 
 def image_generation(
     prompt: str,
+    *,
     openai_url: str,
     openai_api_key: str,
+    model: str = "gen",
     n: int = 1,
 ) -> str:
     """Generate an image using the given prompt.
@@ -168,19 +184,28 @@ def image_generation(
         prompt: The image generation prompt.
         openai_url: The OpenAI-compatible API URL.
         openai_api_key: The API key for authentication.
+        model: The model to use for image generation.
         n: Number of images to generate.
 
     Returns:
         The base64 encoded image data. Decode and write to a file.
 
     """
-    client = openai.OpenAI(base_url=openai_url, api_key=openai_api_key)
-    response = client.images.generate(
-        prompt=prompt,
-        n=n,
-        size="1024x1024",
-        model="gen",
+    client = openai.OpenAI(
+        base_url=openai_url,
+        api_key=openai_api_key,
+        max_retries=0,
     )
+    try:
+        response = client.images.generate(
+            prompt=prompt,
+            n=n,
+            size="1024x1024",
+            model=model,
+            timeout=120.0,
+        )
+    except openai.APIConnectionError:
+        return ""
     if response.data:
         return response.data[0].b64_json or ""
     return ""
@@ -189,8 +214,10 @@ def image_generation(
 def image_edit(
     image: BufferedReader | BytesIO | list[BufferedReader] | list[BytesIO],
     prompt: str,
+    *,
     openai_url: str,
     openai_api_key: str,
+    model: str = "edit",
     n: int = 1,
 ) -> str:
     """Edit an existing image using a prompt.
@@ -200,6 +227,7 @@ def image_edit(
         prompt: The edit instruction.
         openai_url: The OpenAI-compatible API URL.
         openai_api_key: The API key for authentication.
+        model: The model to use for image editing.
         n: Number of edited images to generate.
 
     Returns:
@@ -212,7 +240,7 @@ def image_edit(
         prompt=prompt,
         n=n,
         size="1024x1024",
-        model="edit",
+        model=model,
     )
     if response.data:
         return response.data[0].b64_json or ""
@@ -228,6 +256,9 @@ def embedding(
 ) -> list[float]:
     """Generate an embedding vector for the given text.
 
+    Uses a raw HTTP request to avoid the OpenAI SDK injecting
+    unsupported parameters like encoding_format.
+
     Args:
         text: The text to embed.
         openai_url: The OpenAI-compatible API URL.
@@ -238,17 +269,26 @@ def embedding(
         The embedding vector as a list of floats, or an empty list on failure.
 
     """
-    client = openai.OpenAI(base_url=openai_url, api_key=openai_api_key)
-    response = client.embeddings.create(
-        input=[text],
-        model=model,
-        encoding_format="float",
-    )
-    if response:
-        data = response.data
-        raw_data = data[0].embedding
-        # The result could be an array of floats or a single float.
-        if not isinstance(raw_data, float):
-            return list(raw_data)
-        return [raw_data]
-    return []
+    url = f"{openai_url.rstrip('/')}/embeddings"
+    headers = {
+        "Authorization": f"Bearer {openai_api_key}",
+        "Content-Type": "application/json",
+    }
+    payload = {"model": model, "input": [text]}
+
+    try:
+        resp = requests.post(url, headers=headers, json=payload, timeout=30)
+        resp.raise_for_status()
+    except requests.RequestException:
+        return []
+
+    data = resp.json()
+    if not data.get("data"):
+        return []
+
+    raw = data["data"][0].get("embedding")
+    if isinstance(raw, str):
+        raw = json.loads(raw)
+    if not isinstance(raw, list):
+        raw = list(raw)
+    return raw
diff --git a/vibe_bot/main.py b/vibe_bot/main.py
index 6340e0d..1e1c7f8 100644
--- a/vibe_bot/main.py
+++ b/vibe_bot/main.py
@@ -20,6 +20,10 @@ from vibe_bot.config import (
     DISCORD_TOKEN,
     IMAGE_EDIT_ENDPOINT,
     IMAGE_EDIT_ENDPOINT_KEY,
+    IMAGE_EDIT_MODEL,
+    IMAGE_GEN_ENDPOINT,
+    IMAGE_GEN_ENDPOINT_KEY,
+    IMAGE_GEN_MODEL,
     MAX_COMPLETION_TOKENS,
     TTS_MODEL_PATH,
     TTS_SPEED,
@@ -415,7 +419,7 @@ async def _speak_with_bot(
                 message_id=f"{ctx.message.id}_response",
                 user_id=str(ctx.bot.user.id),
                 username=ctx.bot.user.name,
-                content=f"Bot: {bot_response}",
+                content=bot_response,
                 channel_id=str(ctx.channel.id),
                 guild_id=str(ctx.guild.id) if ctx.guild else None,
             )
@@ -497,14 +501,23 @@ async def doodlebob(ctx: CommandsContext[Bot], *, message: str) -> None:
 
     image_b64 = llama_wrapper.image_generation(
         prompt=image_prompt,
-        openai_url=IMAGE_EDIT_ENDPOINT,
-        openai_api_key=IMAGE_EDIT_ENDPOINT_KEY,
+        openai_url=IMAGE_GEN_ENDPOINT,
+        openai_api_key=IMAGE_GEN_ENDPOINT_KEY,
+        model=IMAGE_GEN_MODEL,
     )
 
-    # Save the image to a file
-    edited_image_data = BytesIO(base64.b64decode(image_b64))
-    send_img = discord.File(edited_image_data, filename="image.png")
-    await ctx.send(file=send_img)
+    if not image_b64:
+        logger.warning("Image generation returned empty response.")
+        await ctx.send("Failed to generate image. The server may be busy.")
+        return
+
+    try:
+        edited_image_data = BytesIO(base64.b64decode(image_b64))
+        send_img = discord.File(edited_image_data, filename="image.png")
+        await ctx.send(file=send_img)
+    except Exception:
+        logger.exception("Failed to decode image data")
+        await ctx.send("Failed to process the generated image.")
 
 
 @bot.command(name="retcon")
@@ -529,6 +542,7 @@ async def retcon(ctx: CommandsContext[Bot], *, message: str) -> None:
         prompt=message,
         openai_url=IMAGE_EDIT_ENDPOINT,
         openai_api_key=IMAGE_EDIT_ENDPOINT_KEY,
+        model=IMAGE_EDIT_MODEL,
     )
 
     # Save the image to a file
@@ -621,7 +635,7 @@ async def talkforme(ctx: CommandsContext[Bot], *, message: str) -> None:
         bot_response = llama_wrapper.chat_completion_with_history(
             system_prompt=(
                 current_bot[1] + f"\nKeep your responses under 2-3 sentences. "
-                f"{current_bot[flip_counter(bot_counter)]}"
+                f"You are talking to {current_bot[flip_counter(bot_counter)][0]}"
             ),
             prompts=prompt_histories[bot_counter],
             openai_url=CHAT_ENDPOINT,
@@ -709,7 +723,7 @@ async def handle_chat(
                 message_id=f"{ctx.message.id}_response",
                 user_id=str(ctx.bot.user.id),
                 username=ctx.bot.user.name,
-                content=f"Bot: {bot_response}",
+                content=bot_response,
                 channel_id=str(ctx.channel.id),
                 guild_id=str(ctx.guild.id) if ctx.guild else None,
             )
diff --git a/vibe_bot/tests/conftest.py b/vibe_bot/tests/conftest.py
index b738c36..f697cd2 100644
--- a/vibe_bot/tests/conftest.py
+++ b/vibe_bot/tests/conftest.py
@@ -117,17 +117,22 @@ def mock_kokoro_tts() -> Generator[dict[str, Any]]:
     mock_samples = np.array([0.1, 0.2, 0.3], dtype=np.float32)
     mock_process = MagicMock(return_value=(mock_samples, 24000))
 
-    with patch("vibe_bot.tts.Kokoro", return_value=mock_kokoro_instance):  # noqa: SIM117
-        with patch("vibe_bot.tts.chunk_text", mock_chunk):
-            with patch("vibe_bot.tts.process_chunk_sequential", mock_process):
-                yield {
-                    "Kokoro": mock_kokoro,
-                    "chunk_text": mock_chunk,
-                    "process_chunk_sequential": mock_process,
-                    "kokoro_instance": mock_kokoro_instance,
-                    "mock_samples": mock_samples,
-                    "mock_sr": 24000,
-                }
+    with (
+        patch(
+            "vibe_bot.tts.Kokoro",
+            return_value=mock_kokoro_instance,
+        ),
+        patch("vibe_bot.tts.chunk_text", mock_chunk),
+    ):
+        with patch("vibe_bot.tts.process_chunk_sequential", mock_process):
+            yield {
+                "Kokoro": mock_kokoro,
+                "chunk_text": mock_chunk,
+                "process_chunk_sequential": mock_process,
+                "kokoro_instance": mock_kokoro_instance,
+                "mock_samples": mock_samples,
+                "mock_sr": 24000,
+            }
 
 
 @pytest.fixture
@@ -143,7 +148,7 @@ def mock_discord() -> Generator[dict[str, MagicMock]]:
     mock_bot_instance.user.name = "test-bot"
     mock_bot_instance.user.id = "123456789"
 
-    with patch("vibe_bot.main.discord") as mock_discord_module:  # noqa: SIM117
+    with patch("vibe_bot.main.discord") as mock_discord_module:
         with patch("vibe_bot.main.commands", MagicMock()):
             with patch("vibe_bot.main.commands.Bot", mock_bot_class):
                 mock_bot_class.return_value = mock_bot_instance
@@ -162,7 +167,7 @@ def mock_tts_engine() -> Generator[MagicMock]:
     """Provide a mock TTSEngine."""
     mock_engine = MagicMock()
     mock_engine.generate_audio.return_value = MagicMock()
-    with patch("vibe_bot.main.tts_engine", mock_engine):  # noqa: SIM117
+    with patch("vibe_bot.main.tts_engine", mock_engine):
         with patch("vibe_bot.main.tts.TTSEngine", return_value=mock_engine):
             yield mock_engine
 
diff --git a/vibe_bot/tests/test_config.py b/vibe_bot/tests/test_config.py
index dd7d54c..9786607 100644
--- a/vibe_bot/tests/test_config.py
+++ b/vibe_bot/tests/test_config.py
@@ -106,9 +106,9 @@ except Exception as e:
         timeout=30,
     )
     output = result.stdout.strip()
-    assert output.startswith("ERROR:") and expected_error in output, (  # noqa: PT018
-        f"Expected error '{expected_error}' but got: {output}"
-    )
+    assert (
+        output.startswith("ERROR:") and expected_error in output
+    ), f"Expected error '{expected_error}' but got: {output}"
 
 
 def test_config_missing_discord_token() -> None:
diff --git a/vibe_bot/tests/test_database.py b/vibe_bot/tests/test_database.py
index 06c154b..905ee9a 100644
--- a/vibe_bot/tests/test_database.py
+++ b/vibe_bot/tests/test_database.py
@@ -129,13 +129,22 @@ def test_get_recent_messages(
 ) -> None:
     """Test retrieving recent messages."""
     chat_db.add_message(
-        message_id="msg-1", user_id="u1", username="alice", content="First",
+        message_id="msg-1",
+        user_id="u1",
+        username="alice",
+        content="First",
     )
     chat_db.add_message(
-        message_id="msg-2", user_id="u2", username="bob", content="Second",
+        message_id="msg-2",
+        user_id="u2",
+        username="bob",
+        content="Second",
     )
     chat_db.add_message(
-        message_id="msg-3", user_id="u1", username="alice", content="Third",
+        message_id="msg-3",
+        user_id="u1",
+        username="alice",
+        content="Third",
     )
 
     messages = chat_db.get_recent_messages(limit=2)
@@ -167,10 +176,16 @@ def test_clear_all_messages(
 ) -> None:
     """Test clearing all messages."""
     chat_db.add_message(
-        message_id="msg-1", user_id="u1", username="alice", content="Hello",
+        message_id="msg-1",
+        user_id="u1",
+        username="alice",
+        content="Hello",
     )
     chat_db.add_message(
-        message_id="msg-2", user_id="u2", username="bob", content="World",
+        message_id="msg-2",
+        user_id="u2",
+        username="bob",
+        content="World",
     )
 
     chat_db.clear_all_messages()
@@ -185,7 +200,10 @@ def test_get_user_history(
 ) -> None:
     """Test retrieving user message history."""
     chat_db.add_message(
-        message_id="msg-1", user_id="u1", username="alice", content="User question",
+        message_id="msg-1",
+        user_id="u1",
+        username="alice",
+        content="User question",
     )
     chat_db.add_message(
         message_id="msg-1_response",
@@ -422,7 +440,9 @@ def test_custom_bot_delete_with_error(
 ) -> None:
     """Test that delete_custom_bot returns False on error."""
     with patch.object(
-        custom_bot_manager, "_initialize_custom_bots_table", side_effect=Exception("db error"),  # noqa: E501
+        custom_bot_manager,
+        "_initialize_custom_bots_table",
+        side_effect=Exception("db error"),
     ):
         pass
     result = custom_bot_manager.delete_custom_bot("nonexistent")
@@ -433,6 +453,7 @@ def test_database_get_database_singleton(temp_db_path: str) -> None:
     """Test that get_database returns the same instance."""
     import vibe_bot.database as db_module
     from vibe_bot.database import ChatDatabase, get_database
+
     db_module._chat_db = None
 
     db1 = get_database()
@@ -453,6 +474,7 @@ def test_database_init_creates_tables(temp_db_path: str) -> None:
     db.client.close()
 
     import sqlite3
+
     conn = sqlite3.connect(temp_db_path)
     cursor = conn.cursor()
     cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
diff --git a/vibe_bot/tests/test_llama_wrapper.py b/vibe_bot/tests/test_llama_wrapper.py
index ee729d9..8e5f423 100644
--- a/vibe_bot/tests/test_llama_wrapper.py
+++ b/vibe_bot/tests/test_llama_wrapper.py
@@ -6,6 +6,7 @@ import base64
 import tempfile
 from io import BytesIO
 from pathlib import Path
+from typing import Any
 from unittest.mock import MagicMock, patch
 
 import numpy as np
@@ -106,24 +107,24 @@ EMBEDDING_SIMILARITY_LOW = 0.5
 
 def test_embeddings() -> None:
     """Test embedding similarity for similar and different texts."""
-    with patch("vibe_bot.llama_wrapper.openai.OpenAI") as mock_openai:
-        mock_horse_vec = [0.8] * 1024 + [0.6] * 1024
-        mock_horse_also_vec = [0.79] * 1024 + [0.61] * 1024
-        mock_donkey_vec = [-0.8] * 1024 + [-0.6] * 1024
+    mock_horse_vec = [0.8] * 1024 + [0.6] * 1024
+    mock_horse_also_vec = [0.79] * 1024 + [0.61] * 1024
+    mock_donkey_vec = [-0.8] * 1024 + [-0.6] * 1024
 
-        mock_response1 = MagicMock()
-        mock_response1.data = [MagicMock(embedding=mock_horse_vec)]
-        mock_response2 = MagicMock()
-        mock_response2.data = [MagicMock(embedding=mock_horse_also_vec)]
-        mock_response3 = MagicMock()
-        mock_response3.data = [MagicMock(embedding=mock_donkey_vec)]
-
-        mock_openai.return_value.embeddings.create.side_effect = [
-            mock_response1,
-            mock_response2,
-            mock_response3,
-        ]
+    def mock_post(*args: Any, **kwargs: Any) -> MagicMock:
+        json_data = kwargs.get("json", {})
+        text = json_data["input"][0]
+        if "horse" in text and "donkey" not in text and "also" not in text:
+            embedding_data = mock_horse_vec
+        elif "also" in text:
+            embedding_data = mock_horse_also_vec
+        else:
+            embedding_data = mock_donkey_vec
+        mock_resp = MagicMock()
+        mock_resp.json.return_value = {"data": [{"embedding": embedding_data}]}
+        return mock_resp
 
+    with patch("vibe_bot.llama_wrapper.requests.post", side_effect=mock_post):
         result1 = embedding(
             "this is a horse",
             openai_url=EMBEDDING_ENDPOINT,
diff --git a/vibe_bot/tests/test_main.py b/vibe_bot/tests/test_main.py
index 1cb9942..93210a1 100644
--- a/vibe_bot/tests/test_main.py
+++ b/vibe_bot/tests/test_main.py
@@ -125,7 +125,9 @@ def test_custom_bot_command_success(
 
     asyncio.run(
         main_module.custom_bot(
-            mock_ctx, bot_name="alfred", personality="you are a british butler",
+            mock_ctx,
+            bot_name="alfred",
+            personality="you are a british butler",
         ),
     )
 
@@ -199,7 +201,9 @@ def test_custom_bot_command_create_fails(
 
     asyncio.run(
         main_module.custom_bot(
-            mock_ctx, bot_name="alfred", personality="you are a british butler",
+            mock_ctx,
+            bot_name="alfred",
+            personality="you are a british butler",
         ),
     )
     call_args = mock_ctx.send.call_args[0][0]
@@ -347,7 +351,9 @@ def test_handle_chat_success(
 
     import vibe_bot.main as main_module
 
-    mock_llama_wrapper.chat_completion_with_history.return_value = "This is a bot response"  # noqa: E501
+    mock_llama_wrapper.chat_completion_with_history.return_value = (
+        "This is a bot response"
+    )
 
     asyncio.run(
         main_module.handle_chat(
diff --git a/vibe_bot/tests/test_tts.py b/vibe_bot/tests/test_tts.py
index 71f9021..5908eb0 100644
--- a/vibe_bot/tests/test_tts.py
+++ b/vibe_bot/tests/test_tts.py
@@ -63,9 +63,15 @@ def test_generate_audio_multiple_chunks(mock_kokoro_tts: MagicMock) -> None:
 
     from vibe_bot.tts import TTSEngine
 
-    mock_kokoro_tts["chunk_text"].return_value = ["chunk one", "chunk two", "chunk three"]  # noqa: E501
+    mock_kokoro_tts["chunk_text"].return_value = [
+        "chunk one",
+        "chunk two",
+        "chunk three",
+    ]
     engine = TTSEngine("/tmp/test-model.onnx", "/tmp/test-voices.bin")
-    result = engine.generate_audio("this text is long enough to be split into multiple chunks")  # noqa: E501
+    result = engine.generate_audio(
+        "this text is long enough to be split into multiple chunks",
+    )
 
     assert isinstance(result, BytesIO)
     assert mock_kokoro_tts["process_chunk_sequential"].call_count == 3
@@ -88,7 +94,11 @@ def test_generate_audio_chunk_failure(mock_kokoro_tts: MagicMock) -> None:
             raise Exception("processing error")
         return np.array([0.1, 0.2], dtype=np.float32), 24000
 
-    mock_kokoro_tts["chunk_text"].return_value = ["good chunk", "bad chunk", "another good"]  # noqa: E501
+    mock_kokoro_tts["chunk_text"].return_value = [
+        "good chunk",
+        "bad chunk",
+        "another good",
+    ]
     mock_kokoro_tts["process_chunk_sequential"].side_effect = process_with_failure
 
     engine = TTSEngine("/tmp/test-model.onnx", "/tmp/test-voices.bin")