properly feed in conversation history

bots talking to each otehr fix
let the bots know who they're talking to
2026-03-10 16:02:19 -04:00 · 2026-03-10 13:53:37 -04:00 · 2026-03-10 13:00:50 -04:00 · 2026-03-10 12:57:38 -04:00 · 2026-03-10 12:48:33 -04:00 · 2026-03-09 22:39:49 -04:00
6 changed files with 163 additions and 20 deletions
--- a/.containerignore
+++ b/.containerignore
@@ -2,3 +2,6 @@
 .mypy_cache
 .vscode
 .token
 *.db
 *.env
 *.png
--- a/3
+++ b/3
@@ -2,6 +2,9 @@ FROM ghcr.io/astral-sh/uv:python3.13-alpine
 # Copy the project into the image
 COPY vibe_bot /app
 COPY uv.lock /app
 COPY .python-version /app
 COPY pyproject.toml /app
 # Disable development dependencies
 ENV UV_NO_DEV=1
--- a/README.md
+++ b/README.md
@@ -188,8 +188,7 @@ uv run main.py
 podman build -t vibe-bot:latest .
 # Run
-export DISCORD_TOKEN=$(cat .token)
+podman run --env-file .env localhost/vibe-bot:latest
 podman run -e DISCORD_TOKEN localhost/vibe-bot:latest
 ```
 ## Docs
--- a/vibe_bot/database.py
+++ b/vibe_bot/database.py
@@ -346,7 +346,7 @@ class ChatDatabase:
    def get_conversation_context(
        self, user_id: str, current_message: str, max_context: int = 5
-    ) -> str:
+    ) -> list[dict[str, str]]:
        """Get relevant conversation context for RAG."""
        # Get recent messages from the user
        recent_messages = self.get_user_history(user_id, limit=max_context * 2)
@@ -357,22 +357,21 @@ class ChatDatabase:
        )
        # Combine contexts
-        context_parts = []
+        context_parts: list[dict[str, str]] = []
        # Add recent messages
        for user_message, bot_message in recent_messages:
-            combined_content = f"[Recent chat]\n{user_message}\n{bot_message}"
+            context_parts.append({"role": "assistant", "content": bot_message})
-            context_parts.append(combined_content)
+            context_parts.append({"role": "user", "content": user_message})
        # Add similar messages
        for user_message, bot_message, similarity in similar_messages:
-            combined_content = f"{user_message}\n{bot_message}"
+            context_parts.append({"role": "assistant", "content": bot_message})
-            if combined_content not in "\n".join(context_parts):
+            context_parts.append({"role": "user", "content": user_message})
                context_parts.append(f"[You remember]\n{combined_content}")
        # Conversation history needs to be delivered in "newest context last" order
        context_parts.reverse()
-        return "\n".join(context_parts[-max_context * 4 :])  # Limit total context
+        return context_parts
    def clear_all_messages(self):
        """Clear all messages and embeddings from the database."""
--- a/vibe_bot/llama_wrapper.py
+++ b/vibe_bot/llama_wrapper.py
@@ -44,6 +44,42 @@ def chat_completion(
        return ""
 def chat_completion_with_history(
    system_prompt: str,
    prompts: Iterable[ChatCompletionMessageParam],
    openai_url: str,
    openai_api_key: str,
    model: str,
    max_tokens: int = 1000,
 ) -> str:
    client = openai.OpenAI(base_url=openai_url, api_key=openai_api_key)
    messages: Iterable[ChatCompletionMessageParam] = [
        {
            "role": "system",
            "content": system_prompt,
        }
    ] + prompts  # type: ignore
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        max_tokens=max_tokens,
        extra_body={
            "chat_template_kwargs": {"enable_thinking": False},
        },
        seed=-1,
    )
    # Assert that thinking was used
    if response.choices[0].message.model_extra:
        assert response.choices[0].message.model_extra.get("reasoning_content")
    content = response.choices[0].message.content
    if content:
        return content.strip()
    else:
        return ""
 def chat_completion_instruct(
    system_prompt: str,
    user_prompt: str,
--- a/vibe_bot/main.py
+++ b/vibe_bot/main.py
@@ -297,6 +297,112 @@ async def retcon(ctx, *, message: str):
    await ctx.send(file=send_img)
@bot.command(name="talkforme")
 async def talkforme(ctx, *, message: str):
    """Have two bots talk to each other about a topic
    Usage: !talkforme bot1 bot2 4 some conversation topic
    """
    TALK_LIMIT = 20
    bot1_name, bot2_name, limit, topic_list = (
        message.split(" ")[0],
        message.split(" ")[1],
        message.split(" ")[2],
        message.split(" ")[3:],
    )
    topic = " ".join(topic_list)
    custom_bot_manager = CustomBotManager()
    bot1 = custom_bot_manager.get_custom_bot(bot1_name)
    if not bot1:
        await ctx.send(f"{bot1_name} is not a real bot...")
        return
    else:
        _, bot1_prompt, _, _ = bot1
    bot2 = custom_bot_manager.get_custom_bot(bot2_name)
    if not bot2:
        await ctx.send(f"{bot2_name} is not a real bot...")
        return
    else:
        _, bot2_prompt, _, _ = bot2
    await ctx.send(
        f'{bot1_name} is going to talk to {bot2_name} about "{topic[:50]}" for {limit} replies.'
    )
    bot_list = [(bot1_name, bot1_prompt), (bot2_name, bot2_prompt)]
    message_limit = int(limit)
    def flip_counter(counter: int):
        if counter == 0:
            return 1
        else:
            return 0
    def flip_user(user: str):
        if user == "user":
            return "assistant"
        else:
            return "user"
    message_counter = 0
    bot_counter = 0
    current_bot = bot_list[bot_counter]
    prompt_histories = [
        [{"role": "user", "content": topic}],
        [{"role": "assistant", "content": topic}],
    ]
    first_bot_response = llama_wrapper.chat_completion_with_history(
        system_prompt=current_bot[1]
        + f"\nKeep your responses under 2-3 sentences. You are talking to {current_bot[flip_counter(bot_counter)][0]}",
        prompts=prompt_histories[bot_counter],  # type: ignore
        openai_url=CHAT_ENDPOINT,
        openai_api_key=CHAT_ENDPOINT_KEY,
        model=CHAT_MODEL,
        max_tokens=MAX_COMPLETION_TOKENS,
    )
    await ctx.send(f"## {current_bot[0]}\n{first_bot_response}")
    prompt_histories[0].append({"role": "assistant", "content": first_bot_response})
    prompt_histories[1].append({"role": "user", "content": first_bot_response})
    bot_counter = flip_counter(counter=bot_counter)
    while message_counter < min(message_limit, TALK_LIMIT):
        current_bot = bot_list[bot_counter]
        logger.info(f"Current bot is {current_bot}")
        bot_response = llama_wrapper.chat_completion_with_history(
            system_prompt=current_bot[1]
            + f"\nKeep your responses under 2-3 sentences. {current_bot[flip_counter(bot_counter)]}",
            prompts=prompt_histories[bot_counter],  # type: ignore
            openai_url=CHAT_ENDPOINT,
            openai_api_key=CHAT_ENDPOINT_KEY,
            model=CHAT_MODEL,
            max_tokens=MAX_COMPLETION_TOKENS,
        )
        message_counter += 1
        prompt_histories[bot_counter].append(
            {"role": "assistant", "content": bot_response}
        )
        prompt_histories[flip_counter(bot_counter)].append(
            {"role": "user", "content": bot_response}
        )
        await ctx.send(f"## {current_bot[0]}")
        while bot_response:
            send_chunk = bot_response[:1000]
            bot_response = bot_response[1000:]
            await ctx.send(send_chunk)
        bot_counter = flip_counter(counter=bot_counter)
        logger.info(f"Message counter is {message_counter}/{limit}")
 async def handle_chat(
    ctx, *, bot_name: str, message: str, system_prompt: str, response_prefix: str
 ):
@@ -310,22 +416,19 @@ async def handle_chat(
        user_id=str(ctx.author.id), current_message=message, max_context=5
    )
    prompts = [{"role": "user", "content": message}]
    if context:
-        user_message = f"\n\nRelevant conversation history:\n{context}\n\n{message}"
+        prompts = context + prompts
    else:
        user_message = message
-    logger.info(user_message)
+    logger.info(prompts)
-    system_prompt_edit = (
+    system_prompt_edit = f"{system_prompt}\nKeep your responses under 2-3 sentences."
        "Keep your responses somewhat short, limited to 500 words or less. "
        f"{system_prompt}"
    )
    try:
-        bot_response = llama_wrapper.chat_completion_instruct(
+        bot_response = llama_wrapper.chat_completion_with_history(
            system_prompt=system_prompt_edit,
-            user_prompt=user_message,
+            prompts=prompts,  # type: ignore
            openai_url=CHAT_ENDPOINT,
            openai_api_key=CHAT_ENDPOINT_KEY,
            model=CHAT_MODEL,
Author	SHA1	Message	Date
ducoterra	677ea83fa1	properly feed in conversation history	2026-03-10 16:02:19 -04:00
ducoterra	c9b49121ea	bots talking to each otehr fix	2026-03-10 13:53:37 -04:00
ducoterra	c70c6b2202	let the bots know who they're talking to	2026-03-10 13:00:50 -04:00
ducoterra	7f5909690d	set max conversation messages	2026-03-10 12:57:38 -04:00
ducoterra	d063519c04	bots can talk to each other	2026-03-10 12:48:33 -04:00
ducoterra	2a55c412d2	fix container build	2026-03-09 22:39:49 -04:00