diff --git a/vibe_bot/database.py b/vibe_bot/database.py
index 445cea9..9da4b5e 100644
--- a/vibe_bot/database.py
+++ b/vibe_bot/database.py
@@ -346,7 +346,7 @@ class ChatDatabase:
 
     def get_conversation_context(
         self, user_id: str, current_message: str, max_context: int = 5
-    ) -> str:
+    ) -> list[dict[str, str]]:
         """Get relevant conversation context for RAG."""
         # Get recent messages from the user
         recent_messages = self.get_user_history(user_id, limit=max_context * 2)
@@ -357,22 +357,21 @@ class ChatDatabase:
         )
 
         # Combine contexts
-        context_parts = []
+        context_parts: list[dict[str, str]] = []
 
         # Add recent messages
         for user_message, bot_message in recent_messages:
-            combined_content = f"[Recent chat]\n{user_message}\n{bot_message}"
-            context_parts.append(combined_content)
+            context_parts.append({"role": "assistant", "content": bot_message})
+            context_parts.append({"role": "user", "content": user_message})
 
         # Add similar messages
         for user_message, bot_message, similarity in similar_messages:
-            combined_content = f"{user_message}\n{bot_message}"
-            if combined_content not in "\n".join(context_parts):
-                context_parts.append(f"[You remember]\n{combined_content}")
+            context_parts.append({"role": "assistant", "content": bot_message})
+            context_parts.append({"role": "user", "content": user_message})
 
         # Conversation history needs to be delivered in "newest context last" order
         context_parts.reverse()
-        return "\n".join(context_parts[-max_context * 4 :])  # Limit total context
+        return context_parts
 
     def clear_all_messages(self):
         """Clear all messages and embeddings from the database."""
diff --git a/vibe_bot/llama_wrapper.py b/vibe_bot/llama_wrapper.py
index 150a892..fab13a2 100644
--- a/vibe_bot/llama_wrapper.py
+++ b/vibe_bot/llama_wrapper.py
@@ -66,6 +66,7 @@ def chat_completion_with_history(
         extra_body={
             "chat_template_kwargs": {"enable_thinking": False},
         },
+        seed=-1,
     )
 
     # Assert that thinking was used
diff --git a/vibe_bot/main.py b/vibe_bot/main.py
index 296235b..2f5a721 100644
--- a/vibe_bot/main.py
+++ b/vibe_bot/main.py
@@ -416,22 +416,19 @@ async def handle_chat(
         user_id=str(ctx.author.id), current_message=message, max_context=5
     )
 
+    prompts = [{"role": "user", "content": message}]
+
     if context:
-        user_message = f"\n\nRelevant conversation history:\n{context}\n\n{message}"
-    else:
-        user_message = message
+        prompts = context + prompts
 
-    logger.info(user_message)
+    logger.info(prompts)
 
-    system_prompt_edit = (
-        "Keep your responses somewhat short, limited to 500 words or less. "
-        f"{system_prompt}"
-    )
+    system_prompt_edit = f"{system_prompt}\nKeep your responses under 2-3 sentences."
 
     try:
-        bot_response = llama_wrapper.chat_completion_instruct(
+        bot_response = llama_wrapper.chat_completion_with_history(
             system_prompt=system_prompt_edit,
-            user_prompt=user_message,
+            prompts=prompts,  # type: ignore
             openai_url=CHAT_ENDPOINT,
             openai_api_key=CHAT_ENDPOINT_KEY,
             model=CHAT_MODEL,