diff --git a/vibe_bot/database.py b/vibe_bot/database.py index 445cea9..9da4b5e 100644 --- a/vibe_bot/database.py +++ b/vibe_bot/database.py @@ -346,7 +346,7 @@ class ChatDatabase: def get_conversation_context( self, user_id: str, current_message: str, max_context: int = 5 - ) -> str: + ) -> list[dict[str, str]]: """Get relevant conversation context for RAG.""" # Get recent messages from the user recent_messages = self.get_user_history(user_id, limit=max_context * 2) @@ -357,22 +357,21 @@ class ChatDatabase: ) # Combine contexts - context_parts = [] + context_parts: list[dict[str, str]] = [] # Add recent messages for user_message, bot_message in recent_messages: - combined_content = f"[Recent chat]\n{user_message}\n{bot_message}" - context_parts.append(combined_content) + context_parts.append({"role": "assistant", "content": bot_message}) + context_parts.append({"role": "user", "content": user_message}) # Add similar messages for user_message, bot_message, similarity in similar_messages: - combined_content = f"{user_message}\n{bot_message}" - if combined_content not in "\n".join(context_parts): - context_parts.append(f"[You remember]\n{combined_content}") + context_parts.append({"role": "assistant", "content": bot_message}) + context_parts.append({"role": "user", "content": user_message}) # Conversation history needs to be delivered in "newest context last" order context_parts.reverse() - return "\n".join(context_parts[-max_context * 4 :]) # Limit total context + return context_parts def clear_all_messages(self): """Clear all messages and embeddings from the database.""" diff --git a/vibe_bot/llama_wrapper.py b/vibe_bot/llama_wrapper.py index 150a892..fab13a2 100644 --- a/vibe_bot/llama_wrapper.py +++ b/vibe_bot/llama_wrapper.py @@ -66,6 +66,7 @@ def chat_completion_with_history( extra_body={ "chat_template_kwargs": {"enable_thinking": False}, }, + seed=-1, ) # Assert that thinking was used diff --git a/vibe_bot/main.py b/vibe_bot/main.py index 296235b..2f5a721 100644 --- a/vibe_bot/main.py +++ b/vibe_bot/main.py @@ -416,22 +416,19 @@ async def handle_chat( user_id=str(ctx.author.id), current_message=message, max_context=5 ) + prompts = [{"role": "user", "content": message}] + if context: - user_message = f"\n\nRelevant conversation history:\n{context}\n\n{message}" - else: - user_message = message + prompts = context + prompts - logger.info(user_message) + logger.info(prompts) - system_prompt_edit = ( - "Keep your responses somewhat short, limited to 500 words or less. " - f"{system_prompt}" - ) + system_prompt_edit = f"{system_prompt}\nKeep your responses under 2-3 sentences." try: - bot_response = llama_wrapper.chat_completion_instruct( + bot_response = llama_wrapper.chat_completion_with_history( system_prompt=system_prompt_edit, - user_prompt=user_message, + prompts=prompts, # type: ignore openai_url=CHAT_ENDPOINT, openai_api_key=CHAT_ENDPOINT_KEY, model=CHAT_MODEL,