properly feed in conversation history

This commit is contained in:
2026-03-10 16:02:19 -04:00
parent c9b49121ea
commit 677ea83fa1
3 changed files with 15 additions and 18 deletions

View File

@@ -346,7 +346,7 @@ class ChatDatabase:
def get_conversation_context(
self, user_id: str, current_message: str, max_context: int = 5
) -> str:
) -> list[dict[str, str]]:
"""Get relevant conversation context for RAG."""
# Get recent messages from the user
recent_messages = self.get_user_history(user_id, limit=max_context * 2)
@@ -357,22 +357,21 @@ class ChatDatabase:
)
# Combine contexts
context_parts = []
context_parts: list[dict[str, str]] = []
# Add recent messages
for user_message, bot_message in recent_messages:
combined_content = f"[Recent chat]\n{user_message}\n{bot_message}"
context_parts.append(combined_content)
context_parts.append({"role": "assistant", "content": bot_message})
context_parts.append({"role": "user", "content": user_message})
# Add similar messages
for user_message, bot_message, similarity in similar_messages:
combined_content = f"{user_message}\n{bot_message}"
if combined_content not in "\n".join(context_parts):
context_parts.append(f"[You remember]\n{combined_content}")
context_parts.append({"role": "assistant", "content": bot_message})
context_parts.append({"role": "user", "content": user_message})
# Conversation history needs to be delivered in "newest context last" order
context_parts.reverse()
return "\n".join(context_parts[-max_context * 4 :]) # Limit total context
return context_parts
def clear_all_messages(self):
"""Clear all messages and embeddings from the database."""

View File

@@ -66,6 +66,7 @@ def chat_completion_with_history(
extra_body={
"chat_template_kwargs": {"enable_thinking": False},
},
seed=-1,
)
# Assert that thinking was used

View File

@@ -416,22 +416,19 @@ async def handle_chat(
user_id=str(ctx.author.id), current_message=message, max_context=5
)
prompts = [{"role": "user", "content": message}]
if context:
user_message = f"\n\nRelevant conversation history:\n{context}\n\n{message}"
else:
user_message = message
prompts = context + prompts
logger.info(user_message)
logger.info(prompts)
system_prompt_edit = (
"Keep your responses somewhat short, limited to 500 words or less. "
f"{system_prompt}"
)
system_prompt_edit = f"{system_prompt}\nKeep your responses under 2-3 sentences."
try:
bot_response = llama_wrapper.chat_completion_instruct(
bot_response = llama_wrapper.chat_completion_with_history(
system_prompt=system_prompt_edit,
user_prompt=user_message,
prompts=prompts, # type: ignore
openai_url=CHAT_ENDPOINT,
openai_api_key=CHAT_ENDPOINT_KEY,
model=CHAT_MODEL,