add tts

2026-05-22 14:35:49 -04:00
parent 36b164af16
commit 2e162c928a
8 changed files with 1213 additions and 14 deletions
@@ -2,6 +2,7 @@ import discord
 from discord.ext import commands
 import os
 import base64
+import traceback
 from io import BytesIO
 from openai import OpenAI
 import logging
@@ -15,7 +16,12 @@ from config import (  # type: ignore
    IMAGE_GEN_ENDPOINT,
    IMAGE_EDIT_ENDPOINT,
    MAX_COMPLETION_TOKENS,
+    TTS_MODEL_PATH,
+    TTS_VOICES_PATH,
+    TTS_VOICE,
+    TTS_SPEED,
 )
+import tts  # type: ignore
 import llama_wrapper  # type: ignore
 import requests

@@ -30,6 +36,15 @@ intents = discord.Intents.default()
 intents.message_content = True
 bot = commands.Bot(command_prefix="!", intents=intents)

+# Initialize TTS engine
+try:
+    tts_engine = tts.TTSEngine(TTS_MODEL_PATH, TTS_VOICES_PATH)
+    logger.info("TTS engine initialized successfully")
+except Exception as e:
+    logger.error(f"Failed to initialize TTS engine: {e}")
+    logger.info("Make sure kokoro-v1.0.onnx and voices-v1.0.bin are in the project directory")
+    tts_engine = None
+

@bot.event
 async def on_ready():
@@ -232,6 +247,84 @@ async def on_message(message):
    await bot.process_commands(message)


+@bot.command(name="speak")
+async def speak(ctx, *, message: str):
+    """Have the bot speak the given text using Kokoro TTS, or have a custom bot speak
+
+    Usage: !speak <text> - plain text to speech
+    Usage: !speak <bot_name> <text> - have a custom bot respond and speak
+    Example: !speak hello world
+    Example: !speak alfred what time is it
+    """
+    if tts_engine is None:
+        await ctx.send("❌ TTS engine not initialized. Make sure kokoro-v1.0.onnx and voices-v1.0.bin are present.")
+        return
+
+    if not message or len(message.strip()) == 0:
+        await ctx.send("❌ Please provide text to speak.")
+        return
+
+    custom_bot_manager = CustomBotManager()
+    custom_bots = custom_bot_manager.list_custom_bots()
+    bot_names = [b[0] for b in custom_bots]
+
+    first_word = message.split()[0] if message.split() else ""
+    if first_word in bot_names:
+        bot_name = first_word
+        text_to_speak = message[len(bot_name):].lstrip()
+        if not text_to_speak:
+            await ctx.send("❌ Please provide text for the bot to respond to.")
+            return
+
+        await ctx.send(f"🔊 **{bot_name}** is thinking...")
+
+        bot_info = custom_bot_manager.get_custom_bot(bot_name)
+        if not bot_info:
+            await ctx.send(f"❌ Custom bot '{bot_name}' not found.")
+            return
+
+        _, system_prompt, _, _ = bot_info
+
+        system_prompt_edit = f"{system_prompt}\nKeep your responses under 2-3 sentences."
+
+        try:
+            bot_response = llama_wrapper.chat_completion_with_history(
+                system_prompt=system_prompt_edit,
+                prompts=[{"role": "user", "content": text_to_speak}],
+                openai_url=CHAT_ENDPOINT,
+                openai_api_key=CHAT_ENDPOINT_KEY,
+                model=CHAT_MODEL,
+                max_tokens=MAX_COMPLETION_TOKENS,
+            )
+
+            if not bot_response:
+                await ctx.send(f"❌ **{bot_name}** failed to generate a response.")
+                return
+
+            await ctx.send(f"🔊 Generating speech for **{bot_name}**...")
+            audio_buffer = tts_engine.generate_audio(bot_response, voice=TTS_VOICE, speed=TTS_SPEED)
+
+            audio_file = discord.File(audio_buffer, filename="speech.mp3")
+            await ctx.send(file=audio_file)
+        except Exception as e:
+            logger.error(f"Error in !speak command with bot '{bot_name}': {traceback.format_exc()}")
+            await ctx.send(f"❌ Error generating speech: {str(e)}")
+    else:
+        if not message or len(message.strip()) == 0:
+            await ctx.send("❌ Please provide text to speak.")
+            return
+
+        try:
+            await ctx.send("🔊 Generating speech...")
+            audio_buffer = tts_engine.generate_audio(message, voice=TTS_VOICE, speed=TTS_SPEED)
+
+            audio_file = discord.File(audio_buffer, filename="speech.mp3")
+            await ctx.send(file=audio_file)
+        except Exception as e:
+            logger.error(f"Error in !speak command: {e}")
+            await ctx.send(f"❌ Error generating speech: {str(e)}")
+
+
@bot.command(name="doodlebob")
 async def doodlebob(ctx, *, message: str):
    # add some logging