add tts
This commit is contained in:
@@ -2,6 +2,7 @@ import discord
|
||||
from discord.ext import commands
|
||||
import os
|
||||
import base64
|
||||
import traceback
|
||||
from io import BytesIO
|
||||
from openai import OpenAI
|
||||
import logging
|
||||
@@ -15,7 +16,12 @@ from config import ( # type: ignore
|
||||
IMAGE_GEN_ENDPOINT,
|
||||
IMAGE_EDIT_ENDPOINT,
|
||||
MAX_COMPLETION_TOKENS,
|
||||
TTS_MODEL_PATH,
|
||||
TTS_VOICES_PATH,
|
||||
TTS_VOICE,
|
||||
TTS_SPEED,
|
||||
)
|
||||
import tts # type: ignore
|
||||
import llama_wrapper # type: ignore
|
||||
import requests
|
||||
|
||||
@@ -30,6 +36,15 @@ intents = discord.Intents.default()
|
||||
intents.message_content = True
|
||||
bot = commands.Bot(command_prefix="!", intents=intents)
|
||||
|
||||
# Initialize TTS engine
|
||||
try:
|
||||
tts_engine = tts.TTSEngine(TTS_MODEL_PATH, TTS_VOICES_PATH)
|
||||
logger.info("TTS engine initialized successfully")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize TTS engine: {e}")
|
||||
logger.info("Make sure kokoro-v1.0.onnx and voices-v1.0.bin are in the project directory")
|
||||
tts_engine = None
|
||||
|
||||
|
||||
@bot.event
|
||||
async def on_ready():
|
||||
@@ -232,6 +247,84 @@ async def on_message(message):
|
||||
await bot.process_commands(message)
|
||||
|
||||
|
||||
@bot.command(name="speak")
|
||||
async def speak(ctx, *, message: str):
|
||||
"""Have the bot speak the given text using Kokoro TTS, or have a custom bot speak
|
||||
|
||||
Usage: !speak <text> - plain text to speech
|
||||
Usage: !speak <bot_name> <text> - have a custom bot respond and speak
|
||||
Example: !speak hello world
|
||||
Example: !speak alfred what time is it
|
||||
"""
|
||||
if tts_engine is None:
|
||||
await ctx.send("❌ TTS engine not initialized. Make sure kokoro-v1.0.onnx and voices-v1.0.bin are present.")
|
||||
return
|
||||
|
||||
if not message or len(message.strip()) == 0:
|
||||
await ctx.send("❌ Please provide text to speak.")
|
||||
return
|
||||
|
||||
custom_bot_manager = CustomBotManager()
|
||||
custom_bots = custom_bot_manager.list_custom_bots()
|
||||
bot_names = [b[0] for b in custom_bots]
|
||||
|
||||
first_word = message.split()[0] if message.split() else ""
|
||||
if first_word in bot_names:
|
||||
bot_name = first_word
|
||||
text_to_speak = message[len(bot_name):].lstrip()
|
||||
if not text_to_speak:
|
||||
await ctx.send("❌ Please provide text for the bot to respond to.")
|
||||
return
|
||||
|
||||
await ctx.send(f"🔊 **{bot_name}** is thinking...")
|
||||
|
||||
bot_info = custom_bot_manager.get_custom_bot(bot_name)
|
||||
if not bot_info:
|
||||
await ctx.send(f"❌ Custom bot '{bot_name}' not found.")
|
||||
return
|
||||
|
||||
_, system_prompt, _, _ = bot_info
|
||||
|
||||
system_prompt_edit = f"{system_prompt}\nKeep your responses under 2-3 sentences."
|
||||
|
||||
try:
|
||||
bot_response = llama_wrapper.chat_completion_with_history(
|
||||
system_prompt=system_prompt_edit,
|
||||
prompts=[{"role": "user", "content": text_to_speak}],
|
||||
openai_url=CHAT_ENDPOINT,
|
||||
openai_api_key=CHAT_ENDPOINT_KEY,
|
||||
model=CHAT_MODEL,
|
||||
max_tokens=MAX_COMPLETION_TOKENS,
|
||||
)
|
||||
|
||||
if not bot_response:
|
||||
await ctx.send(f"❌ **{bot_name}** failed to generate a response.")
|
||||
return
|
||||
|
||||
await ctx.send(f"🔊 Generating speech for **{bot_name}**...")
|
||||
audio_buffer = tts_engine.generate_audio(bot_response, voice=TTS_VOICE, speed=TTS_SPEED)
|
||||
|
||||
audio_file = discord.File(audio_buffer, filename="speech.mp3")
|
||||
await ctx.send(file=audio_file)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in !speak command with bot '{bot_name}': {traceback.format_exc()}")
|
||||
await ctx.send(f"❌ Error generating speech: {str(e)}")
|
||||
else:
|
||||
if not message or len(message.strip()) == 0:
|
||||
await ctx.send("❌ Please provide text to speak.")
|
||||
return
|
||||
|
||||
try:
|
||||
await ctx.send("🔊 Generating speech...")
|
||||
audio_buffer = tts_engine.generate_audio(message, voice=TTS_VOICE, speed=TTS_SPEED)
|
||||
|
||||
audio_file = discord.File(audio_buffer, filename="speech.mp3")
|
||||
await ctx.send(file=audio_file)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in !speak command: {e}")
|
||||
await ctx.send(f"❌ Error generating speech: {str(e)}")
|
||||
|
||||
|
||||
@bot.command(name="doodlebob")
|
||||
async def doodlebob(ctx, *, message: str):
|
||||
# add some logging
|
||||
|
||||
Reference in New Issue
Block a user