add custom voices
This commit is contained in:
@@ -52,10 +52,13 @@ Once you create a custom bot, interact with it by prefixing your message with th
|
||||
|
||||
### Text-to-Speech
|
||||
|
||||
| Command | Description | Example Usage |
|
||||
| -------------------------- | --------------------------------------- | ------------------------------- |
|
||||
| `!speak <text>` | Convert text to speech (MP3 attachment) | `!speak hello world` |
|
||||
| `!speak <bot_name> <text>` | Have a custom bot respond and speak | `!speak alfred what time is it` |
|
||||
| Command | Description | Example Usage |
|
||||
| ------------------------------------ | ----------------------------------------------------- | ------------------------------------------ |
|
||||
| `!speak <text>` | Convert text to speech (MP3 attachment) | `!speak hello world` |
|
||||
| `!speak <text> --voice <voice>` | Convert text to speech with a specific voice | `!speak hello world --voice af_bella` |
|
||||
| `!speak <bot_name> <text>` | Have a custom bot respond and speak | `!speak alfred what time is it` |
|
||||
| `!speak <bot_name> <text> --voice` | Have a custom bot respond and speak with a voice | `!speak alfred what time is it --voice am_puck` |
|
||||
| `!voices` | List all available TTS voices by category | `!voices` |
|
||||
|
||||
### Image Commands
|
||||
|
||||
|
||||
@@ -101,6 +101,83 @@ TTS_VOICES_PATH: str = os.getenv("TTS_VOICES_PATH", "voices-v1.0.bin")
|
||||
TTS_VOICE: str = os.getenv("TTS_VOICE", "af_sarah")
|
||||
TTS_SPEED: float = float(os.getenv("TTS_SPEED", "1.0"))
|
||||
|
||||
# Available voices organized by category
|
||||
VOICES_LIST: dict[str, dict[str, str | list[str]]] = {
|
||||
"🇺🇸 👩": {
|
||||
"language": "en-us",
|
||||
"voices": [
|
||||
"af_alloy",
|
||||
"af_aoede",
|
||||
"af_bella",
|
||||
"af_heart",
|
||||
"af_jessica",
|
||||
"af_kore",
|
||||
"af_nicole",
|
||||
"af_nova",
|
||||
"af_river",
|
||||
"af_sarah",
|
||||
"af_sky",
|
||||
],
|
||||
},
|
||||
"🇺🇸 👨": {
|
||||
"language": "en-us",
|
||||
"voices": [
|
||||
"am_adam",
|
||||
"am_echo",
|
||||
"am_eric",
|
||||
"am_fenrir",
|
||||
"am_liam",
|
||||
"am_michael",
|
||||
"am_onyx",
|
||||
"am_puck",
|
||||
],
|
||||
},
|
||||
"🇬🇧": {
|
||||
"language": "en-gb",
|
||||
"voices": [
|
||||
"bf_alice",
|
||||
"bf_emma",
|
||||
"bf_isabella",
|
||||
"bf_lily",
|
||||
"bm_daniel",
|
||||
"bm_fable",
|
||||
"bm_george",
|
||||
"bm_lewis",
|
||||
],
|
||||
},
|
||||
"🇫🇷": {
|
||||
"language": "fr-fr",
|
||||
"voices": ["ff_siwis"],
|
||||
},
|
||||
"🇮🇹": {
|
||||
"language": "it",
|
||||
"voices": ["if_sara", "im_nicola"],
|
||||
},
|
||||
"🇯🇵": {
|
||||
"language": "ja",
|
||||
"voices": [
|
||||
"jf_alpha",
|
||||
"jf_gongitsune",
|
||||
"jf_nezumi",
|
||||
"jf_tebukuro",
|
||||
"jm_kumo",
|
||||
],
|
||||
},
|
||||
"🇨🇳": {
|
||||
"language": "cmn",
|
||||
"voices": [
|
||||
"zf_xiaobei",
|
||||
"zf_xiaoni",
|
||||
"zf_xiaoxiao",
|
||||
"zf_xiaoyi",
|
||||
"zm_yunjian",
|
||||
"zm_yunxi",
|
||||
"zm_yunxia",
|
||||
"zm_yunyang",
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
logger.info("CHAT_ENDPOINT set to %s", CHAT_ENDPOINT)
|
||||
logger.info("COMPLETION_ENDPOINT set to %s", COMPLETION_ENDPOINT)
|
||||
logger.info("IMAGE_GEN_ENDPOINT set to %s", IMAGE_GEN_ENDPOINT)
|
||||
|
||||
+82
-8
@@ -13,6 +13,7 @@ from discord import Message
|
||||
from discord.ext import commands
|
||||
|
||||
from vibe_bot import llama_wrapper, tts
|
||||
from vibe_bot.tts import DEFAULT_LANG
|
||||
from vibe_bot.config import (
|
||||
CHAT_ENDPOINT,
|
||||
CHAT_ENDPOINT_KEY,
|
||||
@@ -29,6 +30,7 @@ from vibe_bot.config import (
|
||||
TTS_SPEED,
|
||||
TTS_VOICE,
|
||||
TTS_VOICES_PATH,
|
||||
VOICES_LIST,
|
||||
)
|
||||
from vibe_bot.database import CustomBotManager, get_database
|
||||
|
||||
@@ -326,14 +328,47 @@ async def on_message(message: Message) -> None:
|
||||
await bot.process_commands(message)
|
||||
|
||||
|
||||
@bot.command(name="voices")
|
||||
async def voices(ctx: CommandsContext[Bot]) -> None:
|
||||
"""List all available TTS voices organized by category."""
|
||||
voice_list = "Available Voices:\n\n"
|
||||
for category, info in VOICES_LIST.items():
|
||||
voice_list += f"{category} ({info['language']}):\n"
|
||||
for v in info["voices"]:
|
||||
voice_list += f" - {v}\n"
|
||||
voice_list += "\n"
|
||||
voice_list += "Use `!speak <text> --voice <voice_name>` to choose a voice."
|
||||
|
||||
chunk_size = 1900
|
||||
chunks: list[str] = []
|
||||
current_chunk = voice_list
|
||||
while current_chunk:
|
||||
if len(current_chunk) <= chunk_size:
|
||||
chunks.append(current_chunk)
|
||||
break
|
||||
split_pos = current_chunk.rfind("\n", 0, chunk_size)
|
||||
if split_pos == -1:
|
||||
split_pos = chunk_size
|
||||
chunks.append(current_chunk[:split_pos])
|
||||
current_chunk = current_chunk[split_pos:].lstrip("\n")
|
||||
|
||||
for chunk in chunks:
|
||||
await ctx.send(chunk)
|
||||
|
||||
|
||||
@bot.command(name="speak")
|
||||
async def speak(ctx: CommandsContext[Bot], *, message: str) -> None:
|
||||
async def speak(
|
||||
ctx: CommandsContext[Bot],
|
||||
*,
|
||||
message: str,
|
||||
) -> None:
|
||||
"""Have the bot speak the given text using Kokoro TTS, or have a custom bot speak.
|
||||
|
||||
Usage: !speak <text> - plain text to speech
|
||||
Usage: !speak <bot_name> <text> - have a custom bot respond and speak
|
||||
Usage: !speak <text> --voice <voice_name> - plain text to speech
|
||||
Usage: !speak <bot_name> <text> --voice <voice_name> - have a custom bot respond and speak
|
||||
Example: !speak hello world
|
||||
Example: !speak alfred what time is it
|
||||
Example: !speak hello world --voice af_bella
|
||||
Example: !speak alfred what time is it --voice am_puck
|
||||
"""
|
||||
if tts_engine is None:
|
||||
await ctx.send(
|
||||
@@ -342,19 +377,37 @@ async def speak(ctx: CommandsContext[Bot], *, message: str) -> None:
|
||||
)
|
||||
return
|
||||
|
||||
# Parse --voice flag from the message
|
||||
voice = None
|
||||
voice_match = message.rsplit("--voice ", 1)
|
||||
if len(voice_match) == 2:
|
||||
voice = voice_match[1].strip()
|
||||
message = voice_match[0].rstrip()
|
||||
|
||||
if not message or not message.strip():
|
||||
await ctx.send("Please provide text to speak.")
|
||||
return
|
||||
|
||||
# Validate voice if provided
|
||||
if voice:
|
||||
all_voices = [v for cat in VOICES_LIST.values() for v in cat["voices"]]
|
||||
if voice not in all_voices:
|
||||
await ctx.send(
|
||||
f"Unknown voice '{voice}'. Use `!voices` to see available voices."
|
||||
)
|
||||
return
|
||||
|
||||
custom_bot_manager = CustomBotManager()
|
||||
custom_bots = custom_bot_manager.list_custom_bots()
|
||||
bot_names = [b[0] for b in custom_bots]
|
||||
|
||||
first_word = message.split(maxsplit=1)[0] if message.split() else ""
|
||||
if first_word in bot_names:
|
||||
await _speak_with_bot(ctx, first_word, message, tts_engine, custom_bot_manager)
|
||||
await _speak_with_bot(
|
||||
ctx, first_word, message, tts_engine, custom_bot_manager, voice
|
||||
)
|
||||
else:
|
||||
await _speak_plain(ctx, message, tts_engine)
|
||||
await _speak_plain(ctx, message, tts_engine, voice)
|
||||
|
||||
|
||||
async def _speak_with_bot(
|
||||
@@ -363,6 +416,7 @@ async def _speak_with_bot(
|
||||
message: str,
|
||||
engine: tts.TTSEngine,
|
||||
custom_bot_manager: CustomBotManager,
|
||||
voice: str | None = None,
|
||||
) -> None:
|
||||
"""Handle speak command for a custom bot."""
|
||||
text_to_speak = message[len(bot_name) :].lstrip()
|
||||
@@ -380,6 +434,14 @@ async def _speak_with_bot(
|
||||
_, system_prompt, _, _ = bot_info
|
||||
system_prompt_edit = f"{system_prompt}\nKeep your responses under 2-3 sentences."
|
||||
|
||||
# Determine language for the chosen voice
|
||||
chosen_voice = voice or TTS_VOICE
|
||||
lang = DEFAULT_LANG
|
||||
for cat in VOICES_LIST.values():
|
||||
if chosen_voice in cat["voices"]:
|
||||
lang = str(cat["language"])
|
||||
break
|
||||
|
||||
try:
|
||||
db = get_database()
|
||||
context = db.get_conversation_context(
|
||||
@@ -429,8 +491,9 @@ async def _speak_with_bot(
|
||||
await ctx.send(f"Generating speech for **{bot_name}**...")
|
||||
audio_buffer = engine.generate_audio(
|
||||
bot_response,
|
||||
voice=TTS_VOICE,
|
||||
voice=chosen_voice,
|
||||
speed=TTS_SPEED,
|
||||
lang=lang,
|
||||
)
|
||||
|
||||
audio_file = discord.File(audio_buffer, filename="speech.mp3")
|
||||
@@ -447,14 +510,25 @@ async def _speak_plain(
|
||||
ctx: CommandsContext[Bot],
|
||||
message: str,
|
||||
engine: tts.TTSEngine,
|
||||
voice: str | None = None,
|
||||
) -> None:
|
||||
"""Handle speak command for plain text."""
|
||||
chosen_voice = voice or TTS_VOICE
|
||||
|
||||
# Determine language for the chosen voice
|
||||
lang = DEFAULT_LANG
|
||||
for cat in VOICES_LIST.values():
|
||||
if chosen_voice in cat["voices"]:
|
||||
lang = str(cat["language"])
|
||||
break
|
||||
|
||||
try:
|
||||
await ctx.send("Generating speech...")
|
||||
audio_buffer = engine.generate_audio(
|
||||
message,
|
||||
voice=TTS_VOICE,
|
||||
voice=chosen_voice,
|
||||
speed=TTS_SPEED,
|
||||
lang=lang,
|
||||
)
|
||||
|
||||
audio_file = discord.File(audio_buffer, filename="speech.mp3")
|
||||
|
||||
Reference in New Issue
Block a user