import discord from discord.ext import commands from discord import app_commands import os import asyncio import tempfile import sys import importlib.util from google.cloud import texttospeech class TTSProviderCog(commands.Cog): def __init__(self, bot): self.bot = bot print("TTSProviderCog initialized!") self.cleanup_old_files() # Schedule periodic cleanup self.cleanup_task = self.bot.loop.create_task(self.periodic_cleanup()) async def periodic_cleanup(self): """Periodically clean up old TTS files.""" import asyncio while not self.bot.is_closed(): # Clean up every hour await asyncio.sleep(3600) # 1 hour self.cleanup_old_files() def cog_unload(self): """Cancel the cleanup task when the cog is unloaded.""" if hasattr(self, "cleanup_task") and self.cleanup_task: self.cleanup_task.cancel() def cleanup_old_files(self): """Clean up old TTS files to prevent disk space issues.""" try: import glob import time import os # Create the SOUND directory if it doesn't exist os.makedirs("./SOUND", exist_ok=True) # Get current time current_time = time.time() # Find all TTS files older than 1 hour old_files = [] for pattern in [ "./SOUND/tts_*.mp3", "./SOUND/tts_direct_*.mp3", "./SOUND/tts_test_*.mp3", ]: for file in glob.glob(pattern): if ( os.path.exists(file) and os.path.getmtime(file) < current_time - 3600 ): # 1 hour = 3600 seconds old_files.append(file) # Delete old files for file in old_files: try: os.remove(file) print(f"Cleaned up old TTS file: {file}") except Exception as e: print(f"Error removing old TTS file {file}: {e}") print(f"Cleaned up {len(old_files)} old TTS files") except Exception as e: print(f"Error during cleanup: {e}") async def generate_tts_directly(self, provider, text, output_file=None): """Generate TTS audio directly without using a subprocess.""" # Create a unique output file if none is provided if output_file is None: import uuid output_file = f"./SOUND/tts_direct_{uuid.uuid4().hex}.mp3" # Create output directory if it doesn't exist os.makedirs("./SOUND", exist_ok=True) # Check if the provider is available if provider == "gtts": # Check if gtts is available if importlib.util.find_spec("gtts") is None: return ( False, "Google TTS (gtts) is not installed. Run: pip install gtts", ) try: from gtts import gTTS tts = gTTS(text=text, lang="en") tts.save(output_file) return True, output_file except Exception as e: return False, f"Error with Google TTS: {str(e)}" elif provider == "pyttsx3": # Check if pyttsx3 is available if importlib.util.find_spec("pyttsx3") is None: return False, "pyttsx3 is not installed. Run: pip install pyttsx3" try: import pyttsx3 engine = pyttsx3.init() engine.save_to_file(text, output_file) engine.runAndWait() return True, output_file except Exception as e: return False, f"Error with pyttsx3: {str(e)}" elif provider == "coqui": # Check if TTS is available if importlib.util.find_spec("TTS") is None: return False, "Coqui TTS is not installed. Run: pip install TTS" try: from TTS.api import TTS tts = TTS("tts_models/en/ljspeech/tacotron2-DDC") tts.tts_to_file(text=text, file_path=output_file) return True, output_file except Exception as e: return False, f"Error with Coqui TTS: {str(e)}" elif provider == "espeak": # Check if we can run espeak-ng command import subprocess import platform try: # On Windows, we'll check if the command exists if platform.system() == "Windows": result = subprocess.run( ["where", "espeak-ng"], capture_output=True, text=True ) espeak_available = result.returncode == 0 else: # On Linux/Mac, we'll use which result = subprocess.run( ["which", "espeak-ng"], capture_output=True, text=True ) espeak_available = result.returncode == 0 if not espeak_available: return ( False, "espeak-ng is not installed or not in PATH. Install espeak-ng and make sure it's in your PATH.", ) # Create a WAV file first wav_file = output_file.replace(".mp3", ".wav") # Run espeak-ng to generate the audio cmd = ["espeak-ng", "-w", wav_file, text] process = subprocess.run(cmd, capture_output=True, text=True) if process.returncode != 0: return False, f"Error running espeak-ng: {process.stderr}" # Convert WAV to MP3 if needed if output_file.endswith(".mp3"): try: # Try to use pydub for conversion from pydub import AudioSegment sound = AudioSegment.from_wav(wav_file) sound.export(output_file, format="mp3") # Remove the temporary WAV file os.remove(wav_file) except Exception as e: # If pydub fails, just use the WAV file print(f"Warning: Could not convert WAV to MP3: {e}") output_file = wav_file else: # If the output file doesn't end with .mp3, we're already using the WAV file output_file = wav_file return True, output_file except Exception as e: return False, f"Error with espeak-ng: {str(e)}" elif provider == "google_cloud_tts": # Check if google-cloud-texttospeech is available if importlib.util.find_spec("google.cloud.texttospeech") is None: return ( False, "Google Cloud TTS library is not installed. Run: pip install google-cloud-texttospeech", ) try: client = ( texttospeech.TextToSpeechClient() ) # Assumes GOOGLE_APPLICATION_CREDENTIALS is set input_text = texttospeech.SynthesisInput(text=text) # Specify the voice, using your requested model voice = texttospeech.VoiceSelectionParams( language_code="en-US", name="en-US-Chirp3-HD-Autonoe" ) # Specify audio configuration (MP3 output) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3 ) response = client.synthesize_speech( request={ "input": input_text, "voice": voice, "audio_config": audio_config, } ) # The response's audio_content is binary. Write it to the output file. with open(output_file, "wb") as out: out.write(response.audio_content) return True, output_file except Exception as e: error_message = f"Error with Google Cloud TTS: {str(e)}" if "quota" in str(e).lower(): error_message += ( " This might be a quota issue with your Google Cloud project." ) elif "credentials" in str(e).lower(): error_message += " Please ensure GOOGLE_APPLICATION_CREDENTIALS environment variable is set correctly." return False, error_message else: return False, f"Unknown TTS provider: {provider}" @app_commands.command( name="ttsprovider", description="Test different TTS providers" ) @app_commands.describe( provider="Select the TTS provider to use", text="Text to be spoken" ) @app_commands.choices( provider=[ app_commands.Choice(name="Google TTS (Online)", value="gtts"), app_commands.Choice(name="pyttsx3 (Offline)", value="pyttsx3"), app_commands.Choice(name="Coqui TTS (AI Voice)", value="coqui"), app_commands.Choice(name="eSpeak-NG (Offline)", value="espeak"), app_commands.Choice( name="Google Cloud TTS (Chirp HD)", value="google_cloud_tts" ), ] ) async def ttsprovider_slash( self, interaction: discord.Interaction, provider: str, text: str = "This is a test of text to speech", ): """Test different TTS providers""" await interaction.response.defer(thinking=True) # Create a temporary script to test the TTS provider script_content = f""" import importlib.util import sys import os import traceback # Print Python version and path for debugging print(f"Python version: {{sys.version}}") print(f"Python executable: {{sys.executable}}") print(f"Current working directory: {{os.getcwd()}}") # Check for TTS libraries try: import pkg_resources installed_packages = [pkg.key for pkg in pkg_resources.working_set] print(f"Installed packages: {{installed_packages}}") except Exception as e: print(f"Error getting installed packages: {{e}}") # Check for specific TTS libraries try: GTTS_AVAILABLE = importlib.util.find_spec("gtts") is not None print(f"GTTS_AVAILABLE: {{GTTS_AVAILABLE}}") if GTTS_AVAILABLE: import gtts print(f"gtts version: {{gtts.__version__}}") except Exception as e: print(f"Error checking gtts: {{e}}") GTTS_AVAILABLE = False try: PYTTSX3_AVAILABLE = importlib.util.find_spec("pyttsx3") is not None print(f"PYTTSX3_AVAILABLE: {{PYTTSX3_AVAILABLE}}") if PYTTSX3_AVAILABLE: import pyttsx3 print("pyttsx3 imported successfully") except Exception as e: print(f"Error checking pyttsx3: {{e}}") PYTTSX3_AVAILABLE = False try: COQUI_AVAILABLE = importlib.util.find_spec("TTS") is not None print(f"COQUI_AVAILABLE: {{COQUI_AVAILABLE}}") if COQUI_AVAILABLE: import TTS print(f"TTS version: {{TTS.__version__}}") except Exception as e: print(f"Error checking TTS: {{e}}") COQUI_AVAILABLE = False # Check for espeak-ng try: import subprocess import platform if platform.system() == "Windows": # On Windows, we'll check if the command exists result = subprocess.run(["where", "espeak-ng"], capture_output=True, text=True) ESPEAK_AVAILABLE = result.returncode == 0 else: # On Linux/Mac, we'll use which result = subprocess.run(["which", "espeak-ng"], capture_output=True, text=True) ESPEAK_AVAILABLE = result.returncode == 0 print(f"ESPEAK_AVAILABLE: {{ESPEAK_AVAILABLE}}") if ESPEAK_AVAILABLE: # Try to get version version_result = subprocess.run(["espeak-ng", "--version"], capture_output=True, text=True) if version_result.returncode == 0: print(f"espeak-ng version: {{version_result.stdout.strip()}}") else: print("espeak-ng found but couldn't get version") except Exception as e: print(f"Error checking espeak-ng: {{e}}") ESPEAK_AVAILABLE = False try: GCLOUD_TTS_AVAILABLE = importlib.util.find_spec("google.cloud.texttospeech") is not None print(f"GCLOUD_TTS_AVAILABLE: {{GCLOUD_TTS_AVAILABLE}}") if GCLOUD_TTS_AVAILABLE: import google.cloud.texttospeech print(f"google-cloud-texttospeech version: {{google.cloud.texttospeech.__version__}}") except Exception as e: print(f"Error checking google.cloud.texttospeech: {{e}}") GCLOUD_TTS_AVAILABLE = False def generate_tts_audio(provider, text, output_file): print(f"Testing TTS provider: {{provider}}") print(f"Text: {{text}}") print(f"Output file: {{output_file}}") if provider == "gtts" and GTTS_AVAILABLE: try: from gtts import gTTS tts = gTTS(text=text, lang='en') tts.save(output_file) print(f"Google TTS audio saved to {{output_file}}") return True except Exception as e: print(f"Error with Google TTS: {{e}}") traceback.print_exc() return False elif provider == "pyttsx3" and PYTTSX3_AVAILABLE: try: import pyttsx3 engine = pyttsx3.init() engine.save_to_file(text, output_file) engine.runAndWait() print(f"pyttsx3 audio saved to {{output_file}}") return True except Exception as e: print(f"Error with pyttsx3: {{e}}") traceback.print_exc() return False elif provider == "coqui" and COQUI_AVAILABLE: try: from TTS.api import TTS tts = TTS("tts_models/en/ljspeech/tacotron2-DDC") tts.tts_to_file(text=text, file_path=output_file) print(f"Coqui TTS audio saved to {{output_file}}") return True except Exception as e: print(f"Error with Coqui TTS: {{e}}") traceback.print_exc() return False elif provider == "espeak" and ESPEAK_AVAILABLE: try: # Create a WAV file first wav_file = output_file.replace(".mp3", ".wav") # Run espeak-ng to generate the audio cmd = ["espeak-ng", "-w", wav_file, text] process = subprocess.run(cmd, capture_output=True, text=True) if process.returncode != 0: print(f"Error running espeak-ng: {{process.stderr}}") traceback.print_exc() return False # Convert WAV to MP3 if needed if output_file.endswith(".mp3"): try: # Try to use pydub for conversion from pydub import AudioSegment sound = AudioSegment.from_wav(wav_file) sound.export(output_file, format="mp3") # Remove the temporary WAV file os.remove(wav_file) print(f"espeak-ng audio saved to {{output_file}}") except Exception as e: # If pydub fails, just use the WAV file print(f"Warning: Could not convert WAV to MP3: {{e}}") print(f"Using WAV file instead: {{wav_file}}") output_file = wav_file else: # If the output file doesn't end with .mp3, we're already using the WAV file output_file = wav_file print(f"espeak-ng audio saved to {{output_file}}") return True except Exception as e: print(f"Error with espeak-ng: {{e}}") traceback.print_exc() return False elif provider == "google_cloud_tts" and GCLOUD_TTS_AVAILABLE: try: from google.cloud import texttospeech as gcloud_tts client = gcloud_tts.TextToSpeechClient() input_text = gcloud_tts.SynthesisInput(text=text) voice = gcloud_tts.VoiceSelectionParams( language_code="en-US", name="en-US-Chirp3-HD-Autonoe" ) audio_config = gcloud_tts.AudioConfig( audio_encoding=gcloud_tts.AudioEncoding.MP3 ) response = client.synthesize_speech( request={{"input": input_text, "voice": voice, "audio_config": audio_config}} ) with open(output_file, "wb") as out: out.write(response.audio_content) print(f"Google Cloud TTS audio saved to {{output_file}}") return True except Exception as e: print(f"Error with Google Cloud TTS: {{e}}") traceback.print_exc() return False else: print(f"TTS provider {{provider}} not available.") return False # Create output directory if it doesn't exist os.makedirs("./SOUND", exist_ok=True) # Generate a unique filename import uuid unique_id = uuid.uuid4().hex output_file = f"./SOUND/tts_test_{{unique_id}}.mp3" print(f"Using output file: {{output_file}}") # Generate TTS audio try: success = generate_tts_audio("{provider}", "{text}", output_file) print(f"TTS generation {{'' if success else 'un'}}successful") except Exception as e: print(f"Unexpected error: {{e}}") traceback.print_exc() success = False # Verify file exists and has content if os.path.exists(output_file): file_size = os.path.getsize(output_file) print(f"Output file exists, size: {{file_size}} bytes") else: print("Output file does not exist") """ # Save the script to a temporary file script_path = os.path.join(tempfile.gettempdir(), "tts_test.py") with open(script_path, "w", encoding="utf8") as f: f.write(script_content) # Run the script process = await asyncio.create_subprocess_exec( sys.executable, script_path, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) # Wait for the process to complete stdout, stderr = await process.communicate() # Get the output regardless of return code stdout_text = stdout.decode() if stdout else "" stderr_text = stderr.decode() if stderr else "" # Combine stdout and stderr for complete output full_output = f"STDOUT:\n{stdout_text}\n\nSTDERR:\n{stderr_text}" # Extract the output filename from the stdout output_filename = None for line in stdout_text.split("\n"): if line.startswith("Using output file:"): output_filename = line.split(":", 1)[1].strip() break # If we couldn't find the filename in the output, use a default pattern to search if not output_filename: # Look for any tts_test_*.mp3 files created in the last minute import glob import time current_time = time.time() tts_files = [] for file in glob.glob("./SOUND/tts_test_*.mp3"): if os.path.exists(file) and os.path.getmtime(file) > current_time - 60: tts_files.append(file) if tts_files: # Use the most recently created file output_filename = max(tts_files, key=os.path.getmtime) else: # Fallback to the old filename pattern output_filename = "./SOUND/tts_test.mp3" # Check if the TTS file was generated if os.path.exists(output_filename) and os.path.getsize(output_filename) > 0: # Success! Send the audio file await interaction.followup.send( f"✅ Successfully tested TTS provider: {provider}\nText: {text}\nFile: {os.path.basename(output_filename)}", file=discord.File(output_filename), ) else: # Failed to generate audio with subprocess, try direct method as fallback await interaction.followup.send( f"Subprocess method failed. Trying direct TTS generation with {provider}..." ) # Try the direct method success, result = await self.generate_tts_directly(provider, text) if success and os.path.exists(result) and os.path.getsize(result) > 0: # Direct method succeeded! await interaction.followup.send( f"✅ Successfully generated TTS audio with {provider} (direct method)\nText: {text}", file=discord.File(result), ) return # Both methods failed, send detailed error information error_message = ( f"❌ Failed to generate TTS audio with provider: {provider}\n\n" ) # Check if the process failed if process.returncode != 0: error_message += ( f"Process returned error code: {process.returncode}\n\n" ) # Add direct method error if not success: error_message += f"Direct method error: {result}\n\n" # Create a summary of the most important information error_summary = "Error Summary:\n" # Extract key information from the output if f"{provider.upper()}_AVAILABLE: False" in full_output: error_summary += f"- The {provider} library is not available or not properly installed\n" if "Error with " + provider in full_output: # Extract the specific error message error_line = next( ( line for line in full_output.split("\n") if "Error with " + provider in line ), "", ) if error_line: error_summary += f"- {error_line}\n" # Add the error summary to the message error_message += error_summary + "\n" # Add instructions for fixing the issue error_message += "To fix this issue, try:\n" error_message += "1. Make sure the required packages are installed:\n" if provider == "gtts": error_message += " - Run: pip install gtts\n" elif provider == "pyttsx3": error_message += " - Run: pip install pyttsx3\n" error_message += " - On Linux, you may need additional packages: sudo apt-get install espeak\n" elif provider == "coqui": error_message += " - Run: pip install TTS\n" error_message += " - This may require additional dependencies based on your system\n" elif provider == "google_cloud_tts": error_message += " - Run: pip install google-cloud-texttospeech\n" error_message += " - Ensure GOOGLE_APPLICATION_CREDENTIALS environment variable is set correctly.\n" error_message += "2. Restart the bot after installing the packages\n" # Add a note about the full output error_message += "\nFull diagnostic output is available but may be too long to display here." # Send the error message await interaction.followup.send(error_message) # If the output is not too long, send it as a separate message if len(full_output) <= 1900: # Discord message limit is 2000 characters await interaction.followup.send(f"```\n{full_output}\n```") else: # Save the output to a file and send it output_file = os.path.join(tempfile.gettempdir(), "tts_error_log.txt") with open(output_file, "w", encoding="utf8") as f: f.write(full_output) await interaction.followup.send( "Detailed error log:", file=discord.File(output_file) ) @commands.command(name="ttscheck") async def tts_check(self, ctx): """Check if TTS libraries are installed and working.""" await ctx.send("Checking TTS libraries...") # Check for gtts gtts_available = importlib.util.find_spec("gtts") is not None gtts_version = "Not installed" if gtts_available: try: import gtts gtts_version = getattr(gtts, "__version__", "Unknown version") except Exception as e: gtts_version = f"Error importing: {str(e)}" # Check for pyttsx3 pyttsx3_available = importlib.util.find_spec("pyttsx3") is not None pyttsx3_version = "Not installed" if pyttsx3_available: try: import pyttsx3 pyttsx3_version = "Installed (no version info available)" except Exception as e: pyttsx3_version = f"Error importing: {str(e)}" # Check for TTS (Coqui) coqui_available = importlib.util.find_spec("TTS") is not None coqui_version = "Not installed" if coqui_available: try: import TTS coqui_version = getattr(TTS, "__version__", "Unknown version") except Exception as e: coqui_version = f"Error importing: {str(e)}" # Check for espeak-ng espeak_version = "Not installed" try: import subprocess import platform if platform.system() == "Windows": # On Windows, we'll check if the command exists result = subprocess.run( ["where", "espeak-ng"], capture_output=True, text=True ) espeak_available = result.returncode == 0 else: # On Linux/Mac, we'll use which result = subprocess.run( ["which", "espeak-ng"], capture_output=True, text=True ) espeak_available = result.returncode == 0 if espeak_available: # Try to get version version_result = subprocess.run( ["espeak-ng", "--version"], capture_output=True, text=True ) if version_result.returncode == 0: espeak_version = version_result.stdout.strip() else: espeak_version = "Installed (version unknown)" else: espeak_version = "Not installed" except Exception as e: espeak_version = f"Error checking: {str(e)}" # Check for Google Cloud TTS gcloud_tts_available = ( importlib.util.find_spec("google.cloud.texttospeech") is not None ) gcloud_tts_version = "Not installed" if gcloud_tts_available: try: import google.cloud.texttospeech as gcloud_tts_module gcloud_tts_version = getattr( gcloud_tts_module, "__version__", "Unknown version" ) except Exception as e: gcloud_tts_version = f"Error importing: {str(e)}" # Create a report report = "**TTS Libraries Status:**\n" report += f"- Google TTS (gtts): {gtts_version}\n" report += f"- pyttsx3: {pyttsx3_version}\n" report += f"- Coqui TTS: {coqui_version}\n" report += f"- eSpeak-NG: {espeak_version}\n" report += f"- Google Cloud TTS: {gcloud_tts_version}\n\n" # Add installation instructions report += "**Installation Instructions:**\n" report += "- Google TTS: `pip install gtts`\n" report += "- pyttsx3: `pip install pyttsx3`\n" report += "- Coqui TTS: `pip install TTS`\n" report += "- eSpeak-NG: Install from https://github.com/espeak-ng/espeak-ng/releases\n" report += "- Google Cloud TTS: `pip install google-cloud-texttospeech` (ensure `GOOGLE_APPLICATION_CREDENTIALS` is set)\n\n" report += "After installing, restart the bot for the changes to take effect." await ctx.send(report) async def setup(bot: commands.Bot): print("Loading TTSProviderCog...") await bot.add_cog(TTSProviderCog(bot)) print("TTSProviderCog loaded successfully!")