discordbot/webdrivertorso_template.py

import random
from PIL import Image, ImageDraw, ImageFont
import math
import wave
import struct
from pydub import AudioSegment
import os
import moviepy.video.io.ImageSequenceClip
import glob
import json
import numpy as np
import importlib.util
import sys

# Check for TTS libraries
GTTS_AVAILABLE = importlib.util.find_spec("gtts") is not None
PYTTSX3_AVAILABLE = importlib.util.find_spec("pyttsx3") is not None
COQUI_AVAILABLE = importlib.util.find_spec("TTS") is not None

# Check for espeak-ng
try:
    import subprocess
    import platform

    if platform.system() == "Windows":
        # On Windows, we'll check if the command exists
        result = subprocess.run(["where", "espeak-ng"], capture_output=True, text=True)
        ESPEAK_AVAILABLE = result.returncode == 0
    else:
        # On Linux/Mac, we'll use which
        result = subprocess.run(["which", "espeak-ng"], capture_output=True, text=True)
        ESPEAK_AVAILABLE = result.returncode == 0
except Exception as e:
    print(f"Error checking espeak-ng: {e}")
    ESPEAK_AVAILABLE = False


class JSON:
    def read(file):
        with open(f"{file}.json", "r", encoding="utf8") as file:
            data = json.load(file, strict=False)
        return data

    def dump(file, data):
        with open(f"{file}.json", "w", encoding="utf8") as file:
            json.dump(data, file, indent=4)


config_data = JSON.read("config")

# SETTINGS #
w = config_data["WIDTH"]
h = config_data["HEIGHT"]
maxW = config_data["MAX_WIDTH"]
maxH = config_data["MAX_HEIGHT"]
minW = config_data["MIN_WIDTH"]
minH = config_data["MIN_HEIGHT"]
LENGTH = config_data["SLIDES"]
AMOUNT = config_data["VIDEOS"]
min_shapes = config_data["MIN_SHAPES"]
max_shapes = config_data["MAX_SHAPES"]
sample_rate = config_data["SOUND_QUALITY"]
tts_enabled = config_data.get("TTS_ENABLED", False)
tts_text = config_data.get("TTS_TEXT", "This is a default text for TTS.")
tts_provider = config_data.get("TTS_PROVIDER", "gtts")  # Options: gtts, pyttsx3, coqui
audio_wave_type = config_data.get(
    "AUDIO_WAVE_TYPE", "sawtooth"
)  # Options: sawtooth, sine, square, triangle, noise, pulse, harmonic
slide_duration = config_data.get("SLIDE_DURATION", 1000)  # Duration in milliseconds
deform_level = config_data.get(
    "DEFORM_LEVEL", "none"
)  # Options: none, low, medium, high
color_mode = config_data.get("COLOR_MODE", "random")  # Options: random, scheme, solid
color_scheme = config_data.get(
    "COLOR_SCHEME", "default"
)  # Placeholder for color schemes
solid_color = config_data.get("SOLID_COLOR", "#FFFFFF")  # Default solid color
allowed_shapes = config_data.get(
    "ALLOWED_SHAPES", ["rectangle", "ellipse", "polygon", "triangle", "circle"]
)
wave_vibe = config_data.get("WAVE_VIBE", "calm")  # New config option for wave vibe
top_left_text_enabled = config_data.get("TOP_LEFT_TEXT_ENABLED", True)
top_left_text_mode = config_data.get(
    "TOP_LEFT_TEXT_MODE", "random"
)  # Options: random, word
words_topic = config_data.get(
    "WORDS_TOPIC", "random"
)  # Options: random, introspective, action, nature, technology
text_color = config_data.get("TEXT_COLOR", "#000000")
text_size = config_data.get("TEXT_SIZE", 0)  # 0 means auto-scale
text_position = config_data.get("TEXT_POSITION", "top-left")

# Get color schemes from config if available
color_schemes_data = config_data.get(
    "COLOR_SCHEMES",
    {
        "pastel": [
            [255, 182, 193],
            [176, 224, 230],
            [240, 230, 140],
            [221, 160, 221],
            [152, 251, 152],
        ],
        "dark_gritty": [
            [47, 79, 79],
            [105, 105, 105],
            [0, 0, 0],
            [85, 107, 47],
            [139, 69, 19],
        ],
        "nature": [
            [34, 139, 34],
            [107, 142, 35],
            [46, 139, 87],
            [32, 178, 170],
            [154, 205, 50],
        ],
        "vibrant": [
            [255, 0, 0],
            [0, 255, 0],
            [0, 0, 255],
            [255, 255, 0],
            [255, 0, 255],
        ],
        "ocean": [
            [0, 105, 148],
            [72, 209, 204],
            [70, 130, 180],
            [135, 206, 250],
            [176, 224, 230],
        ],
    },
)

# Convert color schemes from lists to tuples for PIL
color_schemes = {}
for scheme_name, colors in color_schemes_data.items():
    color_schemes[scheme_name] = [tuple(color) for color in colors]

# Default color scheme if the specified one doesn't exist
if color_scheme not in color_schemes:
    color_schemes[color_scheme] = [(128, 128, 128)]

# Vibe presets for wave sound
wave_vibes = config_data.get(
    "WAVE_VIBES",
    {
        "calm": {"frequency": 200, "amplitude": 0.3, "modulation": 0.1},
        "eerie": {"frequency": 600, "amplitude": 0.5, "modulation": 0.7},
        "random": {},  # Randomized values will be generated
        "energetic": {"frequency": 800, "amplitude": 0.7, "modulation": 0.2},
        "dreamy": {"frequency": 400, "amplitude": 0.4, "modulation": 0.5},
        "chaotic": {"frequency": 1000, "amplitude": 1.0, "modulation": 1.0},
    },
)

# Word topics
word_topics = config_data.get(
    "WORD_TOPICS",
    {
        "introspective": [
            "reflection",
            "thought",
            "solitude",
            "ponder",
            "meditation",
            "introspection",
            "awareness",
            "contemplation",
            "silence",
            "stillness",
        ],
        "action": [
            "run",
            "jump",
            "climb",
            "race",
            "fight",
            "explore",
            "build",
            "create",
            "overcome",
            "achieve",
        ],
        "nature": [
            "tree",
            "mountain",
            "river",
            "ocean",
            "flower",
            "forest",
            "animal",
            "sky",
            "valley",
            "meadow",
        ],
        "technology": [
            "computer",
            "robot",
            "network",
            "data",
            "algorithm",
            "innovation",
            "digital",
            "machine",
            "software",
            "hardware",
        ],
    },
)

# Font scaling based on video size
if text_size <= 0:
    font_size = max(w, h) // 40  # Scales font size to make it smaller and more readable
else:
    font_size = text_size

fnt = ImageFont.truetype("./FONT/sys.ttf", font_size)

files = glob.glob("./IMG/*")
for f in files:
    os.remove(f)

print("REMOVED OLD FILES")


def generate_string(
    length, charset="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
):
    result = ""
    for i in range(length):
        result += random.choice(charset)
    return result


def generate_word(theme="random"):
    if theme == "random" or theme not in word_topics:
        if random.random() < 0.5 and len(word_topics) > 0:
            # 50% chance to use a word from a random topic
            random_topic = random.choice(list(word_topics.keys()))
            return random.choice(word_topics[random_topic])
        else:
            # Generate a random string
            return generate_string(random.randint(3, 10))
    else:
        # Use a word from the specified topic
        return random.choice(word_topics[theme])


def generate_wave_sample(x, freq, wave_type, amplitude=1.0):
    """Generate a sample for different wave types"""
    t = x / sample_rate

    if wave_type == "sine":
        return amplitude * math.sin(2 * math.pi * freq * t)
    elif wave_type == "square":
        return amplitude * (1 if math.sin(2 * math.pi * freq * t) > 0 else -1)
    elif wave_type == "triangle":
        return amplitude * (2 * abs(2 * (t * freq - math.floor(t * freq + 0.5))) - 1)
    elif wave_type == "sawtooth":
        return amplitude * (2 * (t * freq - math.floor(t * freq + 0.5)))
    elif wave_type == "noise":
        return amplitude * (random.random() * 2 - 1)
    elif wave_type == "pulse":
        return amplitude * (1 if math.sin(2 * math.pi * freq * t) > 0.7 else 0)
    elif wave_type == "harmonic":
        return amplitude * (
            math.sin(2 * math.pi * freq * t) * 0.6
            + math.sin(2 * math.pi * freq * 2 * t) * 0.3
            + math.sin(2 * math.pi * freq * 3 * t) * 0.1
        )
    else:  # Default to sawtooth
        return amplitude * (2 * (t * freq - math.floor(t * freq + 0.5)))


def append_wave(freq=None, duration_milliseconds=1000, volume=1.0):

    global audio

    vibe_params = wave_vibes.get(wave_vibe, wave_vibes["calm"])
    if wave_vibe == "random":
        freq = random.uniform(100, 1000) if freq is None else freq
        amplitude = random.uniform(0.1, 1.0)
        modulation = random.uniform(0.1, 1.0)
    else:
        base_freq = vibe_params["frequency"]
        freq = (
            random.uniform(base_freq * 0.7, base_freq * 1.3) if freq is None else freq
        )
        amplitude = vibe_params["amplitude"] * random.uniform(0.7, 1.3)
        modulation = vibe_params["modulation"] * random.uniform(0.6, 1.4)

    num_samples = duration_milliseconds * (sample_rate / 1000.0)

    for x in range(int(num_samples)):
        wave_sample = generate_wave_sample(x, freq, audio_wave_type, amplitude)
        modulated_sample = wave_sample * (
            1 + modulation * math.sin(2 * math.pi * 0.5 * x / sample_rate)
        )
        audio.append(volume * modulated_sample)
    return


def save_wav(file_name):
    wav_file = wave.open(file_name, "w")

    nchannels = 1

    sampwidth = 2

    nframes = len(audio)
    comptype = "NONE"
    compname = "not compressed"
    wav_file.setparams((nchannels, sampwidth, sample_rate, nframes, comptype, compname))

    for sample in audio:
        wav_file.writeframes(struct.pack("h", int(sample * 32767.0)))

    wav_file.close()

    return


# Generate TTS audio using different providers
def generate_tts_audio(text, output_file):
    if tts_provider == "gtts" and GTTS_AVAILABLE:
        from gtts import gTTS

        tts = gTTS(text=text, lang="en")
        tts.save(output_file)
        print(f"Google TTS audio saved to {output_file}")
        return True
    elif tts_provider == "pyttsx3" and PYTTSX3_AVAILABLE:
        import pyttsx3

        engine = pyttsx3.init()
        engine.save_to_file(text, output_file)
        engine.runAndWait()
        print(f"pyttsx3 audio saved to {output_file}")
        return True
    elif tts_provider == "coqui" and COQUI_AVAILABLE:
        try:
            from TTS.api import TTS

            tts = TTS("tts_models/en/ljspeech/tacotron2-DDC")
            tts.tts_to_file(text=text, file_path=output_file)
            print(f"Coqui TTS audio saved to {output_file}")
            return True
        except Exception as e:
            print(f"Error with Coqui TTS: {e}")
            return False
    elif tts_provider == "espeak" and ESPEAK_AVAILABLE:
        try:
            # Create a WAV file first
            wav_file = output_file.replace(".mp3", ".wav")

            # Run espeak-ng to generate the audio
            cmd = ["espeak-ng", "-w", wav_file, text]
            process = subprocess.run(cmd, capture_output=True, text=True)

            if process.returncode != 0:
                print(f"Error running espeak-ng: {process.stderr}")
                return False

            # Convert WAV to MP3 if needed
            if output_file.endswith(".mp3"):
                try:
                    # Try to use pydub for conversion
                    sound = AudioSegment.from_wav(wav_file)
                    sound.export(output_file, format="mp3")
                    # Remove the temporary WAV file
                    os.remove(wav_file)
                    print(f"espeak-ng audio saved to {output_file}")
                except Exception as e:
                    # If pydub fails, just use the WAV file
                    print(f"Warning: Could not convert WAV to MP3: {e}")
                    print(f"Using WAV file instead: {wav_file}")
                    output_file = wav_file
            else:
                # If the output file doesn't end with .mp3, we're already using the WAV file
                output_file = wav_file
                print(f"espeak-ng audio saved to {output_file}")

            return True
        except Exception as e:
            print(f"Error with espeak-ng: {e}")
            return False
    else:
        print(f"TTS provider {tts_provider} not available. Falling back to no TTS.")
        return False


if tts_enabled:
    tts_audio_file = "./SOUND/tts_output.mp3"
    tts_success = generate_tts_audio(tts_text, tts_audio_file)
    if not tts_success:
        tts_enabled = False

for xyz in range(AMOUNT):
    video_name = generate_string(6)  # Generate a consistent video name

    for i in range(LENGTH):
        img = Image.new("RGB", (w, h))

        img1 = ImageDraw.Draw(img)

        img1.rectangle([(0, 0), (w, h)], fill="white", outline="white")

        num_shapes = random.randint(min_shapes, max_shapes)
        for _ in range(num_shapes):
            shape_type = random.choice(allowed_shapes)
            x1, y1 = random.randint(0, w), random.randint(0, h)

            if deform_level == "none":
                x2, y2 = minW + (maxW - minW) // 2, minH + (maxH - minH) // 2
            elif deform_level == "low":
                x2 = random.randint(minW, minW + (maxW - minW) // 4)
                y2 = random.randint(minH, minH + (maxH - minH) // 4)
            elif deform_level == "medium":
                x2 = random.randint(minW, minW + (maxW - minW) // 2)
                y2 = random.randint(minH, minH + (maxH - minH) // 2)
            elif deform_level == "high":
                x2 = random.randint(minW, maxW)
                y2 = random.randint(minH, maxH)

            if color_mode == "random":
                color = (
                    random.randint(0, 255),
                    random.randint(0, 255),
                    random.randint(0, 255),
                )
            elif color_mode == "scheme":
                scheme_colors = color_schemes.get(color_scheme, [(128, 128, 128)])
                color = random.choice(scheme_colors)
            elif color_mode == "solid":
                try:
                    color = tuple(
                        int(solid_color.lstrip("#")[i : i + 2], 16) for i in (0, 2, 4)
                    )
                except:
                    color = (255, 255, 255)  # Default to white if invalid hex

            if shape_type == "rectangle":
                img1.rectangle(
                    [(x1, y1), (x1 + x2, y1 + y2)], fill=color, outline=color
                )
            elif shape_type == "ellipse":
                img1.ellipse([(x1, y1), (x1 + x2, y1 + y2)], fill=color, outline=color)
            elif shape_type == "polygon":
                num_points = random.randint(3, 6)
                points = [
                    (random.randint(0, w), random.randint(0, h))
                    for _ in range(num_points)
                ]
                img1.polygon(points, fill=color, outline=color)
            elif shape_type == "triangle":
                points = [
                    (x1, y1),
                    (x1 + random.randint(-x2, x2), y1 + y2),
                    (x1 + x2, y1 + random.randint(-y2, y2)),
                ]
                img1.polygon(points, fill=color, outline=color)
            elif shape_type == "circle":
                radius = min(x2, y2) // 2
                img1.ellipse(
                    [(x1 - radius, y1 - radius), (x1 + radius, y1 + radius)],
                    fill=color,
                    outline=color,
                )

        # Parse text color
        try:
            if text_color.startswith("#"):
                parsed_text_color = tuple(
                    int(text_color.lstrip("#")[i : i + 2], 16) for i in (0, 2, 4)
                )
            else:
                # Named colors (basic support)
                color_map = {
                    "black": (0, 0, 0),
                    "white": (255, 255, 255),
                    "red": (255, 0, 0),
                    "green": (0, 255, 0),
                    "blue": (0, 0, 255),
                    "yellow": (255, 255, 0),
                    "purple": (128, 0, 128),
                    "orange": (255, 165, 0),
                    "gray": (128, 128, 128),
                }
                parsed_text_color = color_map.get(text_color.lower(), (0, 0, 0))
        except:
            parsed_text_color = (0, 0, 0)  # Default to black

        if top_left_text_enabled:
            if top_left_text_mode == "random":
                random_top_left_text = generate_string(
                    30,
                    charset="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()_+-=[]{}|;:',.<>?/",
                )
            elif top_left_text_mode == "word":
                random_top_left_text = generate_word(words_topic)
            else:
                random_top_left_text = ""

            # Position text based on text_position setting
            if (
                text_position == "top-left"
                or text_position == "random"
                and random.random() < 0.2
            ):
                img1.text(
                    (10, 10), random_top_left_text, font=fnt, fill=parsed_text_color
                )
            elif (
                text_position == "top-right"
                or text_position == "random"
                and random.random() < 0.2
            ):
                text_width = img1.textlength(random_top_left_text, font=fnt)
                img1.text(
                    (w - text_width - 10, 10),
                    random_top_left_text,
                    font=fnt,
                    fill=parsed_text_color,
                )
            elif (
                text_position == "bottom-left"
                or text_position == "random"
                and random.random() < 0.2
            ):
                img1.text(
                    (10, h - font_size - 10),
                    random_top_left_text,
                    font=fnt,
                    fill=parsed_text_color,
                )
            elif (
                text_position == "bottom-right"
                or text_position == "random"
                and random.random() < 0.2
            ):
                text_width = img1.textlength(random_top_left_text, font=fnt)
                img1.text(
                    (w - text_width - 10, h - font_size - 10),
                    random_top_left_text,
                    font=fnt,
                    fill=parsed_text_color,
                )
            elif text_position == "center" or text_position == "random":
                text_width = img1.textlength(random_top_left_text, font=fnt)
                img1.text(
                    (w // 2 - text_width // 2, h // 2 - font_size // 2),
                    random_top_left_text,
                    font=fnt,
                    fill=parsed_text_color,
                )

        # Add video name to bottom-left corner
        video_name_text = f"{video_name}.mp4"
        video_name_width = img1.textlength(video_name_text, font=fnt)
        video_name_height = font_size
        img1.text(
            (10, h - video_name_height - 10),
            video_name_text,
            font=fnt,
            fill=parsed_text_color,
        )

        # Move slide info text to the top right corner
        slide_text = f"Slide {i}"
        text_width = img1.textlength(slide_text, font=fnt)
        text_height = font_size
        img1.text(
            (w - text_width - 10, 10), slide_text, font=fnt, fill=parsed_text_color
        )

        img.save(f"./IMG/{str(i).zfill(4)}_{random.randint(1000, 9999)}.png")

    print("IMAGE GENERATION DONE")

    audio = []

    for i in range(LENGTH):
        append_wave(None, duration_milliseconds=slide_duration, volume=0.25)

    save_wav("./SOUND/output.wav")

    print("WAV GENERATED")

    wav_audio = AudioSegment.from_file("./SOUND/output.wav", format="wav")

    if tts_enabled:
        try:
            tts_audio = AudioSegment.from_file(tts_audio_file, format="mp3")
            combined_audio = wav_audio.overlay(tts_audio, position=0)
        except Exception as e:
            print(f"Error overlaying TTS audio: {e}")
            combined_audio = wav_audio
    else:
        combined_audio = wav_audio

    combined_audio.export("./SOUND/output.m4a", format="adts")

    print("AUDIO GENERATED")

    image_folder = "./IMG"
    fps = 1000 / slide_duration  # Ensure fps is precise to handle timing discrepancies

    image_files = sorted(
        [f for f in glob.glob(f"{image_folder}/*.png")],
        key=lambda x: int(os.path.basename(x).split("_")[0]),
    )

    # Ensure all frames have the same dimensions
    frames = []
    first_frame = np.array(Image.open(image_files[0]))
    for idx, file in enumerate(image_files):
        frame = np.array(Image.open(file))
        if frame.shape != first_frame.shape:
            print(
                f"Frame {idx} has inconsistent dimensions: {frame.shape} vs {first_frame.shape}"
            )
            frame = np.resize(frame, first_frame.shape)  # Resize if necessary
        frames.append(frame)

    print("Starting video compilation...")
    clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(frames, fps=fps)
    clip.write_videofile(
        f"./OUTPUT/{video_name}.mp4",
        audio="./SOUND/output.m4a",
        codec="libx264",
        audio_codec="aac",
    )

    print("Video compilation finished successfully!")