457 lines
19 KiB
Python
457 lines
19 KiB
Python
import random
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
import math
|
|
import wave
|
|
import struct
|
|
from pydub import AudioSegment
|
|
import os
|
|
import moviepy.video.io.ImageSequenceClip
|
|
import glob
|
|
import json
|
|
import numpy as np
|
|
import importlib.util
|
|
import sys
|
|
|
|
# Check for TTS libraries
|
|
GTTS_AVAILABLE = importlib.util.find_spec("gtts") is not None
|
|
PYTTSX3_AVAILABLE = importlib.util.find_spec("pyttsx3") is not None
|
|
COQUI_AVAILABLE = importlib.util.find_spec("TTS") is not None
|
|
|
|
# Check for espeak-ng
|
|
try:
|
|
import subprocess
|
|
import platform
|
|
if platform.system() == "Windows":
|
|
# On Windows, we'll check if the command exists
|
|
result = subprocess.run(["where", "espeak-ng"], capture_output=True, text=True)
|
|
ESPEAK_AVAILABLE = result.returncode == 0
|
|
else:
|
|
# On Linux/Mac, we'll use which
|
|
result = subprocess.run(["which", "espeak-ng"], capture_output=True, text=True)
|
|
ESPEAK_AVAILABLE = result.returncode == 0
|
|
except Exception as e:
|
|
print(f"Error checking espeak-ng: {e}")
|
|
ESPEAK_AVAILABLE = False
|
|
|
|
class JSON:
|
|
def read(file):
|
|
with open(f"{file}.json", "r", encoding="utf8") as file:
|
|
data = json.load(file, strict=False)
|
|
return data
|
|
|
|
def dump(file, data):
|
|
with open(f"{file}.json", "w", encoding="utf8") as file:
|
|
json.dump(data, file, indent=4)
|
|
|
|
config_data = JSON.read("config")
|
|
|
|
# SETTINGS #
|
|
w = config_data["WIDTH"]
|
|
h = config_data["HEIGHT"]
|
|
maxW = config_data["MAX_WIDTH"]
|
|
maxH = config_data["MAX_HEIGHT"]
|
|
minW = config_data["MIN_WIDTH"]
|
|
minH = config_data["MIN_HEIGHT"]
|
|
LENGTH = config_data["SLIDES"]
|
|
AMOUNT = config_data["VIDEOS"]
|
|
min_shapes = config_data["MIN_SHAPES"]
|
|
max_shapes = config_data["MAX_SHAPES"]
|
|
sample_rate = config_data["SOUND_QUALITY"]
|
|
tts_enabled = config_data.get("TTS_ENABLED", False)
|
|
tts_text = config_data.get("TTS_TEXT", "This is a default text for TTS.")
|
|
tts_provider = config_data.get("TTS_PROVIDER", "gtts") # Options: gtts, pyttsx3, coqui
|
|
audio_wave_type = config_data.get("AUDIO_WAVE_TYPE", "sawtooth") # Options: sawtooth, sine, square, triangle, noise, pulse, harmonic
|
|
slide_duration = config_data.get("SLIDE_DURATION", 1000) # Duration in milliseconds
|
|
deform_level = config_data.get("DEFORM_LEVEL", "none") # Options: none, low, medium, high
|
|
color_mode = config_data.get("COLOR_MODE", "random") # Options: random, scheme, solid
|
|
color_scheme = config_data.get("COLOR_SCHEME", "default") # Placeholder for color schemes
|
|
solid_color = config_data.get("SOLID_COLOR", "#FFFFFF") # Default solid color
|
|
allowed_shapes = config_data.get("ALLOWED_SHAPES", ["rectangle", "ellipse", "polygon", "triangle", "circle"])
|
|
wave_vibe = config_data.get("WAVE_VIBE", "calm") # New config option for wave vibe
|
|
top_left_text_enabled = config_data.get("TOP_LEFT_TEXT_ENABLED", True)
|
|
top_left_text_mode = config_data.get("TOP_LEFT_TEXT_MODE", "random") # Options: random, word
|
|
words_topic = config_data.get("WORDS_TOPIC", "random") # Options: random, introspective, action, nature, technology
|
|
text_color = config_data.get("TEXT_COLOR", "#000000")
|
|
text_size = config_data.get("TEXT_SIZE", 0) # 0 means auto-scale
|
|
text_position = config_data.get("TEXT_POSITION", "top-left")
|
|
|
|
# Get color schemes from config if available
|
|
color_schemes_data = config_data.get("COLOR_SCHEMES", {
|
|
"pastel": [[255, 182, 193], [176, 224, 230], [240, 230, 140], [221, 160, 221], [152, 251, 152]],
|
|
"dark_gritty": [[47, 79, 79], [105, 105, 105], [0, 0, 0], [85, 107, 47], [139, 69, 19]],
|
|
"nature": [[34, 139, 34], [107, 142, 35], [46, 139, 87], [32, 178, 170], [154, 205, 50]],
|
|
"vibrant": [[255, 0, 0], [0, 255, 0], [0, 0, 255], [255, 255, 0], [255, 0, 255]],
|
|
"ocean": [[0, 105, 148], [72, 209, 204], [70, 130, 180], [135, 206, 250], [176, 224, 230]]
|
|
})
|
|
|
|
# Convert color schemes from lists to tuples for PIL
|
|
color_schemes = {}
|
|
for scheme_name, colors in color_schemes_data.items():
|
|
color_schemes[scheme_name] = [tuple(color) for color in colors]
|
|
|
|
# Default color scheme if the specified one doesn't exist
|
|
if color_scheme not in color_schemes:
|
|
color_schemes[color_scheme] = [(128, 128, 128)]
|
|
|
|
# Vibe presets for wave sound
|
|
wave_vibes = config_data.get("WAVE_VIBES", {
|
|
"calm": {"frequency": 200, "amplitude": 0.3, "modulation": 0.1},
|
|
"eerie": {"frequency": 600, "amplitude": 0.5, "modulation": 0.7},
|
|
"random": {}, # Randomized values will be generated
|
|
"energetic": {"frequency": 800, "amplitude": 0.7, "modulation": 0.2},
|
|
"dreamy": {"frequency": 400, "amplitude": 0.4, "modulation": 0.5},
|
|
"chaotic": {"frequency": 1000, "amplitude": 1.0, "modulation": 1.0}
|
|
})
|
|
|
|
# Word topics
|
|
word_topics = config_data.get("WORD_TOPICS", {
|
|
"introspective": ["reflection", "thought", "solitude", "ponder", "meditation", "introspection", "awareness", "contemplation", "silence", "stillness"],
|
|
"action": ["run", "jump", "climb", "race", "fight", "explore", "build", "create", "overcome", "achieve"],
|
|
"nature": ["tree", "mountain", "river", "ocean", "flower", "forest", "animal", "sky", "valley", "meadow"],
|
|
"technology": ["computer", "robot", "network", "data", "algorithm", "innovation", "digital", "machine", "software", "hardware"]
|
|
})
|
|
|
|
# Font scaling based on video size
|
|
if text_size <= 0:
|
|
font_size = max(w, h) // 40 # Scales font size to make it smaller and more readable
|
|
else:
|
|
font_size = text_size
|
|
|
|
fnt = ImageFont.truetype("./FONT/sys.ttf", font_size)
|
|
|
|
files = glob.glob('./IMG/*')
|
|
for f in files:
|
|
os.remove(f)
|
|
|
|
print("REMOVED OLD FILES")
|
|
|
|
def generate_string(length, charset="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"):
|
|
result = ""
|
|
for i in range(length):
|
|
result += random.choice(charset)
|
|
return result
|
|
|
|
def generate_word(theme="random"):
|
|
if theme == "random" or theme not in word_topics:
|
|
if random.random() < 0.5 and len(word_topics) > 0:
|
|
# 50% chance to use a word from a random topic
|
|
random_topic = random.choice(list(word_topics.keys()))
|
|
return random.choice(word_topics[random_topic])
|
|
else:
|
|
# Generate a random string
|
|
return generate_string(random.randint(3, 10))
|
|
else:
|
|
# Use a word from the specified topic
|
|
return random.choice(word_topics[theme])
|
|
|
|
def generate_wave_sample(x, freq, wave_type, amplitude=1.0):
|
|
"""Generate a sample for different wave types"""
|
|
t = x / sample_rate
|
|
|
|
if wave_type == "sine":
|
|
return amplitude * math.sin(2 * math.pi * freq * t)
|
|
elif wave_type == "square":
|
|
return amplitude * (1 if math.sin(2 * math.pi * freq * t) > 0 else -1)
|
|
elif wave_type == "triangle":
|
|
return amplitude * (2 * abs(2 * (t * freq - math.floor(t * freq + 0.5))) - 1)
|
|
elif wave_type == "sawtooth":
|
|
return amplitude * (2 * (t * freq - math.floor(t * freq + 0.5)))
|
|
elif wave_type == "noise":
|
|
return amplitude * (random.random() * 2 - 1)
|
|
elif wave_type == "pulse":
|
|
return amplitude * (1 if math.sin(2 * math.pi * freq * t) > 0.7 else 0)
|
|
elif wave_type == "harmonic":
|
|
return amplitude * (
|
|
math.sin(2 * math.pi * freq * t) * 0.6 +
|
|
math.sin(2 * math.pi * freq * 2 * t) * 0.3 +
|
|
math.sin(2 * math.pi * freq * 3 * t) * 0.1
|
|
)
|
|
else: # Default to sawtooth
|
|
return amplitude * (2 * (t * freq - math.floor(t * freq + 0.5)))
|
|
|
|
def append_wave(
|
|
freq=None,
|
|
duration_milliseconds=1000,
|
|
volume=1.0):
|
|
|
|
global audio
|
|
|
|
vibe_params = wave_vibes.get(wave_vibe, wave_vibes["calm"])
|
|
if wave_vibe == "random":
|
|
freq = random.uniform(100, 1000) if freq is None else freq
|
|
amplitude = random.uniform(0.1, 1.0)
|
|
modulation = random.uniform(0.1, 1.0)
|
|
else:
|
|
base_freq = vibe_params["frequency"]
|
|
freq = random.uniform(base_freq * 0.7, base_freq * 1.3) if freq is None else freq
|
|
amplitude = vibe_params["amplitude"] * random.uniform(0.7, 1.3)
|
|
modulation = vibe_params["modulation"] * random.uniform(0.6, 1.4)
|
|
|
|
num_samples = duration_milliseconds * (sample_rate / 1000.0)
|
|
|
|
for x in range(int(num_samples)):
|
|
wave_sample = generate_wave_sample(x, freq, audio_wave_type, amplitude)
|
|
modulated_sample = wave_sample * (1 + modulation * math.sin(2 * math.pi * 0.5 * x / sample_rate))
|
|
audio.append(volume * modulated_sample)
|
|
return
|
|
|
|
def save_wav(file_name):
|
|
wav_file = wave.open(file_name, "w")
|
|
|
|
nchannels = 1
|
|
|
|
sampwidth = 2
|
|
|
|
nframes = len(audio)
|
|
comptype = "NONE"
|
|
compname = "not compressed"
|
|
wav_file.setparams((nchannels, sampwidth, sample_rate, nframes, comptype, compname))
|
|
|
|
for sample in audio:
|
|
wav_file.writeframes(struct.pack('h', int(sample * 32767.0)))
|
|
|
|
wav_file.close()
|
|
|
|
return
|
|
|
|
# Generate TTS audio using different providers
|
|
def generate_tts_audio(text, output_file):
|
|
if tts_provider == "gtts" and GTTS_AVAILABLE:
|
|
from gtts import gTTS
|
|
tts = gTTS(text=text, lang='en')
|
|
tts.save(output_file)
|
|
print(f"Google TTS audio saved to {output_file}")
|
|
return True
|
|
elif tts_provider == "pyttsx3" and PYTTSX3_AVAILABLE:
|
|
import pyttsx3
|
|
engine = pyttsx3.init()
|
|
engine.save_to_file(text, output_file)
|
|
engine.runAndWait()
|
|
print(f"pyttsx3 audio saved to {output_file}")
|
|
return True
|
|
elif tts_provider == "coqui" and COQUI_AVAILABLE:
|
|
try:
|
|
from TTS.api import TTS
|
|
tts = TTS("tts_models/en/ljspeech/tacotron2-DDC")
|
|
tts.tts_to_file(text=text, file_path=output_file)
|
|
print(f"Coqui TTS audio saved to {output_file}")
|
|
return True
|
|
except Exception as e:
|
|
print(f"Error with Coqui TTS: {e}")
|
|
return False
|
|
elif tts_provider == "espeak" and ESPEAK_AVAILABLE:
|
|
try:
|
|
# Create a WAV file first
|
|
wav_file = output_file.replace(".mp3", ".wav")
|
|
|
|
# Run espeak-ng to generate the audio
|
|
cmd = ["espeak-ng", "-w", wav_file, text]
|
|
process = subprocess.run(cmd, capture_output=True, text=True)
|
|
|
|
if process.returncode != 0:
|
|
print(f"Error running espeak-ng: {process.stderr}")
|
|
return False
|
|
|
|
# Convert WAV to MP3 if needed
|
|
if output_file.endswith(".mp3"):
|
|
try:
|
|
# Try to use pydub for conversion
|
|
sound = AudioSegment.from_wav(wav_file)
|
|
sound.export(output_file, format="mp3")
|
|
# Remove the temporary WAV file
|
|
os.remove(wav_file)
|
|
print(f"espeak-ng audio saved to {output_file}")
|
|
except Exception as e:
|
|
# If pydub fails, just use the WAV file
|
|
print(f"Warning: Could not convert WAV to MP3: {e}")
|
|
print(f"Using WAV file instead: {wav_file}")
|
|
output_file = wav_file
|
|
else:
|
|
# If the output file doesn't end with .mp3, we're already using the WAV file
|
|
output_file = wav_file
|
|
print(f"espeak-ng audio saved to {output_file}")
|
|
|
|
return True
|
|
except Exception as e:
|
|
print(f"Error with espeak-ng: {e}")
|
|
return False
|
|
else:
|
|
print(f"TTS provider {tts_provider} not available. Falling back to no TTS.")
|
|
return False
|
|
|
|
if tts_enabled:
|
|
tts_audio_file = "./SOUND/tts_output.mp3"
|
|
tts_success = generate_tts_audio(tts_text, tts_audio_file)
|
|
if not tts_success:
|
|
tts_enabled = False
|
|
|
|
for xyz in range(AMOUNT):
|
|
video_name = generate_string(6) # Generate a consistent video name
|
|
|
|
for i in range(LENGTH):
|
|
img = Image.new("RGB", (w, h))
|
|
|
|
img1 = ImageDraw.Draw(img)
|
|
|
|
img1.rectangle([(0, 0), (w, h)], fill="white", outline="white")
|
|
|
|
num_shapes = random.randint(min_shapes, max_shapes)
|
|
for _ in range(num_shapes):
|
|
shape_type = random.choice(allowed_shapes)
|
|
x1, y1 = random.randint(0, w), random.randint(0, h)
|
|
|
|
if deform_level == "none":
|
|
x2, y2 = minW + (maxW - minW) // 2, minH + (maxH - minH) // 2
|
|
elif deform_level == "low":
|
|
x2 = random.randint(minW, minW + (maxW - minW) // 4)
|
|
y2 = random.randint(minH, minH + (maxH - minH) // 4)
|
|
elif deform_level == "medium":
|
|
x2 = random.randint(minW, minW + (maxW - minW) // 2)
|
|
y2 = random.randint(minH, minH + (maxH - minH) // 2)
|
|
elif deform_level == "high":
|
|
x2 = random.randint(minW, maxW)
|
|
y2 = random.randint(minH, maxH)
|
|
|
|
if color_mode == "random":
|
|
color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
|
|
elif color_mode == "scheme":
|
|
scheme_colors = color_schemes.get(color_scheme, [(128, 128, 128)])
|
|
color = random.choice(scheme_colors)
|
|
elif color_mode == "solid":
|
|
try:
|
|
color = tuple(int(solid_color.lstrip("#")[i:i + 2], 16) for i in (0, 2, 4))
|
|
except:
|
|
color = (255, 255, 255) # Default to white if invalid hex
|
|
|
|
if shape_type == "rectangle":
|
|
img1.rectangle([(x1, y1), (x1 + x2, y1 + y2)], fill=color, outline=color)
|
|
elif shape_type == "ellipse":
|
|
img1.ellipse([(x1, y1), (x1 + x2, y1 + y2)], fill=color, outline=color)
|
|
elif shape_type == "polygon":
|
|
num_points = random.randint(3, 6)
|
|
points = [(random.randint(0, w), random.randint(0, h)) for _ in range(num_points)]
|
|
img1.polygon(points, fill=color, outline=color)
|
|
elif shape_type == "triangle":
|
|
points = [
|
|
(x1, y1),
|
|
(x1 + random.randint(-x2, x2), y1 + y2),
|
|
(x1 + x2, y1 + random.randint(-y2, y2))
|
|
]
|
|
img1.polygon(points, fill=color, outline=color)
|
|
elif shape_type == "circle":
|
|
radius = min(x2, y2) // 2
|
|
img1.ellipse([(x1 - radius, y1 - radius), (x1 + radius, y1 + radius)], fill=color, outline=color)
|
|
|
|
# Parse text color
|
|
try:
|
|
if text_color.startswith("#"):
|
|
parsed_text_color = tuple(int(text_color.lstrip("#")[i:i + 2], 16) for i in (0, 2, 4))
|
|
else:
|
|
# Named colors (basic support)
|
|
color_map = {
|
|
"black": (0, 0, 0),
|
|
"white": (255, 255, 255),
|
|
"red": (255, 0, 0),
|
|
"green": (0, 255, 0),
|
|
"blue": (0, 0, 255),
|
|
"yellow": (255, 255, 0),
|
|
"purple": (128, 0, 128),
|
|
"orange": (255, 165, 0),
|
|
"gray": (128, 128, 128)
|
|
}
|
|
parsed_text_color = color_map.get(text_color.lower(), (0, 0, 0))
|
|
except:
|
|
parsed_text_color = (0, 0, 0) # Default to black
|
|
|
|
if top_left_text_enabled:
|
|
if top_left_text_mode == "random":
|
|
random_top_left_text = generate_string(30, charset="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()_+-=[]{}|;:',.<>?/")
|
|
elif top_left_text_mode == "word":
|
|
random_top_left_text = generate_word(words_topic)
|
|
else:
|
|
random_top_left_text = ""
|
|
|
|
# Position text based on text_position setting
|
|
if text_position == "top-left" or text_position == "random" and random.random() < 0.2:
|
|
img1.text((10, 10), random_top_left_text, font=fnt, fill=parsed_text_color)
|
|
elif text_position == "top-right" or text_position == "random" and random.random() < 0.2:
|
|
text_width = img1.textlength(random_top_left_text, font=fnt)
|
|
img1.text((w - text_width - 10, 10), random_top_left_text, font=fnt, fill=parsed_text_color)
|
|
elif text_position == "bottom-left" or text_position == "random" and random.random() < 0.2:
|
|
img1.text((10, h - font_size - 10), random_top_left_text, font=fnt, fill=parsed_text_color)
|
|
elif text_position == "bottom-right" or text_position == "random" and random.random() < 0.2:
|
|
text_width = img1.textlength(random_top_left_text, font=fnt)
|
|
img1.text((w - text_width - 10, h - font_size - 10), random_top_left_text, font=fnt, fill=parsed_text_color)
|
|
elif text_position == "center" or text_position == "random":
|
|
text_width = img1.textlength(random_top_left_text, font=fnt)
|
|
img1.text((w//2 - text_width//2, h//2 - font_size//2), random_top_left_text, font=fnt, fill=parsed_text_color)
|
|
|
|
# Add video name to bottom-left corner
|
|
video_name_text = f"{video_name}.mp4"
|
|
video_name_width = img1.textlength(video_name_text, font=fnt)
|
|
video_name_height = font_size
|
|
img1.text((10, h - video_name_height - 10), video_name_text, font=fnt, fill=parsed_text_color)
|
|
|
|
# Move slide info text to the top right corner
|
|
slide_text = f"Slide {i}"
|
|
text_width = img1.textlength(slide_text, font=fnt)
|
|
text_height = font_size
|
|
img1.text((w - text_width - 10, 10), slide_text, font=fnt, fill=parsed_text_color)
|
|
|
|
img.save(f"./IMG/{str(i).zfill(4)}_{random.randint(1000, 9999)}.png")
|
|
|
|
print("IMAGE GENERATION DONE")
|
|
|
|
audio = []
|
|
|
|
for i in range(LENGTH):
|
|
append_wave(None, duration_milliseconds=slide_duration, volume=0.25)
|
|
|
|
save_wav("./SOUND/output.wav")
|
|
|
|
print("WAV GENERATED")
|
|
|
|
wav_audio = AudioSegment.from_file("./SOUND/output.wav", format="wav")
|
|
|
|
if tts_enabled:
|
|
try:
|
|
tts_audio = AudioSegment.from_file(tts_audio_file, format="mp3")
|
|
combined_audio = wav_audio.overlay(tts_audio, position=0)
|
|
except Exception as e:
|
|
print(f"Error overlaying TTS audio: {e}")
|
|
combined_audio = wav_audio
|
|
else:
|
|
combined_audio = wav_audio
|
|
|
|
combined_audio.export("./SOUND/output.m4a", format="adts")
|
|
|
|
print("AUDIO GENERATED")
|
|
|
|
image_folder = './IMG'
|
|
fps = 1000 / slide_duration # Ensure fps is precise to handle timing discrepancies
|
|
|
|
image_files = sorted([f for f in glob.glob(f"{image_folder}/*.png")], key=lambda x: int(os.path.basename(x).split('_')[0]))
|
|
|
|
# Ensure all frames have the same dimensions
|
|
frames = []
|
|
first_frame = np.array(Image.open(image_files[0]))
|
|
for idx, file in enumerate(image_files):
|
|
frame = np.array(Image.open(file))
|
|
if frame.shape != first_frame.shape:
|
|
print(f"Frame {idx} has inconsistent dimensions: {frame.shape} vs {first_frame.shape}")
|
|
frame = np.resize(frame, first_frame.shape) # Resize if necessary
|
|
frames.append(frame)
|
|
|
|
print("Starting video compilation...")
|
|
clip = moviepy.video.io.ImageSequenceClip.ImageSequenceClip(
|
|
frames, fps=fps
|
|
)
|
|
clip.write_videofile(
|
|
f'./OUTPUT/{video_name}.mp4',
|
|
audio="./SOUND/output.m4a",
|
|
codec="libx264",
|
|
audio_codec="aac"
|
|
)
|
|
|
|
print("Video compilation finished successfully!")
|