feat: Enhance GIF captioning with text wrapping and dynamic height adjustment

This commit is contained in:
Slipstream 2025-05-20 21:06:38 -06:00
parent 8de629ca36
commit 32eecffdd0
Signed by: slipstream
GPG Key ID: 13E498CE010AC6FD

View File

@ -5,10 +5,19 @@ from PIL import Image, ImageDraw, ImageFont, ImageSequence
import requests
import io
import os
import textwrap # Import textwrap for text wrapping
class CaptionCog(commands.Cog, name="Caption"):
"""Cog for captioning GIFs"""
# Define constants for magic numbers
CAPTION_PADDING = 10
DEFAULT_GIF_DURATION = 100
MIN_FONT_SIZE = 10
MAX_FONT_SIZE = 50
TEXT_COLOR = (0, 0, 0) # Black text
BAR_COLOR = (255, 255, 255) # White bar
def __init__(self, bot):
self.bot = bot
# Define preferred font names/paths
@ -47,47 +56,70 @@ class CaptionCog(commands.Cog, name="Caption"):
# Adjust font size for default font if necessary, as it might render differently.
# This might require re-calculating text_width and text_height if default font is used.
text_color = (0, 0, 0) # Black text
bar_color = (255, 255, 255) # White bar
# Calculate max text width based on image width and padding
max_text_width = gif.width - (2 * self.CAPTION_PADDING)
# Calculate text size and bar height
# Create a dummy draw object to measure text
# Wrap text based on max width
# Estimate characters per line based on font size and image width
# This is a heuristic and might need adjustment based on the font
estimated_char_width = font_size * 0.6
if estimated_char_width == 0: # Avoid division by zero if font_size is somehow 0
estimated_char_width = 1
chars_per_line = int(max_text_width / estimated_char_width)
if chars_per_line <= 0: # Ensure at least one character per line
chars_per_line = 1
wrapped_text = textwrap.wrap(caption_text, width=chars_per_line)
# Calculate total text height and bar height
# Create a dummy draw object to measure text height per line
dummy_image = Image.new("RGB", (1, 1))
dummy_draw = ImageDraw.Draw(dummy_image)
# For Pillow versions >= 10.0.0, use getbbox
if hasattr(dummy_draw, 'textbbox'):
text_bbox = dummy_draw.textbbox((0, 0), caption_text, font=font)
text_width = text_bbox[2] - text_bbox[0]
text_height = text_bbox[3] - text_bbox[1]
else: # For older Pillow versions, use textsize (deprecated)
text_width, text_height = dummy_draw.textsize(caption_text, font=font)
bar_height = text_height + 20 # Add some padding (10px top, 10px bottom)
line_heights = []
for line in wrapped_text:
if hasattr(dummy_draw, 'textbbox'):
text_bbox = dummy_draw.textbbox((0, 0), line, font=font)
line_heights.append(text_bbox[3] - text_bbox[1])
else: # For older Pillow versions, use textsize (deprecated)
line_heights.append(dummy_draw.textsize(line, font=font)[1])
total_text_height = sum(line_heights)
bar_height = total_text_height + (2 * self.CAPTION_PADDING)
for frame in ImageSequence.Iterator(gif):
frame = frame.convert("RGBA")
# Create a new image for the frame with space for the text bar
new_frame_width = frame.width
new_frame_height = frame.height + bar_height
new_frame = Image.new("RGBA", (new_frame_width, new_frame_height), (0,0,0,0)) # Transparent background for the new area
# Draw the white bar
draw = ImageDraw.Draw(new_frame)
draw.rectangle([(0, 0), (new_frame_width, bar_height)], fill=bar_color)
draw.rectangle([(0, 0), (new_frame_width, bar_height)], fill=self.BAR_COLOR)
# Paste the original frame below the bar
new_frame.paste(frame, (0, bar_height))
# Calculate text position (centered in the bar)
text_x = (new_frame_width - text_width) / 2
text_y = (bar_height - text_height) / 2
# Add text to the bar
draw.text((text_x, text_y), caption_text, font=font, fill=text_color)
# Add wrapped text to the bar
text_y_offset = self.CAPTION_PADDING
for line in wrapped_text:
# Calculate text position (centered in the bar horizontally)
if hasattr(draw, 'textbbox'):
line_width = draw.textbbox((0, 0), line, font=font)[2] - draw.textbbox((0, 0), line, font=font)[0]
line_height = draw.textbbox((0, 0), line, font=font)[3] - draw.textbbox((0, 0), line, font=font)[1]
else: # For older Pillow versions, use textsize (deprecated)
line_width, line_height = draw.textsize(line, font=font)
text_x = (new_frame_width - line_width) / 2
draw.text((text_x, text_y_offset), line, font=font, fill=self.TEXT_COLOR)
text_y_offset += line_height
# Reduce colors to optimize GIF and ensure compatibility
new_frame_alpha = new_frame.getchannel('A')
new_frame = new_frame.convert("RGB").convert("P", palette=Image.ADAPTIVE, colors=255)
@ -96,7 +128,7 @@ class CaptionCog(commands.Cog, name="Caption"):
new_frame.info['transparency'] = gif.info['transparency'] # Preserve transparency if present
# Masking might be needed here if the original GIF had complex transparency
# For simplicity, we assume simple transparency or opaque.
# If issues arise, more complex alpha compositing might be needed before converting to "P"
# If issues arise, more complex alpha compositing might be needed before converting to "P")
frames.append(new_frame)
@ -106,7 +138,7 @@ class CaptionCog(commands.Cog, name="Caption"):
format="GIF",
save_all=True,
append_images=frames[1:],
duration=gif.info.get("duration", 100), # Use original duration, default to 100ms
duration=gif.info.get("duration", self.DEFAULT_GIF_DURATION), # Use original duration, default to constant
loop=gif.info.get("loop", 0), # Use original loop count, default to infinite
transparency=gif.info.get("transparency", None), # Preserve transparency
disposal=2 # Important for GIFs with transparency and animation