feat: Enhance image MIME type detection for improved compatibility

This commit is contained in:
Slipstream 2025-05-29 11:56:10 -06:00
parent 850e933b51
commit d7e64fdf4b
Signed by: slipstream
GPG Key ID: 13E498CE010AC6FD

View File

@ -1,6 +1,7 @@
from collections import deque from collections import deque
import ssl import ssl
import certifi import certifi
import imghdr # Added for robust image MIME type detection
from .config import CONTEXT_WINDOW_SIZE from .config import CONTEXT_WINDOW_SIZE
@ -1887,13 +1888,35 @@ async def generate_image_description(
print(f"Failed to download image from {image_url}. Status: {response.status}") print(f"Failed to download image from {image_url}. Status: {response.status}")
return None return None
image_bytes = await response.read() image_bytes = await response.read()
# Validate MIME type (optional, but good practice for Gemini)
# Attempt to infer MIME type from bytes
inferred_type = imghdr.what(None, h=image_bytes)
inferred_mime_type = None
if inferred_type == 'png':
inferred_mime_type = 'image/png'
elif inferred_type == 'jpeg':
inferred_mime_type = 'image/jpeg'
elif inferred_type == 'gif':
inferred_mime_type = 'image/gif'
# imghdr does not directly support webp, so check magic bytes
elif image_bytes.startswith(b'RIFF') and b'WEBP' in image_bytes[:12]:
inferred_mime_type = 'image/webp'
# Add other types as needed
# Use inferred_mime_type if it's more specific or if the provided mime_type is generic
final_mime_type = mime_type.split(';')[0].lower() # Start with provided clean mime
if inferred_mime_type and inferred_mime_type != final_mime_type:
print(f"MIME type mismatch: Provided '{final_mime_type}', Inferred '{inferred_mime_type}'. Using inferred.")
final_mime_type = inferred_mime_type
elif not inferred_mime_type and final_mime_type == "application/octet-stream":
print(f"Warning: Could not infer specific MIME type from bytes. Using provided generic '{final_mime_type}'.")
# Validate against known supported image types for Gemini
supported_image_mimes = ["image/png", "image/jpeg", "image/webp", "image/heic", "image/heif", "image/gif"] supported_image_mimes = ["image/png", "image/jpeg", "image/webp", "image/heic", "image/heif", "image/gif"]
clean_mime_type = mime_type.split(';')[0].lower() if final_mime_type not in supported_image_mimes:
if clean_mime_type not in supported_image_mimes: print(f"Warning: Final image MIME type '{final_mime_type}' from {image_url} is not explicitly supported by Gemini. Proceeding anyway.")
print(f"Warning: Image MIME type '{clean_mime_type}' from {image_url} might not be fully supported for description. Proceeding anyway.")
# Fallback to a generic type if not in list, or handle error print(f"Using final MIME type '{final_mime_type}' for image part.")
# For now, we'll proceed with the provided mime_type
# 2. Prepare contents for AI # 2. Prepare contents for AI
# Ensure item_name is escaped if it contains characters that could break the prompt string. # Ensure item_name is escaped if it contains characters that could break the prompt string.