fix: Improve handling of multimodal message content in get_ai_response function

2025-05-27 23:07:31 -06:00 · 2025-05-27 23:07:31 -06:00 · bb745160ac
commit bb745160ac
parent cc9666f4e3
1 changed files with 18 additions and 5 deletions
--- a/gurt/api.py
+++ b/gurt/api.py
@ -804,13 +804,19 @@ async def get_ai_response(cog: 'GurtCog', message: discord.Message, model_name:
        # The current message is already included in conversation_context_messages
        for msg in conversation_context_messages:
            role = "assistant" if msg.get('author', {}).get('id') == str(cog.bot.user.id) else "user" # Use get for safety
+            parts: List[types.Part] = [] # Initialize parts for each message

            # Handle potential multimodal content in history (if stored that way)
            if isinstance(msg.get("content"), list):
-                 parts = [types.Part(text=part["text"]) if part["type"] == "text" else types.Part(uri=part["image_url"]["url"], mime_type=part["image_url"]["url"].split(";")[0].split(":")[1]) if part["type"] == "image_url" else None for part in msg["content"]]
-                 parts = [p for p in parts if p] # Filter out None parts
-                 if parts:
-                     contents.append(types.Content(role=role, parts=parts))
+                 # If content is already a list of parts, process them
+                 for part_data in msg["content"]:
+                     if part_data["type"] == "text":
+                         parts.append(types.Part(text=part_data["text"]))
+                     elif part_data["type"] == "image_url":
+                         # Assuming image_url has 'url' and 'mime_type'
+                         parts.append(types.Part(uri=part_data["image_url"]["url"], mime_type=part_data["image_url"]["url"].split(";")[0].split(":")[1]))
+                 # Filter out None parts if any were conditionally added
+                 parts = [p for p in parts if p]
            # Combine text, embeds, and attachments for history messages
            elif isinstance(msg.get("content"), str) or msg.get("embed_content") or msg.get("attachment_descriptions"):
                 text_parts = []
@ -917,7 +923,14 @@ async def get_ai_response(cog: 'GurtCog', message: discord.Message, model_name:
                     author_identifier_string = f"{final_display_part}{username_part_str}"
                     # Append the text part to the existing parts list for this message
                     parts.append(types.Part(text=f"{author_identifier_string}: {full_text}"))
-                     contents.append(types.Content(role=role, parts=parts)) # Add the content with all parts
+            
+            # Only append to contents if there are parts to add for this message
+            if parts:
+                contents.append(types.Content(role=role, parts=parts))
+            else:
+                # If no parts were generated (e.g., empty message, or only unsupported content),
+                # log a warning and skip adding this message to contents.
+                print(f"Warning: Skipping message from history (ID: {msg.get('id')}) as no valid parts were generated.")


        # --- Prepare the current message content (potentially multimodal) ---