Merge work into master

2025-06-07 00:13:59 +00:00 · 2025-06-07 00:13:59 +00:00 · 0a9eee97e6
commit 0a9eee97e6
parent 07e2beecf6 1ee79a9576
1 changed files with 80 additions and 3 deletions
--- a/cogs/aimod_cog.py
+++ b/cogs/aimod_cog.py
@ -18,6 +18,7 @@ import shutil  # For backing up files
 from typing import Optional, List, Dict, Any, Tuple  # For type hinting
 import asyncio
 import aiofiles
 import re
 # Google Generative AI Imports (using Vertex AI backend)
 from google import genai
@ -446,6 +447,55 @@ class AIModerationCog(commands.Cog):
            print(f"Error processing video: {e}")
            return None, None
    async def process_url_attachment(self, url: str) -> tuple[str, bytes, str, str]:
        """Fetch an attachment from a direct link."""
        import aiohttp
        try:
            cleaned_url = url.strip("<>")
            filename = cleaned_url.split("/")[-1].split("?")[0]
            _, ext = os.path.splitext(filename.lower())
            if ext in self.image_extensions:
                attachment_type = "image"
            elif ext in self.gif_extensions:
                attachment_type = "gif"
            elif ext in self.video_extensions:
                attachment_type = "video"
            else:
                return None, None, None, None
            async with aiohttp.ClientSession() as session:
                async with session.get(cleaned_url) as resp:
                    if resp.status != 200:
                        print(
                            f"Failed to fetch URL attachment {cleaned_url}: {resp.status}"
                        )
                        return None, None, None, None
                    data = await resp.read()
                    mime_type = resp.headers.get(
                        "Content-Type", f"image/{ext.lstrip('.')}"
                    )
                    return mime_type, data, attachment_type, filename
        except Exception as e:
            print(f"Error processing URL attachment {url}: {e}")
            return None, None, None, None
    def extract_direct_attachment_urls(self, text: str) -> List[str]:
        """Return a list of direct image/video URLs found in the text."""
        urls = re.findall(r"https?://\S+", text or "")
        allowed_exts = (
            self.image_extensions + self.gif_extensions + self.video_extensions
        )
        results = []
        for u in urls:
            cleaned = u.strip("<>")
            path = cleaned.split("?")[0]
            _, ext = os.path.splitext(path.lower())
            if ext in allowed_exts:
                results.append(cleaned)
        return results
    # --- AI Moderation Command Group ---
    aimod_group = app_commands.Group(
        name="aimod", description="AI Moderation commands."
@ -1058,6 +1108,7 @@ Instructions:
 The only rule regarding NSFW content is that **real-life pornography is strictly prohibited**.
 Full-on pornographic images are permitted in designated NSFW channels.
 Stickers and emojis are NOT considered "full-on pornographic images" and are allowed in any channel.
   - Do NOT attempt to moderate AI-generated pornography. You are unlikely to know what it looks like.
   - For general disrespectful behavior, harassment, or bullying (Rule 2 & 3): Only flag a violation if the intent appears **genuinely malicious, targeted, or serious, even after considering conversational history and replies.** Lighthearted insults or "wild" statements within an ongoing banter are generally permissible.
   - For **explicit slurs or severe discriminatory language** (Rule 3): These are violations **regardless of joking intent if they are used in a targeted or hateful manner**. Context from replies and history is still important to assess targeting.
 After considering the above, pay EXTREME attention to rules 5 (Pedophilia) and 5A (IRL Porn) – these are always severe. Rule 4 (AI Porn) is also critical. Prioritize these severe violations.
@ -1107,7 +1158,7 @@ Example Response (Image Violation):
 Example Response (Multiple Attachments Violation):
 {{
-  "reasoning": "While the text content is fine, attachment #3 contains AI-generated pornography, violating rule 4.",
+  "reasoning": "While the text content is fine, attachment #3 contains IRL pornography, violating rule 4.",
  "violation": true,
  "rule_violated": "4",
  "action": "WARN"
@ -2076,8 +2127,13 @@ CRITICAL: Do NOT output anything other than the required JSON response.
        if message.author.bot:
            print(f"Ignoring message {message.id} from bot.")
            return
-        # Ignore messages without content or attachments
+        link_urls = (
-        if not message.content and not message.attachments:
+            self.extract_direct_attachment_urls(message.content)
            if message.content
            else []
        )
        # Ignore messages without content, attachments, or direct attachment links
        if not message.content and not message.attachments and not link_urls:
            print(f"Ignoring message {message.id} with no content or attachments.")
            return
        # Ignore DMs
@ -2124,6 +2180,27 @@ CRITICAL: Do NOT output anything other than the required JSON response.
                    f"Processed {len(image_data_list)} attachments for message {message.id}"
                )
        # Check for direct link attachments in the message content
        if link_urls:
            processed_links = 0
            for url in link_urls:
                mime_type, image_bytes, attachment_type, filename = (
                    await self.process_url_attachment(url)
                )
                if mime_type and image_bytes and attachment_type:
                    image_data_list.append(
                        (mime_type, image_bytes, attachment_type, filename)
                    )
                    processed_links += 1
                    print(
                        f"Processed linked attachment: {filename} as {attachment_type}"
                    )
            if processed_links > 0:
                print(
                    f"Processed {processed_links} linked attachments for message {message.id}"
                )
        # Only proceed with AI analysis if there's text to analyze or attachments
        if not message_content and not image_data_list:
            print(