Merge work into master

2025-06-07 03:14:31 +00:00 · 2025-06-07 03:14:31 +00:00 · 21dada9de0
commit 21dada9de0
parent 07e2beecf6 3229a54270
2 changed files with 143 additions and 13 deletions
--- a/cogs/aimod_cog.py
+++ b/cogs/aimod_cog.py
@ -18,6 +18,7 @@ import shutil  # For backing up files
 from typing import Optional, List, Dict, Any, Tuple  # For type hinting
 import asyncio
 import aiofiles
+import re

 # Google Generative AI Imports (using Vertex AI backend)
 from google import genai
@ -446,6 +447,55 @@ class AIModerationCog(commands.Cog):
            print(f"Error processing video: {e}")
            return None, None

+    async def process_url_attachment(self, url: str) -> tuple[str, bytes, str, str]:
+        """Fetch an attachment from a direct link."""
+        import aiohttp
+
+        try:
+            cleaned_url = url.strip("<>")
+            filename = cleaned_url.split("/")[-1].split("?")[0]
+            _, ext = os.path.splitext(filename.lower())
+            if ext in self.image_extensions:
+                attachment_type = "image"
+            elif ext in self.gif_extensions:
+                attachment_type = "gif"
+            elif ext in self.video_extensions:
+                attachment_type = "video"
+            else:
+                return None, None, None, None
+
+            async with aiohttp.ClientSession() as session:
+                async with session.get(cleaned_url) as resp:
+                    if resp.status != 200:
+                        print(
+                            f"Failed to fetch URL attachment {cleaned_url}: {resp.status}"
+                        )
+                        return None, None, None, None
+                    data = await resp.read()
+                    mime_type = resp.headers.get(
+                        "Content-Type", f"image/{ext.lstrip('.')}"
+                    )
+                    return mime_type, data, attachment_type, filename
+        except Exception as e:
+            print(f"Error processing URL attachment {url}: {e}")
+            return None, None, None, None
+
+    def extract_direct_attachment_urls(self, text: str) -> List[str]:
+        """Return a list of direct image/video URLs found in the text."""
+
+        urls = re.findall(r"https?://\S+", text or "")
+        allowed_exts = (
+            self.image_extensions + self.gif_extensions + self.video_extensions
+        )
+        results = []
+        for u in urls:
+            cleaned = u.strip("<>")
+            path = cleaned.split("?")[0]
+            _, ext = os.path.splitext(path.lower())
+            if ext in allowed_exts:
+                results.append(cleaned)
+        return results
+
    # --- AI Moderation Command Group ---
    aimod_group = app_commands.Group(
        name="aimod", description="AI Moderation commands."
@ -717,6 +767,18 @@ class AIModerationCog(commands.Cog):
            f"Server rules updated from {channel.mention}.", ephemeral=False
        )

+    @config_subgroup.command(
+        name="reset_rules",
+        description="Reset server rules to the default hardcoded version.",
+    )
+    @app_commands.checks.has_permissions(administrator=True)
+    async def reset_rules(self, interaction: discord.Interaction) -> None:
+        """Reset the server rules to the default string."""
+        aimod_config_module.SERVER_RULES = aimod_config_module.DEFAULT_SERVER_RULES
+        await interaction.response.send_message(
+            "Server rules have been reset to the default.", ephemeral=False
+        )
+
    @infractions_subgroup.command(
        name="view",
        description="View a user's AI moderation infraction history (mod/admin only).",
@ -1052,29 +1114,32 @@ Instructions:
   - **"Replied-to Message" and "Recent Channel History" are vital for understanding banter, jokes, and ongoing discussions. A statement that seems offensive in isolation might be acceptable within the flow of conversation or as a direct reply.**
   - If images, GIFs, or videos are attached, analyze ALL of them for rule violations.
   - Pay special attention to images that may contain NSFW content, pornography, gore, or other prohibited visual content.
+   - **Do not attempt to guess or estimate the ages of characters or people in images.** Only act on explicit textual context indicating they are minors.
   - If multiple attachments are present, a violation in ANY of them should be flagged.
 2. Determine if ANY rule is violated. When evaluating, consider the server's culture where **extremely edgy, dark, and sexual humor, including potentially offensive jokes (e.g., rape jokes, saying you want to be raped), are common and generally permissible IF THEY ARE CLEARLY JOKES, part of an established banter, or a direct non-malicious reply, and not targeted harassment or explicit rule violations.**
 * **NSFW Content:**
 The only rule regarding NSFW content is that **real-life pornography is strictly prohibited**.
 Full-on pornographic images are permitted in designated NSFW channels.
 Stickers and emojis are NOT considered "full-on pornographic images" and are allowed in any channel.
+   - **Completely ignore AI-generated pornography.** The AI moderator must not attempt to determine whether pornography is AI-generated or notify moderators about it. Only real-life pornography should be considered.
   - For general disrespectful behavior, harassment, or bullying (Rule 2 & 3): Only flag a violation if the intent appears **genuinely malicious, targeted, or serious, even after considering conversational history and replies.** Lighthearted insults or "wild" statements within an ongoing banter are generally permissible.
   - For **explicit slurs or severe discriminatory language** (Rule 3): These are violations **regardless of joking intent if they are used in a targeted or hateful manner**. Context from replies and history is still important to assess targeting.
-After considering the above, pay EXTREME attention to rules 5 (Pedophilia) and 5A (IRL Porn) – these are always severe. Rule 4 (AI Porn) is also critical. Prioritize these severe violations.
+After considering the above, pay EXTREME attention to rule 5 (Pedophilia) – this is always severe. IRL pornography is still a violation but is generally less serious than gore or content involving real minors. **Ignore any rules about AI-generated pornography.** Prioritize genuinely severe violations.
 3. Respond ONLY with a single JSON object containing the following keys:
    - "reasoning": string (A concise explanation for your decision, referencing the specific rule and content).
    - "violation": boolean (true if any rule is violated, false otherwise)
-    - "rule_violated": string (The number of the rule violated, e.g., "1", "5A", "None". If multiple rules are violated, state the MOST SEVERE one, prioritizing 5A > 5 > 4 > 3 > 2 > 1).
+    - "rule_violated": string (The number of the rule violated, e.g., "1", "5A", "None". If multiple rules are violated, state the MOST SEVERE one, prioritizing 5 > 5A > 3 > 2 > 1. Ignore any rules about AI-generated pornography.)
    - "action": string (Suggest ONE action from: "IGNORE", "WARN", "DELETE", "TIMEOUT_SHORT", "TIMEOUT_MEDIUM", "TIMEOUT_LONG", "KICK", "BAN", "NOTIFY_MODS", "SUICIDAL".
    - "notify_mods_message": optional string (If the suggested action is "NOTIFY_MODS", provide an optional brief message here for the moderators, e.g., "User's message is slightly ambiguous, human review needed.").
       Consider the user's infraction history. If the user has prior infractions for similar or escalating behavior, suggest a more severe action than if it were a first-time offense for a minor rule.
       Progressive Discipline Guide (unless overridden by severity):
         - First minor offense: "WARN" (and "DELETE" if content is removable like Rule 1/4).
         - Second minor offense / First moderate offense: "TIMEOUT_SHORT" (e.g., 10 minutes).
-         - Repeated moderate offenses: "TIMEOUT_MEDIUM" (e.g., 1 hour).
-         - Multiple/severe offenses: "TIMEOUT_LONG" (e.g., 1 day), "KICK", or "BAN".
-       Spamming:
-         - If a user continuously sends very long messages that are off-topic, repetitive, or appear to be meaningless spam (e.g., character floods, nonsensical text), suggest "TIMEOUT_MEDIUM" or "TIMEOUT_LONG" depending on severity and history, even if the content itself doesn't violate other specific rules. This is to maintain chat readability.
+       - Repeated moderate offenses: "TIMEOUT_MEDIUM" (e.g., 1 hour).
+       - Multiple/severe offenses: "TIMEOUT_LONG" (e.g., 1 day), "KICK", or "BAN".
+      - Use "BAN" on a user's **first infraction only in extremely severe cases** such as posting gore or unmistakable real-life CSAM involving minors. If the content appears animated or ambiguous, do **not** immediately ban; a timeout or moderator review is more appropriate.
+      Spamming:
+        - If a user continuously sends very long messages that are off-topic, repetitive, or appear to be meaningless spam (e.g., character floods, nonsensical text), suggest "TIMEOUT_MEDIUM" or "TIMEOUT_LONG" depending on severity and history, even if the content itself doesn't violate other specific rules. This is to maintain chat readability.
       Rule Severity Guidelines (use your judgment):
         - Consider the severity of each rule violation on its own merits.
         - Consider the user's history of past infractions when determining appropriate action.
@ -1107,9 +1172,9 @@ Example Response (Image Violation):

 Example Response (Multiple Attachments Violation):
 {{
-  "reasoning": "While the text content is fine, attachment #3 contains AI-generated pornography, violating rule 4.",
+  "reasoning": "While the text content is fine, attachment #3 contains IRL pornography, violating rule 5A.",
  "violation": true,
-  "rule_violated": "4",
+  "rule_violated": "5A",
  "action": "WARN"
 }}

@ -1629,6 +1694,38 @@ CRITICAL: Do NOT output anything other than the required JSON response.
        except Exception as e:
            print(f"Failed to POST initial action info: {e}")

+        # --- Adjust action for first-time offenses ---
+        user_history_list = get_user_infraction_history(guild_id, user_id)
+        if action == "BAN" and not user_history_list:
+            combined_text = f"{rule_violated} {reasoning}".lower()
+            severe = False
+            if "gore" in combined_text:
+                severe = True
+            elif "csam" in combined_text:
+                severe = True
+            elif (
+                "pedophilia" in combined_text
+                or "child" in combined_text
+                or "5a" in combined_text
+                or "5" in combined_text
+            ):
+                real_indicators = [
+                    "real",
+                    "real-life",
+                    "real life",
+                    "irl",
+                    "photo",
+                    "photograph",
+                    "video",
+                ]
+                if any(indicator in combined_text for indicator in real_indicators):
+                    severe = True
+            if not severe:
+                print(
+                    "Downgrading BAN to TIMEOUT_LONG due to first offense and lack of severe content."
+                )
+                action = "TIMEOUT_LONG"
+
        # --- Prepare Notification ---
        notification_embed = discord.Embed(
            title="🚨 Rule Violation Detected 🚨",
@ -2076,8 +2173,14 @@ CRITICAL: Do NOT output anything other than the required JSON response.
        if message.author.bot:
            print(f"Ignoring message {message.id} from bot.")
            return
-        # Ignore messages without content or attachments
-        if not message.content and not message.attachments:
+        embed_urls = [embed.url for embed in message.embeds if embed.url]
+        link_urls = (
+            self.extract_direct_attachment_urls(" ".join(embed_urls))
+            if embed_urls
+            else []
+        )
+        # Ignore messages without content, attachments, or direct attachment links
+        if not message.content and not message.attachments and not link_urls:
            print(f"Ignoring message {message.id} with no content or attachments.")
            return
        # Ignore DMs
@ -2124,6 +2227,27 @@ CRITICAL: Do NOT output anything other than the required JSON response.
                    f"Processed {len(image_data_list)} attachments for message {message.id}"
                )

+        # Check for direct link attachments in the message content
+        if link_urls:
+            processed_links = 0
+            for url in link_urls:
+                mime_type, image_bytes, attachment_type, filename = (
+                    await self.process_url_attachment(url)
+                )
+                if mime_type and image_bytes and attachment_type:
+                    image_data_list.append(
+                        (mime_type, image_bytes, attachment_type, filename)
+                    )
+                    processed_links += 1
+                    print(
+                        f"Processed linked attachment: {filename} as {attachment_type}"
+                    )
+
+            if processed_links > 0:
+                print(
+                    f"Processed {processed_links} linked attachments for message {message.id}"
+                )
+
        # Only proceed with AI analysis if there's text to analyze or attachments
        if not message_content and not image_data_list:
            print(
--- a/cogs/aimod_config.py
+++ b/cogs/aimod_config.py
@ -119,7 +119,7 @@ async def add_user_infraction(
    await save_user_infractions()


-SERVER_RULES = """
+DEFAULT_SERVER_RULES = """
 # Server Rules

 * **NSFW Content:**
@ -138,9 +138,13 @@ Stickers and emojis are NOT considered "full-on pornographic images" and are all

 * **No Discrimination:** Discrimination based on race, gender identity, sexual orientation, religion, nationality, disability, or other protected characteristics is prohibited.

-* **AI-Generated Pornography:** Do not post AI-generated pornography.
+* **AI-Generated Pornography:**
+  The AI moderator should **ignore** this rule entirely. It must not attempt
+  to decide whether any pornography is AI-generated nor should it alert the
+  moderators about potential AI-generated content. Human moderators may
+  enforce this rule manually if desired.

-* **Zero Tolerance for Pedophilia:** Any form of pedophilia, including lolicon and shotacon content, is strictly forbidden and will result in an immediate ban.
+* **Pedophilia and Underage Content:** Depicting minors in sexual situations, including lolicon or shotacon, is not allowed and may result in severe moderation.

 * **Channel Usage:** Please use channels for their intended purposes. Bot commands should primarily be used in `#bot-commands`, unless they are part of a bot-based game or event happening in another specific channel.

@ -159,6 +163,8 @@ If you witness someone breaking these rules, please ping an `@Moderator` with de
 Use the bot command `/modapp apply`
 """

+SERVER_RULES = DEFAULT_SERVER_RULES
+
 SUICIDAL_HELP_RESOURCES = """
 Hey, I'm really concerned to hear you're feeling this way. Please know that you're not alone and there are people who want to support you.
 Your well-being is important to us on this server.