mirror of
https://gitlab.com/pancakes1234/wdiscordbotserver.git
synced 2025-06-16 07:14:21 -06:00
fix: Update paths for guild configuration and OpenRouter model data to use relative paths
This commit is contained in:
parent
5ad75e645e
commit
74505979c1
332
cogs/aimod.py
332
cogs/aimod.py
@ -26,7 +26,7 @@ OPENROUTER_MODEL = "google/gemini-2.5-flash-preview-05-20" # Make sure this mode
|
||||
MOD_LOG_API_SECRET_ENV_VAR = "MOD_LOG_API_SECRET"
|
||||
|
||||
# --- Per-Guild Discord Configuration ---
|
||||
GUILD_CONFIG_DIR = "/home/ubuntu/wdiscordbot-json-data" # Using the existing directory for all json data
|
||||
GUILD_CONFIG_DIR = os.path.join(os.getcwd(), "wdiscordbot-json-data") # Using relative path from current working directory
|
||||
GUILD_CONFIG_PATH = os.path.join(GUILD_CONFIG_DIR, "guild_config.json")
|
||||
USER_INFRACTIONS_PATH = os.path.join(GUILD_CONFIG_DIR, "user_infractions.json")
|
||||
|
||||
@ -207,7 +207,7 @@ class ModerationCog(commands.Cog):
|
||||
|
||||
def _load_openrouter_models(self):
|
||||
"""Loads OpenRouter model data from the JSON file."""
|
||||
models_json_path = "/home/ubuntu/wdiscordbot-internal-server-aws/data/openrouter_models.json" # Relative to bot's root
|
||||
models_json_path = os.path.join(os.getcwd(), "data", "openrouter_models.json") # Relative to bot's root
|
||||
try:
|
||||
if os.path.exists(models_json_path):
|
||||
with open(models_json_path, "r", encoding="utf-8") as f:
|
||||
@ -611,200 +611,7 @@ class ModerationCog(commands.Cog):
|
||||
# self.bot.tree.add_command(self.modsetmodel)
|
||||
# self.bot.tree.add_command(self.modgetmodel)
|
||||
|
||||
async def query_openrouter(self, message: discord.Message, message_content: str, user_history: str, image_data_list=None):
|
||||
"""
|
||||
Sends the message content, user history, and additional context to the OpenRouter API for analysis.
|
||||
Optionally includes image data for visual content moderation.
|
||||
|
||||
Args:
|
||||
message: The original discord.Message object.
|
||||
message_content: The text content of the message.
|
||||
user_history: A string summarizing the user's past infractions.
|
||||
image_data_list: Optional list of tuples (mime_type, image_bytes, attachment_type, filename) for image moderation.
|
||||
|
||||
Returns:
|
||||
A dictionary containing the AI's decision, or None if an error occurs.
|
||||
Expected format:
|
||||
{
|
||||
"reasoning": str,
|
||||
"violation": bool,
|
||||
"rule_violated": str ("None", "1", "5A", etc.),
|
||||
"action": str ("IGNORE", "WARN", "DELETE", "BAN", "NOTIFY_MODS")
|
||||
}
|
||||
"""
|
||||
print(f"query_openrouter called. API key available: {self.openrouter_api_key is not None}")
|
||||
# Check if the API key was successfully fetched
|
||||
if not self.openrouter_api_key:
|
||||
print("Error: OpenRouter API Key is not available. Cannot query API.")
|
||||
return None
|
||||
|
||||
# Construct the prompt for the AI model
|
||||
system_prompt_text = f"""You are an AI moderation assistant for a Discord server.
|
||||
Your primary function is to analyze message content and attached media based STRICTLY on the server rules provided below, using all available context.
|
||||
|
||||
Server Rules:
|
||||
---
|
||||
{SERVER_RULES}
|
||||
---
|
||||
|
||||
Context Provided:
|
||||
You will receive the following information to aid your analysis:
|
||||
- User's Server Role: (e.g., "Server Owner", "Admin", "Moderator", "Member").
|
||||
- Channel Category: The name of the category the channel belongs to.
|
||||
- Channel Age-Restricted/NSFW (Discord Setting): Boolean (true/false).
|
||||
- Replied-to Message: If the current message is a reply, the content of the original message will be provided. This is crucial for understanding direct interactions.
|
||||
- Recent Channel History: The last few messages in the channel to understand the flow of conversation.
|
||||
- Attached Media: If the message contains image, GIF, or video attachments, they will be provided as image_url objects in the content array. For GIFs and videos, only the first frame is extracted.
|
||||
|
||||
Instructions:
|
||||
1. Review the "Message Content" and any attached media against EACH rule, considering ALL provided context (User Role, Channel Info, Replied-to Message, Recent Channel History).
|
||||
- The "Channel Age-Restricted/NSFW (Discord Setting)" is the definitive indicator for NSFW content by Discord.
|
||||
- The "Channel Category" provides general context.
|
||||
- **"Replied-to Message" and "Recent Channel History" are vital for understanding banter, jokes, and ongoing discussions. A statement that seems offensive in isolation might be acceptable within the flow of conversation or as a direct reply.**
|
||||
- If images, GIFs, or videos are attached, analyze ALL of them for rule violations. For GIFs and videos, only the first frame is provided.
|
||||
- Pay special attention to images that may contain NSFW content, pornography, gore, or other prohibited visual content.
|
||||
- If multiple attachments are present, a violation in ANY of them should be flagged.
|
||||
2. Determine if ANY rule is violated. When evaluating, consider the server's culture where **extremely edgy, dark, and sexual humor, including potentially offensive jokes (e.g., rape jokes, saying you want to be raped), are common and generally permissible IF THEY ARE CLEARLY JOKES, part of an established banter, or a direct non-malicious reply, and not targeted harassment or explicit rule violations.**
|
||||
* **NSFW Content:**
|
||||
The only rule regarding NSFW content is that **real-life pornography is strictly prohibited**.
|
||||
Full-on pornographic images are permitted in designated NSFW channels.
|
||||
Stickers and emojis are NOT considered "full-on pornographic images" and are allowed in any channel.
|
||||
- For general disrespectful behavior, harassment, or bullying (Rule 2 & 3): Only flag a violation if the intent appears **genuinely malicious, targeted, or serious, even after considering conversational history and replies.** Lighthearted insults or "wild" statements within an ongoing banter are generally permissible.
|
||||
- For **explicit slurs or severe discriminatory language** (Rule 3): These are violations **regardless of joking intent if they are used in a targeted or hateful manner**. Context from replies and history is still important to assess targeting.
|
||||
- CRITICAL: You should NOT consider the word "retard" or "retarded" as a slur in this server, as it is commonly used in a non-offensive context.
|
||||
After considering the above, pay EXTREME attention to rules 5 (Pedophilia) and 5A (IRL Porn) – these are always severe. Rule 4 (AI Porn) is also critical. Prioritize these severe violations.
|
||||
3. Respond ONLY with a single JSON object containing the following keys:
|
||||
- "reasoning": string (A concise explanation for your decision, referencing the specific rule and content).
|
||||
- "violation": boolean (true if any rule is violated, false otherwise)
|
||||
- "rule_violated": string (The number of the rule violated, e.g., "1", "5A", "None". If multiple rules are violated, state the MOST SEVERE one, prioritizing 5A > 5 > 4 > 3 > 2 > 1).
|
||||
- "action": string (Suggest ONE action from: "IGNORE", "WARN", "DELETE", "TIMEOUT_SHORT", "TIMEOUT_MEDIUM", "TIMEOUT_LONG", "KICK", "BAN", "NOTIFY_MODS", "SUICIDAL".
|
||||
Consider the user's infraction history. If the user has prior infractions for similar or escalating behavior, suggest a more severe action than if it were a first-time offense for a minor rule.
|
||||
Progressive Discipline Guide (unless overridden by severity):
|
||||
- First minor offense: "WARN" (and "DELETE" if content is removable like Rule 1/4).
|
||||
- Second minor offense / First moderate offense: "TIMEOUT_SHORT" (e.g., 10 minutes).
|
||||
- Repeated moderate offenses: "TIMEOUT_MEDIUM" (e.g., 1 hour).
|
||||
- Multiple/severe offenses: "TIMEOUT_LONG" (e.g., 1 day), "KICK", or "BAN".
|
||||
Spamming:
|
||||
- If a user continuously sends very long messages that are off-topic, repetitive, or appear to be meaningless spam (e.g., character floods, nonsensical text), suggest "TIMEOUT_MEDIUM" or "TIMEOUT_LONG" depending on severity and history, even if the content itself doesn't violate other specific rules. This is to maintain chat readability.
|
||||
Rule Severity Guidelines (use your judgment):
|
||||
- Consider the severity of each rule violation on its own merits.
|
||||
- Consider the user's history of past infractions when determining appropriate action.
|
||||
- Consider the context of the message and channel when evaluating violations.
|
||||
- You have full discretion to determine the most appropriate action for any violation.
|
||||
Suicidal Content:
|
||||
If the message content expresses **clear, direct, and serious suicidal ideation, intent, planning, or recent attempts** (e.g., 'I am going to end my life and have a plan', 'I survived my attempt last night', 'I wish I hadn't woken up after trying'), ALWAYS use "SUICIDAL" as the action, and set "violation" to true, with "rule_violated" as "Suicidal Content".
|
||||
For casual, edgy, hyperbolic, or ambiguous statements like 'imma kms', 'just kill me now', 'I want to die (lol)', or phrases that are clearly part of edgy humor/banter rather than a genuine cry for help, you should lean towards "IGNORE" or "NOTIFY_MODS" if there's slight ambiguity but no clear serious intent. **Do NOT flag 'imma kms' as "SUICIDAL" unless there is very strong supporting context indicating genuine, immediate, and serious intent.**
|
||||
If unsure but suspicious, or if the situation is complex: "NOTIFY_MODS".
|
||||
Default action for minor first-time rule violations should be "WARN" or "DELETE" (if applicable).
|
||||
Do not suggest "KICK" or "BAN" lightly; reserve for severe or repeated major offenses.
|
||||
Timeout durations: TIMEOUT_SHORT (approx 10 mins), TIMEOUT_MEDIUM (approx 1 hour), TIMEOUT_LONG (approx 1 day to 1 week).
|
||||
The system will handle the exact timeout duration; you just suggest the category.)
|
||||
|
||||
Example Response (Violation):
|
||||
{{
|
||||
"reasoning": "The message content clearly depicts IRL non-consensual sexual content involving minors, violating rule 5A.",
|
||||
"violation": true,
|
||||
"rule_violated": "5A",
|
||||
"action": "BAN"
|
||||
}}
|
||||
|
||||
Example Response (No Violation):
|
||||
{{
|
||||
"reasoning": "The message is a respectful discussion and contains no prohibited content.",
|
||||
"violation": false,
|
||||
"rule_violated": "None",
|
||||
"action": "IGNORE"
|
||||
}}
|
||||
|
||||
Example Response (Suicidal Content):
|
||||
{{
|
||||
"reasoning": "The user's message 'I want to end my life' indicates clear suicidal intent.",
|
||||
"violation": true,
|
||||
"rule_violated": "Suicidal Content",
|
||||
"action": "SUICIDAL"
|
||||
}}
|
||||
"""
|
||||
|
||||
system_prompt_text = f"""You are an AI moderation assistant for a Discord server.
|
||||
Your primary function is to analyze message content and attached media based STRICTLY on the server rules provided below, using all available context.
|
||||
|
||||
Server Rules:
|
||||
---
|
||||
{SERVER_RULES}
|
||||
---
|
||||
|
||||
Context Provided:
|
||||
You will receive the following information to aid your analysis:
|
||||
- User's Server Role: (e.g., "Server Owner", "Admin", "Moderator", "Member").
|
||||
- Channel Category: The name of the category the channel belongs to.
|
||||
- Channel Age-Restricted/NSFW (Discord Setting): Boolean (true/false).
|
||||
- Replied-to Message: If the current message is a reply, the content of the original message will be provided. This is crucial for understanding direct interactions.
|
||||
- Recent Channel History: The last few messages in the channel to understand the flow of conversation.
|
||||
|
||||
Instructions:
|
||||
1. Review the "Message Content" against EACH rule, considering ALL provided context (User Role, Channel Info, Replied-to Message, Recent Channel History).
|
||||
- The "Channel Age-Restricted/NSFW (Discord Setting)" is the definitive indicator for NSFW content by Discord.
|
||||
- The "Channel Category" provides general context.
|
||||
- **"Replied-to Message" and "Recent Channel History" are vital for understanding banter, jokes, and ongoing discussions. A statement that seems offensive in isolation might be acceptable within the flow of conversation or as a direct reply.**
|
||||
2. Determine if ANY rule is violated. When evaluating, consider the server's culture where **extremely edgy, dark, and sexual humor, including potentially offensive jokes (e.g., rape jokes, saying you want to be raped), are common and generally permissible IF THEY ARE CLEARLY JOKES, part of an established banter, or a direct non-malicious reply, and not targeted harassment or explicit rule violations.**
|
||||
- For Rule 1 (NSFW content):
|
||||
The only rules regarding NSFW content is that **real-life pornography is strictly prohibited**, and Full-on pornographic images are only permitted in designated NSFW channels.
|
||||
Stickers and emojis are NOT considered "full-on pornographic images" and are allowed in any channel.
|
||||
- For general disrespectful behavior, harassment, or bullying (Rule 2 & 3): Only flag a violation if the intent appears **genuinely malicious, targeted, or serious, even after considering conversational history and replies.** Lighthearted insults or "wild" statements within an ongoing banter are generally permissible.
|
||||
- For **explicit slurs or severe discriminatory language** (Rule 3): These are violations **regardless of joking intent if they are used in a targeted or hateful manner**. Context from replies and history is still important to assess targeting.
|
||||
- CRITICAL: You should NOT consider the word "retard" or "retarded" as a slur in this server, as it is commonly used in a non-offensive context.
|
||||
After considering the above, pay EXTREME attention to rules 5 (Pedophilia) and 5A (IRL Porn) – these are always severe. Rule 4 (AI Porn) is also critical. Prioritize these severe violations.
|
||||
3. Respond ONLY with a single JSON object containing the following keys:
|
||||
- "reasoning": string (A concise explanation for your decision, referencing the specific rule and content).
|
||||
- "violation": boolean (true if any rule is violated, false otherwise)
|
||||
- "rule_violated": string (The number of the rule violated, e.g., "1", "5A", "None". If multiple rules are violated, state the MOST SEVERE one, prioritizing 5A > 5 > 4 > 3 > 2 > 1).
|
||||
- "action": string (Suggest ONE action from: "IGNORE", "WARN", "DELETE", "TIMEOUT_SHORT", "TIMEOUT_MEDIUM", "TIMEOUT_LONG", "KICK", "BAN", "NOTIFY_MODS", "SUICIDAL".
|
||||
Consider the user's infraction history. If the user has prior infractions for similar or escalating behavior, suggest a more severe action than if it were a first-time offense for a minor rule.
|
||||
Progressive Discipline Guide (unless overridden by severity):
|
||||
- First minor offense: "WARN" (and "DELETE" if content is removable like Rule 1/4).
|
||||
- Second minor offense / First moderate offense: "TIMEOUT_SHORT" (e.g., 10 minutes).
|
||||
- Repeated moderate offenses: "TIMEOUT_MEDIUM" (e.g., 1 hour).
|
||||
- Multiple/severe offenses: "TIMEOUT_LONG" (e.g., 1 day), "KICK", or "BAN".
|
||||
Spamming:
|
||||
- If a user continuously sends very long messages that are off-topic, repetitive, or appear to be meaningless spam (e.g., character floods, nonsensical text), suggest "TIMEOUT_MEDIUM" or "TIMEOUT_LONG" depending on severity and history, even if the content itself doesn't violate other specific rules. This is to maintain chat readability.
|
||||
Rule Severity Guidelines (use your judgment):
|
||||
- Consider the severity of each rule violation on its own merits.
|
||||
- Consider the user's history of past infractions when determining appropriate action.
|
||||
- Consider the context of the message and channel when evaluating violations.
|
||||
- You have full discretion to determine the most appropriate action for any violation.
|
||||
Suicidal Content:
|
||||
If the message content expresses **clear, direct, and serious suicidal ideation, intent, planning, or recent attempts** (e.g., 'I am going to end my life and have a plan', 'I survived my attempt last night', 'I wish I hadn't woken up after trying'), ALWAYS use "SUICIDAL" as the action, and set "violation" to true, with "rule_violated" as "Suicidal Content".
|
||||
For casual, edgy, hyperbolic, or ambiguous statements like 'imma kms', 'just kill me now', 'I want to die (lol)', or phrases that are clearly part of edgy humor/banter rather than a genuine cry for help, you should lean towards "IGNORE" or "NOTIFY_MODS" if there's slight ambiguity but no clear serious intent. **Do NOT flag 'imma kms' as "SUICIDAL" unless there is very strong supporting context indicating genuine, immediate, and serious intent.**
|
||||
If unsure but suspicious, or if the situation is complex: "NOTIFY_MODS".
|
||||
Default action for minor first-time rule violations should be "WARN" or "DELETE" (if applicable).
|
||||
Do not suggest "KICK" or "BAN" lightly; reserve for severe or repeated major offenses.
|
||||
Timeout durations: TIMEOUT_SHORT (approx 10 mins), TIMEOUT_MEDIUM (approx 1 hour), TIMEOUT_LONG (approx 1 day to 1 week).
|
||||
The system will handle the exact timeout duration; you just suggest the category.)
|
||||
|
||||
Example Response (Violation):
|
||||
{{
|
||||
"reasoning": "The message content clearly depicts IRL non-consensual sexual content involving minors, violating rule 5A.",
|
||||
"violation": true,
|
||||
"rule_violated": "5A",
|
||||
"action": "BAN"
|
||||
}}
|
||||
|
||||
Example Response (No Violation):
|
||||
{{
|
||||
"reasoning": "The message is a respectful discussion and contains no prohibited content.",
|
||||
"violation": false,
|
||||
"rule_violated": "None",
|
||||
"action": "IGNORE"
|
||||
}}
|
||||
|
||||
Example Response (Suicidal Content):
|
||||
{{
|
||||
"reasoning": "The user's message 'I want to end my life' indicates clear suicidal intent.",
|
||||
"violation": true,
|
||||
"rule_violated": "Suicidal Content",
|
||||
"action": "SUICIDAL"
|
||||
}}
|
||||
"""
|
||||
|
||||
async def query_openrouter(self, message: discord.Message, message_content: str, user_history: str, image_data_list=None):
|
||||
"""
|
||||
@ -920,13 +727,144 @@ Example Response (Suicidal Content):
|
||||
}}
|
||||
"""
|
||||
|
||||
# Get the model from guild config, fall back to global default
|
||||
guild_id = message.guild.id
|
||||
model_used = get_guild_config(guild_id, "AI_MODEL", OPENROUTER_MODEL)
|
||||
|
||||
# Gather context information
|
||||
user_role = "Member" # Default
|
||||
if message.author.guild_permissions.administrator:
|
||||
user_role = "Admin"
|
||||
elif message.author.guild_permissions.manage_messages:
|
||||
user_role = "Moderator"
|
||||
elif message.guild.owner_id == message.author.id:
|
||||
user_role = "Server Owner"
|
||||
|
||||
# Get channel category
|
||||
channel_category = message.channel.category.name if message.channel.category else "No Category"
|
||||
|
||||
# Check if channel is NSFW
|
||||
is_nsfw_channel = getattr(message.channel, 'nsfw', False)
|
||||
|
||||
# Get replied-to message content if this is a reply
|
||||
replied_to_content = ""
|
||||
if message.reference and message.reference.message_id:
|
||||
try:
|
||||
replied_message = await message.channel.fetch_message(message.reference.message_id)
|
||||
replied_to_content = f"Replied-to Message: {replied_message.author.display_name}: {replied_message.content[:200]}"
|
||||
except:
|
||||
replied_to_content = "Replied-to Message: [Could not fetch]"
|
||||
|
||||
# Get recent channel history (last 3 messages before this one)
|
||||
recent_history = []
|
||||
try:
|
||||
async for hist_message in message.channel.history(limit=4, before=message):
|
||||
if not hist_message.author.bot:
|
||||
recent_history.append(f"{hist_message.author.display_name}: {hist_message.content[:100]}")
|
||||
except:
|
||||
recent_history = ["[Could not fetch recent history]"]
|
||||
|
||||
recent_history_text = "\n".join(recent_history[:3]) if recent_history else "No recent history available."
|
||||
|
||||
# Construct the user prompt with context
|
||||
user_prompt = f"""
|
||||
**Context Information:**
|
||||
- User's Server Role: {user_role}
|
||||
- Channel Category: {channel_category}
|
||||
- Channel Age-Restricted/NSFW (Discord Setting): {is_nsfw_channel}
|
||||
- {replied_to_content}
|
||||
- Recent Channel History:
|
||||
{recent_history_text}
|
||||
|
||||
**User's Infraction History:**
|
||||
{user_history}
|
||||
|
||||
**Message Content:**
|
||||
{message_content if message_content else "[No text content]"}
|
||||
"""
|
||||
|
||||
# Prepare the messages array for the API
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt_text},
|
||||
{"role": "user", "content": [{"type": "text", "text": user_prompt}]}
|
||||
]
|
||||
|
||||
# Add images to the user message if present
|
||||
if image_data_list:
|
||||
for mime_type, image_bytes, attachment_type, filename in image_data_list:
|
||||
# Convert image bytes to base64
|
||||
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
|
||||
image_url = f"data:{mime_type};base64,{image_base64}"
|
||||
|
||||
messages[1]["content"].append({
|
||||
"type": "image_url",
|
||||
"image_url": {"url": image_url}
|
||||
})
|
||||
print(f"Added {attachment_type} attachment to AI analysis: {filename}")
|
||||
|
||||
# Prepare the API request
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.openrouter_api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
payload = {
|
||||
"model": model_used,
|
||||
"messages": messages,
|
||||
"max_tokens": 500,
|
||||
"temperature": 0.1
|
||||
}
|
||||
|
||||
try:
|
||||
async with self.session.post(OPENROUTER_API_URL, headers=headers, json=payload, timeout=30) as response:
|
||||
if response.status == 200:
|
||||
response_data = await response.json()
|
||||
ai_response_text = response_data.get("choices", [{}])[0].get("message", {}).get("content", "")
|
||||
|
||||
if not ai_response_text:
|
||||
print("Error: Empty response from OpenRouter API.")
|
||||
return None
|
||||
|
||||
# Parse the JSON response from the AI
|
||||
try:
|
||||
# Clean the response text (remove markdown code blocks if present)
|
||||
clean_response = ai_response_text.strip()
|
||||
if clean_response.startswith("```json"):
|
||||
clean_response = clean_response[7:]
|
||||
if clean_response.endswith("```"):
|
||||
clean_response = clean_response[:-3]
|
||||
clean_response = clean_response.strip()
|
||||
|
||||
ai_decision = json.loads(clean_response)
|
||||
|
||||
# Validate the response structure
|
||||
required_keys = ["reasoning", "violation", "rule_violated", "action"]
|
||||
if not all(key in ai_decision for key in required_keys):
|
||||
print(f"Error: AI response missing required keys. Got: {ai_decision}")
|
||||
return None
|
||||
|
||||
print(f"AI Decision: {ai_decision}")
|
||||
return ai_decision
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error parsing AI response as JSON: {e}")
|
||||
print(f"Raw AI response: {ai_response_text}")
|
||||
return None
|
||||
|
||||
else:
|
||||
error_text = await response.text()
|
||||
print(f"OpenRouter API error {response.status}: {error_text}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
print(f"Exception during OpenRouter API call: {e}")
|
||||
return None
|
||||
|
||||
async def handle_violation(self, message: discord.Message, ai_decision: dict, notify_mods_message: str = None):
|
||||
"""
|
||||
Takes action based on the AI's violation decision.
|
||||
Also transmits action info via HTTP POST with API key header.
|
||||
"""
|
||||
import datetime
|
||||
import aiohttp
|
||||
|
||||
rule_violated = ai_decision.get("rule_violated", "Unknown")
|
||||
reasoning = ai_decision.get("reasoning", "No reasoning provided.")
|
||||
|
Loading…
x
Reference in New Issue
Block a user