fix: Improve message scraping logic to handle rate limits and ensure valid message collection

This commit is contained in:
Slipstream 2025-05-28 13:14:08 -06:00
parent 4b6632973d
commit 8f529750b2
Signed by: slipstream
GPG Key ID: 13E498CE010AC6FD

View File

@ -13,12 +13,16 @@ class MessageScraperCog(commands.Cog):
Scrapes the last N messages from the current channel, excluding bots,
and includes reply information. Uploads the results as a .txt file.
"""
if limit > 500:
return await ctx.send("Please keep the limit under 500 messages to avoid rate limits.")
# The user wants exactly 'limit' messages, excluding bots and empty content.
# We need to fetch more than 'limit' and then filter.
# Set a reasonable max_fetch_limit to prevent excessive fetching in very sparse channels.
max_fetch_limit = limit * 5 if limit * 5 < 1000 else 1000 # Fetch up to 5x the limit, or 1000, whichever is smaller
messages_data = []
async for message in ctx.channel.history(limit=limit):
if message.author.bot:
fetched_count = 0
async for message in ctx.channel.history(limit=max_fetch_limit):
fetched_count += 1
if message.author.bot or not message.content:
continue
reply_info = ""
@ -37,8 +41,14 @@ class MessageScraperCog(commands.Cog):
f"{message.content}{reply_info}"
)
if len(messages_data) >= limit:
break
if not messages_data:
return await ctx.send("No messages found matching the criteria.")
return await ctx.send("No valid messages found matching the criteria.")
# Trim messages_data to the requested limit if more were collected
messages_data = messages_data[:limit]
output_content = "\n".join(messages_data)