feat: Implement caching and semantic memory embedding for voice transcriptions in on_voice_transcription_received_listener

This commit is contained in:
Slipstream 2025-05-30 22:17:34 -06:00
parent 1c5db9f9e9
commit 540ab69220
Signed by: slipstream
GPG Key ID: 13E498CE010AC6FD

View File

@ -822,6 +822,64 @@ async def on_voice_transcription_received_listener(cog: 'GurtCog', guild: discor
# Update cog's current_channel for the context of this interaction # Update cog's current_channel for the context of this interaction
original_current_channel = cog.current_channel original_current_channel = cog.current_channel
cog.current_channel = text_channel cog.current_channel = text_channel
# --- Cache the transcribed voice message as if it were a text message ---
try:
formatted_pseudo_message = format_message(cog, pseudo_message_obj) # Use utility function
# Ensure channel_id and user_id are correctly sourced from the pseudo_message_obj or its components
msg_channel_id = pseudo_message_obj.channel.id
msg_user_id = pseudo_message_obj.author.id # This is a discord.User/Member object
# Deduplicate by message ID before appending (using helper from on_message_listener)
# Note: _dedup_and_append might need to be accessible here or its logic replicated.
# For simplicity, direct append, assuming pseudo_msg_id is unique enough for this context.
# If _dedup_and_append is not directly usable, simple append is a starting point.
# Consider making _dedup_and_append a static method or utility if widely needed.
# Helper for deduplication (copied from on_message_listener for now)
def _dedup_and_append_local(cache_deque, msg_dict_to_add):
if not any(m.get("id") == msg_dict_to_add.get("id") for m in cache_deque):
cache_deque.append(msg_dict_to_add)
_dedup_and_append_local(cog.message_cache['by_channel'].setdefault(msg_channel_id, deque(maxlen=CONTEXT_WINDOW_SIZE)), formatted_pseudo_message)
_dedup_and_append_local(cog.message_cache['by_user'].setdefault(msg_user_id, deque(maxlen=CONTEXT_WINDOW_SIZE*2)), formatted_pseudo_message) # User cache might be larger
_dedup_and_append_local(cog.message_cache['global_recent'], formatted_pseudo_message)
# No thread_id for pseudo_message currently
# No mention check for pseudo_message currently
cog.conversation_history.setdefault(msg_channel_id, deque(maxlen=CONTEXT_WINDOW_SIZE)).append(formatted_pseudo_message)
cog.channel_activity[msg_channel_id] = time.time() # Update activity timestamp
cog.user_conversation_mapping.setdefault(msg_user_id, set()).add(msg_channel_id)
if msg_channel_id not in cog.active_conversations:
cog.active_conversations[msg_channel_id] = {'participants': set(), 'start_time': time.time(), 'last_activity': time.time(), 'topic': None}
cog.active_conversations[msg_channel_id]['participants'].add(msg_user_id)
cog.active_conversations[msg_channel_id]['last_activity'] = time.time()
print(f"Cached voice transcription from {user.name} into history of channel {text_channel.name} ({msg_channel_id}).")
# --- Add message to semantic memory (if applicable) ---
if text and cog.memory_manager.semantic_collection: # Check if 'text' (original transcription) is not empty
semantic_metadata = {
"user_id": str(msg_user_id), "user_name": user.name, "display_name": user.display_name,
"channel_id": str(msg_channel_id), "channel_name": getattr(text_channel, 'name', 'VoiceContext'),
"guild_id": str(guild.id) if guild else None,
"timestamp": pseudo_message_obj.created_at.timestamp(),
"is_voice_transcription": True # Add a flag
}
asyncio.create_task(
cog.memory_manager.add_message_embedding(
message_id=str(pseudo_message_obj.id), formatted_message_data=formatted_pseudo_message, metadata=semantic_metadata
)
)
print(f"Scheduled voice transcription from {user.name} for semantic embedding.")
except Exception as e:
print(f"Error during voice transcription caching/embedding: {e}")
import traceback
traceback.print_exc()
# --- End Caching & Embedding ---
try: try:
# Process the transcribed text as if it were a regular message # Process the transcribed text as if it were a regular message