feat: Implement caching and semantic memory embedding for voice transcriptions in on_voice_transcription_received_listener

2025-05-30 22:17:34 -06:00 · 2025-05-30 22:17:34 -06:00 · 540ab69220
commit 540ab69220
parent 1c5db9f9e9
1 changed files with 58 additions and 0 deletions
--- a/gurt/listeners.py
+++ b/gurt/listeners.py
@ -822,6 +822,64 @@ async def on_voice_transcription_received_listener(cog: 'GurtCog', guild: discor
    # Update cog's current_channel for the context of this interaction
    original_current_channel = cog.current_channel
    cog.current_channel = text_channel
+
+    # --- Cache the transcribed voice message as if it were a text message ---
+    try:
+        formatted_pseudo_message = format_message(cog, pseudo_message_obj) # Use utility function
+        # Ensure channel_id and user_id are correctly sourced from the pseudo_message_obj or its components
+        msg_channel_id = pseudo_message_obj.channel.id
+        msg_user_id = pseudo_message_obj.author.id # This is a discord.User/Member object
+
+        # Deduplicate by message ID before appending (using helper from on_message_listener)
+        # Note: _dedup_and_append might need to be accessible here or its logic replicated.
+        # For simplicity, direct append, assuming pseudo_msg_id is unique enough for this context.
+        # If _dedup_and_append is not directly usable, simple append is a starting point.
+        # Consider making _dedup_and_append a static method or utility if widely needed.
+        
+        # Helper for deduplication (copied from on_message_listener for now)
+        def _dedup_and_append_local(cache_deque, msg_dict_to_add):
+            if not any(m.get("id") == msg_dict_to_add.get("id") for m in cache_deque):
+                cache_deque.append(msg_dict_to_add)
+
+        _dedup_and_append_local(cog.message_cache['by_channel'].setdefault(msg_channel_id, deque(maxlen=CONTEXT_WINDOW_SIZE)), formatted_pseudo_message)
+        _dedup_and_append_local(cog.message_cache['by_user'].setdefault(msg_user_id, deque(maxlen=CONTEXT_WINDOW_SIZE*2)), formatted_pseudo_message) # User cache might be larger
+        _dedup_and_append_local(cog.message_cache['global_recent'], formatted_pseudo_message)
+        # No thread_id for pseudo_message currently
+        # No mention check for pseudo_message currently
+
+        cog.conversation_history.setdefault(msg_channel_id, deque(maxlen=CONTEXT_WINDOW_SIZE)).append(formatted_pseudo_message)
+        
+        cog.channel_activity[msg_channel_id] = time.time() # Update activity timestamp
+        cog.user_conversation_mapping.setdefault(msg_user_id, set()).add(msg_channel_id)
+
+        if msg_channel_id not in cog.active_conversations:
+            cog.active_conversations[msg_channel_id] = {'participants': set(), 'start_time': time.time(), 'last_activity': time.time(), 'topic': None}
+        cog.active_conversations[msg_channel_id]['participants'].add(msg_user_id)
+        cog.active_conversations[msg_channel_id]['last_activity'] = time.time()
+
+        print(f"Cached voice transcription from {user.name} into history of channel {text_channel.name} ({msg_channel_id}).")
+
+        # --- Add message to semantic memory (if applicable) ---
+        if text and cog.memory_manager.semantic_collection: # Check if 'text' (original transcription) is not empty
+            semantic_metadata = {
+                "user_id": str(msg_user_id), "user_name": user.name, "display_name": user.display_name,
+                "channel_id": str(msg_channel_id), "channel_name": getattr(text_channel, 'name', 'VoiceContext'),
+                "guild_id": str(guild.id) if guild else None,
+                "timestamp": pseudo_message_obj.created_at.timestamp(),
+                "is_voice_transcription": True # Add a flag
+            }
+            asyncio.create_task(
+                cog.memory_manager.add_message_embedding(
+                    message_id=str(pseudo_message_obj.id), formatted_message_data=formatted_pseudo_message, metadata=semantic_metadata
+                )
+            )
+            print(f"Scheduled voice transcription from {user.name} for semantic embedding.")
+
+    except Exception as e:
+        print(f"Error during voice transcription caching/embedding: {e}")
+        import traceback
+        traceback.print_exc()
+    # --- End Caching & Embedding ---
    
    try:
        # Process the transcribed text as if it were a regular message