feat: Implement caching and semantic memory embedding for voice transcriptions in on_voice_transcription_received_listener

2025-05-30 22:17:34 -06:00 · 2025-05-30 22:17:34 -06:00 · 540ab69220
commit 540ab69220
parent 1c5db9f9e9
1 changed files with 58 additions and 0 deletions
--- a/gurt/listeners.py
+++ b/gurt/listeners.py
@ -822,6 +822,64 @@ async def on_voice_transcription_received_listener(cog: 'GurtCog', guild: discor
    # Update cog's current_channel for the context of this interaction
    original_current_channel = cog.current_channel
    cog.current_channel = text_channel
    # --- Cache the transcribed voice message as if it were a text message ---
    try:
        formatted_pseudo_message = format_message(cog, pseudo_message_obj) # Use utility function
        # Ensure channel_id and user_id are correctly sourced from the pseudo_message_obj or its components
        msg_channel_id = pseudo_message_obj.channel.id
        msg_user_id = pseudo_message_obj.author.id # This is a discord.User/Member object
        # Deduplicate by message ID before appending (using helper from on_message_listener)
        # Note: _dedup_and_append might need to be accessible here or its logic replicated.
        # For simplicity, direct append, assuming pseudo_msg_id is unique enough for this context.
        # If _dedup_and_append is not directly usable, simple append is a starting point.
        # Consider making _dedup_and_append a static method or utility if widely needed.
        # Helper for deduplication (copied from on_message_listener for now)
        def _dedup_and_append_local(cache_deque, msg_dict_to_add):
            if not any(m.get("id") == msg_dict_to_add.get("id") for m in cache_deque):
                cache_deque.append(msg_dict_to_add)
        _dedup_and_append_local(cog.message_cache['by_channel'].setdefault(msg_channel_id, deque(maxlen=CONTEXT_WINDOW_SIZE)), formatted_pseudo_message)
        _dedup_and_append_local(cog.message_cache['by_user'].setdefault(msg_user_id, deque(maxlen=CONTEXT_WINDOW_SIZE*2)), formatted_pseudo_message) # User cache might be larger
        _dedup_and_append_local(cog.message_cache['global_recent'], formatted_pseudo_message)
        # No thread_id for pseudo_message currently
        # No mention check for pseudo_message currently
        cog.conversation_history.setdefault(msg_channel_id, deque(maxlen=CONTEXT_WINDOW_SIZE)).append(formatted_pseudo_message)
        cog.channel_activity[msg_channel_id] = time.time() # Update activity timestamp
        cog.user_conversation_mapping.setdefault(msg_user_id, set()).add(msg_channel_id)
        if msg_channel_id not in cog.active_conversations:
            cog.active_conversations[msg_channel_id] = {'participants': set(), 'start_time': time.time(), 'last_activity': time.time(), 'topic': None}
        cog.active_conversations[msg_channel_id]['participants'].add(msg_user_id)
        cog.active_conversations[msg_channel_id]['last_activity'] = time.time()
        print(f"Cached voice transcription from {user.name} into history of channel {text_channel.name} ({msg_channel_id}).")
        # --- Add message to semantic memory (if applicable) ---
        if text and cog.memory_manager.semantic_collection: # Check if 'text' (original transcription) is not empty
            semantic_metadata = {
                "user_id": str(msg_user_id), "user_name": user.name, "display_name": user.display_name,
                "channel_id": str(msg_channel_id), "channel_name": getattr(text_channel, 'name', 'VoiceContext'),
                "guild_id": str(guild.id) if guild else None,
                "timestamp": pseudo_message_obj.created_at.timestamp(),
                "is_voice_transcription": True # Add a flag
            }
            asyncio.create_task(
                cog.memory_manager.add_message_embedding(
                    message_id=str(pseudo_message_obj.id), formatted_message_data=formatted_pseudo_message, metadata=semantic_metadata
                )
            )
            print(f"Scheduled voice transcription from {user.name} for semantic embedding.")
    except Exception as e:
        print(f"Error during voice transcription caching/embedding: {e}")
        import traceback
        traceback.print_exc()
    # --- End Caching & Embedding ---
    try:
        # Process the transcribed text as if it were a regular message