aa

2025-04-29 10:51:26 -06:00 · 2025-04-29 10:51:26 -06:00 · ac298213a8
commit ac298213a8
parent de09605313
2 changed files with 42 additions and 10 deletions
--- a/gurt/api.py
+++ b/gurt/api.py
@ -151,11 +151,12 @@ except Exception as e:
 # Use actual types if import succeeded, otherwise fallback to Any
 _HarmCategory = getattr(generative_models, 'HarmCategory', Any)
 _HarmBlockThreshold = getattr(generative_models, 'HarmBlockThreshold', Any)
+# To disable blocking, set all thresholds to BLOCK_NONE (or equivalent).
 STANDARD_SAFETY_SETTINGS = {
-    getattr(_HarmCategory, 'HARM_CATEGORY_HATE_SPEECH', 'HARM_CATEGORY_HATE_SPEECH'): getattr(_HarmBlockThreshold, 'BLOCK_MEDIUM_AND_ABOVE', 'BLOCK_MEDIUM_AND_ABOVE'),
-    getattr(_HarmCategory, 'HARM_CATEGORY_DANGEROUS_CONTENT', 'HARM_CATEGORY_DANGEROUS_CONTENT'): getattr(_HarmBlockThreshold, 'BLOCK_MEDIUM_AND_ABOVE', 'BLOCK_MEDIUM_AND_ABOVE'),
-    getattr(_HarmCategory, 'HARM_CATEGORY_SEXUALLY_EXPLICIT', 'HARM_CATEGORY_SEXUALLY_EXPLICIT'): getattr(_HarmBlockThreshold, 'BLOCK_MEDIUM_AND_ABOVE', 'BLOCK_MEDIUM_AND_ABOVE'),
-    getattr(_HarmCategory, 'HARM_CATEGORY_HARASSMENT', 'HARM_CATEGORY_HARASSMENT'): getattr(_HarmBlockThreshold, 'BLOCK_MEDIUM_AND_ABOVE', 'BLOCK_MEDIUM_AND_ABOVE'),
+    getattr(_HarmCategory, 'HARM_CATEGORY_HATE_SPEECH', 'HARM_CATEGORY_HATE_SPEECH'): getattr(_HarmBlockThreshold, 'BLOCK_NONE', 'BLOCK_NONE'),
+    getattr(_HarmCategory, 'HARM_CATEGORY_DANGEROUS_CONTENT', 'HARM_CATEGORY_DANGEROUS_CONTENT'): getattr(_HarmBlockThreshold, 'BLOCK_NONE', 'BLOCK_NONE'),
+    getattr(_HarmCategory, 'HARM_CATEGORY_SEXUALLY_EXPLICIT', 'HARM_CATEGORY_SEXUALLY_EXPLICIT'): getattr(_HarmBlockThreshold, 'BLOCK_NONE', 'BLOCK_NONE'),
+    getattr(_HarmCategory, 'HARM_CATEGORY_HARASSMENT', 'HARM_CATEGORY_HARASSMENT'): getattr(_HarmBlockThreshold, 'BLOCK_NONE', 'BLOCK_NONE'),
 }

 # --- API Call Helper ---
@ -203,7 +204,23 @@ async def call_vertex_api_with_retry(
                tool_config=tool_config # Pass tool_config here
            )

-            # --- Success Logging ---
+            # --- Check Finish Reason (Safety) ---
+            # This check is primarily for non-streaming responses where a single finish_reason is available.
+            if not stream and response and response.candidates:
+                candidate = response.candidates[0]
+                # Ensure FinishReason is accessible (it should be if vertexai imported correctly)
+                _FinishReason = globals().get('FinishReason')
+                if _FinishReason and candidate.finish_reason == _FinishReason.SAFETY:
+                    safety_ratings_str = ", ".join([f"{rating.category}: {rating.probability.name}" for rating in candidate.safety_ratings]) if candidate.safety_ratings else "N/A"
+                    print(f"⚠️ SAFETY BLOCK: API request for {request_desc} ({model_name}) was blocked by safety filters. Finish Reason: SAFETY. Ratings: [{safety_ratings_str}]")
+                    # Optionally, raise a specific exception here if needed downstream
+                    # raise SafetyBlockError(f"Blocked by safety filters. Ratings: {safety_ratings_str}")
+                elif _FinishReason and candidate.finish_reason != _FinishReason.STOP and candidate.finish_reason != _FinishReason.MAX_TOKENS and candidate.finish_reason != _FinishReason.FUNCTION_CALL:
+                     # Log other unexpected finish reasons
+                     print(f"⚠️ UNEXPECTED FINISH REASON: API request for {request_desc} ({model_name}) finished with reason: {candidate.finish_reason.name}")
+
+
+            # --- Success Logging (Proceed even if safety blocked, but log occurred) ---
            elapsed_time = time.monotonic() - start_time
            # Ensure model_name exists in stats before incrementing
            if model_name not in cog.api_stats:
--- a/gurt/tools.py
+++ b/gurt/tools.py
@ -620,12 +620,25 @@ async def _check_command_safety(cog: commands.Cog, command: str) -> Dict[str, An
        "type": "object",
        "properties": {
            "is_safe": {"type": "boolean", "description": "True if safe for restricted container, False otherwise."},
-            "reason": {"type": "string", "description": "Brief explanation."}
+            "reason": {"type": "string", "description": "Brief explanation why the command is safe or unsafe."}
        }, "required": ["is_safe", "reason"]
    }
+    # Enhanced system prompt with more examples of safe commands
+    system_prompt_content = (
+        f"Analyze shell command safety for execution in an isolated, network-disabled Docker container ({DOCKER_EXEC_IMAGE}) "
+        f"with CPU ({DOCKER_CPU_LIMIT} core) and Memory ({DOCKER_MEM_LIMIT}) limits. "
+        "Focus on preventing: data destruction (outside container's ephemeral storage), resource exhaustion (fork bombs, crypto mining), "
+        "container escape vulnerabilities, network attacks (network is disabled), sensitive environment variable leakage (assume only safe vars are mounted). "
+        "Commands that only read system info, list files, print text, or manipulate text are generally SAFE. "
+        "Examples of SAFE commands: whoami, id, uname, hostname, pwd, ls, echo, cat, grep, sed, awk, date, time, env, df, du, ps, top, htop, find (read-only), file. "
+        "Examples of UNSAFE commands: rm, mkfs, shutdown, reboot, poweroff, wget, curl, apt, yum, apk, pip install, npm install, git clone (network disabled, but still potentially risky), "
+        "any command trying to modify system files outside /tmp or /home, fork bombs like ':(){ :|:& };:', commands enabling network access. "
+        "Be cautious with file writing/modification commands if not clearly limited to temporary directories. "
+        "Respond ONLY with the raw JSON object matching the provided schema."
+    )
    prompt_messages = [
-        {"role": "system", "content": f"Analyze shell command safety for execution in isolated, network-disabled Docker ({DOCKER_EXEC_IMAGE}) with CPU/Mem limits. Focus on data destruction, resource exhaustion, container escape, network attacks (disabled), env var leaks. Simple echo/ls/pwd safe. rm/mkfs/shutdown/wget/curl/install/fork bombs unsafe. Respond ONLY with JSON matching the provided schema."},
-        {"role": "user", "content": f"Analyze safety: ```{command}```"}
+        {"role": "system", "content": system_prompt_content},
+        {"role": "user", "content": f"Analyze safety of this command: ```\n{command}\n```"}
    ]
    safety_response = await get_internal_ai_json_response(
        cog=cog,
@ -642,8 +655,10 @@ async def _check_command_safety(cog: commands.Cog, command: str) -> Dict[str, An
        print(f"AI Safety Check Result: is_safe={is_safe}, reason='{reason}'")
        return {"safe": is_safe, "reason": reason}
    else:
-        error_msg = "AI safety check failed or returned invalid format."
-        print(f"AI Safety Check Error: Response was {safety_response}")
+        # Include part of the invalid response in the error for debugging
+        raw_response_excerpt = str(safety_response)[:200] # Get first 200 chars
+        error_msg = f"AI safety check failed or returned invalid format. Response: {raw_response_excerpt}"
+        print(f"AI Safety Check Error: {error_msg}")
        return {"safe": False, "reason": error_msg}

 async def run_terminal_command(cog: commands.Cog, command: str) -> Dict[str, Any]: