Refine AI moderator token calculation and config

Bentlybro · Bentlybro · commit 1bbd8309106b · 2025-10-14T14:22:27.000+01:00
Improved the token calculation logic in AIModerator to better estimate prompt and message overhead, ensuring more accurate content token allocation. Updated OPENAI_MAX_OUTPUT_TOKENS default in config.py to 500, reflecting the typical size of AI moderator responses.
diff --git a/app/services/ai/ai_moderator.py b/app/services/ai/ai_moderator.py
@@ -43,9 +43,6 @@ def calculate_max_content_tokens(self, custom_prompt=None):
         Calculate the maximum tokens available for content based on prompt size.
         Dynamically adjusts for custom prompts to prevent exceeding context window.
         """
-        # Base system prompt size estimation
-        base_system_tokens = 100  # System message overhead
-
         # Calculate custom prompt tokens if provided
         prompt_tokens = 0
         if custom_prompt:
@@ -63,9 +60,12 @@ def calculate_max_content_tokens(self, custom_prompt=None):
 Does content violate this rule? JSON only:"""
             # Count tokens for the prompt parts (excluding content placeholder)
             prompt_tokens = self.count_tokens(system_message) + self.count_tokens(user_template)
+        else:
+            # For default moderation, estimate prompt overhead
+            prompt_tokens = 150  # Typical system + user message without content
 
-        # Total overhead = base + prompt + output + safety margin
-        total_overhead = base_system_tokens + prompt_tokens + self.max_output_tokens
+        # Total overhead = prompt + output tokens + small buffer for message formatting
+        total_overhead = prompt_tokens + self.max_output_tokens + 50  # 50 for message structure overhead
         safety_margin = 0.90  # Use 90% of available capacity
 
         available_for_content = int(
diff --git a/config/config.py b/config/config.py
@@ -21,8 +21,9 @@ class Config:
     OPENAI_CONTEXT_WINDOW = int(os.environ.get(
         'OPENAI_CONTEXT_WINDOW', '272000'))
     # Upper bound for output tokens; actual requests may use much less
+    # AI moderator only returns small JSON responses (~100-200 tokens), so 500 is plenty
     OPENAI_MAX_OUTPUT_TOKENS = int(os.environ.get(
-        'OPENAI_MAX_OUTPUT_TOKENS', '128000'))
+        'OPENAI_MAX_OUTPUT_TOKENS', '500'))
     ADMIN_EMAIL = os.environ.get('ADMIN_EMAIL')
     ADMIN_PASSWORD = os.environ.get('ADMIN_PASSWORD')