@@ -42,6 +42,7 @@ def calculate_max_content_tokens(self, custom_prompt=None):
4242 """
4343 Calculate the maximum tokens available for content based on prompt size.
4444 Dynamically adjusts for custom prompts to prevent exceeding context window.
45+ Uses conservative estimates to account for tokenizer inaccuracies.
4546 """
4647 # Calculate custom prompt tokens if provided
4748 prompt_tokens = 0
@@ -60,19 +61,37 @@ def calculate_max_content_tokens(self, custom_prompt=None):
6061Does content violate this rule? JSON only:"""
6162 # Count tokens for the prompt parts (excluding content placeholder)
6263 prompt_tokens = self .count_tokens (system_message ) + self .count_tokens (user_template )
64+
65+ # Log large prompts for debugging
66+ if prompt_tokens > 10000 :
67+ current_app .logger .warning (
68+ f"Large custom prompt detected: { prompt_tokens } tokens. "
69+ f"This will significantly reduce available content space."
70+ )
6371 else :
6472 # For default moderation, estimate prompt overhead
6573 prompt_tokens = 150 # Typical system + user message without content
6674
67- # Total overhead = prompt + output tokens + small buffer for message formatting
68- total_overhead = prompt_tokens + self .max_output_tokens + 50 # 50 for message structure overhead
69- safety_margin = 0.90 # Use 90% of available capacity
75+ # Total overhead = prompt + output tokens + larger buffer for safety
76+ # Add 15% buffer to prompt tokens to account for tokenizer inaccuracies
77+ # (being extra conservative for large prompts)
78+ safe_prompt_tokens = int (prompt_tokens * 1.15 )
79+ total_overhead = safe_prompt_tokens + self .max_output_tokens + 500 # 500 for message structure overhead
80+
81+ # Use VERY conservative safety margin (70%) to account for tokenizer differences
82+ # between our counting and OpenAI's counting. Better to chunk more than fail.
83+ safety_margin = 0.70
7084
7185 available_for_content = int (
7286 (self .model_context_window - total_overhead ) * safety_margin )
7387
88+ # Hard cap: never allow more than 180k tokens for content
89+ # This ensures we stay well under the 272k limit even with large prompts
90+ available_for_content = min (available_for_content , 180000 )
91+
7492 # Ensure a sensible lower bound
75- return max (12000 , available_for_content )
93+ available_for_content = max (12000 , available_for_content )
94+ return available_for_content
7695
7796 def split_text_into_chunks (self , text , max_tokens_per_chunk ):
7897 """
0 commit comments