Skip to content

Commit 0f7c462

Browse files
committed
Make token calculation more conservative in AIModerator
Adjusts the calculation of available tokens for content to use a larger safety margin and buffer, accounting for tokenizer inaccuracies and large custom prompts. Adds logging for unusually large prompts and enforces a hard cap on content tokens to prevent exceeding model context limits.
1 parent 82fc73a commit 0f7c462

1 file changed

Lines changed: 23 additions & 4 deletions

File tree

app/services/ai/ai_moderator.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def calculate_max_content_tokens(self, custom_prompt=None):
4242
"""
4343
Calculate the maximum tokens available for content based on prompt size.
4444
Dynamically adjusts for custom prompts to prevent exceeding context window.
45+
Uses conservative estimates to account for tokenizer inaccuracies.
4546
"""
4647
# Calculate custom prompt tokens if provided
4748
prompt_tokens = 0
@@ -60,19 +61,37 @@ def calculate_max_content_tokens(self, custom_prompt=None):
6061
Does content violate this rule? JSON only:"""
6162
# Count tokens for the prompt parts (excluding content placeholder)
6263
prompt_tokens = self.count_tokens(system_message) + self.count_tokens(user_template)
64+
65+
# Log large prompts for debugging
66+
if prompt_tokens > 10000:
67+
current_app.logger.warning(
68+
f"Large custom prompt detected: {prompt_tokens} tokens. "
69+
f"This will significantly reduce available content space."
70+
)
6371
else:
6472
# For default moderation, estimate prompt overhead
6573
prompt_tokens = 150 # Typical system + user message without content
6674

67-
# Total overhead = prompt + output tokens + small buffer for message formatting
68-
total_overhead = prompt_tokens + self.max_output_tokens + 50 # 50 for message structure overhead
69-
safety_margin = 0.90 # Use 90% of available capacity
75+
# Total overhead = prompt + output tokens + larger buffer for safety
76+
# Add 15% buffer to prompt tokens to account for tokenizer inaccuracies
77+
# (being extra conservative for large prompts)
78+
safe_prompt_tokens = int(prompt_tokens * 1.15)
79+
total_overhead = safe_prompt_tokens + self.max_output_tokens + 500 # 500 for message structure overhead
80+
81+
# Use VERY conservative safety margin (70%) to account for tokenizer differences
82+
# between our counting and OpenAI's counting. Better to chunk more than fail.
83+
safety_margin = 0.70
7084

7185
available_for_content = int(
7286
(self.model_context_window - total_overhead) * safety_margin)
7387

88+
# Hard cap: never allow more than 180k tokens for content
89+
# This ensures we stay well under the 272k limit even with large prompts
90+
available_for_content = min(available_for_content, 180000)
91+
7492
# Ensure a sensible lower bound
75-
return max(12000, available_for_content)
93+
available_for_content = max(12000, available_for_content)
94+
return available_for_content
7695

7796
def split_text_into_chunks(self, text, max_tokens_per_chunk):
7897
"""

0 commit comments

Comments
 (0)