@@ -295,30 +295,40 @@ def moderate_content(self, content, content_type='text', custom_prompt=None):
295295
296296 # STEP 1: If custom prompt is provided, use ONLY custom prompt analysis
297297 if custom_prompt :
298- # Calculate max content tokens based on custom prompt size
299- max_content_tokens = self .calculate_max_content_tokens (custom_prompt )
298+ # CRITICAL FIX: Hard limit on character count (tiktoken is broken for some content)
299+ # Assume worst case: 1 char = 1 token for safety
300+ MAX_CHARS_PER_CHUNK = 50000 # ~50k tokens worst case, safe for any prompt
301+ content_chars = len (content )
302+
300303 current_app .logger .info (
301- f"Content has { content_tokens } tokens, max allowed: { max_content_tokens } " )
304+ f"Content: { content_tokens } tokens (tiktoken), { content_chars } chars" )
305+
306+ # Force chunking if content is too large BY CHARACTER COUNT
307+ if content_chars > MAX_CHARS_PER_CHUNK :
308+ current_app .logger .warning (
309+ f"FORCING CHUNKING: Content too large ({ content_chars } chars > { MAX_CHARS_PER_CHUNK } )" )
310+
311+ # Split by character count, not tokens
312+ chunks = []
313+ for i in range (0 , content_chars , MAX_CHARS_PER_CHUNK ):
314+ chunks .append (content [i :i + MAX_CHARS_PER_CHUNK ])
302315
303- if content_tokens <= max_content_tokens :
304- return self ._analyze_with_custom_prompt (content , custom_prompt )
305- else :
306- # Split content and analyze each chunk
307- chunks = self .split_text_into_chunks (content , max_content_tokens )
308316 current_app .logger .info (
309- f"Split content into { len (chunks )} chunks for custom prompt analysis " )
317+ f"Split content into { len (chunks )} chunks by character count " )
310318
311319 chunk_results = []
312320 for i , chunk in enumerate (chunks ):
313- result = self ._analyze_with_custom_prompt (
314- chunk , custom_prompt )
321+ result = self ._analyze_with_custom_prompt (chunk , custom_prompt )
315322 chunk_results .append (result )
316323
317- # Early exit if chunk is rejected (for efficiency)
324+ # Early exit if chunk is rejected
318325 if result ['decision' ] == 'rejected' :
319326 break
320327
321328 return self ._combine_chunk_results (chunk_results , len (content ))
329+ else :
330+ # Content is small enough, process normally
331+ return self ._analyze_with_custom_prompt (content , custom_prompt )
322332
323333 # STEP 2: For default moderation, run baseline check first
324334 # Note: OpenAI moderation API has its own limits, but typically handles larger content
0 commit comments