Enable Flask context in parallel AI moderation tasks

Bentlybro · Bentlybro · commit 3a26a500d45b · 2025-10-16T17:05:40.000+01:00
Introduces a context wrapper in AIModerator to ensure Flask application context is available during parallel chunk processing. Also updates logging for chunking and refines cache info reporting in ModerationOrchestrator for improved clarity and accuracy.
diff --git a/app/services/ai/ai_moderator.py b/app/services/ai/ai_moderator.py
@@ -16,6 +16,8 @@ class AIModerator:
     def __init__(self):
         self.client_manager = OpenAIClient()
         self.cache = ResultCache()
+        # Capture Flask app instance for context propagation in parallel processing
+        self.app = current_app._get_current_object()
         # Load model and token settings from config
         cfg = current_app.config
         self.model_name = cfg.get('OPENAI_CHAT_MODEL', 'gpt-5-2025-08-07')
@@ -30,6 +32,14 @@ def __init__(self):
         except (KeyError, ValueError):
             self.tokenizer = tiktoken.get_encoding("cl100k_base")
 
+    def _context_wrapper(self, func, *args, **kwargs):
+        """
+        Wrapper to execute functions within Flask application context.
+        Required for parallel processing to access current_app.
+        """
+        with self.app.app_context():
+            return func(*args, **kwargs)
+
     def count_tokens(self, text):
         """Count the number of tokens in a text string"""
         try:
@@ -303,8 +313,8 @@ def moderate_content(self, content, content_type='text', custom_prompt=None):
 
                 # Force chunking if content is too large BY CHARACTER COUNT
                 if content_chars > MAX_CHARS_PER_CHUNK:
-                    current_app.logger.warning(
-                        f"FORCING CHUNKING: Content too large ({content_chars} chars > {MAX_CHARS_PER_CHUNK})")
+                    current_app.logger.debug(f"Chunking content: {content_chars} chars split into {
+                        (content_chars // MAX_CHARS_PER_CHUNK) + 1} chunks")
 
                     # Split by character count, not tokens
                     chunks = []
@@ -314,9 +324,9 @@ def moderate_content(self, content, content_type='text', custom_prompt=None):
                     # Process all chunks IN PARALLEL for maximum speed
                     chunk_results = []
                     with ThreadPoolExecutor(max_workers=min(len(chunks), 10)) as executor:
-                        # Submit all chunks at once
+                        # Submit all chunks at once with context wrapper
                         future_to_chunk = {
-                            executor.submit(self._analyze_with_custom_prompt, chunk, custom_prompt): i
+                            executor.submit(self._context_wrapper, self._analyze_with_custom_prompt, chunk, custom_prompt): i
                             for i, chunk in enumerate(chunks)
                         }
 
@@ -349,9 +359,9 @@ def moderate_content(self, content, content_type='text', custom_prompt=None):
                 # Process all chunks IN PARALLEL for maximum speed
                 chunk_results = []
                 with ThreadPoolExecutor(max_workers=min(len(chunks), 10)) as executor:
-                    # Submit all chunks at once
+                    # Submit all chunks at once with context wrapper
                     future_to_chunk = {
-                        executor.submit(self._run_enhanced_default_moderation, chunk): i
+                        executor.submit(self._context_wrapper, self._run_enhanced_default_moderation, chunk): i
                         for i, chunk in enumerate(chunks)
                     }
 
diff --git a/app/services/moderation_orchestrator.py b/app/services/moderation_orchestrator.py
@@ -80,10 +80,8 @@ async def moderate_content(self, content_id, request_start_time=None):
 
             # Get cache summary for this request
             cache_summary = self.ai_moderator.cache.get_request_cache_summary()
-            if cache_summary['stores'] > 0:
-                cache_info = f" [Cached {cache_summary['stores']} results, total: {cache_summary['total']}]"
-            elif total_time < 1.0:
-                cache_info = " [Cache hit]"
+            if total_time < 0.5 and cache_summary['hits'] > 0:
+                cache_info = " [cached]"
             else:
                 cache_info = ""