Skip to content

Commit 464fb0e

Browse files
committed
Increase concurrency and cache limits for AI moderation
Raised thread pool worker limits from 10 to 50 in AI moderation and rule processing for improved parallelism under load. Increased max cache size in result_cache.py from 10k to 50k entries to handle higher concurrency. Updated OpenAI client connection pool and limits for better burst handling. Increased max chunk size for content processing to 150k characters for efficiency with large context models.
1 parent bede582 commit 464fb0e

4 files changed

Lines changed: 14 additions & 10 deletions

File tree

app/services/ai/ai_moderator.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,9 @@ def moderate_content(self, content, content_type='text', custom_prompt=None):
308308
if custom_prompt:
309309
# CRITICAL FIX: Hard limit on character count (tiktoken is broken for some content)
310310
# Assume worst case: 1 char = 1 token for safety
311-
# Increased from 50k to 100k for better performance (fewer chunks = faster)
312-
MAX_CHARS_PER_CHUNK = 100000 # ~100k tokens worst case, safe for large context models
311+
# Increased from 50k -> 100k -> 150k for better performance (fewer chunks = faster)
312+
# With 400k context window and 70% safety margin, 150k is safe
313+
MAX_CHARS_PER_CHUNK = 150000 # ~150k tokens worst case, safe for large context models
313314
content_chars = len(content)
314315

315316
# Force chunking if content is too large BY CHARACTER COUNT
@@ -324,8 +325,9 @@ def moderate_content(self, content, content_type='text', custom_prompt=None):
324325
chunks.append(content[i:i + MAX_CHARS_PER_CHUNK])
325326

326327
# Process all chunks IN PARALLEL for maximum speed
328+
# Increased from 10 to 50 workers for better concurrency under load
327329
chunk_results = []
328-
with ThreadPoolExecutor(max_workers=min(len(chunks), 10)) as executor:
330+
with ThreadPoolExecutor(max_workers=min(len(chunks), 50)) as executor:
329331
# Submit all chunks at once with context wrapper
330332
future_to_chunk = {
331333
executor.submit(self._context_wrapper, self._analyze_with_custom_prompt, chunk, custom_prompt): i
@@ -373,8 +375,9 @@ def moderate_content(self, content, content_type='text', custom_prompt=None):
373375
chunks = self.split_text_into_chunks(content, max_content_tokens)
374376

375377
# Process all chunks IN PARALLEL for maximum speed
378+
# Increased from 10 to 50 workers for better concurrency under load
376379
chunk_results = []
377-
with ThreadPoolExecutor(max_workers=min(len(chunks), 10)) as executor:
380+
with ThreadPoolExecutor(max_workers=min(len(chunks), 50)) as executor:
378381
# Submit all chunks at once with context wrapper
379382
future_to_chunk = {
380383
executor.submit(self._context_wrapper, self._run_enhanced_default_moderation, chunk): i

app/services/ai/openai_client.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,9 @@ def _get_or_create_client(cls, api_key):
3737
pool=2.0
3838
),
3939
limits=httpx.Limits(
40-
max_keepalive_connections=200, # Increased connection pool
41-
max_connections=1000, # Increased total connections
42-
keepalive_expiry=300.0 # Longer keepalive
40+
max_keepalive_connections=500, # Increased from 200 to 500 for high concurrency
41+
max_connections=2000, # Increased from 1000 to 2000 for burst handling
42+
keepalive_expiry=300.0 # Longer keepalive (5 minutes)
4343
),
4444
http2=True
4545
)

app/services/ai/result_cache.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ class ResultCache:
1313
_shared_cache_ttl = 3600 # 1 hour
1414
_current_request_stores = 0 # Track stores per request
1515
_cache_lock = RLock() # Thread-safe operations
16-
_max_cache_size = 10000 # Maximum cache entries (increased to handle high volume)
17-
_cleanup_threshold = 9000 # Start cleanup when reaching 90% capacity
16+
_max_cache_size = 50000 # Maximum cache entries (increased from 10k to 50k for high concurrency)
17+
_cleanup_threshold = 45000 # Start cleanup when reaching 90% capacity
1818
_last_cleanup_time = 0
1919
_cleanup_interval = 900 # Check for expired entries every 15 minutes
2020

app/services/moderation/rule_processor.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ def process_single_ai_rule(rule):
103103
return (rule.id, None)
104104

105105
# Execute in parallel
106-
with ThreadPoolExecutor(max_workers=min(len(ai_rules), 10)) as executor:
106+
# Increased from 10 to 50 workers for better concurrency under load
107+
with ThreadPoolExecutor(max_workers=min(len(ai_rules), 50)) as executor:
107108
futures = {executor.submit(
108109
process_single_ai_rule, rule): rule for rule in ai_rules}
109110

0 commit comments

Comments
 (0)