inclusionAI
diff --git a/‎areal/api/workflow_api.py‎
Lines changed: 70 additions & 58 deletions b/‎areal/api/workflow_api.py‎
Lines changed: 70 additions & 58 deletions
diff --git a/‎areal/core/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎areal/core/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎areal/core/staleness_manager.py‎
Lines changed: 144 additions & 0 deletions b/‎areal/core/staleness_manager.py‎
Lines changed: 144 additions & 0 deletions
@@ -17,7 +17,7 @@
 
 from areal.api.cli_args import InferenceEngineConfig
 from areal.api.engine_api import InferenceEngine
-from areal.api.io_struct import RolloutStat
+from areal.core.staleness_manager import StalenessManager
 from areal.experimental.openai.types import CompletionWithTokenLogpReward
 from areal.utils import logging
 from areal.utils.data import concat_padded_tensors, cycle_dataloader
@@ -256,24 +256,28 @@ def __init__(
         self,
         config: InferenceEngineConfig,
         inference_engine: "InferenceEngine",
+        staleness_manager: StalenessManager | None = None,
     ):
         self.max_concurrent_rollouts = (
             config.max_concurrent_rollouts or config.consumer_batch_size
         )
+        self.consumer_batch_size = config.consumer_batch_size
+
         self.config = config
         self.exiting = threading.Event()
         self.paused = threading.Event()
-        self.lock = threading.Lock()
 
         self.inference_engine = inference_engine
 
+        # Use provided staleness manager or create a default one
+        # The manager will be properly initialized in initialize()
+        self.staleness_manager = staleness_manager
+
         qsize = config.queue_size or self.max_concurrent_rollouts * 16
         self.input_queue = queue.Queue(maxsize=qsize)
         self.output_queue = queue.Queue(maxsize=qsize)
         self.result_cache: List[_TimedResult] = []
 
-        self.rollout_stat = RolloutStat()
-
         # For trajectory format checking
         self._expected_trajectory_keys: set | None = None
 
@@ -282,18 +286,31 @@ def initialize(self, logger=None, train_data_parallel_size: int | None = None):
             logger = logging.getLogger("WorkflowExecutor")
         self.logger = logger
 
-        if train_data_parallel_size is not None:
-            self.dp_world_size = train_data_parallel_size
-        else:
-            if dist.is_initialized():
-                if not mpu.is_initialized():
-                    self.dp_world_size = dist.get_world_size()
-                else:
-                    self.dp_world_size = mpu.get_data_parallel_world_size()
+        # Initialize staleness manager if not provided
+        if self.staleness_manager is None:
+            if train_data_parallel_size is not None:
+                dp_world_size = train_data_parallel_size
             else:
-                self.dp_world_size = 1
+                if dist.is_initialized():
+                    if not mpu.is_initialized():
+                        dp_world_size = dist.get_world_size()
+                    else:
+                        dp_world_size = mpu.get_data_parallel_world_size()
+                else:
+                    dp_world_size = 1
+
+            # Apply data parallel scaling
+            max_concurrent_rollouts = max(
+                1, self.max_concurrent_rollouts // dp_world_size
+            )
+            consumer_batch_size = max(1, self.consumer_batch_size // dp_world_size)
+
+            self.staleness_manager = StalenessManager(
+                max_concurrent_rollouts=max_concurrent_rollouts,
+                consumer_batch_size=consumer_batch_size,
+                max_staleness=self.config.max_head_offpolicyness,
+            )
 
-        self.rollout_tasks: Dict[str, _RolloutTask] = {}
         self.rollout_thread = threading.Thread(
             target=self._rollout_thread, daemon=True
         )  # set daemon=True to automatically exit when error occurs
@@ -304,17 +321,8 @@ def destroy(self):
         self.rollout_thread.join()
 
     def get_capacity(self):
-        with self.lock:
-            max_concurrent_rollouts = max(
-                1, self.max_concurrent_rollouts // self.dp_world_size
-            )
-            capacity = max_concurrent_rollouts - len(self.rollout_tasks)
-            # Staleness control
-            version = self.inference_engine.get_version()
-            ofp = self.config.max_head_offpolicyness
-            sample_cnt = self.rollout_stat.accepted + self.rollout_stat.running
-            consumer_bs = max(1, self.config.consumer_batch_size // self.dp_world_size)
-            capacity = min(capacity, (ofp + version + 1) * consumer_bs - sample_cnt)
+        version = self.inference_engine.get_version()
+        capacity = self.staleness_manager.get_capacity(version)
         return capacity
 
     def _rollout_thread(self):
@@ -325,14 +333,13 @@ def _rollout_thread(self):
             traceback.print_exc()
 
     async def _rollout_thread_async(self):
-        rollout_tasks = self.rollout_tasks
+        rollout_tasks: Dict[str, _RolloutTask] = {}
         rid = 0
         try:
             while not self.exiting.is_set():
                 # Check capacity
                 capacity = self.get_capacity()
                 # Create new rollout task
-                self.lock.acquire()
                 while (
                     capacity > 0
                     and not self.paused.is_set()
@@ -348,19 +355,19 @@ async def _rollout_thread_async(self):
                     rollout_tasks[str(rid)] = _RolloutTask(
                         create_time=time.monotonic_ns(), task=task, task_input=x
                     )
-                    self.rollout_stat.submitted += 1
-                    self.rollout_stat.running += 1
+                    # Notify staleness manager
+                    self.staleness_manager.on_rollout_submitted()
                     if self.config.enable_rollout_tracing:
+                        stat = self.staleness_manager.get_stats()
                         self.logger.info(
                             f"Submit rollout rid {rid}. "
-                            f"Submit: {self.rollout_stat.submitted}, "
-                            f"running: {self.rollout_stat.running}, "
-                            f"accepted: {self.rollout_stat.accepted}."
+                            f"Submit: {stat.submitted}, "
+                            f"running: {stat.running}, "
+                            f"accepted: {stat.accepted}."
                         )
                     capacity -= 1
                     rid += 1
                 tasks = [x.task for x in rollout_tasks.values()]
-                self.lock.release()
 
                 # Wait for rollout completion
                 done = []
@@ -396,26 +403,25 @@ async def _rollout_thread_async(self):
                         )
                     assert traj is None or isinstance(traj, dict), traj
                     task_rid = task.get_name()
-                    with self.lock:
-                        task_obj = rollout_tasks.pop(task_rid)
-                        self.rollout_stat.accepted += 1
-                        self.rollout_stat.running -= 1
-                        if self.config.enable_rollout_tracing:
-                            self.logger.info(
-                                f"Finish rollout {task_rid}. "
-                                f"Submit: {self.rollout_stat.submitted}, "
-                                f"running: {self.rollout_stat.running}, "
-                                f"accepted: {self.rollout_stat.accepted}."
-                            )
+                    task_obj = rollout_tasks.pop(task_rid)
 
                     task_input = task_obj.task_input
-                    if traj is not None and (
+                    # Check if trajectory should be accepted
+                    should_accept_traj = traj is not None and (
                         task_input.should_accept is None
                         or task_input.should_accept(traj)
-                    ):
+                    )
+
+                    if should_accept_traj:
+                        # Notify staleness manager of accepted rollout
+                        self.staleness_manager.on_rollout_accepted()
                         if self.config.enable_rollout_tracing:
+                            stat = self.staleness_manager.get_stats()
                             self.logger.info(
-                                f"Accept rollout result of task {task_rid}."
+                                f"Finish and accept rollout {task_rid}. "
+                                f"Submit: {stat.submitted}, "
+                                f"running: {stat.running}, "
+                                f"accepted: {stat.accepted}."
                             )
                         try:
                             self.output_queue.put_nowait(
@@ -426,24 +432,30 @@ async def _rollout_thread_async(self):
                                 "Output queue full. Please increase queue_size."
                             )
                     else:
+                        # Rollout completed but was rejected
+                        # Only decrement running count since it was never accepted
+                        self.staleness_manager.on_rollout_rejected()
                         if self.config.enable_rollout_tracing:
-                            self.logger.info(f"Rollout is rejected.")
-                        with self.lock:
-                            self.rollout_stat.accepted -= 1
+                            stat = self.staleness_manager.get_stats()
+                            self.logger.info(
+                                f"Finish but reject rollout {task_rid}. "
+                                f"Submit: {stat.submitted}, "
+                                f"running: {stat.running}, "
+                                f"accepted: {stat.accepted}."
+                            )
 
                 await asyncio.sleep(1)
         except Exception:
             traceback.print_exc()
         finally:
             # Cancel remaining tasks
-            with self.lock:
-                for task_obj in rollout_tasks.values():
-                    if not task_obj.task.done():
-                        task_obj.task.cancel()
-                        try:
-                            await task_obj.task
-                        except asyncio.CancelledError:
-                            pass
+            for task_obj in rollout_tasks.values():
+                if not task_obj.task.done():
+                    task_obj.task.cancel()
+                    try:
+                        await task_obj.task
+                    except asyncio.CancelledError:
+                        pass
 
     def submit(
         self,
 
@@ -0,0 +1 @@
+"""Core components for AREAL."""
@@ -0,0 +1,144 @@
+"""Staleness-aware capacity manager for rollout generation.
+
+This module provides the StalenessManager class which manages capacity
+and staleness constraints for asynchronous rollout generation in RL training.
+"""
+
+from threading import Lock
+
+from areal.api.io_struct import RolloutStat
+
+
+class StalenessManager:
+    """Manages rollout capacity based on staleness and concurrency constraints.
+
+    The manager ensures that:
+    1. The number of concurrent rollouts doesn't exceed the configured maximum
+    2. Rollouts don't become too stale (off-policy) by limiting acceptance based on
+       the current model version and maximum allowed offpolicyness
+
+    Parameters
+    ----------
+    max_concurrent_rollouts : int
+        Maximum number of concurrent rollouts allowed
+    consumer_batch_size : int
+        Expected batch size for consuming rollouts during training
+    max_staleness : int
+        Maximum allowed offpolicyness (version difference) for rollouts
+    """
+
+    def __init__(
+        self,
+        max_concurrent_rollouts: int,
+        consumer_batch_size: int,
+        max_staleness: int,
+    ):
+        """Initialize the staleness manager.
+
+        Parameters
+        ----------
+        max_concurrent_rollouts : int
+            Maximum number of concurrent rollouts allowed
+        consumer_batch_size : int
+            Expected batch size for consuming rollouts during training
+        max_staleness : int
+            Maximum allowed offpolicyness (version difference) for rollouts
+        """
+        self.max_concurrent_rollouts = max_concurrent_rollouts
+        self.consumer_batch_size = consumer_batch_size
+        self.max_staleness = max_staleness
+
+        # Thread-safe access to rollout statistics
+        self.lock = Lock()
+        self.rollout_stat = RolloutStat()
+
+    def get_capacity(self, current_version: int) -> int:
+        """Calculate available capacity for new rollouts.
+
+        This method considers both concurrency limits and staleness constraints
+        to determine how many new rollouts can be accepted.
+
+        The capacity calculation ensures:
+        1. The number of running rollouts doesn't exceed max_concurrent_rollouts
+        2. Samples don't become too stale by limiting based on:
+           - current_version: The current model version
+           - max_staleness: Maximum allowed version difference
+           - consumer_batch_size: Expected batch size for training
+
+        Parameters
+        ----------
+        current_version : int
+            The current version of the model weights
+
+        Returns
+        -------
+        int
+            Number of new rollout slots available. Can be negative if over capacity.
+
+        Notes
+        -----
+        The staleness control formula is:
+        max_samples = (max_staleness + current_version + 1) * consumer_batch_size
+        capacity = min(concurrency_limit, max_samples - current_samples)
+
+        This ensures that by the time samples are consumed, they won't exceed
+        the maximum allowed staleness.
+        """
+        with self.lock:
+            # Calculate concurrency-based capacity
+            max_concurrent_rollouts = max(1, self.max_concurrent_rollouts)
+            concurrency_capacity = max_concurrent_rollouts - self.rollout_stat.running
+
+            # Calculate staleness-based capacity
+            ofp = self.max_staleness
+            sample_cnt = self.rollout_stat.accepted + self.rollout_stat.running
+            consumer_bs = max(1, self.consumer_batch_size)
+            staleness_capacity = (ofp + current_version + 1) * consumer_bs - sample_cnt
+
+            # Return the minimum of both constraints
+            capacity = min(concurrency_capacity, staleness_capacity)
+            return capacity
+
+    def on_rollout_submitted(self) -> None:
+        """Callback when a rollout is submitted for execution.
+
+        Thread-safe method to increment the submitted and running counters.
+        """
+        with self.lock:
+            self.rollout_stat.submitted += 1
+            self.rollout_stat.running += 1
+
+    def on_rollout_accepted(self) -> None:
+        """Callback when a rollout completes successfully and is accepted.
+
+        Thread-safe method to increment accepted counter and decrement running counter.
+        """
+        with self.lock:
+            self.rollout_stat.accepted += 1
+            self.rollout_stat.running -= 1
+
+    def on_rollout_rejected(self) -> None:
+        """Callback when a rollout completes but is rejected.
+
+        Thread-safe method to decrement running counter only.
+        This is called when a trajectory is filtered out by should_accept or
+        when the workflow returns None. The rollout was never added to accepted,
+        so we only need to decrement running.
+        """
+        with self.lock:
+            self.rollout_stat.running -= 1
+
+    def get_stats(self) -> RolloutStat:
+        """Get a snapshot of current rollout statistics.
+
+        Returns
+        -------
+        RolloutStat
+            Current rollout statistics (submitted, accepted, running)
+        """
+        with self.lock:
+            return RolloutStat(
+                submitted=self.rollout_stat.submitted,
+                accepted=self.rollout_stat.accepted,
+                running=self.rollout_stat.running,
+            )