PyAutoLabs · Jammy2211 · Mar 8, 2024 · Mar 4, 2024 · Mar 5, 2024 · Mar 5, 2024
diff --git a/autofit/__init__.py b/autofit/__init__.py
@@ -8,6 +8,7 @@
 from . import conf
 from . import exc
 from . import mock as m
+from .aggregator.base import AggBase
 from .database.aggregator.aggregator import GridSearchAggregator
 from .graphical.expectation_propagation.history import EPHistory
 from .graphical.declarative.factor.analysis import AnalysisFactor

diff --git a/autofit/aggregator/base.py b/autofit/aggregator/base.py
@@ -0,0 +1,171 @@
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from functools import partial
+from typing import List, Optional, Generator
+
+import autofit as af
+
+
+class AggBase(ABC):
+    def __init__(self, aggregator: af.Aggregator):
+        """
+        Base aggregator wrapper, which makes it straight forward to compute generators of instances of objects from
+        specific samples of a non-linear search.
+
+        The stadard aggregator makes it straight forward to create instances from the model. However, if there
+        are other classes which are generated from the model. but not part of the model itself, creating
+        instances of them with samples from the non-linear, via a generator, requires manual code to be written.
+
+        The base aggregator can be used to streamline this process and create a concise API for generating these
+        instances.
+
+        This is achieved by overwriting the `object_via_gen_from` method, which is used to create the object from
+        the non-linear search samples. This method is then used to create generators of the object from the
+        non-linear search samples.
+
+        Parameters
+        ----------
+        aggregator
+            An PyAutoFit aggregator containing the results of non-linear searches performed by PyAutoFit.
+        """
+        self.aggregator = aggregator
+
+    @abstractmethod
+    def object_via_gen_from(
+        self, fit: af.Fit, instance: Optional[af.ModelInstance] = None
+    ) -> object:
+        """
+        For example, in the `PlaneAgg` object, this function is overwritten such that it creates a `Plane` from a
+        `ModelInstance` that contains the galaxies of a sample from a non-linear search.
+
+        Parameters
+        ----------
+        fit
+            A PyAutoFit database Fit object containing the generators of the results of PyAutoGalaxy model-fits.
+        instance
+            A manual instance that overwrites the max log likelihood instance in fit (e.g. for drawing the instance
+            randomly from the PDF).
+
+        Returns
+        -------
+        Generator
+            A generator that creates an object used in the model-fitting process of a non-linear search.
+        """
+
+    def max_log_likelihood_gen_from(self) -> Generator:
+        """
+        Returns a generator using the maximum likelihood instance of a non-linear search.
+
+        This generator creates a list containing the maximum log instance of every result loaded in the aggregator.
+
+        For example, in **PyAutoLens**, by overwriting the `make_gen_from` method this returns a generator
+        of `Plane` objects from a PyAutoFit aggregator. This generator then generates a list of the maximum log
+        likelihood `Plane` objects for all aggregator results.
+        """
+
+        def func_gen(fit: af.Fit) -> Generator:
+            return self.object_via_gen_from(fit=fit)
+
+        return self.aggregator.map(func=func_gen)
+
+    def weights_above_gen_from(self, minimum_weight: float) -> List:
+        """
+        Returns a list of all weights above a minimum weight for every result.
+
+        Parameters
+        ----------
+        minimum_weight
+            The minimum weight of a non-linear sample, such that samples with a weight below this value are discarded
+            and not included in the generator.
+        """
+
+        def func_gen(fit: af.Fit, minimum_weight: float) -> List[object]:
+            samples = fit.value(name="samples")
+
+            weight_list = []
+
+            for sample in samples.sample_list:
+                if sample.weight > minimum_weight:
+                    weight_list.append(sample.weight)
+
+            return weight_list
+
+        func = partial(func_gen, minimum_weight=minimum_weight)
+
+        return self.aggregator.map(func=func)
+
+    def all_above_weight_gen_from(self, minimum_weight: float) -> Generator:
+        """
+        Returns a generator which for every result generates a list of objects whose parameter values are all those
+        in the non-linear search with a weight about an input `minimum_weight` value. This enables straight forward
+        error estimation.
+
+        This generator creates lists containing instances whose non-linear sample weight are above the value of
+        `minimum_weight`. For example, if the aggregator contains 10 results and each result has 100 samples above the
+        `minimum_weight`, a list of 10 entries will be returned, where each entry in this list contains 100 object's
+        paired with each non-linear sample.
+
+        For example, in **PyAutoLens**, by overwriting the `make_gen_from` method this returns a generator
+        of `Plane` objects from a PyAutoFit aggregator. This generator then generates lists of `Plane` objects
+        corresponding to all non-linear search samples above the `minimum_weight`.
+
+        Parameters
+        ----------
+        minimum_weight
+            The minimum weight of a non-linear sample, such that samples with a weight below this value are discarded
+            and not included in the generator.
+        """
+
+        def func_gen(fit: af.Fit, minimum_weight: float) -> List[object]:
+            samples = fit.value(name="samples")
+
+            all_above_weight_list = []
+
+            for sample in samples.sample_list:
+                if sample.weight > minimum_weight:
+                    instance = sample.instance_for_model(model=samples.model)
+
+                    all_above_weight_list.append(
+                        self.object_via_gen_from(fit=fit, instance=instance)
+                    )
+
+            return all_above_weight_list
+
+        func = partial(func_gen, minimum_weight=minimum_weight)
+
+        return self.aggregator.map(func=func)
+
+    def randomly_drawn_via_pdf_gen_from(self, total_samples: int):
+        """
+        Returns a generator which for every result generates a list of objects whose parameter values are drawn
+        randomly from the PDF. This enables straight forward error estimation.
+
+        This generator creates lists containing instances that are drawn randomly from the PDF for every result loaded
+        in the aggregator. For example, the aggregator contains 10 results and if `total_samples=100`, a list of 10
+        entries will be returned, where each entry in this list contains 100 object's paired with non-linear samples
+        randomly drawn from the PDF.
+
+        For example, in **PyAutoLens**, by overwriting the `make_gen_from` method this returns a generator
+        of `Plane` objects from a PyAutoFit aggregator. This generator then generates lists of `Plane` objects
+        corresponding to non-linear search samples randomly drawn from the PDF.
+
+        Parameters
+        ----------
+        total_samples
+            The total number of non-linear search samples that should be randomly drawn from the PDF.
+        """
+
+        def func_gen(fit: af.Fit, total_samples: int) -> List[object]:
+            samples = fit.value(name="samples")
+
+            return [
+                self.object_via_gen_from(
+                    fit=fit,
+                    instance=samples.draw_randomly_via_pdf(),
+                )
+                for i in range(total_samples)
+            ]
+
+        func = partial(func_gen, total_samples=total_samples)
+
+        return self.aggregator.map(func=func)
diff --git a/autofit/non_linear/search/nest/nautilus/search.py b/autofit/non_linear/search/nest/nautilus/search.py
@@ -2,7 +2,7 @@
 import logging
 import os
 import sys
-from typing import Dict, Optional
+from typing import Dict, Optional, Tuple
 
 from autofit import jax_wrapper
 from autofit.database.sqlalchemy_ import sa
@@ -143,15 +143,13 @@ def _fit(self, model: AbstractPriorModel, analysis):
                 fitness=fitness,
                 model=model,
                 analysis=analysis,
-                checkpoint_exists=checkpoint_exists,
             )
         else:
             if not self.using_mpi:
                 search_internal = self.fit_multiprocessing(
                     fitness=fitness,
                     model=model,
                     analysis=analysis,
-                    checkpoint_exists=checkpoint_exists,
                 )
             else:
                 search_internal = self.fit_mpi(
@@ -194,7 +192,7 @@ def checkpoint_file(self):
         except TypeError:
             pass
 
-    def fit_x1_cpu(self, fitness, model, analysis, checkpoint_exists: bool):
+    def fit_x1_cpu(self, fitness, model, analysis):
         """
         Perform the non-linear search, using one CPU core.
 
@@ -211,8 +209,6 @@ def fit_x1_cpu(self, fitness, model, analysis, checkpoint_exists: bool):
         analysis
             Contains the data and the log likelihood function which fits an instance of the model to the data, returning
             the log likelihood the search maximizes.
-        checkpoint_exists
-            Does the checkpoint file corresponding do a previous run of this search exist?
         """
 
         self.logger.info(
@@ -231,25 +227,9 @@ def fit_x1_cpu(self, fitness, model, analysis, checkpoint_exists: bool):
             **self.config_dict_search,
         )
 
-        if checkpoint_exists:
-            self.output_sampler_results(search_internal=search_internal)
-
-            self.perform_update(
-                model=model,
-                analysis=analysis,
-                during_analysis=True,
-                search_internal=search_internal,
-            )
+        return self.call_search(search_internal=search_internal, model=model, analysis=analysis)
 
-        search_internal.run(
-            **self.config_dict_run,
-        )
-
-        self.output_sampler_results(search_internal=search_internal)
-
-        return search_internal
-
-    def fit_multiprocessing(self, fitness, model, analysis, checkpoint_exists: bool):
+    def fit_multiprocessing(self, fitness, model, analysis):
         """
         Perform the non-linear search, using multiple CPU cores parallelized via Python's multiprocessing module.
 
@@ -269,8 +249,6 @@ def fit_multiprocessing(self, fitness, model, analysis, checkpoint_exists: bool)
         analysis
             Contains the data and the log likelihood function which fits an instance of the model to the data, returning
             the log likelihood the search maximizes.
-        checkpoint_exists
-            Does the checkpoint file corresponding do a previous run of this search exist?
         """
 
         search_internal = self.sampler_cls(
@@ -283,21 +261,68 @@ def fit_multiprocessing(self, fitness, model, analysis, checkpoint_exists: bool)
             **self.config_dict_search,
         )
 
-        if checkpoint_exists:
-            self.output_sampler_results(search_internal=search_internal)
+        return self.call_search(search_internal=search_internal, model=model, analysis=analysis)
 
-            self.perform_update(
-                model=model,
-                analysis=analysis,
-                during_analysis=True,
-                search_internal=search_internal,
+    def call_search(self, search_internal, model, analysis):
+        """
+        The x1 CPU and multiprocessing searches both call this function to perform the non-linear search.
+
+        This function calls the search a reduced number of times, corresponding to the `iterations_per_update` of the
+        search. This allows the search to output results on-the-fly, for example writing to the hard-disk the latest
+        model and samples.
+
+        It tracks how often to do this update alongside the maximum number of iterations the search will perform.
+        This ensures that on-the-fly output is performed at regular intervals and that the search does not perform more
+        iterations than the `n_like_max` input variable.
+
+        Parameters
+        ----------
+        search_internal
+            The single CPU or multiprocessing search which is run and performs nested sampling.
+        model
+            The model which maps parameters chosen via the non-linear search (e.g. via the priors or sampling) to
+            instances of the model, which are passed to the fitness function.
+        analysis
+            Contains the data and the log likelihood function which fits an instance of the model to the data, returning
+            the log likelihood the search maximizes.
+        """
+
+        finished = False
+
+        while not finished:
+
+            iterations, total_iterations = self.iterations_from(
+                search_internal=search_internal
             )
 
-        search_internal.run(
-            **self.config_dict_run,
-        )
+            config_dict_run = {
+                key: value
+                for key, value in self.config_dict_run.items()
+                if key != "n_like_max"
+            }
+            search_internal.run(
+                **config_dict_run,
+                n_like_max=iterations,
+            )
+
+            iterations_after_run = self.iterations_from(search_internal=search_internal)[1]
 
-        self.output_sampler_results(search_internal=search_internal)
+            if (
+                    total_iterations == iterations_after_run
+                    or iterations_after_run == self.config_dict_run["n_like_max"]
+            ):
+                finished = True
+
+            if not finished:
+
+                self.perform_update(
+                    model=model,
+                    analysis=analysis,
+                    during_analysis=True,
+                    search_internal=search_internal
+                )
+
+                self.output_sampler_results(search_internal=search_internal)
 
         return search_internal
 
@@ -361,6 +386,48 @@ def fit_mpi(self, fitness, model, analysis, checkpoint_exists: bool):
 
         return search_internal
 
+    def iterations_from(
+        self, search_internal
+    ) -> Tuple[int, int]:
+        """
+        Returns the next number of iterations that a dynesty call will use and the total number of iterations
+        that have been performed so far.
+
+        This is used so that the `iterations_per_update` input leads to on-the-fly output of dynesty results.
+
+        It also ensures dynesty does not perform more samples than the `n_like_max` input variable.
+
+        Parameters
+        ----------
+        search_internal
+            The Dynesty sampler (static or dynamic) which is run and performs nested sampling.
+
+        Returns
+        -------
+        The next number of iterations that a dynesty run sampling will perform and the total number of iterations
+        it has performed so far.
+        """
+
+        if isinstance(self.paths, NullPaths):
+            n_like_max = self.config_dict_run.get("n_like_max")
+
+            if n_like_max is not None:
+                return n_like_max, n_like_max
+            return int(1e99), int(1e99)
+
+        try:
+            total_iterations = len(search_internal.posterior()[1])
+        except ValueError:
+            total_iterations = 0
+
+        iterations = total_iterations + self.iterations_per_update
+
+        if self.config_dict_run["n_like_max"] is not None:
+            if iterations > self.config_dict_run["n_like_max"]:
+                iterations = self.config_dict_run["n_like_max"]
+
+        return iterations, total_iterations
+
     def output_sampler_results(self, search_internal):
         """
         Output the sampler results to hard-disk in a generalized PyAutoFit format.