diff --git a/autofit/__init__.py b/autofit/__init__.py index d27ed9600..ed2300b50 100644 --- a/autofit/__init__.py +++ b/autofit/__init__.py @@ -8,6 +8,7 @@ from . import conf from . import exc from . import mock as m +from .aggregator.base import AggBase from .database.aggregator.aggregator import GridSearchAggregator from .graphical.expectation_propagation.history import EPHistory from .graphical.declarative.factor.analysis import AnalysisFactor diff --git a/autofit/aggregator/base.py b/autofit/aggregator/base.py new file mode 100644 index 000000000..fbd7fd175 --- /dev/null +++ b/autofit/aggregator/base.py @@ -0,0 +1,171 @@ +from __future__ import annotations +from abc import ABC, abstractmethod +from functools import partial +from typing import List, Optional, Generator + +import autofit as af + + +class AggBase(ABC): + def __init__(self, aggregator: af.Aggregator): + """ + Base aggregator wrapper, which makes it straight forward to compute generators of instances of objects from + specific samples of a non-linear search. + + The stadard aggregator makes it straight forward to create instances from the model. However, if there + are other classes which are generated from the model. but not part of the model itself, creating + instances of them with samples from the non-linear, via a generator, requires manual code to be written. + + The base aggregator can be used to streamline this process and create a concise API for generating these + instances. + + This is achieved by overwriting the `object_via_gen_from` method, which is used to create the object from + the non-linear search samples. This method is then used to create generators of the object from the + non-linear search samples. + + Parameters + ---------- + aggregator + An PyAutoFit aggregator containing the results of non-linear searches performed by PyAutoFit. + """ + self.aggregator = aggregator + + @abstractmethod + def object_via_gen_from( + self, fit: af.Fit, instance: Optional[af.ModelInstance] = None + ) -> object: + """ + For example, in the `PlaneAgg` object, this function is overwritten such that it creates a `Plane` from a + `ModelInstance` that contains the galaxies of a sample from a non-linear search. + + Parameters + ---------- + fit + A PyAutoFit database Fit object containing the generators of the results of PyAutoGalaxy model-fits. + instance + A manual instance that overwrites the max log likelihood instance in fit (e.g. for drawing the instance + randomly from the PDF). + + Returns + ------- + Generator + A generator that creates an object used in the model-fitting process of a non-linear search. + """ + + def max_log_likelihood_gen_from(self) -> Generator: + """ + Returns a generator using the maximum likelihood instance of a non-linear search. + + This generator creates a list containing the maximum log instance of every result loaded in the aggregator. + + For example, in **PyAutoLens**, by overwriting the `make_gen_from` method this returns a generator + of `Plane` objects from a PyAutoFit aggregator. This generator then generates a list of the maximum log + likelihood `Plane` objects for all aggregator results. + """ + + def func_gen(fit: af.Fit) -> Generator: + return self.object_via_gen_from(fit=fit) + + return self.aggregator.map(func=func_gen) + + def weights_above_gen_from(self, minimum_weight: float) -> List: + """ + Returns a list of all weights above a minimum weight for every result. + + Parameters + ---------- + minimum_weight + The minimum weight of a non-linear sample, such that samples with a weight below this value are discarded + and not included in the generator. + """ + + def func_gen(fit: af.Fit, minimum_weight: float) -> List[object]: + samples = fit.value(name="samples") + + weight_list = [] + + for sample in samples.sample_list: + if sample.weight > minimum_weight: + weight_list.append(sample.weight) + + return weight_list + + func = partial(func_gen, minimum_weight=minimum_weight) + + return self.aggregator.map(func=func) + + def all_above_weight_gen_from(self, minimum_weight: float) -> Generator: + """ + Returns a generator which for every result generates a list of objects whose parameter values are all those + in the non-linear search with a weight about an input `minimum_weight` value. This enables straight forward + error estimation. + + This generator creates lists containing instances whose non-linear sample weight are above the value of + `minimum_weight`. For example, if the aggregator contains 10 results and each result has 100 samples above the + `minimum_weight`, a list of 10 entries will be returned, where each entry in this list contains 100 object's + paired with each non-linear sample. + + For example, in **PyAutoLens**, by overwriting the `make_gen_from` method this returns a generator + of `Plane` objects from a PyAutoFit aggregator. This generator then generates lists of `Plane` objects + corresponding to all non-linear search samples above the `minimum_weight`. + + Parameters + ---------- + minimum_weight + The minimum weight of a non-linear sample, such that samples with a weight below this value are discarded + and not included in the generator. + """ + + def func_gen(fit: af.Fit, minimum_weight: float) -> List[object]: + samples = fit.value(name="samples") + + all_above_weight_list = [] + + for sample in samples.sample_list: + if sample.weight > minimum_weight: + instance = sample.instance_for_model(model=samples.model) + + all_above_weight_list.append( + self.object_via_gen_from(fit=fit, instance=instance) + ) + + return all_above_weight_list + + func = partial(func_gen, minimum_weight=minimum_weight) + + return self.aggregator.map(func=func) + + def randomly_drawn_via_pdf_gen_from(self, total_samples: int): + """ + Returns a generator which for every result generates a list of objects whose parameter values are drawn + randomly from the PDF. This enables straight forward error estimation. + + This generator creates lists containing instances that are drawn randomly from the PDF for every result loaded + in the aggregator. For example, the aggregator contains 10 results and if `total_samples=100`, a list of 10 + entries will be returned, where each entry in this list contains 100 object's paired with non-linear samples + randomly drawn from the PDF. + + For example, in **PyAutoLens**, by overwriting the `make_gen_from` method this returns a generator + of `Plane` objects from a PyAutoFit aggregator. This generator then generates lists of `Plane` objects + corresponding to non-linear search samples randomly drawn from the PDF. + + Parameters + ---------- + total_samples + The total number of non-linear search samples that should be randomly drawn from the PDF. + """ + + def func_gen(fit: af.Fit, total_samples: int) -> List[object]: + samples = fit.value(name="samples") + + return [ + self.object_via_gen_from( + fit=fit, + instance=samples.draw_randomly_via_pdf(), + ) + for i in range(total_samples) + ] + + func = partial(func_gen, total_samples=total_samples) + + return self.aggregator.map(func=func) diff --git a/autofit/non_linear/search/nest/nautilus/search.py b/autofit/non_linear/search/nest/nautilus/search.py index 3861f92c1..656ad4b63 100644 --- a/autofit/non_linear/search/nest/nautilus/search.py +++ b/autofit/non_linear/search/nest/nautilus/search.py @@ -2,7 +2,7 @@ import logging import os import sys -from typing import Dict, Optional +from typing import Dict, Optional, Tuple from autofit import jax_wrapper from autofit.database.sqlalchemy_ import sa @@ -143,7 +143,6 @@ def _fit(self, model: AbstractPriorModel, analysis): fitness=fitness, model=model, analysis=analysis, - checkpoint_exists=checkpoint_exists, ) else: if not self.using_mpi: @@ -151,7 +150,6 @@ def _fit(self, model: AbstractPriorModel, analysis): fitness=fitness, model=model, analysis=analysis, - checkpoint_exists=checkpoint_exists, ) else: search_internal = self.fit_mpi( @@ -194,7 +192,7 @@ def checkpoint_file(self): except TypeError: pass - def fit_x1_cpu(self, fitness, model, analysis, checkpoint_exists: bool): + def fit_x1_cpu(self, fitness, model, analysis): """ Perform the non-linear search, using one CPU core. @@ -211,8 +209,6 @@ def fit_x1_cpu(self, fitness, model, analysis, checkpoint_exists: bool): analysis Contains the data and the log likelihood function which fits an instance of the model to the data, returning the log likelihood the search maximizes. - checkpoint_exists - Does the checkpoint file corresponding do a previous run of this search exist? """ self.logger.info( @@ -231,25 +227,9 @@ def fit_x1_cpu(self, fitness, model, analysis, checkpoint_exists: bool): **self.config_dict_search, ) - if checkpoint_exists: - self.output_sampler_results(search_internal=search_internal) - - self.perform_update( - model=model, - analysis=analysis, - during_analysis=True, - search_internal=search_internal, - ) + return self.call_search(search_internal=search_internal, model=model, analysis=analysis) - search_internal.run( - **self.config_dict_run, - ) - - self.output_sampler_results(search_internal=search_internal) - - return search_internal - - def fit_multiprocessing(self, fitness, model, analysis, checkpoint_exists: bool): + def fit_multiprocessing(self, fitness, model, analysis): """ Perform the non-linear search, using multiple CPU cores parallelized via Python's multiprocessing module. @@ -269,8 +249,6 @@ def fit_multiprocessing(self, fitness, model, analysis, checkpoint_exists: bool) analysis Contains the data and the log likelihood function which fits an instance of the model to the data, returning the log likelihood the search maximizes. - checkpoint_exists - Does the checkpoint file corresponding do a previous run of this search exist? """ search_internal = self.sampler_cls( @@ -283,21 +261,68 @@ def fit_multiprocessing(self, fitness, model, analysis, checkpoint_exists: bool) **self.config_dict_search, ) - if checkpoint_exists: - self.output_sampler_results(search_internal=search_internal) + return self.call_search(search_internal=search_internal, model=model, analysis=analysis) - self.perform_update( - model=model, - analysis=analysis, - during_analysis=True, - search_internal=search_internal, + def call_search(self, search_internal, model, analysis): + """ + The x1 CPU and multiprocessing searches both call this function to perform the non-linear search. + + This function calls the search a reduced number of times, corresponding to the `iterations_per_update` of the + search. This allows the search to output results on-the-fly, for example writing to the hard-disk the latest + model and samples. + + It tracks how often to do this update alongside the maximum number of iterations the search will perform. + This ensures that on-the-fly output is performed at regular intervals and that the search does not perform more + iterations than the `n_like_max` input variable. + + Parameters + ---------- + search_internal + The single CPU or multiprocessing search which is run and performs nested sampling. + model + The model which maps parameters chosen via the non-linear search (e.g. via the priors or sampling) to + instances of the model, which are passed to the fitness function. + analysis + Contains the data and the log likelihood function which fits an instance of the model to the data, returning + the log likelihood the search maximizes. + """ + + finished = False + + while not finished: + + iterations, total_iterations = self.iterations_from( + search_internal=search_internal ) - search_internal.run( - **self.config_dict_run, - ) + config_dict_run = { + key: value + for key, value in self.config_dict_run.items() + if key != "n_like_max" + } + search_internal.run( + **config_dict_run, + n_like_max=iterations, + ) + + iterations_after_run = self.iterations_from(search_internal=search_internal)[1] - self.output_sampler_results(search_internal=search_internal) + if ( + total_iterations == iterations_after_run + or iterations_after_run == self.config_dict_run["n_like_max"] + ): + finished = True + + if not finished: + + self.perform_update( + model=model, + analysis=analysis, + during_analysis=True, + search_internal=search_internal + ) + + self.output_sampler_results(search_internal=search_internal) return search_internal @@ -361,6 +386,48 @@ def fit_mpi(self, fitness, model, analysis, checkpoint_exists: bool): return search_internal + def iterations_from( + self, search_internal + ) -> Tuple[int, int]: + """ + Returns the next number of iterations that a dynesty call will use and the total number of iterations + that have been performed so far. + + This is used so that the `iterations_per_update` input leads to on-the-fly output of dynesty results. + + It also ensures dynesty does not perform more samples than the `n_like_max` input variable. + + Parameters + ---------- + search_internal + The Dynesty sampler (static or dynamic) which is run and performs nested sampling. + + Returns + ------- + The next number of iterations that a dynesty run sampling will perform and the total number of iterations + it has performed so far. + """ + + if isinstance(self.paths, NullPaths): + n_like_max = self.config_dict_run.get("n_like_max") + + if n_like_max is not None: + return n_like_max, n_like_max + return int(1e99), int(1e99) + + try: + total_iterations = len(search_internal.posterior()[1]) + except ValueError: + total_iterations = 0 + + iterations = total_iterations + self.iterations_per_update + + if self.config_dict_run["n_like_max"] is not None: + if iterations > self.config_dict_run["n_like_max"]: + iterations = self.config_dict_run["n_like_max"] + + return iterations, total_iterations + def output_sampler_results(self, search_internal): """ Output the sampler results to hard-disk in a generalized PyAutoFit format.