Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions autofit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from . import conf
from . import exc
from . import mock as m
from .aggregator.base import AggBase
from .database.aggregator.aggregator import GridSearchAggregator
from .graphical.expectation_propagation.history import EPHistory
from .graphical.declarative.factor.analysis import AnalysisFactor
Expand Down
171 changes: 171 additions & 0 deletions autofit/aggregator/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from functools import partial
from typing import List, Optional, Generator

import autofit as af


class AggBase(ABC):
def __init__(self, aggregator: af.Aggregator):
"""
Base aggregator wrapper, which makes it straight forward to compute generators of instances of objects from
specific samples of a non-linear search.

The stadard aggregator makes it straight forward to create instances from the model. However, if there
are other classes which are generated from the model. but not part of the model itself, creating
instances of them with samples from the non-linear, via a generator, requires manual code to be written.

The base aggregator can be used to streamline this process and create a concise API for generating these
instances.

This is achieved by overwriting the `object_via_gen_from` method, which is used to create the object from
the non-linear search samples. This method is then used to create generators of the object from the
non-linear search samples.

Parameters
----------
aggregator
An PyAutoFit aggregator containing the results of non-linear searches performed by PyAutoFit.
"""
self.aggregator = aggregator

@abstractmethod
def object_via_gen_from(
self, fit: af.Fit, instance: Optional[af.ModelInstance] = None
) -> object:
"""
For example, in the `PlaneAgg` object, this function is overwritten such that it creates a `Plane` from a
`ModelInstance` that contains the galaxies of a sample from a non-linear search.

Parameters
----------
fit
A PyAutoFit database Fit object containing the generators of the results of PyAutoGalaxy model-fits.
instance
A manual instance that overwrites the max log likelihood instance in fit (e.g. for drawing the instance
randomly from the PDF).

Returns
-------
Generator
A generator that creates an object used in the model-fitting process of a non-linear search.
"""

def max_log_likelihood_gen_from(self) -> Generator:
"""
Returns a generator using the maximum likelihood instance of a non-linear search.

This generator creates a list containing the maximum log instance of every result loaded in the aggregator.

For example, in **PyAutoLens**, by overwriting the `make_gen_from` method this returns a generator
of `Plane` objects from a PyAutoFit aggregator. This generator then generates a list of the maximum log
likelihood `Plane` objects for all aggregator results.
"""

def func_gen(fit: af.Fit) -> Generator:
return self.object_via_gen_from(fit=fit)

return self.aggregator.map(func=func_gen)

def weights_above_gen_from(self, minimum_weight: float) -> List:
"""
Returns a list of all weights above a minimum weight for every result.

Parameters
----------
minimum_weight
The minimum weight of a non-linear sample, such that samples with a weight below this value are discarded
and not included in the generator.
"""

def func_gen(fit: af.Fit, minimum_weight: float) -> List[object]:
samples = fit.value(name="samples")

weight_list = []

for sample in samples.sample_list:
if sample.weight > minimum_weight:
weight_list.append(sample.weight)

return weight_list

func = partial(func_gen, minimum_weight=minimum_weight)

return self.aggregator.map(func=func)

def all_above_weight_gen_from(self, minimum_weight: float) -> Generator:
"""
Returns a generator which for every result generates a list of objects whose parameter values are all those
in the non-linear search with a weight about an input `minimum_weight` value. This enables straight forward
error estimation.

This generator creates lists containing instances whose non-linear sample weight are above the value of
`minimum_weight`. For example, if the aggregator contains 10 results and each result has 100 samples above the
`minimum_weight`, a list of 10 entries will be returned, where each entry in this list contains 100 object's
paired with each non-linear sample.

For example, in **PyAutoLens**, by overwriting the `make_gen_from` method this returns a generator
of `Plane` objects from a PyAutoFit aggregator. This generator then generates lists of `Plane` objects
corresponding to all non-linear search samples above the `minimum_weight`.

Parameters
----------
minimum_weight
The minimum weight of a non-linear sample, such that samples with a weight below this value are discarded
and not included in the generator.
"""

def func_gen(fit: af.Fit, minimum_weight: float) -> List[object]:
samples = fit.value(name="samples")

all_above_weight_list = []

for sample in samples.sample_list:
if sample.weight > minimum_weight:
instance = sample.instance_for_model(model=samples.model)

all_above_weight_list.append(
self.object_via_gen_from(fit=fit, instance=instance)
)

return all_above_weight_list

func = partial(func_gen, minimum_weight=minimum_weight)

return self.aggregator.map(func=func)

def randomly_drawn_via_pdf_gen_from(self, total_samples: int):
"""
Returns a generator which for every result generates a list of objects whose parameter values are drawn
randomly from the PDF. This enables straight forward error estimation.

This generator creates lists containing instances that are drawn randomly from the PDF for every result loaded
in the aggregator. For example, the aggregator contains 10 results and if `total_samples=100`, a list of 10
entries will be returned, where each entry in this list contains 100 object's paired with non-linear samples
randomly drawn from the PDF.

For example, in **PyAutoLens**, by overwriting the `make_gen_from` method this returns a generator
of `Plane` objects from a PyAutoFit aggregator. This generator then generates lists of `Plane` objects
corresponding to non-linear search samples randomly drawn from the PDF.

Parameters
----------
total_samples
The total number of non-linear search samples that should be randomly drawn from the PDF.
"""

def func_gen(fit: af.Fit, total_samples: int) -> List[object]:
samples = fit.value(name="samples")

return [
self.object_via_gen_from(
fit=fit,
instance=samples.draw_randomly_via_pdf(),
)
for i in range(total_samples)
]

func = partial(func_gen, total_samples=total_samples)

return self.aggregator.map(func=func)
141 changes: 104 additions & 37 deletions autofit/non_linear/search/nest/nautilus/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import os
import sys
from typing import Dict, Optional
from typing import Dict, Optional, Tuple

from autofit import jax_wrapper
from autofit.database.sqlalchemy_ import sa
Expand Down Expand Up @@ -143,15 +143,13 @@ def _fit(self, model: AbstractPriorModel, analysis):
fitness=fitness,
model=model,
analysis=analysis,
checkpoint_exists=checkpoint_exists,
)
else:
if not self.using_mpi:
search_internal = self.fit_multiprocessing(
fitness=fitness,
model=model,
analysis=analysis,
checkpoint_exists=checkpoint_exists,
)
else:
search_internal = self.fit_mpi(
Expand Down Expand Up @@ -194,7 +192,7 @@ def checkpoint_file(self):
except TypeError:
pass

def fit_x1_cpu(self, fitness, model, analysis, checkpoint_exists: bool):
def fit_x1_cpu(self, fitness, model, analysis):
"""
Perform the non-linear search, using one CPU core.

Expand All @@ -211,8 +209,6 @@ def fit_x1_cpu(self, fitness, model, analysis, checkpoint_exists: bool):
analysis
Contains the data and the log likelihood function which fits an instance of the model to the data, returning
the log likelihood the search maximizes.
checkpoint_exists
Does the checkpoint file corresponding do a previous run of this search exist?
"""

self.logger.info(
Expand All @@ -231,25 +227,9 @@ def fit_x1_cpu(self, fitness, model, analysis, checkpoint_exists: bool):
**self.config_dict_search,
)

if checkpoint_exists:
self.output_sampler_results(search_internal=search_internal)

self.perform_update(
model=model,
analysis=analysis,
during_analysis=True,
search_internal=search_internal,
)
return self.call_search(search_internal=search_internal, model=model, analysis=analysis)

search_internal.run(
**self.config_dict_run,
)

self.output_sampler_results(search_internal=search_internal)

return search_internal

def fit_multiprocessing(self, fitness, model, analysis, checkpoint_exists: bool):
def fit_multiprocessing(self, fitness, model, analysis):
"""
Perform the non-linear search, using multiple CPU cores parallelized via Python's multiprocessing module.

Expand All @@ -269,8 +249,6 @@ def fit_multiprocessing(self, fitness, model, analysis, checkpoint_exists: bool)
analysis
Contains the data and the log likelihood function which fits an instance of the model to the data, returning
the log likelihood the search maximizes.
checkpoint_exists
Does the checkpoint file corresponding do a previous run of this search exist?
"""

search_internal = self.sampler_cls(
Expand All @@ -283,21 +261,68 @@ def fit_multiprocessing(self, fitness, model, analysis, checkpoint_exists: bool)
**self.config_dict_search,
)

if checkpoint_exists:
self.output_sampler_results(search_internal=search_internal)
return self.call_search(search_internal=search_internal, model=model, analysis=analysis)

self.perform_update(
model=model,
analysis=analysis,
during_analysis=True,
search_internal=search_internal,
def call_search(self, search_internal, model, analysis):
"""
The x1 CPU and multiprocessing searches both call this function to perform the non-linear search.

This function calls the search a reduced number of times, corresponding to the `iterations_per_update` of the
search. This allows the search to output results on-the-fly, for example writing to the hard-disk the latest
model and samples.

It tracks how often to do this update alongside the maximum number of iterations the search will perform.
This ensures that on-the-fly output is performed at regular intervals and that the search does not perform more
iterations than the `n_like_max` input variable.

Parameters
----------
search_internal
The single CPU or multiprocessing search which is run and performs nested sampling.
model
The model which maps parameters chosen via the non-linear search (e.g. via the priors or sampling) to
instances of the model, which are passed to the fitness function.
analysis
Contains the data and the log likelihood function which fits an instance of the model to the data, returning
the log likelihood the search maximizes.
"""

finished = False

while not finished:

iterations, total_iterations = self.iterations_from(
search_internal=search_internal
)

search_internal.run(
**self.config_dict_run,
)
config_dict_run = {
key: value
for key, value in self.config_dict_run.items()
if key != "n_like_max"
}
search_internal.run(
**config_dict_run,
n_like_max=iterations,
)

iterations_after_run = self.iterations_from(search_internal=search_internal)[1]

self.output_sampler_results(search_internal=search_internal)
if (
total_iterations == iterations_after_run
or iterations_after_run == self.config_dict_run["n_like_max"]
):
finished = True

if not finished:

self.perform_update(
model=model,
analysis=analysis,
during_analysis=True,
search_internal=search_internal
)

self.output_sampler_results(search_internal=search_internal)

return search_internal

Expand Down Expand Up @@ -361,6 +386,48 @@ def fit_mpi(self, fitness, model, analysis, checkpoint_exists: bool):

return search_internal

def iterations_from(
self, search_internal
) -> Tuple[int, int]:
"""
Returns the next number of iterations that a dynesty call will use and the total number of iterations
that have been performed so far.

This is used so that the `iterations_per_update` input leads to on-the-fly output of dynesty results.

It also ensures dynesty does not perform more samples than the `n_like_max` input variable.

Parameters
----------
search_internal
The Dynesty sampler (static or dynamic) which is run and performs nested sampling.

Returns
-------
The next number of iterations that a dynesty run sampling will perform and the total number of iterations
it has performed so far.
"""

if isinstance(self.paths, NullPaths):
n_like_max = self.config_dict_run.get("n_like_max")

if n_like_max is not None:
return n_like_max, n_like_max
return int(1e99), int(1e99)

try:
total_iterations = len(search_internal.posterior()[1])
except ValueError:
total_iterations = 0

iterations = total_iterations + self.iterations_per_update

if self.config_dict_run["n_like_max"] is not None:
if iterations > self.config_dict_run["n_like_max"]:
iterations = self.config_dict_run["n_like_max"]

return iterations, total_iterations

def output_sampler_results(self, search_internal):
"""
Output the sampler results to hard-disk in a generalized PyAutoFit format.
Expand Down