PyAutoLabs · Jammy2211 · Apr 8, 2024 · Mar 19, 2024 · Mar 19, 2024 · Mar 19, 2024
diff --git a/autofit/__init__.py b/autofit/__init__.py
@@ -16,6 +16,7 @@
 from .graphical.declarative.factor.hierarchical import HierarchicalFactor
 from .graphical.laplace import LaplaceOptimiser
 from .non_linear.grid.grid_list import GridList
+from .non_linear.samples.summary import SamplesSummary
 from .non_linear.samples import SamplesMCMC
 from .non_linear.samples import SamplesNest
 from .non_linear.samples import Samples
@@ -82,6 +83,7 @@
 from .non_linear.search.optimize.lbfgs.search import LBFGS
 from .non_linear.search.optimize.pyswarms.search.globe import PySwarmsGlobal
 from .non_linear.search.optimize.pyswarms.search.local import PySwarmsLocal
+from .non_linear.paths.abstract import AbstractPaths
 from .non_linear.paths import DirectoryPaths
 from .non_linear.paths import DatabasePaths
 from .non_linear.result import Result

diff --git a/autofit/aggregator/search_output.py b/autofit/aggregator/search_output.py
@@ -264,7 +264,7 @@ def latent_variables(self):
         """
         The latent variables of the search, parsed from a CSV file.
         """
-        with open(self.files_path / "latent_variables.csv") as f:
+        with open(self.files_path / "latent.csv") as f:
             reader = csv.reader(f)
             headers = next(reader)
             from autofit.non_linear.analysis.latent_variables import LatentVariables

diff --git a/autofit/config/general.yaml b/autofit/config/general.yaml
@@ -27,6 +27,7 @@ profiling:
   should_profile: false             # If True, the ``profile_log_likelihood_function()`` function of an analysis class is called throughout a model-fit, profiling run times.
   repeats: 1                        # The number of repeat function calls used to measure run-times when profiling.
 test:
+  check_likelihood_function: true   # if True, when a search is resumed the likelihood of a previous sample is recalculated to ensure it is consistent with the previous run.
   exception_override: false
   lh_timeout_seconds:               # If a float is input, the log_likelihood_function call is timed out after this many seconds, to diagnose infinite loops. Default is None, meaning no timeout.
   parallel_profile: false
diff --git a/autofit/config/non_linear/nest.yaml b/autofit/config/non_linear/nest.yaml
@@ -26,7 +26,6 @@ DynestyStatic:
     logl_max: .inf
     maxcall: null
     maxiter: null
-    n_effective: null
   initialize:                       # The method used to generate where walkers are initialized in parameter space {prior}.
     method: prior                   # priors: samples are initialized by randomly drawing from each parameter's prior.
   parallel:
@@ -57,8 +56,6 @@ DynestyDynamic:
     maxcall_init: null
     maxiter: null
     maxiter_init: null
-    n_effective: .inf
-    n_effective_init: .inf
     nlive_init: 500
   initialize:                       # The method used to generate where walkers are initialized in parameter space {prior}.
     method: prior                   # priors: samples are initialized by randomly drawing from each parameter's prior.

diff --git a/autofit/config/output.yaml b/autofit/config/output.yaml
@@ -1,11 +1,6 @@
 # Determines whether files saved by the search are output to the hard-disk. This is true both when saving to the
 # directory structure and when saving to database.
 
-# Files can be listed name: bool where the name is the name of the file without a suffix (e.g. model not model.json)
-# and bool is true or false.
-
-# If a given file is not listed then the default value is used.
-
 default: true # If true then files which are not explicitly listed here are output anyway. If false then they are not.
 
 ### Samples ###
@@ -22,25 +17,29 @@ default: true # If true then files which are not explicitly listed here are outp
 samples: true
 
 # The `samples.csv` file contains every accepted sampled value of every free parameter with its log likelihood and
-# weight. For certain searches, the majority of samples have a very low weight, which has no numerical impact on the
-# results of the model-fit. However, these samples are still output to the `samples.csv` file, taking up hard-disk space
-# and slowing down analysis of the samples (e.g. via the database).
+# weight. For certain searches, the majority of samples have a very low weight and have no numerical impact on the
+# results of the model-fit. However, these samples are still output to the `samples.csv` file, taking up hard-disk
+# space and slowing down analysis of the samples (e.g. via the database).
 
 # The `samples_weight_threshold` below specifies the threshold value of the weight such that samples with a weight
 # below this value are not output to the `samples.csv` file. This can be used to reduce the size of the `samples.csv`
 # file and speed up analysis of the samples.
 
-# Note that for many searches (e.g. MCMC) all samples have equal weight, and thus this threshold has no impact and
-# there is no simple way to save hard-disk space. However, for nested sampling, the majority of samples have a very
-# low weight and this threshold can be used to save hard-disk space.
+# For many searches (e.g. MCMC) all samples have an equal weight of 1.0, and this threshold therefore has no impact.
+# For these searches, there is no simple way to save hard-disk space. This input is more suited to nested sampling,
+# where the majority of samples have a very low weight..
 
 # Set value to empty (e.g. delete 1.0e-10 below) to disable this feature.
 
 samples_weight_threshold: 1.0e-10
 
 ### Search Internal ###
 
-# The search internal folder which contains a saved state of the non-linear search, as a .pickle or .dill file.
+# The search internal folder which contains a saved state of the non-linear search in its internal reprsenetation,
+# as a .pickle or .dill file.
+
+# For example, for the nested sampling dynesty, this .dill file is the `DynestySampler` object which is used to
+# perform sampling, and it therefore contains all internal dynesty representations of the results, samples, weights, etc.
 
 # If the entry below is false, the folder is still output during the model-fit, as it is required to resume the fit
 # from where it left off. Therefore, settings `false` below does not impact model-fitting checkpointing and resumption.
@@ -49,12 +48,40 @@ samples_weight_threshold: 1.0e-10
 # The search internal folder file is often large, therefore deleting it after a fit is complete can significantly
 # reduce hard-disk space use.
 
-# The search internal representation (e.g. what you can load from the output .pickle file) may have additional
-# quantities specific to the non-linear search that you are interested in inspecting. Deleting the folder means this
-# information is list.
+# The search internal representation that can be loaded from the .dill file has many additional quantities specific to
+# the non-linear search that the standardized autofit forms do not. For example, for emcee, it contains information on
+# every walker. This information is required to do certain analyes and make certain plots, therefore deleting the
+# folder means this information is list.
 
 search_internal: false
 
+### Latent Variables ###
+
+# A latent variable is not a model parameter but can be derived from the model. Its value and errors may be of interest
+# and aid in the interpretation of a model-fit.
+
+# For example, for the simple 1D Gaussian example, it could be the full-width half maximum (FWHM) of the Gaussian. This
+# is not included in the model but can be easily derived from the Gaussian's sigma value.
+
+# By overwriting an Analysis class's `compute_latent_variable` method we can manually specify latent variables that
+# are calculated and output to a `latent.csv` file, which mirrors the `samples.csv` file. The `latent.csv` file has
+# the same weight resampling performed on the `samples.csv` file, controlled via the `samples_weight_threshold` above.
+
+# There may also be a `latent.results` and `latent_summary.json` files output, which the inputs below control whether
+# they are output and how often.
+
+# Outputting latent variables manually after a fit is complete is simple, just call
+# the `analysis.compute_all_latent_variables()` function.
+
+# For many use cases, the best set up may be to disable autofit latent variable output during the fit and perform it
+# manually after completing a successful model-fit. This will save computational run time by not computing latent
+# variables during a any model-fit which is unsuccessful.
+
+latent_during_fit: true # Whether to output the `latent.csv`, `latent.results` and `latent_summary.json` files during the fit when it performs on-the-fly output.
+latent_after_fit: true # If `latent_during_fit` is False, whether to output the `latent.csv`, `latent.results` and `latent_summary.json` files after the fit is complete.
+latent_csv: true # Whether to ouptut the `latent.csv` file.
+latent_results: true # Whether to output the `latent.results` file.
+
 # Other Files:
 
 covariance: true # `covariance.csv`: The [free parameters x free parameters] covariance matrix.

diff --git a/autofit/example/analysis.py b/autofit/example/analysis.py
@@ -1,7 +1,8 @@
 import os
 import matplotlib.pyplot as plt
-from typing import List, Optional
+from typing import Dict, List, Optional
 
+from autofit.example.result import ResultExample
 from autofit.jax_wrapper import numpy as np
 
 import autofit as af
@@ -12,6 +13,15 @@
 """
 
 class Analysis(af.Analysis):
+
+    """
+    This overwrite means the `ResultExample` class is returned after the model-fit.
+
+    This result has been extended, based on the model that is input into the analysis, to include a property
+    `max_log_likelihood_model_data`, which is the model data of the best-fit model.
+    """
+    Result = ResultExample
+
     def __init__(self, data: np.ndarray, noise_map:np.ndarray):
         """
         In this example the `Analysis` object only contains the data and noise-map. It can be easily extended,
@@ -43,6 +53,34 @@ def log_likelihood_function(self, instance: af.ModelInstance) -> float:
         -------
         The log likelihood value indicating how well this model fit the dataset.
         """
+        model_data_1d = self.model_data_1d_from(instance=instance)
+
+        residual_map = self.data - model_data_1d
+        chi_squared_map = (residual_map / self.noise_map) ** 2.0
+        log_likelihood = -0.5 * sum(chi_squared_map)
+
+        return log_likelihood
+
+    def model_data_1d_from(self, instance : af.ModelInstance) -> np.ndarray:
+        """
+        Returns the model data of a the 1D profiles.
+
+        The way this is generated changes depending on if the model is a `Model` (therefore having only one profile)
+        or a `Collection` (therefore having multiple profiles).
+
+        If its a model, the model component's `model_data_1d_via_xvalues_from` is called and the output returned.
+        For a collection, each components `model_data_1d_via_xvalues_from` is called, iterated through and summed
+        to return the combined model data.
+
+        Parameters
+        ----------
+        instance
+            The model instance of the profile or collection of profiles.
+
+        Returns
+        -------
+        The model data of the profiles.
+        """
 
         xvalues = np.arange(self.data.shape[0])
         model_data_1d = np.zeros(self.data.shape[0])
@@ -56,18 +94,7 @@ def log_likelihood_function(self, instance: af.ModelInstance) -> float:
         except TypeError:
             model_data_1d += instance.model_data_1d_via_xvalues_from(xvalues=xvalues)
 
-        residual_map = self.data - model_data_1d
-        chi_squared_map = (residual_map / self.noise_map) ** 2.0
-        log_likelihood = -0.5 * sum(chi_squared_map)
-
-        try:
-            self.save_latent_variables(
-                fwmh=instance.fwhm
-            )
-        except AttributeError:
-            pass
-
-        return log_likelihood
+        return model_data_1d
 
     def visualize(self, paths: af.DirectoryPaths, instance: af.ModelInstance, during_analysis : bool):
         """
@@ -122,6 +149,7 @@ def visualize(self, paths: af.DirectoryPaths, instance: af.ModelInstance, during
         os.makedirs(paths.image_path, exist_ok=True)
         plt.savefig(paths.image_path / "model_fit.png")
         plt.clf()
+        plt.close()
 
     def visualize_combined(
         self,
@@ -176,4 +204,99 @@ def save_attributes(self, paths: af.DirectoryPaths):
         paths.save_json(name="data", object_dict=self.data.tolist(), prefix="dataset")
         paths.save_json(name="noise_map", object_dict=self.noise_map.tolist(), prefix="dataset")
 
+    def make_result(
+        self,
+        samples_summary: af.SamplesSummary,
+        paths: af.AbstractPaths,
+        samples: Optional[af.SamplesPDF] = None,
+        search_internal: Optional[object] = None,
+        analysis: Optional[object] = None,
+    ) -> Result:
+        """
+        Returns the `Result` of the non-linear search after it is completed.
+
+        The result type is defined as a class variable in the `Analysis` class (see top of code under the python code
+        `class Analysis(af.Analysis)`.
+
+        The result can be manually overwritten by a user to return a user-defined result object, which can be extended
+        with additional methods and attribute specific to the model-fit.
 
+        This example class does example this, whereby the analysis result has been over written with the `ResultExample`
+        class, which contains a property `max_log_likelihood_model_data_1d` that returns the model data of the
+        best-fit model. This API means you can customize your result object to include whatever attributes you want
+        and therefore make a result object specific to your model-fit and model-fitting problem.
+
+        The `Result` object you return can be customized to include:
+
+        - The samples summary, which contains the maximum log likelihood instance and median PDF model.
+
+        - The paths of the search, which are used for loading the samples and search internal below when a search
+        is resumed.
+
+        - The samples of the non-linear search (e.g. MCMC chains) also stored in `samples.csv`.
+
+        - The non-linear search used for the fit in its internal representation, which is used for resuming a search
+        and making bespoke visualization using the search's internal results.
+
+        - The analysis used to fit the model (default disabled to save memory, but option may be useful for certain
+        projects).
+
+        Parameters
+        ----------
+        samples_summary
+            The summary of the samples of the non-linear search, which include the maximum log likelihood instance and
+            median PDF model.
+        paths
+            An object describing the paths for saving data (e.g. hard-disk directories or entries in sqlite database).
+        samples
+            The samples of the non-linear search, for example the chains of an MCMC run.
+        search_internal
+            The internal representation of the non-linear search used to perform the model-fit.
+        analysis
+            The analysis used to fit the model.
+
+        Returns
+        -------
+        Result
+            The result of the non-linear search, which is defined as a class variable in the `Analysis` class.
+        """
+        return self.Result(
+            samples_summary=samples_summary,
+            paths=paths,
+            samples=samples,
+            search_internal=search_internal,
+            analysis=self
+        )
+
+    def compute_latent_variable(self, instance) -> Dict[str, float]:
+        """
+        A latent variable is not a model parameter but can be derived from the model. Its value and errors may be
+        of interest and aid in the interpretation of a model-fit.
+
+        For example, for the simple 1D Gaussian example, it could be the full-width half maximum (FWHM) of the
+        Gaussian. This is not included in the model but can be easily derived from the Gaussian's sigma value.
+
+        By overwriting this method we can manually specify latent variables that are calculated and output to
+        a `latent.csv` file, which mirrors the `samples.csv` file.
+
+        In the example below, the `latent.csv` file will contain one column with the FWHM of every Gausian model
+        sampled by the non-linear search.
+
+        This function is called for every non-linear search sample, where the `instance` passed in corresponds to
+        each sample.
+
+        Parameters
+        ----------
+        instance
+            The instances of the model which the latent variable is derived from.
+
+        Returns
+        -------
+
+        """
+        try:
+            return {
+                "fwhm": instance.fwhm
+            }
+        except AttributeError:
+            return {}
diff --git a/autofit/example/util.py b/autofit/example/util.py
@@ -49,4 +49,5 @@ def plot_profile_1d(
         if not path.exists(output_path):
             os.makedirs(output_path)
         plt.savefig(output_path / f"{output_filename}.png")
-    plt.clf()
+    plt.clf()
+    plt.close()
diff --git a/autofit/graphical/declarative/abstract.py b/autofit/graphical/declarative/abstract.py
@@ -151,6 +151,7 @@ def draw_graph(
             **kwargs
         )
         plt.show()
+        plt.close()
 
     @property
     def plates(self):