Source code for xpsi.PostProcessing._postprocessor

from ._global_imports import *

from ._backends import NestedBackend
from ._runs import Runs, ParameterError
from ._run import Run # for Sphinx docstring cross-references

class Params(object):
    """ Information about parameters shared by runs for plotting. """

    def __init__(self, names):
        self._names = names

    @property
    def names(self):
        return self._names

    @property
    def labels(self):
        return self._labels

    def __len__(self):
        return len(self._names)



[docs]
class PostProcessor(object):
    """ Post-process samples for inference and posterior checking.

    Base class that functions as a container for nested sampling run objects
    representing some set of posterior distributions that share some subset of
    parameters of interest that are to be plotted. The parameters can be
    shared in the sense that their relationship is the identity relation, or
    multiple physical objects in a population context each have an instance
    of a parameter type. An example of the former is the mass of a star, for
    which there are two or more distinct posterior distributions, due, e.g.:

        * to sequential inference conditional on independent data sets, where
          each posterior is an updated of the previous in the sequence;
        * inference conditional on independent data sets, where the different
          analyses invoked same [different] prior models and one simply wants
          to overlay the posteriors e.g., to gauge the synergy of different
          experiments [and argue the priors are weakly informative/
          diffuse in the context of the likelihood functions];
        * to analysis of a single data set but the effective existence of
          discrete hyperparameter resulting in posteriors conditional on
          the discrete label over population-level prior distributions that
          has not been marginalized over (i.e., the problem of choosing an
          appropriate the hierarchical prior model);
        * to competing models, where models can differ in the likelihood
          function, prior density, or both.

    An example of the latter is two more (neutron) stars, each with a mass;
    these masses collectively encode interesting population-level information.

    Subclasses will calculate inferences, usually in the form of approximate
    integrals over the posterior distribution. They will also provide tools
    for visualisation of posterior information and derived quantities, which
    is useful for basic posterior predictive checking.

    :param iterable posteriors:
        An iterable of instances of :class:`~.Runs`.

    """
    def __init__(self, posteriors):
        self.posteriors = posteriors

        self.val_cred=[]
        self.samples={}
    @property
    def posteriors(self):
        """ Get the posteriors. """
        return self._posteriors

    @posteriors.setter
    def posteriors(self, obj):
        """ Set the posteriors attribute. """

        if isinstance(obj, (list, tuple)):
            if not obj:
                raise TypeError('No objects of type ``Runs`` supplied.')
            IDs = []
            for o in obj:
                if not isinstance(o, Runs):
                    raise TypeError('Objects must be instances of the '
                                    '``Runs`` class.')
                if o.ID not in IDs:
                    IDs.append(o.ID)
                else:
                    raise AmbiguityError('Use distinct IDs for distinct '
                                         'posteriors (run sets).')
            self._posteriors = obj
        elif isinstance(obj, Runs):
                self._posteriors = [obj]
        else:
            raise TypeError('Supply an instance or instances of the '
                            '``Runs`` class.')


[docs]
    @make_verbose('Curating set of runs for posterior plotting',
                  'Run set curated')
    def set_subset(self, IDs=None,
                   combine=False, combine_all=False,
                   force_combine=True, only_combined=False,
                   overwrite=False, split_modes=False):
        """ Set a current list of :class:`~.Runs` instances.

        Helper function to get and notify which runs will be plotted.

        :param OrderedDict IDs:
            A dictionary of lists, where keys match :class:`~.Runs` instance
            IDs, and list elements match :class:`~.Run` instance IDs. If no
            key matches a :class:`~.Runs` instance ID, it is assumed that
            all associated :class:`~.Run` instances are to be selected as the
            current subset for plotting applications.

        :param bool combine:
            Additionally combine the runs into a single run for overplotting?
            The overriding setting if there is more than one underlying
            posterior to be plotted, is to attempt to combine runs on each
            posterior if multiple such runs are available, in order to
            reduce information density. If there is a single underlying
            posterior, the user-specified value is respected.

        :param bool combine_all:
            Combine all runs in each :class:`Runs` instance or only those
            for which IDs are provided? Ignored if ``combine`` is ``False``.

        :param bool force_combine:
            Force recombination of elligible run sets, even if a
            combined run is already cached?

        :param bool only_combined:
            Only plot the combined run? Only heeded if a single posterior
            is selected for plotting, and in that case is ignored if
            ``combine`` is ``False``.

        :param bool overwrite:
            Overwrite combined-sample files on disk with the same filename?

        :param bool split_modes:
            Split posteriors by mode? Only useful for cornerplots.

        """

        if IDs is None:
            self._subset = self._posteriors
            IDs = {}
        else:
            if isinstance(IDs, OrderedDict):
                self._subset = [self[ID] for ID in IDs]
            else:
                raise TypeError('IDs must be supplied in a '
                                'collections.OrderedDict container.')

        if len(self._subset) > 1:
            if len(self._subset) > 9:
                print('Warning: Only the first nine positional posteriors '
                      'will be plotted, with IDs to prevent the plot from overcrowding'
                      + ', '.join(str(p.ID) for p in self._subset[:9]))

            for posterior in self._subset[:9]:
                posterior.set_subset(IDs.get(posterior.ID, None),
                                     combine = True,
                                     combine_all = combine_all,
                                     force_combine = True,
                                     only_combined = True,
                                     only_principal = True,
                                     overwrite = overwrite,
                                     split_modes = split_modes)
        else:
            posterior = self._subset[0]
            ids = IDs.get(posterior.ID, None)

            if only_combined:
                pass

            elif len(ids) > 9 and not combine:
                print('Warning: Only the first nine positional runs '
                      'will be plotted individually, with IDs to prevent the plot from overcrowding'
                      + ', '.join(ids[:9]))

                ids = ids[:9]

            elif len(ids) == 9 and combine:
                print('Warning: Only the first eight positional runs will be '
                      'plotted individually, with IDs to prevent the plot from overcrowding '
                      + ', '.join(ids[:8]))

                ids = ids[:8]

            posterior.set_subset(ids,
                                 combine = combine,
                                 combine_all = combine_all,
                                 force_combine = force_combine,
                                 only_combined = only_combined,
                                 only_principal = False,
                                 overwrite = overwrite,
                                 split_modes = split_modes)


    @property
    def subset(self):
        """ Get the current subset of posteriors for plotting. """
        return self._subset

    @property
    def subset_to_plot(self):
        """ Get the current subset of runs for plotting. """

        if len(self._subset) > 1:
            runs = []
            for posterior in self._subset:
                runs += posterior.subset_to_plot

            # reorder so nestcheck-compatible last
            reordered_runs = []
            temp_runs = []
            reordered_posteriors = []
            temp_posteriors = []
            for i, run in enumerate(runs):
                if run.use_nestcheck:
                    reordered_runs.append(run)
                    reordered_posteriors.append(self._subset[i])
                else:
                    temp_runs.append(run)
                    temp_posteriors.append(self._subset[i])
            reordered_runs += temp_runs
            reordered_posteriors += temp_posteriors
            self._subset = reordered_posteriors
        else:
            posterior = self._subset[0]
            runs = posterior.subset_to_plot

            # reorder so nestcheck-compatible first
            reordered_runs = []
            temp_runs = []
            for run in runs:
                if run.use_nestcheck:
                    reordered_runs.append(run)
                else:
                    temp_runs.append(run)
            reordered_runs += temp_runs

        return reordered_runs

    def __getitem__(self, ID):
        """ Get a :class:`~.Runs` instance using the associated ID. """

        def search(ID):
            if isinstance(ID, _six.string_types):
                for posterior in self._posteriors:
                    if ID == posterior.ID:
                        return posterior
            raise KeyError('No posterior with ID matching key.')

        if isinstance(ID, _six.string_types):
            return search(ID)
        elif isinstance(ID, tuple):
            if len(ID) == 2:
                return search(ID[0])[ID[1]]
            else:
                raise TypeError('Invalid run ID specification.')

    @property
    def params(self):
        """ Get the current parameter information. """
        return self._params


[docs]
    def set_params(self, names):
        """ Set current parameters for plotting, which must be shared. """

        self._params = Params(names)
        for name in names:
            for posterior in self._subset:
                if name not in posterior.names:
                    self._params = None
                    raise ParameterError('No parameter name matching %s in run '
                                         'with ID %s.' % (name, posterior.ID))

        try:
            self._check_params('labels')
        except Exception:
            self._params = None
            raise


    def _check_params(self, attrs):
        """ Check consistency of parameter information across posteriors. """

        _attrs = '_' + attrs

        setattr(self._params, _attrs, [None] * len(self._params._names))
        try:
            for i, param in enumerate(self._params._names):
                for posterior in self._subset[1:]:
                    if (getattr(self._subset[0], attrs)[param] \
                            != getattr(posterior, attrs)[param]):
                        raise ValueError('Inconsistent %s for parameter'
                                         ' %s between posteriors %s and %s.' %
                                         (attrs, param,
                                          self._subset[0].ID, posterior.ID))
                getattr(self._params, _attrs)[i] = \
                        getattr(self._subset[0], attrs)[param]
        except (AttributeError, KeyError):
            print('Parameter %s not specified correctly.' % attrs)
            raise


[docs]
    def get_attr(self, attribute):
        """ Get a list of attributes of the :class:`~.Runs` instances stored as
            the current subset. """

        return [getattr(run, attribute) for run in self.subset_to_plot]


    def _filter_nestcheck_compatible(self):
        """ Return only runs to plot that are compatible with nestcheck. """
        try:
            for run in self.subset_to_plot:
                if not isinstance(run, NestedBackend):
                    raise TypeError('Nested sampling backends are required.')
        except AttributeError:
            print('Nested sampling runs are required.')
            raise
        else:
            runs = [] # space to cache refs to only nestcheck-compatible runs
            nestcheck_bcknds = []
            # we only want the runs that are nestcheck-compatible
            for run in self.subset_to_plot:
                if run.use_nestcheck:
                    nestcheck_bcknds.append(run.nestcheck_backend)
                    runs.append(run)

        return nestcheck_bcknds, runs