Source code for bfade.statistics

from typing import Dict, Any, Tuple, List
import numpy as np
from scipy.stats import t as t_student
from bfade.util import MissingInputException, logger_factory

_log = logger_factory(name=__name__, level="DEBUG")


[docs]
class Distribution():
    """Interface to scipy random variables.
    
    This class instantiates distribution probabilities relying on scipy.stats.
    """

    def __init__(self, dist, **kwargs: Dict[str, Any]) -> None:
        """
        Initialize a distribution.

        Parameters
        ----------
        dist : callable
            Function representing a probability distribution
            (e.g., scipy.stats.norm for a normal distribution).
        kwargs : Dict[str, Any]
            Parameters of the selected distribution.

        Returns
        -------
        None.

        """
        self.dist_pars = kwargs
        self.dist = dist(**kwargs)


[docs]
    def pdf(self, x: np.ndarray) -> float:
        """
        Probability density function (PDF).

        Parameters
        ----------
        x : float
            The point where PDF is evaluated.

        Returns
        -------
        float
            The PDF value at `x`.

        """
        return self.dist.pdf(x)

    

[docs]
    def logpdf(self, x: np.ndarray) -> float:
        """
        Log-probability density function (PDF).

        Parameters
        ----------
        x : float
            The point where PDF is evaluated.

        Returns
        -------
        float
            The Log-PDF value at `x`.

        """
        return self.dist.logpdf(x)

    

[docs]
    def rvs(self, size: int) -> np.ndarray:
        """
        Sample PDF.

        Parameters
        ----------
        size : int
            Size of the sample.
            
        Returns
        -------
        np.ndarray
            Sample of size 'size'

        """
        return self.dist.rvs(size)

    
    def __str__(self) -> str:
        _dict = self.__dict__.pop("dist_pars")
        return f"\n{_dict}"




[docs]
class uniform():
    """Uniform probability distribution.

    This class is provided for convenience. Using this class is totally optional
    and one can alternatively utilise scipy's either. Still scipy's uniform
    returns 0 if the input point is outside the set range. This can cause issues
    to the likelihood for under-conservative choices of the lower and the upper
    bound of the range. Therefore, this custom uniform distribution is designed
    to return a given value everywhere. Obviously, this contrast the CDF but
    facilitates the computation of the likelihood.

    The methods simulate part of the typical interface of scipy's random vars.

    """
    def __init__(self, **kwargs) -> None:
        """
        Initialize Uniform distribution.

        Parameters
        ----------
        unif_value : float
            The uniform value associated with the distribution.

        Raises
        ------
        MissingInputException
            If `unif_value` is not provided.

        """
        _log.info(f"(Non-scipy). {self.__class__.__name__}.{self.__init__.__name__}")
        try:
            self.unif_value = kwargs.pop("unif_value")
        except KeyError as KE:
            raise MissingInputException(f"{KE} not provided")

        self.dist = self.unif_value


[docs]
    def pdf(self, x: np.ndarray) -> np.ndarray:
        """Probability density function (PDF) at given x."""
        return np.ones(np.array(x).shape)*self.unif_value

    

[docs]
    def logpdf(self, x: np.ndarray) -> np.ndarray:
        """Probability density function (PDF) at given x."""
        return np.log(np.ones(np.array(x).shape)*self.unif_value)



[docs]
    def rvs(self, size: int) -> np.ndarray:
        """Draw a random sample of size 'size'"""
        return np.ones(size)*self.unif_value





[docs]
class MonteCarlo:

    def __init__(self, curve, random_state: int = 0) -> None:
        """
        Initialise Monte Carlo simulation

        Parameters
        ----------
        curve : AbstractCurve
            Reference curve.
        random_state: int
            Random state for numpy to sample the posterior. The default is 0.

        Returns
        -------
        None

        """
        _log.debug(f"{self.__class__.__name__}.{self.__init__.__name__}")
        self.curve = curve
        np.random.seed(random_state)


[docs]
    def sample(self, n_samples: int, distribution: str, bayes) -> np.ndarray:
        """
        Sample from the Bayesian model.

        Parameters
        ----------
        n_samples : int
            Number of samples to generate.
        distribution : str
            Distribution type to sample from. Options are "joint" or "marginals".
        bayes : AbstractBayes
            Bayesian model object.

        Returns
        -------
        np.ndarray
            Generated samples.

        Raises
        ------
        Exception
            If an invalid distribution type is provided.

        """
        self.n_samples = n_samples
        self.pars = bayes.pars
        self.deterministic = bayes.deterministic

        if distribution == "joint":
            _log.debug(f"{self.__class__.__name__}.{self.sample.__name__} -- Joint, samples = {self.n_samples}")
            self.samples = bayes.joint.rvs(self.n_samples)
        elif distribution == "marginals":
            _log.debug(f"{self.__class__.__name__}.{self.sample.__name__} -- Marginals, samples = {self.n_samples}")
            self.samples = np.array([getattr(bayes, "marginal_" + p).rvs(self.n_samples) for p in bayes.pars]).T
        else:
            raise Exception("Invalid distribution to sample.")

        return self.samples



[docs]
    def prediction_interval(self, x_edges: List[float], n: int, spacing: str, confidence: float = 95) -> Tuple:
        """
        Compute prediction intervals for a curve.

        .. math::
            P\\Big[\overline{\mathcal{E}^{(\sf M)}} \
                            - \mathcal{P}^{(\sf M)} \
                                \le \mathcal{E}^{({\sf M}+1)} \
                                    \le \overline{\mathcal{E}^{(\sf M)}} \
                                        + \mathcal{P}^{(\sf M)}\\Big] = \\beta

        where :math:`\\beta` is the confidence level. The semi ampliture of \
            the prediction interval is:

        .. math::
            \mathcal{P}^{(\sf M)} = T_{\\beta} S^{(\sf M)}  \sqrt{1 + 1/{\mathsf{M}}}

        Parameters
        ----------
        x_edges : list of float, optional
            Edges of the x-axis over which the curve is plotted.
        n : int, optional
            Resolution of the curve (number of points over x-axis).
        spacing : str, optional
            spacing for x and y axes.
        confidence: float
            Confidence level of the prediction intervals. The default is 95.

        Returns
        -------
        result : Tuple
            A tuple containing the following elements:

            - 'mean': The expected curve data.

            - 'pred': The semi-amplitude of the prediction interval.

            - 'x1': abscissa along with the prediction interval is computed.

        """
        self.confidence = confidence
        _log.info(f"{self.__class__.__name__}.{self.prediction_interval.__name__} -- Confidence = {self.confidence/100}%")
        curves = []
        if spacing == "log":
            x1 = np.logspace(np.log10(x_edges[0]), np.log10(x_edges[1]), n)
        else:
            x1 = np.linspace(x_edges[0], x_edges[1], n)

        for s in self.samples:
            # d = dict(zip(self.pars, s)) | self.deterministic # as of Python 3.9
            d = dict(zip(self.pars, s))
            d.update(self.deterministic)
            curves.append(self.curve(**d).equation(x1))

        curves = np.array(curves)
        mean = curves.mean(axis=0)
        std = curves.std(axis=0)
        pred = t_student(df=len(curves)-1).ppf(self.confidence/100)*std*((1+1/len(curves))**0.5)

        return mean, pred, x1