Source code for bfade.elhaddad

from typing import Dict, Any, List
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy.special import expit

from sklearn.model_selection import train_test_split as tts

from bfade.abstract import AbstractBayes, AbstractCurve
from bfade.dataset import Dataset
from bfade.util import sif_equiv, inv_sif_range, sif_range
from bfade.util import MissingInputException, YieldException, logger_factory

_log = logger_factory(name=__name__, level="DEBUG")


[docs]
class ElHaddadCurve(AbstractCurve):
    
    def __init__(self, **pars):    
        super().__init__(**pars)
    

[docs]
    def equation(self, X: np.ndarray) -> np.ndarray:
        """
        Concrete representation of Evaluate El-Haddad curve over a given :math:`\sqrt{\\text{area}}` range.

        .. math::
            \Delta\sigma = \Delta\sigma_w\sqrt{{\sqrt{\\text{area}_0}}\
                                            \over{\sqrt{\\text{area}_0} \
                                                    + \sqrt{\\text{area}}}}

        where

        .. math::
            \sqrt{\\text{area}_0} = {1 \over \pi} \\bigg({{\Delta K_{th}}
                                            \over {Y \Delta \sigma_{w}}}\\bigg)^2
        
        Parameters
        ----------
            X : np.ndarray
                range of sqrt_area

        Returns
        -------
        np.ndarray
            Evaluated El Haddad curve along the given sqrt_area values.
            
        """
        self.sqrt_a0 = inv_sif_range(self.dk_th*1000, self.ds_w, self.Y)
        return self.ds_w * ((self.sqrt_a0/(X+self.sqrt_a0))**0.5)





[docs]
class ElHaddadBayes(AbstractBayes):

    def __init__(self, *pars, **args):
        super().__init__(*pars, **args)


[docs]
    def predictor(self, D, *P: Dict[str, float]):
        """
        Perform logistic prediction based on the given parameters and dataset.

        .. math::
            P[\mathbf{x}_i | \\theta] = {{1}\over{1+\exp [-\mathcal{H}(\mathbf{x}_i, \\theta)]}}

        where :math:`\\theta` is the vector of trainable parameters

        .. math::
            \\theta = [\Delta K_{th,lc}\ \Delta\sigma_w]

        and :math:`\mathbf{x}_i \in D` is a sample from the given dataset.

        :math:`\mathcal{H}(\mathbf{x}_i, \\theta)` is the signed distance of the sample
        to the El Haddad curve of parameters :math:`\\theta`. The position of
        the training points wrt the target curve are computed over the log-log plane.

        Parameters
        ----------
        D : Dataset
        
        P : Dict[str, float]
            Dictionary of the trainable parameters

        Returns
        -------
        numpy.ndarray
            An array containing the logistic predictions.

        """
        # merge dicts keeping order (probabilistic and deterministic)
        # all_pars = dict(zip(self.pars, P)) | self.deterministic # as of python 3.9
        all_pars = dict(zip(self.pars, P))
        all_pars.update(self.deterministic)
        eh = ElHaddadCurve(metrics=np.log10, **all_pars)
        signed_distance, _, _ = eh.signed_distance_to_dataset(D)
        return expit(signed_distance)


    
ElHaddadTranslator = {"dk_th": "$\Delta K_{th,lc}$ [MPa $\sqrt{m}$]",
                      "dk": "$\Delta K$ [MPa $\sqrt{m}$]",
                      "ds_w": "$\Delta\sigma_w$ [MPa]",
                      "sq_a0" : "$\sqrt{area}_0$ [$\mu$m]$",
                      "sq_a": "$\sqrt{area}$ [$\mu$m]",
                      "ds": "$\Delta\sigma$ [MPa]",
                      "0": "Runout",
                      "1": "Failed"}



[docs]
class ElHaddadDataset(Dataset):

    def __init__(self, **kwargs: Dict[str, Any]) -> None:
        super().__init__(**kwargs)


[docs]
    def pre_process(self, **kwargs):
        """
        Pre-process the dataset.

            - set 'Y'

            - convert sqrt_area using the SIF equivalence

            - compute SIF

            - set attributes.

        Parameters
        ----------
        kwargs : Dict[str, Any]
            Y_ref to specify the reference value for Y.

        Raises
        ------
        MissingInputException
            Raised if 'Y' is neither unique in the dataset nor provided
            as a keyword argument.

        """
        _log.debug(f"{self.__class__.__name__}.{self.pre_process.__name__}")
        try:
            self.Y = kwargs.pop("Y_ref")
            _log.warning(f"Y_ref user-provided = {self.Y:.2f}")
        except KeyError:
            _log.warning(f"Y_ref not user-provided")
            _log.warning("Verify uniqueness of Y")
            if len(set(self.data.Y)) == 1:
                self.Y = list(set(self.data.Y))[0]
                _log.warning(f"Y is unique = {self.Y:.2f}")
            else:
                _log.error(f"Y is not unique")
                _log.debug(f"Values found: {set(self.data.Y)}")
                raise MissingInputException("Y_ref is neither unique nor provided")

        _log.info("Update dataframe")
        self.data.rename(columns={"Y": "Y_"}, inplace=True)
        self.data.insert(list(self.data.columns).index("Y_")+1, "Y", self.Y)

        _log.warning(f"Convert sqrt_area by {self.Y:.2f}")
        self.data.rename(columns={"sqrt_area": "sqrt_area_"}, inplace=True)
        self.data.insert(list(self.data.columns).index("sqrt_area_")+1, "sqrt_area",
                        sif_equiv(self.data.sqrt_area_, self.data.Y_, self.Y))

        _log.info("Compute SIF range")
        self.data.insert(list(self.data.columns).index("Y")+1, "dk",
                        sif_range(self.data.delta_sigma, self.data.Y, self.data.sqrt_area*1e-6))

        _log.debug(f"Calculate min max of delta_k for colour bars")
        self.aux = self.data["dk"].to_numpy()
        self.aux_min = self.aux.min()
        self.aux_max = self.aux.max()

        self.X = self.data[["sqrt_area", "delta_sigma"]].to_numpy()
        self.y = self.data["failed"].to_numpy()
        self.Y = self.data["Y"].to_numpy()
        self.aux = self.data["dk"].to_numpy()
        self.aux_min = self.aux_min
        self.aux_max = self.aux_max



[docs]
    def populate(self, data, X_labels: List[str] = ["sqrt_area", "delta_sigma"], y_label: str ="failed"):
        """Overload the method by providing keys pertinent to the El Haddad Curve."""
        return {"X": data[X_labels].to_numpy(),
                "y": data[y_label].to_numpy(),
                "aux": data["dk"].to_numpy(),
                "aux_min": self.aux_min,
                "aux_max": self.aux_max,
                "Y": self.Y}