Source code for tessif.identify.static

# src/tessif/identify/core.py
"""Identify submodule providing identification utilities for static results."""

import functools
import pandas as pd

from tessif.identify.core import (
    cluster,
    Identificier,
)
from tessif.identify.calculate import calc_ardiffs, calc_reldiffs


[docs]class StaticIdentificier(Identificier):
    """Identify components of which the static results differ between softwares.


    Components are identified using following logic:

    Parameters
    ----------
    data: pandas.DataFrame
        Pandas DataFrame holding the static results. Indexed by
        components/flows, columned by softwares.

        DataFrames of this format can be obtained using
        :attr:`tessif.analyze.ComparativeResultier.all_capacities` or
        :attr:`tessif.analyze.ComparativeResultier.all_original_capacities` or
        :attr:`tessif.analyze.ComparativeResultier.all_net_energy_flows` for
        example.

    conditions_dict: dict, default=None
        Dictionairy describing the clustering categories as strings and the
        respective threshold above which a difference between softwares is
        considered to fall within this cluster.

        The dict keys :class:`container(s) <collections.abc.Container>`
        of dicts by the respective cluster labels "high", "medium" and "low".
        The dictionairies inside the tuples need to have following keywords:

        If ``None``, following default is used::

            conditions_dict = {
                "high": (
                    {"oprt": "ge", "thres": 0.3,
                    {"oprt": "ge", "thres": 0.3},
                ),
                "medium": (
                    {"oprt": "lt", "thres": 0.3},
                    {"oprt": "ge", "thres": 0.1},
                ),
                "low": (
                    {"oprt": "lt", "thres": 0.1},
                    {"oprt": "lt", "thres": 0.1},
                ),
            }

        which translates to:

            - high:   0.3 <= delta
            - medium: 0.1 <= delta < 0.3
            - low:    0.0 <= dleta < 0.1

    reference: str, None, default=None
        Defines the reference results to be used for calculating the
        absolute relative deviation between softwares.

        In case ``None`` is used (default), the dataframes average is used as
        returned by :func:`average_timevarying_dataframe_results`.
    """

    def __init__(self, data, conditions_dict=None, reference=None):
        if conditions_dict is None:
            conditions_dict = {
                "high": (
                    {"oprt": "ge", "thres": 0.3},
                    {"oprt": "ge", "thres": 0.3},
                ),
                "medium": (
                    {"oprt": "lt", "thres": 0.3},
                    {"oprt": "ge", "thres": 0.1},
                ),
                "low": (
                    {"oprt": "lt", "thres": 0.1},
                    {"oprt": "lt", "thres": 0.1},
                ),
            }

        super().__init__(
            data=data,
            conditions_dict=conditions_dict,
            reference=reference,
        )

    @property
    def relative_deviations(self):
        """Relative deviations between data and reference."""
        return self._rel_devs

[docs]    def cluster_interest(self):
        """Cluster inter component results by interest."""
        # transform cells into tuples of identical values based on number of
        # conditions

        # calculate relative deviations
        self._rel_devs = calc_reldiffs(self.data, self.reference)

        # buffer for increased readability
        num_of_conds = len(tuple(self.cluster_conditions.values())[0])
        rel_devs = self._rel_devs.abs()  # use abs reldiffs for clustering
        dtf = pd.DataFrame(
            {
                col: zip(*tuple(rel_devs[col] for _i in range(num_of_conds)))
                for col in rel_devs.columns
            },
            index=rel_devs.index,
        )

        clustered_df = dtf.applymap(
            # use functools partial to provide additional paramss to "cluster"
            functools.partial(
                cluster,
                conditions_dict=self.cluster_conditions,
            ),
        )

        return clustered_df

[docs]    def map_interest_results(self, data):
        """Map data to identified interest categories."""
        for cluster in ["high", "medium", "low"]:
            clustered_results = data.loc[getattr(self, cluster).index]
            setattr(self, f"_{cluster}_interest_results", clustered_results)