Source code for tessif.identify.auxilliary

# src/tessif/identify/auxilliary.py
# pylint: disable=trailing-whitespace
# pylint error disabled since the dataframe doctest results require those
"""Tessif module providing aux. resullt differences identification tools."""
from numpy import mean as numpy_mean


[docs]def flatten_multiindex_flow_data(midx_df): """Flatten multiindexed results to each flow beeing a singular column. Parameters ---------- midx_df: pandas.DataFrame Multiindexed dataframe to be flattened. Design case, the top-level index represents the individual components, while the second-level index represent the respective outflow targets. Meaning for an energy system like:: A -> B -> C | v D The dataframe would look something like:: A B B C D 0 10 8 2 1 0 0 0 2 20 2 18 Usually returned by something like :attr:`tessif.analyze.ComparativeResultier.all_loads`. Returns ------- pandas.DataFrame Flattened, singular indexed column data frame. Example ------- Picking up on the example flows from above: >>> import pandas as pd >>> data = [ ... [10, 8, 2, ], ... [0, 0, 0, ], ... [20, 2, 18], ... ] >>> mindex_df = pd.DataFrame( ... data=data, ... columns=pd.MultiIndex.from_tuples( ... [("A", "B"), ("B", "C"), ("B", "D")]), ... index=pd.date_range('2019-01-01', periods=3, freq='H'), ... ) Original multiindexed dataframe: >>> print(mindex_df) A B B C D 2019-01-01 00:00:00 10 8 2 2019-01-01 01:00:00 0 0 0 2019-01-01 02:00:00 20 2 18 Flattened dataframe: >>> print(flatten_multiindex_flow_data(mindex_df)) A to B B to C B to D 2019-01-01 00:00:00 10 8 2 2019-01-01 01:00:00 0 0 0 2019-01-01 02:00:00 20 2 18 """ flattened_data = midx_df.copy() flattened_data.columns = [ ' to '.join(col) for col in midx_df.columns] return flattened_data
[docs]def list_mutually_inclusive_columns(dataframes): """ Identify columns present in all dataframes. Parameters ---------- dataframes: ~collections.abc.Container Container of singular column indexed dataframes of which the mutually inclusive columns are identified Returns ------- list List of column indices. Example ------- >>> import pandas as pd >>> data = [ ... [10, 8, 2, ], ... [0, 0, 0, ], ... [20, 2, 18], ... ] >>> df1 = pd.DataFrame( ... data=data, ... columns=["A", "C", "D"], ... ) >>> df2 = pd.DataFrame( ... data=data, ... columns=["A", "D", "E"], ... ) >>> print(list_mutually_inclusive_columns([df1, df2])) ['A', 'D'] """ common_cols = set.intersection( *(set(cols) for cols in dataframes) ) common_cols = list(sorted(common_cols)) return common_cols
[docs]def list_not_mutually_inclusive_columns(dataframes): """ Identify columns not present in all dataframes. Parameters ---------- dataframes: ~collections.abc.Container Container of singular column indexed dataframes of which the not mutually inclusive columns are identified Returns ------- list List of column indices. Example ------- >>> import pandas as pd >>> data = [ ... [10, 8, 2, ], ... [0, 0, 0, ], ... [20, 2, 18], ... ] >>> df1 = pd.DataFrame( ... data=data, ... columns=["A", "C", "D"], ... ) >>> df2 = pd.DataFrame( ... data=data, ... columns=["A", "D", "E"], ... ) >>> print(list_not_mutually_inclusive_columns([df1, df2])) ['C', 'E'] """ all_cols = set() for dtf in dataframes: for col in dtf.columns: all_cols.add(col) common_cols = set(list_mutually_inclusive_columns(dataframes)) return list(sorted(all_cols - common_cols))
[docs]def filter_mutually_inclusive_columns(dataframes): """Filter a set of dataframes to only include mutually inclusive columns. Parameters ---------- dataframes: ~collections.abc.Container Container of singular column indexed dataframes of which the mutually inclusive columns are identified and kept, while the others are dropped. Returns ------- list List of dataframes only containing mutually inclusive columns. Example ------- >>> import pandas as pd >>> data = [ ... [10, 8, 2, ], ... [0, 0, 0, ], ... [20, 2, 18], ... ] >>> df1 = pd.DataFrame( ... data=data, ... columns=["A", "C", "D"], ... ) >>> df2 = pd.DataFrame( ... data=data, ... columns=["A", "D", "E"], ... ) >>> filtered_dfs = filter_mutually_inclusive_columns([df1, df2]) Filtered df1: >>> print(filtered_dfs[0]) A D 0 10 2 1 0 0 2 20 18 Filtered df2: >>> print(filtered_dfs[1]) A D 0 10 8 1 0 0 2 20 2 """ common_cols = list_mutually_inclusive_columns(dataframes) filtered_dfs = [] for dtf in dataframes: filtered_dfs.append(dtf[common_cols]) return filtered_dfs
[docs]def drop_all_zero_columns(dataframe): """Drop all columns only filled with zeroes (0). Parameters ---------- dataframe: pandas.DataFrame data frame of which the all zero columns are dropped. Returns ------- pandas.DataFrame data frame not containing all zero columns Example ------- >>> import pandas as pd >>> data = [ ... [10, 8, 0, ], ... [0, 0, 0, ], ... [20, 2, 0], ... ] >>> df = pd.DataFrame( ... data=data, ... columns=["A", "C", "D"], ... ) >>> print(drop_all_zero_columns(df)) A C 0 10 8 1 0 0 2 20 2 """ cdf = dataframe.loc[:, (dataframe != 0).any(axis="index")] return cdf
[docs]def drop_all_zero_rows(dataframe): """Drop all rows only filled with zeroes (0). Parameters ---------- dataframe: pandas.DataFrame data frame of which the all-zero rows are dropped. Returns ------- pandas.DataFrame data frame not containing all-zero rows Example ------- >>> import pandas as pd >>> data = [ ... [10, 8, 0, ], ... [0, 0, 0, ], ... [20, 2, 0], ... ] >>> df = pd.DataFrame( ... data=data, ... columns=["A", "C", "D"], ... ) >>> print(drop_all_zero_rows(df)) A C D 0 10 8 0 2 20 2 0 """ cdf = dataframe.loc[(dataframe != 0).any(axis="columns")] return cdf
[docs]def parse_reference_df(dataframe, reference=None): """Parse dataframe averages.""" # parse reference if reference is None: ref = numpy_mean(dataframe, axis="columns") else: ref = dataframe[reference] return ref