Source code for sierra.plugins.compare.graphs.preprocess

#
# Copyright 2024 John Harwell, All rights reserved.
#
# SPDX-License-Identifier: MIT
#
"""Preprocess inter-experiment outputs for stage 5.

Basically, gather statistics generated from controllers for graph generation in
previous stages into the correct files(s) for comparison.
"""

# Core packages
import pathlib
import typing as tp

# 3rd party packages
import polars as pl

# Project packages
from sierra.core import utils, config, storage
from sierra.core.variables import batch_criteria as bc


[docs] class IntraExpPreparer: """ Collate generated stats from previous stages into files(s) for comparison. """ def __init__( self, ipath_stem: pathlib.Path, ipath_leaf: str, opath_stem: pathlib.Path, criteria: bc.XVarBatchCriteria, ): self.ipath_stem = ipath_stem self.ipath_leaf = ipath_leaf self.opath_stem = opath_stem self.criteria = criteria
[docs] def for_cc( self, controller: str, opath_leaf: str, index: int, inc_exps: tp.Optional[str], ) -> None: """ Take batch-level dataframes and creates a new dataframe. Has: - Experiment names for the index. - Controller names as column names (guaranteed to be unique, since that's what we are comparing). - df[controller] columns as timeslices *across* columns (i.e., across experiments in the batch) in the source dataframe. """ exts = config.STATS["mean"].exts exts.update(config.STATS["conf95"].exts) exts.update(config.STATS["bw"].exts) for k in exts: stat_ipath = pathlib.Path(self.ipath_stem, self.ipath_leaf + exts[k]) stat_opath = pathlib.Path(self.opath_stem, opath_leaf + exts[k]) df = self._cc_for_stat(stat_ipath, stat_opath, index, inc_exps, controller) if df is not None: storage.df_write( df, self.opath_stem / (opath_leaf + exts[k]), "storage.csv", )
[docs] def for_sc( self, scenario: str, opath_leaf: str, index: int, inc_exps: tp.Optional[str], ) -> None: """ Take batch-level dataframes and creates a new dataframe. Has: - Experiment names for the index. - Scenario names as column names (guaranteed to be unique, since that's what we are comparing). - df[scenario] columns as timeslices *across* columns (i.e., across experiments in the batch) in the source dataframe. """ exts = config.STATS["mean"].exts exts.update(config.STATS["conf95"].exts) exts.update(config.STATS["bw"].exts) for k in exts: stat_ipath = pathlib.Path(self.ipath_stem, self.ipath_leaf + exts[k]) stat_opath = pathlib.Path(self.opath_stem, opath_leaf + exts[k]) df = self._sc_for_stat(stat_ipath, stat_opath, index, inc_exps, scenario) if df is not None: storage.df_write( df, self.opath_stem / (opath_leaf + exts[k]), "storage.csv", )
def _cc_for_stat( self, ipath: pathlib.Path, opath: pathlib.Path, index: int, inc_exps: tp.Optional[str], controller: str, ) -> tp.Optional[pl.DataFrame]: if utils.path_exists(opath): cum_df = storage.df_read(opath, "storage.csv") else: cum_df = pl.DataFrame({"Experiment ID": self.criteria.gen_exp_names()}) if utils.path_exists(ipath): df = storage.df_read(ipath, "storage.csv") # Get the row at the specified index row_data = df.row(index if index >= 0 else len(df) + index) # Add as a new column to cum_df return cum_df.with_columns(pl.Series(controller, row_data)) return None def _sc_for_stat( self, ipath: pathlib.Path, opath: pathlib.Path, index: int, inc_exps: tp.Optional[str], scenario: str, ) -> tp.Optional[pl.DataFrame]: if utils.path_exists(opath): cum_df = storage.df_read(opath, "storage.csv") else: cum_df = pl.DataFrame({"Experiment ID": self.criteria.gen_exp_names()}) if utils.path_exists(ipath): df = storage.df_read(ipath, "storage.csv") # Get the row at the specified index row_data = df.row(index if index >= 0 else len(df) + index) # Add as a new column to cum_df return cum_df.with_columns(pl.Series(scenario, row_data)) return None
__all__ = ["IntraExpPreparer"]