Source code for sierra.core.pipeline.stage5.pipeline_stage5

# Copyright 2018 John Harwell, All rights reserved.
#
#  SPDX-License-Identifier: MIT

"""Stage 5 of the experimental pipeline: comparing deliverables.

"""

# Core packages
import logging
import pathlib

# 3rd party packages
import yaml

# Project packages
from sierra.core.pipeline.stage5 import intra_scenario_comparator as intrasc
from sierra.core.pipeline.stage5 import inter_scenario_comparator as intersc
import sierra.core.root_dirpath_generator as rdg
from sierra.core import types, utils, config


[docs]class PipelineStage5: """Compare controllers within or across scenarios. This can be either: #. Compare a set of controllers within the same scenario using performance measures specified in YAML configuration. #. Compare a single controller across a set ofscenarios using performance measures specified in YAML configuration. This stage is idempotent. Attributes: cmdopts: Dictionary of parsed cmdline parameters. controllers: List of controllers to compare. main_config: Dictionary of parsed main YAML configuration. stage5_config: Dictionary of parsed stage5 YAML configuration. output_roots: Dictionary containing output directories for intra- and inter-scenario graph generation. """
[docs] def __init__(self, main_config: types.YAMLDict, cmdopts: types.Cmdopts) -> None: self.cmdopts = cmdopts self.main_config = main_config path = pathlib.Path(self.cmdopts['project_config_root'], config.kYAML.stage5) with utils.utf8open(path) as f: self.stage5_config = yaml.load(f, yaml.FullLoader) self.logger = logging.getLogger(__name__) if self.cmdopts['controllers_list'] is not None: self.controllers = self.cmdopts['controllers_list'].split(',') self.output_roots = { # We add the controller list to the directory path for the .csv # and graph directories so that multiple runs of stage5 with # different controller sets do not overwrite each other # (i.e. make stage5 more idempotent). 'graphs': pathlib.Path(self.cmdopts['sierra_root'], self.cmdopts['project'], '+'.join(self.controllers) + "-cc-graphs"), 'csvs': pathlib.Path(self.cmdopts['sierra_root'], self.cmdopts['project'], '+'.join(self.controllers) + "-cc-csvs"), } else: self.controllers = [] if self.cmdopts['scenarios_list'] is not None: self.scenarios = self.cmdopts['scenarios_list'].split(',') self.output_roots = { # We add the scenario list to the directory path for the .csv # and graph directories so that multiple runs of stage5 with # different scenario sets do not overwrite each other (i.e. make # stage5 idempotent). 'graphs': pathlib.Path(self.cmdopts['sierra_root'], self.cmdopts['project'], '+'.join(self.scenarios) + "-sc-graphs"), 'csvs': pathlib.Path(self.cmdopts['sierra_root'], self.cmdopts['project'], '+'.join(self.scenarios) + "-sc-csvs"), 'models': pathlib.Path(self.cmdopts['sierra_root'], self.cmdopts['project'], '+'.join(self.scenarios) + "-sc-models"), } else: self.scenarios = [] self.project_root = pathlib.Path(self.cmdopts['sierra_root'], self.cmdopts['project'])
[docs] def run(self, cli_args) -> None: """Run stage 5 of the experimental pipeline. If ``--controller-comparison`` was passed: #. :class:`~sierra.core.pipeline.stage5.intra_scenario_comparator.UnivarIntraScenarioComparator` or :class:`~sierra.core.pipeline.stage5.intra_scenario_comparator.BivarIntraScenarioComparator` as appropriate, depending on which type of :class:`~sierra.core.variables.batch_criteria.BatchCriteria` was selected on the cmdline. If ``--scenario-comparison`` was passed: #. :class:`~sierra.core.pipeline.stage5.inter_scenario_comparator.UnivarInterScenarioComparator` (only valid for univariate batch criteria currently). """ # Create directories for .csv files and graphs for v in self.output_roots.values(): utils.dir_create_checked(v, True) if self.cmdopts['controller_comparison']: self._run_cc(cli_args) elif self.cmdopts['scenario_comparison']: self._run_sc(cli_args)
[docs] def _run_cc(self, cli_args): # Use nice controller names on graph legends if configured if self.cmdopts['controllers_legend'] is not None: legend = self.cmdopts['controllers_legend'].split(',') else: legend = self.controllers self._verify_comparability(self.controllers, cli_args) self.logger.info( "Inter-batch controller comparison of %s...", self.controllers) if cli_args.bc_univar: univar = intrasc.UnivarIntraScenarioComparator(self.controllers, self.output_roots['csvs'], self.output_roots['graphs'], self.cmdopts, cli_args, self.main_config) univar(graphs=self.stage5_config['intra_scenario']['graphs'], legend=legend, comp_type=self.cmdopts['comparison_type']) else: bivar = intrasc.BivarIntraScenarioComparator(self.controllers, self.output_roots['csvs'], self.output_roots['graphs'], self.cmdopts, cli_args, self.main_config) bivar(graphs=self.stage5_config['intra_scenario']['graphs'], legend=legend, comp_type=self.cmdopts['comparison_type']) self.logger.info("Inter-batch controller comparison complete")
[docs] def _run_sc(self, cli_args): # Use nice scenario names on graph legends if configured if self.cmdopts['scenarios_legend'] is not None: legend = self.cmdopts['scenarios_legend'].split(',') else: legend = self.scenarios self.logger.info("Inter-batch comparison of %s across %s...", self.cmdopts['controller'], self.scenarios) assert cli_args.bc_univar,\ "inter-scenario controller comparison only valid for univariate batch criteria" roots = {k: self.output_roots[k] for k in ('csvs', 'graphs', 'models')} comparator = intersc.UnivarInterScenarioComparator(self.cmdopts['controller'], self.scenarios, roots, self.cmdopts, cli_args, self.main_config) comparator(graphs=self.stage5_config['inter_scenario']['graphs'], legend=legend) self.logger.info("Inter-batch comparison of %s across %s complete", self.cmdopts['controller'], self.scenarios)
[docs] def _verify_comparability(self, controllers, cli_args): """Check if the specified controllers can be compared. Comparable controllers have all been run on the same set of batch experiments. If they have not, it is not `necessarily` an error, but probably should be looked at, so it is only a warning, not fatal. """ for t1 in controllers: for item in (self.project_root / t1).iterdir(): template_stem, scenario, _ = rdg.parse_batch_leaf(item.name) batch_leaf = rdg.gen_batch_leaf(cli_args.batch_criteria, template_stem, scenario) for t2 in controllers: opts1 = rdg.regen_from_exp(sierra_rpath=self.cmdopts['sierra_root'], project=self.cmdopts['project'], batch_leaf=batch_leaf, controller=t1) opts2 = rdg.regen_from_exp(sierra_rpath=self.cmdopts['sierra_root'], project=self.cmdopts['project'], batch_leaf=batch_leaf, controller=t2) collate_root1 = opts1['batch_stat_collate_root'] collate_root2 = opts2['batch_stat_collate_root'] if scenario in str(collate_root1) and scenario not in str(collate_root2): self.logger.warning("%s does not exist in %s", scenario, collate_root2) if scenario in str(collate_root2) and scenario not in str(collate_root1): self.logger.warning("%s does not exist in %s", scenario, collate_root1)
__api__ = [ 'PipelineStage5' ]