Source code for sierra.core.pipeline.stage5.pipeline_stage5

# Copyright 2018 John Harwell, All rights reserved.
#
#  SPDX-License-Identifier: MIT

"""Stage 5 of the experimental pipeline: comparing deliverables.

"""

# Core packages
import logging
import pathlib

# 3rd party packages
import yaml

# Project packages
from sierra.core.pipeline.stage5 import intra_scenario_comparator as intrasc
from sierra.core.pipeline.stage5 import inter_scenario_comparator as intersc
import sierra.core.root_dirpath_generator as rdg
from sierra.core import types, utils, config


[docs]class PipelineStage5:
    """Compare controllers within or across scenarios.

    This can be either:

    #. Compare a set of controllers within the same scenario using performance
       measures specified in YAML configuration.

    #. Compare a single controller across a set ofscenarios using performance
       measures specified in YAML configuration.

    This stage is idempotent.

    Attributes:

        cmdopts: Dictionary of parsed cmdline parameters.

        controllers: List of controllers to compare.

        main_config: Dictionary of parsed main YAML configuration.

        stage5_config: Dictionary of parsed stage5 YAML configuration.

        output_roots: Dictionary containing output directories for intra- and
                      inter-scenario graph generation.

    """

[docs]    def __init__(self,
                 main_config: types.YAMLDict,
                 cmdopts: types.Cmdopts) -> None:
        self.cmdopts = cmdopts
        self.main_config = main_config

        path = pathlib.Path(self.cmdopts['project_config_root'],
                            config.kYAML.stage5)

        with utils.utf8open(path) as f:
            self.stage5_config = yaml.load(f, yaml.FullLoader)

        self.logger = logging.getLogger(__name__)

        if self.cmdopts['controllers_list'] is not None:
            self.controllers = self.cmdopts['controllers_list'].split(',')
            self.output_roots = {
                # We add the controller list to the directory path for the .csv
                # and graph directories so that multiple runs of stage5 with
                # different controller sets do not overwrite each other
                # (i.e. make stage5 more idempotent).
                'graphs': pathlib.Path(self.cmdopts['sierra_root'],
                                       self.cmdopts['project'],
                                       '+'.join(self.controllers) + "-cc-graphs"),
                'csvs': pathlib.Path(self.cmdopts['sierra_root'],
                                     self.cmdopts['project'],
                                     '+'.join(self.controllers) + "-cc-csvs"),
            }

        else:
            self.controllers = []

        if self.cmdopts['scenarios_list'] is not None:
            self.scenarios = self.cmdopts['scenarios_list'].split(',')
            self.output_roots = {
                # We add the scenario list to the directory path for the .csv
                # and graph directories so that multiple runs of stage5 with
                # different scenario sets do not overwrite each other (i.e. make
                # stage5 idempotent).
                'graphs': pathlib.Path(self.cmdopts['sierra_root'],
                                       self.cmdopts['project'],
                                       '+'.join(self.scenarios) + "-sc-graphs"),
                'csvs': pathlib.Path(self.cmdopts['sierra_root'],
                                     self.cmdopts['project'],
                                     '+'.join(self.scenarios) + "-sc-csvs"),
                'models': pathlib.Path(self.cmdopts['sierra_root'],
                                       self.cmdopts['project'],
                                       '+'.join(self.scenarios) + "-sc-models"),
            }

        else:
            self.scenarios = []

        self.project_root = pathlib.Path(self.cmdopts['sierra_root'],
                                         self.cmdopts['project'])

[docs]    def run(self, cli_args) -> None:
        """Run stage 5 of the experimental pipeline.

        If ``--controller-comparison`` was passed:

        #. :class:`~sierra.core.pipeline.stage5.intra_scenario_comparator.UnivarIntraScenarioComparator`
            or
            :class:`~sierra.core.pipeline.stage5.intra_scenario_comparator.BivarIntraScenarioComparator`
            as appropriate, depending on which type of
            :class:`~sierra.core.variables.batch_criteria.BatchCriteria` was
            selected on the cmdline.

        If ``--scenario-comparison`` was passed:

        #. :class:`~sierra.core.pipeline.stage5.inter_scenario_comparator.UnivarInterScenarioComparator`
            (only valid for univariate batch criteria currently).

        """
        # Create directories for .csv files and graphs
        for v in self.output_roots.values():
            utils.dir_create_checked(v, True)

        if self.cmdopts['controller_comparison']:
            self._run_cc(cli_args)
        elif self.cmdopts['scenario_comparison']:
            self._run_sc(cli_args)

[docs]    def _run_cc(self, cli_args):
        # Use nice controller names on graph legends if configured
        if self.cmdopts['controllers_legend'] is not None:
            legend = self.cmdopts['controllers_legend'].split(',')
        else:
            legend = self.controllers

        self._verify_comparability(self.controllers, cli_args)

        self.logger.info(
            "Inter-batch controller comparison of %s...", self.controllers)

        if cli_args.bc_univar:
            univar = intrasc.UnivarIntraScenarioComparator(self.controllers,
                                                           self.output_roots['csvs'],
                                                           self.output_roots['graphs'],
                                                           self.cmdopts,
                                                           cli_args,
                                                           self.main_config)
            univar(graphs=self.stage5_config['intra_scenario']['graphs'],
                   legend=legend,
                   comp_type=self.cmdopts['comparison_type'])
        else:
            bivar = intrasc.BivarIntraScenarioComparator(self.controllers,
                                                         self.output_roots['csvs'],
                                                         self.output_roots['graphs'],
                                                         self.cmdopts,
                                                         cli_args,
                                                         self.main_config)
            bivar(graphs=self.stage5_config['intra_scenario']['graphs'],
                  legend=legend,
                  comp_type=self.cmdopts['comparison_type'])

        self.logger.info("Inter-batch controller comparison complete")

[docs]    def _run_sc(self, cli_args):
        # Use nice scenario names on graph legends if configured
        if self.cmdopts['scenarios_legend'] is not None:
            legend = self.cmdopts['scenarios_legend'].split(',')
        else:
            legend = self.scenarios

        self.logger.info("Inter-batch  comparison of %s across %s...",
                         self.cmdopts['controller'],
                         self.scenarios)

        assert cli_args.bc_univar,\
            "inter-scenario controller comparison only valid for univariate batch criteria"

        roots = {k: self.output_roots[k] for k in ('csvs', 'graphs', 'models')}
        comparator = intersc.UnivarInterScenarioComparator(self.cmdopts['controller'],
                                                           self.scenarios,
                                                           roots,
                                                           self.cmdopts,
                                                           cli_args,
                                                           self.main_config)

        comparator(graphs=self.stage5_config['inter_scenario']['graphs'],
                   legend=legend)

        self.logger.info("Inter-batch  comparison of %s across %s complete",
                         self.cmdopts['controller'],
                         self.scenarios)

[docs]    def _verify_comparability(self, controllers, cli_args):
        """Check if the specified controllers can be compared.

        Comparable controllers have all been run on the same set of batch
        experiments. If they have not, it is not `necessarily` an error, but
        probably should be looked at, so it is only a warning, not fatal.

        """
        for t1 in controllers:
            for item in (self.project_root / t1).iterdir():
                template_stem, scenario, _ = rdg.parse_batch_leaf(item.name)
                batch_leaf = rdg.gen_batch_leaf(cli_args.batch_criteria,
                                                template_stem,
                                                scenario)

                for t2 in controllers:
                    opts1 = rdg.regen_from_exp(sierra_rpath=self.cmdopts['sierra_root'],
                                               project=self.cmdopts['project'],
                                               batch_leaf=batch_leaf,
                                               controller=t1)
                    opts2 = rdg.regen_from_exp(sierra_rpath=self.cmdopts['sierra_root'],
                                               project=self.cmdopts['project'],
                                               batch_leaf=batch_leaf,
                                               controller=t2)
                    collate_root1 = opts1['batch_stat_collate_root']
                    collate_root2 = opts2['batch_stat_collate_root']

                    if scenario in str(collate_root1) and scenario not in str(collate_root2):
                        self.logger.warning("%s does not exist in %s",
                                            scenario,
                                            collate_root2)
                    if scenario in str(collate_root2) and scenario not in str(collate_root1):
                        self.logger.warning("%s does not exist in %s",
                                            scenario,
                                            collate_root1)


__api__ = [
    'PipelineStage5'
]