Source code for sierra.core.generators.exp_creator

# Copyright 2018 John Harwell, All rights reserved.
#
#  SPDX-License-Identifier: MIT
"""Experiment creation classes.

Experiment creation takes an experiment definition `generated` by classes in
``exp_generators.py`` and writes the experiment to the filesystem.

"""

# Core packages
import os
import random
import copy
import logging
import time
import pickle
import pathlib

# 3rd party packages

# Project packages
from sierra.core.variables import batch_criteria as bc
from sierra.core import config, utils, types, platform
import sierra.core.plugin_manager as pm
from sierra.core.generators.exp_generators import BatchExpDefGenerator
from sierra.core.experiment import bindings, definition


[docs]class ExpCreator:
    """Instantiate a generated experiment from an experiment definition.

    Takes generated :term:`Experiment` definitions and writes them to the
    filesystem.

    Attributes:

        template_ipath: Absolute path to the template XML configuration file.

        exp_input_root: Absolute path to experiment directory where generated
                         XML input files for this experiment should be written.

        exp_output_root: Absolute path to root directory for run outputs
                         for this experiment.

        cmdopts: Dictionary containing parsed cmdline options.

    """

[docs]    def __init__(self,
                 cmdopts: types.Cmdopts,
                 criteria: bc.BatchCriteria,
                 template_ipath: pathlib.Path,
                 exp_input_root: pathlib.Path,
                 exp_output_root: pathlib.Path,
                 exp_num: int) -> None:

        # filename of template file, sans extension and parent directory path
        self.template_stem = template_ipath.resolve().stem

        # where the generated config and command files should be stored
        self.exp_input_root = exp_input_root

        # where experimental outputs should be stored
        self.exp_output_root = exp_output_root

        self.cmdopts = cmdopts
        self.criteria = criteria
        self.exp_num = exp_num
        self.logger = logging.getLogger(__name__)

        # If random seeds where previously generated, use them if configured
        self.seeds_fpath = self.exp_input_root / config.kRandomSeedsLeaf
        self.preserve_seeds = self.cmdopts['preserve_seeds']
        self.random_seeds = None

        if self.preserve_seeds:
            if utils.path_exists(self.seeds_fpath):
                with open(self.seeds_fpath, 'rb') as f:
                    self.random_seeds = pickle.load(f)

            if self.random_seeds is not None:
                if len(self.random_seeds) == self.cmdopts['n_runs']:
                    self.logger.debug("Using existing random seeds for experiment")
                elif len(self.random_seeds) != self.cmdopts['n_runs']:
                    # OK to overwrite the saved random seeds--they changed the
                    # experiment definition.
                    self.logger.warning(("Experiment definition changed: # random "
                                         "seeds (% s) != --n-runs (%s): create new "
                                         "seeds"),
                                        len(self.random_seeds),
                                        self.cmdopts['n_runs'])
                    self.preserve_seeds = False

        if not self.preserve_seeds or self.random_seeds is None:
            self.logger.debug("Generating new random seeds for experiment")
            self.random_seeds = random.sample(range(0, int(time.time())),
                                              self.cmdopts["n_runs"])

        # where the commands file will be stored
        self.commands_fpath = self.exp_input_root / \
            config.kGNUParallel['cmdfile_stem']

[docs]    def from_def(self, exp_def: definition.XMLExpDef):
        """Create all experimental runs by writing input files to filesystem.

        The passed :class:`~sierra.core.experiment.definition.XMLExpDef` object
        contains all changes that should be made to all runs in the
        experiment. Additional changes to create a set of unique runs from which
        distributions of system behavior can be meaningfully computed post-hoc
        are added.

        """
        # Clear out commands file if it exists
        configurer = platform.ExpConfigurer(self.cmdopts)
        commands_fpath = self.commands_fpath.with_suffix(
            config.kGNUParallel['cmdfile_ext'])
        if configurer.cmdfile_paradigm() == 'per-exp' and utils.path_exists(commands_fpath):
            commands_fpath.unlink()

        n_robots = utils.get_n_robots(self.criteria.main_config,
                                      self.cmdopts,
                                      self.exp_input_root,
                                      exp_def)
        generator = platform.ExpRunShellCmdsGenerator(self.cmdopts,
                                                      self.criteria,
                                                      n_robots,
                                                      self.exp_num)

        # Create all experimental runs
        for run_num in range(self.cmdopts['n_runs']):
            per_run = copy.deepcopy(exp_def)
            self._create_exp_run(per_run, generator, run_num)

        # Perform experiment level configuration AFTER all runs have been
        # generated in the experiment, in case the configuration depends on the
        # generated launch files.
        platform.ExpConfigurer(self.cmdopts).for_exp(self.exp_input_root)

        # Save seeds
        if not utils.path_exists(self.seeds_fpath) or not self.preserve_seeds:
            if utils.path_exists(self.seeds_fpath):
                os.remove(self.seeds_fpath)
            with open(self.seeds_fpath, 'ab') as f:
                utils.pickle_dump(self.random_seeds, f)

[docs]    def _create_exp_run(self,
                        run_exp_def: definition.XMLExpDef,
                        cmds_generator,
                        run_num: int) -> None:
        run_output_dir = "{0}_{1}_output".format(self.template_stem, run_num)

        # If the project defined per-run configuration, apply
        # it. Otherwise, just apply the configuration in the SIERRA core.
        per_run = pm.module_load_tiered(project=self.cmdopts['project'],
                                        path='generators.exp_generators')

        run_output_root = self.exp_output_root / run_output_dir
        stem_path = self._get_launch_file_stempath(run_num)

        per_run.ExpRunDefUniqueGenerator(run_num,
                                         run_output_root,
                                         stem_path,
                                         self.random_seeds[run_num],
                                         self.cmdopts).generate(run_exp_def)

        # Write out the experimental run launch file
        run_exp_def.write(stem_path)

        # Perform any necessary programmatic (i.e., stuff you can do in python
        # and don't need a shell for) per-run configuration.
        configurer = platform.ExpConfigurer(self.cmdopts)
        configurer.for_exp_run(self.exp_input_root, run_output_root)

        ext = config.kGNUParallel['cmdfile_ext']
        if configurer.cmdfile_paradigm() == 'per-exp':
            # Update GNU Parallel commands file with the command for the
            # configured experimental run.
            fpath = f"{self.commands_fpath}{ext}"
            with utils.utf8open(fpath, 'a') as cmds_file:
                self._update_cmds_file(cmds_file,
                                       cmds_generator,
                                       'per-exp',
                                       run_num,
                                       self._get_launch_file_stempath(run_num),
                                       'slave')
        elif configurer.cmdfile_paradigm() == 'per-run':
            # Write new GNU Parallel commands file with the commends for the
            # experimental run.
            master_fpath = f"{self.commands_fpath}_run{run_num}_master{ext}"
            slave_fpath = f"{self.commands_fpath}_run{run_num}_slave{ext}"

            self.logger.trace("Updating slave cmdfile %s",   # type: ignore
                              slave_fpath)
            with utils.utf8open(slave_fpath, 'w') as cmds_file:
                self._update_cmds_file(cmds_file,
                                       cmds_generator,
                                       'per-run',
                                       run_num,
                                       self._get_launch_file_stempath(run_num),
                                       'slave')

            self.logger.trace("Updating master cmdfile %s",   # type: ignore
                              master_fpath)
            with utils.utf8open(master_fpath, 'w') as cmds_file:
                self._update_cmds_file(cmds_file,
                                       cmds_generator,
                                       'per-run',
                                       run_num,
                                       self._get_launch_file_stempath(run_num),
                                       'master')

[docs]    def _get_launch_file_stempath(self, run_num: int) -> pathlib.Path:
        """File is named as ``<template input file stem>_run<run_num>``.
        """
        leaf = "{0}_run{1}".format(self.template_stem, run_num)
        return self.exp_input_root / leaf

[docs]    def _update_cmds_file(self,
                          cmds_file,
                          cmds_generator: bindings.IExpRunShellCmdsGenerator,
                          paradigm: str,
                          run_num: int,
                          launch_stem_path: pathlib.Path,
                          for_host: str) -> None:
        """Add command to launch a given experimental run to the command file.

        """
        pre_specs = cmds_generator.pre_run_cmds(for_host,
                                                launch_stem_path,
                                                run_num)
        assert all(spec.shell for spec in pre_specs),\
            "All pre-exp commands are run in a shell"
        pre_cmds = [spec.cmd for spec in pre_specs]
        self.logger.trace("Pre-experiment cmds: %s", pre_cmds)   # type: ignore

        exec_specs = cmds_generator.exec_run_cmds(for_host,
                                                  launch_stem_path,
                                                  run_num)
        assert all(spec.shell for spec in exec_specs),\
            "All exec-exp commands are run in a shell"
        exec_cmds = [spec.cmd for spec in exec_specs]
        self.logger.trace("Exec-experiment cmds: %s", exec_cmds)   # type: ignore

        post_specs = cmds_generator.post_run_cmds(for_host)
        assert all(spec.shell for spec in post_specs),\
            "All post-exp commands are run in a shell"
        post_cmds = [spec.cmd for spec in post_specs]
        self.logger.trace("Post-experiment cmds: %s", post_cmds)   # type: ignore

        if len(pre_cmds + exec_cmds + post_cmds) == 0:
            self.logger.debug("Skipping writing %s cmds file: no cmds",
                              for_host)
            return

        # If there is 1 cmdfile per experiment, then the pre- and post-exec cmds
        # need to be prepended and appended to the exec cmds on a per-line
        # basis. If there is 1 cmdfile per experimental run, then its the same
        # thing, BUT we need to break the exec cmds over multiple lines in the
        # cmdfile.
        if paradigm == 'per-exp':
            line = ' '.join(pre_cmds + exec_cmds + post_cmds) + '\n'
            cmds_file.write(line)
        elif paradigm == 'per-run':
            for e in exec_cmds:
                line = ' '.join(pre_cmds + [e] + post_cmds) + '\n'
                cmds_file.write(line)
        else:
            raise ValueError(f"Bad paradigm {paradigm}")


[docs]class BatchExpCreator:
    """Instantiate a :term:`Batch Experiment`.

    Calls :class:`~sierra.core.generators.exp_creator.ExpCreator` on each
    experimental definition in the batch

    Attributes:

        batch_config_template: Absolute path to the root template XML
                               configuration file.

        batch_input_root: Root directory for all generated XML input files all
                          experiments should be stored (relative to current dir
                          or absolute). Each experiment will get a directory
                          within this root to store the xml input files for the
                          experimental runs comprising an experiment; directory
                          name determined by the batch criteria used.

        batch_output_root: Root directory for all experiment outputs. Each
                           experiment will get a directory 'exp<n>' in this
                           directory for its outputs.

        criteria: :class:`~sierra.core.variables.batch_criteria.BatchCriteria`
                  derived object instance created from cmdline definition.

    """

[docs]    def __init__(self,
                 criteria: bc.BatchCriteria,
                 cmdopts: types.Cmdopts) -> None:

        self.batch_config_template = pathlib.Path(cmdopts['template_input_file'])
        self.batch_input_root = pathlib.Path(cmdopts['batch_input_root'])
        self.batch_output_root = pathlib.Path(cmdopts['batch_output_root'])
        self.criteria = criteria
        self.cmdopts = cmdopts
        self.logger = logging.getLogger(__name__)

[docs]    def create(self, generator: BatchExpDefGenerator) -> None:
        utils.dir_create_checked(self.batch_input_root,
                                 self.cmdopts['exp_overwrite'])

        # Scaffold the batch experiment, creating experiment directories and
        # writing template XML input files for each experiment in the batch with
        # changes from the batch criteria added.
        exp_def = definition.XMLExpDef(input_fpath=self.batch_config_template,
                                       write_config=None)

        self.criteria.scaffold_exps(exp_def, self.cmdopts)

        # Pickle experiment definitions in the actual batch experiment
        # directory for later retrieval.
        self.criteria.pickle_exp_defs(self.cmdopts)

        # Run batch experiment generator (must be after scaffolding so the
        # per-experiment template files are in place).
        defs = generator.generate_defs()

        assert len(defs) > 0, "No XML modifications generated?"

        for i, defi in enumerate(defs):
            self.logger.debug(
                "Applying generated scenario+controller changes to exp%s",
                i)
            expi = self.criteria.gen_exp_names(self.cmdopts)[i]
            exp_output_root = self.batch_output_root / expi
            exp_input_root = self.batch_input_root / expi

            ExpCreator(self.cmdopts,
                       self.criteria,
                       self.batch_config_template,
                       exp_input_root,
                       exp_output_root,
                       i).from_def(defi)


__api__ = [
    'ExpCreator',
    'BatchExpCreator',
]