Source code for sierra.core.generators.exp_creator

# Copyright 2018 John Harwell, All rights reserved.
#
#  SPDX-License-Identifier: MIT
"""Experiment creation classes.

Experiment creation takes an experiment definition `generated` by classes in
``exp_generators.py`` and writes the experiment to the filesystem.

"""

# Core packages
import os
import random
import copy
import logging
import time
import pickle
import pathlib

# 3rd party packages

# Project packages
from sierra.core.variables import batch_criteria as bc
from sierra.core import config, utils, types, platform
import sierra.core.plugin_manager as pm
from sierra.core.generators.exp_generators import BatchExpDefGenerator
from sierra.core.experiment import bindings, definition


[docs]class ExpCreator: """Instantiate a generated experiment from an experiment definition. Takes generated :term:`Experiment` definitions and writes them to the filesystem. Attributes: template_ipath: Absolute path to the template XML configuration file. exp_input_root: Absolute path to experiment directory where generated XML input files for this experiment should be written. exp_output_root: Absolute path to root directory for run outputs for this experiment. cmdopts: Dictionary containing parsed cmdline options. """
[docs] def __init__(self, cmdopts: types.Cmdopts, criteria: bc.BatchCriteria, template_ipath: pathlib.Path, exp_input_root: pathlib.Path, exp_output_root: pathlib.Path, exp_num: int) -> None: # filename of template file, sans extension and parent directory path self.template_stem = template_ipath.resolve().stem # where the generated config and command files should be stored self.exp_input_root = exp_input_root # where experimental outputs should be stored self.exp_output_root = exp_output_root self.cmdopts = cmdopts self.criteria = criteria self.exp_num = exp_num self.logger = logging.getLogger(__name__) # If random seeds where previously generated, use them if configured self.seeds_fpath = self.exp_input_root / config.kRandomSeedsLeaf self.preserve_seeds = self.cmdopts['preserve_seeds'] self.random_seeds = None if self.preserve_seeds: if utils.path_exists(self.seeds_fpath): with open(self.seeds_fpath, 'rb') as f: self.random_seeds = pickle.load(f) if self.random_seeds is not None: if len(self.random_seeds) == self.cmdopts['n_runs']: self.logger.debug("Using existing random seeds for experiment") elif len(self.random_seeds) != self.cmdopts['n_runs']: # OK to overwrite the saved random seeds--they changed the # experiment definition. self.logger.warning(("Experiment definition changed: # random " "seeds (% s) != --n-runs (%s): create new " "seeds"), len(self.random_seeds), self.cmdopts['n_runs']) self.preserve_seeds = False if not self.preserve_seeds or self.random_seeds is None: self.logger.debug("Generating new random seeds for experiment") self.random_seeds = random.sample(range(0, int(time.time())), self.cmdopts["n_runs"]) # where the commands file will be stored self.commands_fpath = self.exp_input_root / \ config.kGNUParallel['cmdfile_stem']
[docs] def from_def(self, exp_def: definition.XMLExpDef): """Create all experimental runs by writing input files to filesystem. The passed :class:`~sierra.core.experiment.definition.XMLExpDef` object contains all changes that should be made to all runs in the experiment. Additional changes to create a set of unique runs from which distributions of system behavior can be meaningfully computed post-hoc are added. """ # Clear out commands file if it exists configurer = platform.ExpConfigurer(self.cmdopts) commands_fpath = self.commands_fpath.with_suffix( config.kGNUParallel['cmdfile_ext']) if configurer.cmdfile_paradigm() == 'per-exp' and utils.path_exists(commands_fpath): commands_fpath.unlink() n_robots = utils.get_n_robots(self.criteria.main_config, self.cmdopts, self.exp_input_root, exp_def) generator = platform.ExpRunShellCmdsGenerator(self.cmdopts, self.criteria, n_robots, self.exp_num) # Create all experimental runs for run_num in range(self.cmdopts['n_runs']): per_run = copy.deepcopy(exp_def) self._create_exp_run(per_run, generator, run_num) # Perform experiment level configuration AFTER all runs have been # generated in the experiment, in case the configuration depends on the # generated launch files. platform.ExpConfigurer(self.cmdopts).for_exp(self.exp_input_root) # Save seeds if not utils.path_exists(self.seeds_fpath) or not self.preserve_seeds: if utils.path_exists(self.seeds_fpath): os.remove(self.seeds_fpath) with open(self.seeds_fpath, 'ab') as f: utils.pickle_dump(self.random_seeds, f)
[docs] def _create_exp_run(self, run_exp_def: definition.XMLExpDef, cmds_generator, run_num: int) -> None: run_output_dir = "{0}_{1}_output".format(self.template_stem, run_num) # If the project defined per-run configuration, apply # it. Otherwise, just apply the configuration in the SIERRA core. per_run = pm.module_load_tiered(project=self.cmdopts['project'], path='generators.exp_generators') run_output_root = self.exp_output_root / run_output_dir stem_path = self._get_launch_file_stempath(run_num) per_run.ExpRunDefUniqueGenerator(run_num, run_output_root, stem_path, self.random_seeds[run_num], self.cmdopts).generate(run_exp_def) # Write out the experimental run launch file run_exp_def.write(stem_path) # Perform any necessary programmatic (i.e., stuff you can do in python # and don't need a shell for) per-run configuration. configurer = platform.ExpConfigurer(self.cmdopts) configurer.for_exp_run(self.exp_input_root, run_output_root) ext = config.kGNUParallel['cmdfile_ext'] if configurer.cmdfile_paradigm() == 'per-exp': # Update GNU Parallel commands file with the command for the # configured experimental run. fpath = f"{self.commands_fpath}{ext}" with utils.utf8open(fpath, 'a') as cmds_file: self._update_cmds_file(cmds_file, cmds_generator, 'per-exp', run_num, self._get_launch_file_stempath(run_num), 'slave') elif configurer.cmdfile_paradigm() == 'per-run': # Write new GNU Parallel commands file with the commends for the # experimental run. master_fpath = f"{self.commands_fpath}_run{run_num}_master{ext}" slave_fpath = f"{self.commands_fpath}_run{run_num}_slave{ext}" self.logger.trace("Updating slave cmdfile %s", # type: ignore slave_fpath) with utils.utf8open(slave_fpath, 'w') as cmds_file: self._update_cmds_file(cmds_file, cmds_generator, 'per-run', run_num, self._get_launch_file_stempath(run_num), 'slave') self.logger.trace("Updating master cmdfile %s", # type: ignore master_fpath) with utils.utf8open(master_fpath, 'w') as cmds_file: self._update_cmds_file(cmds_file, cmds_generator, 'per-run', run_num, self._get_launch_file_stempath(run_num), 'master')
[docs] def _get_launch_file_stempath(self, run_num: int) -> pathlib.Path: """File is named as ``<template input file stem>_run<run_num>``. """ leaf = "{0}_run{1}".format(self.template_stem, run_num) return self.exp_input_root / leaf
[docs] def _update_cmds_file(self, cmds_file, cmds_generator: bindings.IExpRunShellCmdsGenerator, paradigm: str, run_num: int, launch_stem_path: pathlib.Path, for_host: str) -> None: """Add command to launch a given experimental run to the command file. """ pre_specs = cmds_generator.pre_run_cmds(for_host, launch_stem_path, run_num) assert all(spec.shell for spec in pre_specs),\ "All pre-exp commands are run in a shell" pre_cmds = [spec.cmd for spec in pre_specs] self.logger.trace("Pre-experiment cmds: %s", pre_cmds) # type: ignore exec_specs = cmds_generator.exec_run_cmds(for_host, launch_stem_path, run_num) assert all(spec.shell for spec in exec_specs),\ "All exec-exp commands are run in a shell" exec_cmds = [spec.cmd for spec in exec_specs] self.logger.trace("Exec-experiment cmds: %s", exec_cmds) # type: ignore post_specs = cmds_generator.post_run_cmds(for_host) assert all(spec.shell for spec in post_specs),\ "All post-exp commands are run in a shell" post_cmds = [spec.cmd for spec in post_specs] self.logger.trace("Post-experiment cmds: %s", post_cmds) # type: ignore if len(pre_cmds + exec_cmds + post_cmds) == 0: self.logger.debug("Skipping writing %s cmds file: no cmds", for_host) return # If there is 1 cmdfile per experiment, then the pre- and post-exec cmds # need to be prepended and appended to the exec cmds on a per-line # basis. If there is 1 cmdfile per experimental run, then its the same # thing, BUT we need to break the exec cmds over multiple lines in the # cmdfile. if paradigm == 'per-exp': line = ' '.join(pre_cmds + exec_cmds + post_cmds) + '\n' cmds_file.write(line) elif paradigm == 'per-run': for e in exec_cmds: line = ' '.join(pre_cmds + [e] + post_cmds) + '\n' cmds_file.write(line) else: raise ValueError(f"Bad paradigm {paradigm}")
[docs]class BatchExpCreator: """Instantiate a :term:`Batch Experiment`. Calls :class:`~sierra.core.generators.exp_creator.ExpCreator` on each experimental definition in the batch Attributes: batch_config_template: Absolute path to the root template XML configuration file. batch_input_root: Root directory for all generated XML input files all experiments should be stored (relative to current dir or absolute). Each experiment will get a directory within this root to store the xml input files for the experimental runs comprising an experiment; directory name determined by the batch criteria used. batch_output_root: Root directory for all experiment outputs. Each experiment will get a directory 'exp<n>' in this directory for its outputs. criteria: :class:`~sierra.core.variables.batch_criteria.BatchCriteria` derived object instance created from cmdline definition. """
[docs] def __init__(self, criteria: bc.BatchCriteria, cmdopts: types.Cmdopts) -> None: self.batch_config_template = pathlib.Path(cmdopts['template_input_file']) self.batch_input_root = pathlib.Path(cmdopts['batch_input_root']) self.batch_output_root = pathlib.Path(cmdopts['batch_output_root']) self.criteria = criteria self.cmdopts = cmdopts self.logger = logging.getLogger(__name__)
[docs] def create(self, generator: BatchExpDefGenerator) -> None: utils.dir_create_checked(self.batch_input_root, self.cmdopts['exp_overwrite']) # Scaffold the batch experiment, creating experiment directories and # writing template XML input files for each experiment in the batch with # changes from the batch criteria added. exp_def = definition.XMLExpDef(input_fpath=self.batch_config_template, write_config=None) self.criteria.scaffold_exps(exp_def, self.cmdopts) # Pickle experiment definitions in the actual batch experiment # directory for later retrieval. self.criteria.pickle_exp_defs(self.cmdopts) # Run batch experiment generator (must be after scaffolding so the # per-experiment template files are in place). defs = generator.generate_defs() assert len(defs) > 0, "No XML modifications generated?" for i, defi in enumerate(defs): self.logger.debug( "Applying generated scenario+controller changes to exp%s", i) expi = self.criteria.gen_exp_names(self.cmdopts)[i] exp_output_root = self.batch_output_root / expi exp_input_root = self.batch_input_root / expi ExpCreator(self.cmdopts, self.criteria, self.batch_config_template, exp_input_root, exp_output_root, i).from_def(defi)
__api__ = [ 'ExpCreator', 'BatchExpCreator', ]