Skip to content
Snippets Groups Projects
Commit e3349459 authored by Rob Moss's avatar Rob Moss
Browse files

Iterate over scenarios without any initialisation

The intention is to replace the current config, sweep, and context
modules with a simpler workflow that divides this process into several
discrete stages. This first stage supports iterating over combinations
of observation model parameters, with each "instance" being returned as
a type that can be pickled and sent to another Python process for
construction and validation (i.e., to be turned into a Context object).
parent d66b97b6
Branches
Tags
No related merge requests found
Pipeline #17469 passed
......@@ -32,6 +32,7 @@ table), while others are likely of no use outside of pypfilt_ (see the
:mod:`pypfilt` Provides model-fitting and forecasting functions
:mod:`pypfilt.config` Reads forecast scenarios from TOML_ files
:mod:`pypfilt.sweep` Iterates over forecast scenarios
:mod:`pypfilt.scenario` Reads forecast scenarios from TOML_ files
:mod:`pypfilt.model` Defines the simulation model base class
:class:`~pypfilt.model.Model`
:mod:`pypfilt.obs` Defines the observation model base class
......@@ -75,6 +76,7 @@ table), while others are likely of no use outside of pypfilt_ (see the
pypfilt
config
sweep
scenario
model
obs
params
......
pypfilt.scenario
================
.. py:module:: pypfilt.scenario
The :mod:`pypfilt.scenario` module reads simulation scenarios from plain-text TOML_ inputs.
The purpose of this module is to allow users to define and run simulations **without writing any Python code**, and instead define all of the necessary settings in TOML_ files.
.. note:: A scenario will have a separate :py:class:`Instance` for each combination of observation model parameter values.
Loading scenarios
-----------------
.. autofunction:: load_instances
.. autoclass:: Instance
Internal types
--------------
.. autoclass:: Specification
.. autoclass:: Scenario
.. autoclass:: ObsModelParams
Internal functions
------------------
.. autofunction:: load_toml
.. autofunction:: load_specifications
.. autofunction:: scenarios
.. autofunction:: instances
.. autofunction:: observation_model_parameter_combinations
.. autofunction:: scenario_observation_model_combinations
.. autofunction:: scenario_observation_model_parameters
.. autofunction:: override_dict
.. autofunction:: as_list
"""
Provides a declarative means of defining estimation and forecasting scenarios.
The purpose of this module is to allow users to define and run simulations
**without writing any Python code**, and instead define all of the necessary
settings in `TOML`_ files.
"""
import copy
import itertools
import tomli
from typing import Any, Dict, NamedTuple, Optional
class Specification(NamedTuple):
"""
A specification that defines any number of scenarios.
:param global_settings: Default settings for all scenarios.
:type global_settings: Dict[str, Any]
:param scenario_settings: Settings specific to single scenarios.
This is a dictionary that maps the setting ID to the settings that are
specific to the identified scenario.
:type scenario_settings: Dict[str, Any]
:param source: The (optional) TOML input for this specification.
:type source: Optional[str]
"""
global_settings: Dict[str, Any]
scenario_settings: Dict[str, Any]
source: Optional[str]
class Scenario(NamedTuple):
"""
The definition of a single scenario.
:param scenario_id: The unique identifier for this scenario.
:type scenario_id: str
:param settings: The settings dictionary, which defines all
of the simulation components and parameters.
:type settings: Dict[str, Any]
:param source: The (optional) TOML input for this specification.
:type source: Optional[str]
"""
scenario_id: str
settings: Dict[str, Any]
source: Optional[str]
class Instance(NamedTuple):
"""
A single instance of a scenario.
:param scenario_id: The scenario identifier for this instance.
:type scenario_id: str
:param settings: The settings dictionary, which defines all
of the simulation components and parameters, including any that are
specific to this instance.
:type settings: Dict[str, Any]
:param descriptor: The identifier descriptor, which describes the
observation model parameter values for this specific instance.
:type descriptor: str
:param source: The (optional) TOML input for this specification.
:type source: Optional[str]
"""
scenario_id: str
settings: Dict[str, Any]
descriptor: str
source: Optional[str]
def __str__(self):
fmt = 'Instance(scenario_id="{}", descriptor="{}")'
return fmt.format(self.scenario_id, self.descriptor)
def __repr__(self):
"""
The goal of ``__repr__`` is to produce *unambiguous* output, while the
goal of ``__str__`` is to produce *readable* output.
In this case, these two methods can return the same output because the
scenario ID and instance descriptor uniquely identify a specific
instance of a specific scenario.
"""
return str(self)
class ObsModelParams(NamedTuple):
"""
Describes the parameter values for an observation model, and how to format
the parameter names and values into an instance descriptor.
:param unit: The observation unit, which is a unique identifier for this
observation model and the observations to which it pertains.
:type unit: str
:param values: The parameter values for this observation model.
:type values: Dict[str, Any]
:param value_format: The format strings used to convert parameter values
into strings.
:type value_format: Dict[str, str]
:param display_names: The strings used to represent each parameter in
instance descriptors.
:type display_names: Dict[str, str]
"""
unit: str
values: Dict[str, Any]
value_format: Dict[str, str]
display_names: Dict[str, str]
def load_instances(sources):
"""
Iterate over scenario instances defined in one or more `TOML`_ sources.
:param sources: A list of file-like objects and/or file paths.
If ``sources`` is not a list, it will be treated as the only item of a
list.
:rtype: Iterator[Instance]
"""
for spec in load_specifications(sources):
for scenario in scenarios(spec):
for instance in instances(scenario):
# NOTE: this is where the job of this module ends,
# and the job of Context begins.
yield instance
def load_toml(source):
"""
Read `TOML`_ content from ``source`` and return the parsed dictionary and
the `TOML`_ input.
:param source: A file-like object or a file path.
:return: A ``(dict, str)`` tuple.
"""
if hasattr(source, 'read'):
toml_string = source.read()
else:
with open(source, encoding='utf-8') as f:
toml_string = f.read()
parsed_dict = tomli.loads(toml_string)
return (parsed_dict, toml_string)
def load_specifications(sources):
"""
Iterate over the scenario specifications in ``sources``.
:param sources: A list of file-like objects and/or file paths.
If ``sources`` is not a list, it will be treated as a list containing
one item.
:rtype: Iterator[Specification]
:raises ValueError: if a source does not define any scenarios.
"""
sources = as_list(sources)
for source in sources:
(source_dict, toml_string) = load_toml(source)
if 'scenario' not in source_dict:
raise ValueError('No scenarios defined in {}'.format(source))
scenarios_table = source_dict['scenario']
del source_dict['scenario']
spec = Specification(
global_settings=source_dict,
scenario_settings=scenarios_table,
source=toml_string,
)
yield spec
def scenarios(spec):
"""
Iterate over the scenarios in the provided specification ``spec``.
:param spec: The scenario specifications.
:type spec: Specification
:rtype: Iterator[Scenario]
"""
for (scenario_id, scenario_dict) in spec.scenario_settings.items():
# Construct the scenario settings by applying scenario-specific
# settings on top of the global settings.
global_dict = copy.deepcopy(spec.global_settings)
scenario_dict = copy.deepcopy(scenario_dict)
settings = override_dict(global_dict, scenario_dict)
scenario = Scenario(
scenario_id=scenario_id,
settings=settings,
source=spec.source,
)
yield scenario
def instances(scenario):
"""
Iterate over the instances of a single scenario.
:param scenario: The scenario definition.
:type scenario: Scenario
:rtype: Iterator[Instance]
"""
# Iterate over every combination of observation model parameter values.
previous_descriptors = set()
obs_combs = scenario_observation_model_combinations(scenario)
for (value_dicts, descriptor) in obs_combs:
# First ensure that the descriptor is unique.
if descriptor in previous_descriptors:
msg_fmt = 'Scenario "{}" has a duplicate descriptor "{}"'
raise ValueError(msg_fmt.format(scenario.scenario_id, descriptor))
previous_descriptors.add(descriptor)
# Copy the scenario settings, and apply the parameter values for each
# observation model.
settings = copy.deepcopy(scenario.settings)
for (obs_unit, values) in value_dicts.items():
settings['observations'][obs_unit]['parameters'] = values
# Return this instance of the scenario.
instance = Instance(
scenario_id=scenario.scenario_id,
settings=settings,
descriptor=descriptor,
source=scenario.source,
)
yield instance
def observation_model_parameter_combinations(obs_params):
"""
Iterate over every combination of parameter values for a single
observation model.
Each combination is returned as a ``(unit, values, descriptor)`` tuple.
:param obs_params: The observation model parameters definition.
:type obs_params: ObsModelParams
:rtype: Iterator[tuple[str, Dict[str, float | int], str]]
"""
# NOTE: sort parameters by name to ensure a consistent ordering.
names = sorted(obs_params.values.keys())
# Create a format string for each parameter.
# For example, if the 'bg_obs' parameter has the display name 'bg', the
# format string will be "bg-{val[0]:{fmt[bg_obs]}}".
out_fields = []
for (ix, name) in enumerate(names):
# NOTE: produce format strings such as .
field = '{0}-{{values[{1}]:{{formats[{2}]}}}}'.format(
obs_params.display_names[name], ix, name)
out_fields.append(field)
# Join the format strings into a single format string for all parameters.
out_fmt = '-'.join(out_fields)
# NOTE: the parameters must be scanned in their listed order, so that the
# order of the values matches that of the indices in the format string.
scan = [as_list(obs_params.values[name]) for name in names]
for parameter_values in itertools.product(*scan):
values_dict = dict(zip(names, parameter_values))
descriptor = out_fmt.format(values=parameter_values,
formats=obs_params.value_format)
yield (obs_params.unit, values_dict, descriptor)
def as_list(values):
"""
Return values as a list.
:param values: A list of values, or a value that will be returned as the
only item of the returned list.
:type values: Union[list[Any], Any]
:rtype: list[Any]
"""
if isinstance(values, list):
return values
else:
return [values]
def scenario_observation_model_combinations(scenario):
"""
Iterate over every combination of parameter values for each observation
model.
Each combination is returned as a ``(values, descriptor)`` tuple, where
``values`` is a dictionary that maps each observation model (identified by
observation unit) to the
parameter values for that observation model.
:rtype: Iterator[tuple[Dict[str, Any], str]]
"""
# NOTE: if the scenario has no observation models, return an empty
# configuration dictionary and an empty descriptor string.
if 'observations' not in scenario.settings:
yield ({}, "")
return
obs_models = scenario_observation_model_parameters(scenario)
obs_model_values = [
observation_model_parameter_combinations(obs_model)
for obs_model in obs_models.values()
]
for obs_model_comb in itertools.product(*obs_model_values):
# NOTE: each element is (unit, values_dict, descriptor)
descriptors = [descr for (_unit, _values, descr) in obs_model_comb]
descriptor = '-'.join(descriptors)
obs_config = {
unit: values
for (unit, values, _descr) in obs_model_comb
}
yield(obs_config, descriptor)
def scenario_observation_model_parameters(scenario):
"""
Return the parameter values for each observation model in a scenario,
where each observation model is identified by its observation unit.
:param scenario: The scenario definition.
:type scenario: Scenario
:rtype: Dict[str, ObsModelParams]
:raises ValueError: if the parameter names are not consistent across the
parameter values, the value format strings, and the parameter display
names.
"""
obs_tables = scenario.settings['observations'].items()
obs_models = {
unit: ObsModelParams(
unit=unit,
values=om_dict['parameters'],
value_format=om_dict['format'],
display_names=om_dict['name'],
)
for (unit, om_dict) in obs_tables
}
# Ensure that the parameter values, format string, and display names all
# refer to the same set of parameters.
for om_params in obs_models.values():
value_keys = set(om_params.values.keys())
format_keys = set(om_params.value_format.keys())
names_keys = set(om_params.display_names.keys())
identical_keys = (
value_keys == format_keys
and format_keys == names_keys
and names_keys == value_keys)
if not identical_keys:
msg_fmt = 'Invalid "{}" observation model'
raise ValueError(msg_fmt.format(om_params.unit))
return obs_models
def override_dict(defaults, overrides):
"""
Override a dictionary with values in another dictionary. This will
recursively descend into matching nested dictionaries.
Where an override value is a dictionary, the corresponding default value
must be a dictionary in order for nested defaults to be propagated.
Otherwise, the default value is simply replaced by the override value.
:param dict defaults: The original values; note that this dictionary
**will be modified**.
:param dict overrides: The overriding values.
:return: The modified ``defaults`` dictionary.
:rtype: Dict[Any, Any]
"""
for (key, value) in overrides.items():
if isinstance(value, dict):
if key in defaults and isinstance(defaults[key], dict):
# Override the nested default values.
sub_defaults = defaults[key]
defaults[key] = override_dict(sub_defaults, value)
else:
# Replace the default value with this dictionary.
defaults[key] = value
else:
defaults[key] = value
return defaults
"""
Test that scenario instances are generated as expected from TOML content.
"""
import io
import pypfilt.scenario
import pytest
def test_scenario_empty_toml():
"""
Test that a ValueError is raised when no scenarios are defined.
"""
toml_input = """
"""
source = io.StringIO(toml_input)
with pytest.raises(ValueError):
_ = list(pypfilt.scenario.load_instances(source))
def test_scenario_single_instance():
"""
Test that we obtain a single instance from this minimal TOML input.
"""
toml_input = """
global = true
hello = "world"
[scenario.test]
global = false
local = true
"""
source = io.StringIO(toml_input)
instances = list(pypfilt.scenario.load_instances(source))
assert len(instances) == 1
# Check that global and scenario-specific parameters have been applied.
instance = instances[0]
assert instance.scenario_id == 'test'
assert instance.descriptor == ''
assert len(instance.settings) == 3
assert 'global' in instance.settings
assert instance.settings['global'] == False
assert 'hello' in instance.settings
assert instance.settings['hello'] == 'world'
assert 'local' in instance.settings
assert instance.settings['local'] == True
def test_scenario_many_observation_models():
"""
Test that we obtain multiple instances for a single scenario.
"""
toml_input = """
[scenario.test]
[scenario.test.observations.x]
parameters.bg_obs = 1
parameters.pr_obs = [0.1, 0.2, 0.5]
parameters.disp = 10
format = { bg_obs = "03.0f", pr_obs = "0.1f", disp = "03.0f" }
name = { bg_obs = "bg", pr_obs = "pr", disp = "disp" }
[scenario.test.observations.y]
parameters.bg_obs = 2
parameters.pr_obs = 0.8
parameters.disp = [100, 1000]
format = { bg_obs = "03.0f", pr_obs = "0.1f", disp = "04.0f" }
name = { bg_obs = "bg", pr_obs = "pr", disp = "disp" }
"""
source = io.StringIO(toml_input)
instances = list(pypfilt.scenario.load_instances(source))
assert len(instances) == 6
# Check that each instance has the correct scenario ID.
assert all(inst.scenario_id == 'test' for inst in instances)
# Check that each instance descriptor is unique.
descriptors = set(inst.descriptor for inst in instances)
assert len(descriptors) == len(instances)
# Check that we have the expected number of instances for each of the
# x and y observation model parameters.
# Check the 'bg_obs' values are constant for each observation model.
assert all(
inst.settings['observations']['x']['parameters']['bg_obs'] == 1
for inst in instances)
assert all(
inst.settings['observations']['y']['parameters']['bg_obs'] == 2
for inst in instances)
# Check the 'disp' values vary as expected.
x_disp_10 = [
inst for inst in instances
if inst.settings['observations']['x']['parameters']['disp'] == 10]
assert len(x_disp_10) == len(instances)
y_disp_100 = [
inst for inst in instances
if inst.settings['observations']['y']['parameters']['disp'] == 100]
assert len(y_disp_100) == 3
y_disp_1000 = [
inst for inst in instances
if inst.settings['observations']['y']['parameters']['disp'] == 1000]
assert len(y_disp_1000) == 3
# Check the 'pr_obs' values vary as expected.
y_pr_08 = [
inst for inst in instances
if inst.settings['observations']['y']['parameters']['pr_obs'] == 0.8]
assert len(y_pr_08) == 6
x_pr_01 = [
inst for inst in instances
if inst.settings['observations']['x']['parameters']['pr_obs'] == 0.1]
assert len(x_pr_01) == 2
x_pr_02 = [
inst for inst in instances
if inst.settings['observations']['x']['parameters']['pr_obs'] == 0.2]
assert len(x_pr_02) == 2
x_pr_05 = [
inst for inst in instances
if inst.settings['observations']['x']['parameters']['pr_obs'] == 0.5]
assert len(x_pr_05) == 2
def test_scenario_multiple_scenarios():
"""
Test that we obtain a single instance for each scenario.
"""
toml_input = """
global = { foo = "hello", extra = "hi" }
default = 1
hello = "world"
[scenario.foo]
global = { foo = "goodbye" }
default = { a = "yes", b = "no" }
local = true
[scenario.bar]
global = { bar = "world" }
local = true
"""
source = io.StringIO(toml_input)
instances = list(pypfilt.scenario.load_instances(source))
assert len(instances) == 2
foos = list(filter(lambda i: i.scenario_id == 'foo', instances))
bars = list(filter(lambda i: i.scenario_id == 'bar', instances))
assert len(foos) == 1
assert len(bars) == 1
foo = foos[0]
bar = bars[0]
# Check that global and scenario-specific parameters have been applied.
assert foo.descriptor == ''
assert bar.descriptor == ''
assert len(foo.settings) == 4
assert len(bar.settings) == 4
assert foo.settings['hello'] == 'world'
assert bar.settings['hello'] == 'world'
assert foo.settings['default'] == {'a': 'yes', 'b': 'no'}
assert bar.settings['default'] == 1
assert foo.settings['local'] == True
assert bar.settings['local'] == True
assert foo.settings['global'] == {'foo': 'goodbye',
'extra': 'hi'}
assert bar.settings['global'] == {'foo': 'hello',
'bar': 'world',
'extra': 'hi'}
def test_scenario_multiple_sources():
"""
Test that we obtain instances from each source.
"""
toml_input_a = """
[scenario.foo]
local = true
"""
toml_input_b = """
[scenario.bar]
local = true
"""
source_a = io.StringIO(toml_input_a)
source_b = io.StringIO(toml_input_b)
sources = [source_a, source_b]
instances = list(pypfilt.scenario.load_instances(sources))
assert len(instances) == 2
foos = list(filter(lambda i: i.scenario_id == 'foo', instances))
bars = list(filter(lambda i: i.scenario_id == 'bar', instances))
assert len(foos) == 1
assert len(bars) == 1
foo = foos[0]
bar = bars[0]
# Check that global and scenario-specific parameters have been applied.
assert foo.descriptor == ''
assert bar.descriptor == ''
assert len(foo.settings) == 1
assert len(bar.settings) == 1
assert foo.settings['local'] == True
assert bar.settings['local'] == True
def test_scenario_many_scenarios_instances():
"""
Test that we obtain multiple instances for each scenario.
"""
toml_input = """
[observations.x]
parameters.bg_obs = 1
parameters.pr_obs = [0.1, 0.2, 0.5]
parameters.disp = 10
format = { bg_obs = "03.0f", pr_obs = "0.1f", disp = "03.0f" }
name = { bg_obs = "bg", pr_obs = "pr", disp = "disp" }
[scenario.single]
[scenario.multi]
[scenario.multi.observations.y]
parameters.bg_obs = 2
parameters.pr_obs = 0.8
parameters.disp = [100, 1000]
format = { bg_obs = "03.0f", pr_obs = "0.1f", disp = "04.0f" }
name = { bg_obs = "bg", pr_obs = "pr", disp = "disp" }
"""
source = io.StringIO(toml_input)
instances = list(pypfilt.scenario.load_instances(source))
assert len(instances) == 9
singles = list(filter(lambda i: i.scenario_id == 'single', instances))
multis = list(filter(lambda i: i.scenario_id == 'multi', instances))
assert len(singles) == 3
assert len(multis) == 6
for inst in singles:
assert 'x' in inst.settings['observations']
assert len(inst.settings['observations']) == 1
for inst in multis:
assert 'x' in inst.settings['observations']
assert 'y' in inst.settings['observations']
assert len(inst.settings['observations']) == 2
# Check the 'pr_obs' values for 'x' vary as expected.
for x_pr in [0.1, 0.2, 0.5]:
single_matches = [
inst for inst in singles
if inst.settings['observations']['x']['parameters']['pr_obs']
== x_pr]
assert len(single_matches) == 1
multi_matches = [
inst for inst in multis
if inst.settings['observations']['x']['parameters']['pr_obs']
== x_pr]
assert len(multi_matches) == 2
# Check the 'disp' values for 'y' vary as expected.
for y_disp in [100, 1000]:
multi_matches = [
inst for inst in multis
if inst.settings['observations']['y']['parameters']['disp']
== y_disp]
assert len(multi_matches) == 3
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment