Skip to content

Commit

Permalink
Merge pull request #1825 from InstituteforDiseaseModeling/1810-fix
Browse files Browse the repository at this point in the history
1810 fix
  • Loading branch information
shchen-idmod committed Feb 21, 2023
2 parents 35727a1 + 557b1fe commit 95e6c97
Show file tree
Hide file tree
Showing 63 changed files with 958 additions and 182 deletions.
8 changes: 4 additions & 4 deletions dev_scripts/test_root.mk
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ reports-exist:
test-all: reports-exist ## Run all our tests
ifneq (1, $(PARALLEL_TESTING)) # Only run these tests if Parallel Only Testing is disabled
-echo "Running Serial Tests"
$(TEST_COMMAND) -m "serial"
$(TEST_COMMAND) -m "serial and not performance"
$(MAKE) mv-serial-reports
endif
ifneq (1, $(SERIAL_TESTING)) # Only run these tests if Serial Only Testing is disabled
-echo "Running Parallel Tests"
$(TEST_COMMAND) -n $(PARALLEL_TEST_COUNT) -m "not serial"
$(TEST_COMMAND) -n $(PARALLEL_TEST_COUNT) -m "not serial and not performance"
endif
ifneq (1, $(PARALLEL_TESTING))
ifneq (1, $(SERIAL_TESTING))
Expand Down Expand Up @@ -99,12 +99,12 @@ test-ssmt: reports-exist ## Run our ssmt tests
test-smoke: reports-exist ## Run our smoke tests
ifneq (1, $(PARALLEL_TESTING)) # Only run these tests if Parallel Only Testing is disabled
-echo "Running Serial Tests"
$(TEST_COMMAND) -m "smoke and serial"
$(TEST_COMMAND) -m "smoke and serial and not performance"
$(MAKE) mv-serial-reports
endif
ifneq (1, $(SERIAL_TESTING)) # Only run these tests if Serial Only Testing is disabled
-echo "Running Parallel Tests"
$(TEST_COMMAND) -n $(PARALLEL_TEST_COUNT) -m "smoke and not serial"
$(TEST_COMMAND) -n $(PARALLEL_TEST_COUNT) -m "smoke and not serial and not performance"
endif
ifneq (1, $(PARALLEL_TESTING))
ifneq (1, $(SERIAL_TESTING))
Expand Down
20 changes: 20 additions & 0 deletions docs/cookbook/plugins/idmtools_item_sequence_example.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
[COMMON]
# Number of threads idmtools will use for analysis and other multi-threaded activities
max_threads = 16

# How many simulations per threads during simulation creation
sims_per_thread = 20

# Maximum number of LOCAL simulation ran simultaneously
max_local_sims = 6

# Maximum number of workers processing in parallel
max_workers = 16

# Maximum batch size to retrieve simulations
batch_size = 10

id_generator = item_sequence

[item_sequence]
id_format_str = {{ item_name }}{{ '%%07d' | format(data[item_name] | int) }}
50 changes: 50 additions & 0 deletions docs/cookbook/plugins/sequential_id_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import os
import sys
from functools import partial
from typing import Any, Dict
from pathlib import Path
from idmtools import IdmConfigParser
from idmtools.builders import SimulationBuilder
from idmtools.core.platform_factory import Platform
from idmtools.entities.experiment import Experiment
from idmtools.entities.simulation import Simulation
from idmtools.entities.templated_simulation import TemplatedSimulations
from idmtools_models.python.json_python_task import JSONConfiguredPythonTask
from idmtools_test.utils.utils import clear_id_cache

# NOTE TO USER
# You need to define your own SlurmNative configuration block before running this example
# Please update 'idmtools_item_sequence_example.ini' accordingly

platform = Platform('SlurmNative')
clear_id_cache()
parser = IdmConfigParser()
parser._load_config_file(file_name='idmtools_item_sequence_example.ini')
parser.ensure_init(file_name='idmtools_item_sequence_example.ini', force=True)
sequence_file = Path(IdmConfigParser.get_option("item_sequence", "sequence_file",
Path().home().joinpath(".idmtools", "itemsequence", "index.json")))
if sequence_file.exists():
sequence_file.unlink()

task = JSONConfiguredPythonTask(script_path=os.path.join("..", "..", "..", "examples", "python_model", "inputs", "python_model_with_deps", "Assets", "model.py"),
parameters=(dict(c=0)))

ts = TemplatedSimulations(base_task=task)
experiment = Experiment.from_template(ts)
builder = SimulationBuilder()

def param_update(simulation: Simulation, param: str, value: Any) -> Dict[str, Any]:
simulation.task.set_parameter(param, value)
simulation.tags['id'] = simulation.id
return {param: value}

builder.add_sweep_definition(partial(param_update, param="a"), range(2))
builder.add_sweep_definition(partial(param_update, param="b"), range(2))
experiment.simulations.add_builder(builder)

experiment.tags['id'] = experiment.id
experiment.simulations = list(experiment.simulations)

with platform:
experiment.run(wait_until_done=True)
sys.exit(0 if experiment.succeeded else -1)
4 changes: 4 additions & 0 deletions docs/idmtools.ini
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ max_local_sims = 6
# Maximum number of workers processing in parallel
max_workers = 16

# What type of ids should idmtools use internally
# use idmtools info plugins id_generators
id_generator = uuid

# You can also set number of workers per CPU
# If you had 16 cpus and set to 2, 32 workers would be created
# workers_per_cpu = 2
Expand Down
66 changes: 66 additions & 0 deletions docs/plugin_documentation/id-generator-plugins.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
=====================
ID Generation Plugins
=====================

**1. Create a file to host the plugin callback for generator (under idmtools_core/idmtools/plugins). The plugin must have the following format**::

from idmtools.core.interfaces.ientity import IEntity

from idmtools.registry.hook_specs import function_hook_impl

@function_hook_impl

def idmtools_generate_id(item: IEntity) -> str:
Args:
item: Item for which ID is being generated
Returns:
return <your id implementation here>


The key things in this file are::

@function_hook_impl
def idmtools_generate_id(item: 'IEntity') -> str:

This registers the plugin type with idmtools. By using the name idmtools_generate_id, we know you are defining a callback for ids.
The callback must match the expected signature.


**2. Modify setup.py 'idmtools_hooks' to include the new id generation plugin**::

entry_points=dict(
idmtools_hooks=[
"idmtools_id_generate_<name> = <path to plugin>"
]
),

The *label* of the id plugin must start with **idmtools_id_generate_**
The letters after **idmtools_id_generate_** will be used to select generator in the config.

**3. Modify .ini config file to specify the desired id generator.**

In the .ini configuration file under the 'COMMON' section, use the 'id_generator' option to specify the desired id plugin.

For example, if you want to use the uuid generation plugin ('idmtools_id_generate_uuid'), in the .ini file, you would set the following::

[COMMON]
id_generator = uuid

Similarly, if you want to use the item_sequence plugin ('idmtools_id_generate_item_sequence'), you would specify the following in the .ini file::

[COMMON]
id_generator = item_sequence

The item_sequence plugin allows you to use sequential ids for items in your experiment (experiments themselves as well as simulations, etc).
You can customize use of this plugin by defining an 'item_sequence' section in the .ini file and using the variables:

* *sequence_file*: Json file that is used to store the last-used numbers for item ids. For example, if we have one experiment that was defined with two simulations, this file would keep track of the most recently used ids with the following: {"Simulation": 2, "Experiment": 1}. To note: the sequences start at 0. The default value for this filename (if it is not defined by the user) is index.json, which would be created in the user's home directory (at '.idmtools/itemsequence/index.json'). If a sequence_file IS specified, it is stored in the current working directory unless otherwise specified by a full path. If an item is generated that does not have the item_type attribute (i.e. Platform), its sequence will be stored under the 'Unknown' key in this json file. After an experiment is run, there will be a backup of this sequence file generated at the same location ({sequence_file_name}.json.bak); this is called as a post_run hook (specified under 'idmtools_platform_post_run' in item_sequence.py).
* *id_format_str*: This defines the desired format of the item ids (using the sequential id numbers stored in the sequence_file). In this string, one may access the sequential ids by using 'data[item_name]' (which would resolve to the next id #) as well as the 'item_name' (i.e. 'Simulation', 'Experiment'). The default for this value is '{item_name}{data[item_name]:07d}' (which would yield ids of 'Simulation0000000', 'Simulation0000001', etc).

Configuration format::

[item_sequence]
sequence_file = <custom file name>.json
id_format_str = '<custom string format>'

The configuration string format should be a jinja2 template. See https://jinja.palletsprojects.com/
9 changes: 9 additions & 0 deletions docs/plugin_documentation/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
=========================
Plugin Documentation
=========================

.. toctree::
:maxdepth: 3
:titlesonly:

id-generator-plugins
4 changes: 0 additions & 4 deletions docs/reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,6 @@ API class specifications

.. uml:: /diagrams/apis-emod.puml

.. toctree::
:maxdepth: 2
:titlesonly:



api/idmtools_index
Expand Down
3 changes: 3 additions & 0 deletions examples/builders/manual_building.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
for i in range(5):
# first copy the simulation
sim = copy.deepcopy(base_simulation)
# For now, you have to reset the uid manually when copying here. In future, you should only need to do a
# copy method here
sim._uid = None
# configure it
sim.task.set_parameter("a", i)
sim.task.set_parameter("b", i + 10)
Expand Down
3 changes: 2 additions & 1 deletion idmtools_core/build_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ coverage>=5.3,<6.6
flake8>=3.9.1
flake8-docstrings>=1.6.0
idm-buildtools>=1.0.3
twine>=3.4.1
twine>=3.4.1
jinja2~=3.1.2
1 change: 1 addition & 0 deletions idmtools_core/dev_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ pytest-xdist~=3.1
pytest~=7.2.0
xmlrunner~=1.7.7
pytest-lazy-fixture
jinja2~=3.1.2
21 changes: 10 additions & 11 deletions idmtools_core/idmtools/analysis/analyze_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
import time
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
from logging import getLogger, DEBUG
from typing import NoReturn, List, Dict, Tuple, Optional, Union, TYPE_CHECKING
from uuid import UUID
from typing import NoReturn, List, Dict, Tuple, Optional, TYPE_CHECKING
from tqdm import tqdm
from idmtools import IdmConfigParser
from idmtools.analysis.map_worker_entry import map_item
Expand Down Expand Up @@ -70,26 +69,26 @@ class ItemsNotReady(Exception):
pass

def __init__(self, platform: 'IPlatform' = None, configuration: dict = None,
ids: List[Tuple[Union[str, UUID], ItemType]] = None,
ids: List[Tuple[str, ItemType]] = None,
analyzers: List[IAnalyzer] = None, working_dir: str = None,
partial_analyze_ok: bool = False, max_items: Optional[int] = None, verbose: bool = True,
force_manager_working_directory: bool = False,
exclude_ids: List[Union[str, UUID]] = None, analyze_failed_items: bool = False,
exclude_ids: List[str] = None, analyze_failed_items: bool = False,
max_workers: Optional[int] = None, executor_type: str = 'process'):
"""
Initialize the AnalyzeManager.
Args:
platform (IPlatform): Platform
configuration (dict, optional): Initial Configuration. Defaults to None.
ids (Tuple[UUID, ItemType], optional): List of ids as pair of Tuple and ItemType. Defaults to None.
ids (Tuple[str, ItemType], optional): List of ids as pair of Tuple and ItemType. Defaults to None.
analyzers (List[IAnalyzer], optional): List of Analyzers. Defaults to None.
working_dir (str, optional): The working directory. Defaults to os.getcwd().
partial_analyze_ok (bool, optional): Whether partial analysis is ok. When this is True, Experiments in progress or Failed can be analyzed. Defaults to False.
max_items (int, optional): Max Items to analyze. Useful when developing and testing an Analyzer. Defaults to None.
verbose (bool, optional): Print extra information about analysis. Defaults to True.
force_manager_working_directory (bool, optional): [description]. Defaults to False.
exclude_ids (List[UUID], optional): [description]. Defaults to None.
exclude_ids (List[str], optional): [description]. Defaults to None.
analyze_failed_items (bool, optional): Allows analyzing of failed items. Useful when you are trying to aggregate items that have failed. Defaults to False.
max_workers (int, optional): Set the max workers. If not provided, falls back to the configuration item *max_threads*. If max_workers is not set in configuration, defaults to CPU count
executor_type: (str): Whether to use process or thread pooling. Process pooling is more efficient but threading might be required in some environments
Expand Down Expand Up @@ -146,7 +145,7 @@ def __init__(self, platform: 'IPlatform' = None, configuration: dict = None,
for oid, otype in ids:
logger.debug(f'Getting metadata for {oid} and {otype}')
item = self.platform.get_item(oid, otype, force=True, raw=True)
item.uid = item.id if isinstance(item.id, UUID) else UUID(item.id)
item.uid = str(item.id)
item.platform = self.platform
items.append(item)
self.potential_items: List[IEntity] = []
Expand All @@ -155,10 +154,10 @@ def __init__(self, platform: 'IPlatform' = None, configuration: dict = None,
logger.debug(f'Flattening items for {i.uid}')
self.potential_items.extend(self.platform.flatten_item(item=i, raw=True))

# These are leaf items to be ignored in analysis. Make sure they are UUID and then prune them from analysis.
# These are leaf items to be ignored in analysis. Prune them from analysis.
self.exclude_ids = exclude_ids or []
for index, oid in enumerate(self.exclude_ids):
self.exclude_ids[index] = oid if isinstance(oid, UUID) else UUID(oid)
self.exclude_ids[index] = str(oid)
self.potential_items = [item for item in self.potential_items if item.uid not in self.exclude_ids]
for item in self.potential_items:
item.platform = self.platform
Expand Down Expand Up @@ -204,7 +203,7 @@ def add_item(self, item: IEntity) -> NoReturn:
"""
self.potential_items.extend(self.platform.flatten_item(item=item, raw=True))

def _get_items_to_analyze(self) -> Dict[UUID, IEntity]:
def _get_items_to_analyze(self) -> Dict[str, IEntity]:
"""
Get a list of items derived from :meth:`self._items` that are available to analyze.
Expand Down Expand Up @@ -415,7 +414,7 @@ def analyze(self) -> bool:
user_logger.error('No items were provided; cannot run analysis.')
return False
# trim processing to those items that are ready and match requested limits
self._items: Dict[UUID, IEntity] = self._get_items_to_analyze()
self._items: Dict[str, IEntity] = self._get_items_to_analyze()

if len(self._items) == 0:
user_logger.error('No items are ready; cannot run analysis.')
Expand Down
11 changes: 5 additions & 6 deletions idmtools_core/idmtools/analysis/map_worker_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,9 @@
"""
import itertools
from logging import getLogger, DEBUG
from uuid import UUID
from idmtools.core.interfaces.ientity import IEntity
from idmtools.utils.file_parser import FileParser
from typing import TYPE_CHECKING, Union, Dict
from typing import TYPE_CHECKING, Dict
from idmtools.core.interfaces.iitem import IItem
from idmtools.entities.ianalyzer import TAnalyzerList

Expand All @@ -20,15 +19,15 @@
logger = getLogger(__name__)


def map_item(item: IItem) -> Dict[Union[str, UUID], Dict]:
def map_item(item: IItem) -> Dict[str, Dict]:
"""
Initialize some worker-global values; a worker process entry point for analyzer item-mapping.
Args:
item: The item (often simulation) to process.
Returns:
Dict[Union[str, UUID], Dict]
Dict[str, Dict]
"""
# Retrieve the global variables coming from the pool initialization

Expand All @@ -42,7 +41,7 @@ def map_item(item: IItem) -> Dict[Union[str, UUID], Dict]:
return _get_mapped_data_for_item(item, analyzers, platform)


def _get_mapped_data_for_item(item: IEntity, analyzers: TAnalyzerList, platform: 'IPlatform') -> Dict[Union[str, UUID], Dict]:
def _get_mapped_data_for_item(item: IEntity, analyzers: TAnalyzerList, platform: 'IPlatform') -> Dict[str, Dict]:
"""
Get mapped data from an item.
Expand All @@ -54,7 +53,7 @@ def _get_mapped_data_for_item(item: IEntity, analyzers: TAnalyzerList, platform:
platform: A platform object to query for information.
Returns:
Dict[Union[str, UUID], Dict] - Array mapping file data to from UUID/string to contents
Dict[str, Dict] - Array mapping file data to from str to contents
"""
try:
Expand Down
3 changes: 1 addition & 2 deletions idmtools_core/idmtools/assets/asset_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from logging import getLogger
from os import PathLike
from typing import List, NoReturn, TypeVar, Union, Any, Dict, TYPE_CHECKING
from uuid import UUID
from idmtools.assets import Asset, TAssetList
from idmtools.assets import TAssetFilterList
from idmtools.assets.errors import DuplicatedAssetError
Expand Down Expand Up @@ -63,7 +62,7 @@ def __init__(self, assets: Union[List[str], TAssetList, 'AssetCollection'] = Non
self.tags = self.tags or tags

@classmethod
def from_id(cls, item_id: Union[str, UUID], platform: 'IPlatform' = None, as_copy: bool = False, # noqa E821
def from_id(cls, item_id: str, platform: 'IPlatform' = None, as_copy: bool = False, # noqa E821
**kwargs) -> 'AssetCollection':
"""
Loads a AssetCollection from id.
Expand Down
4 changes: 4 additions & 0 deletions idmtools_core/idmtools/core/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
"""
from pathlib import Path

from enum import Enum

TRUTHY_VALUES = ['1', 'y', 'yes', 'on', 'true', 't', 1, True]
# Used to store idmtools user specific config/data
IDMTOOLS_USER_HOME = Path().home().joinpath(".idmtools")


class EntityStatus(Enum):
Expand Down

0 comments on commit 95e6c97

Please sign in to comment.