Merge pull request #1825 from InstituteforDiseaseModeling/1810-fix

1810 fix
InstituteforDiseaseModeling · Feb 21, 2023 · 95e6c97 · 95e6c97
2 parents 35727a1 + 557b1fe
commit 95e6c97
Show file tree

Hide file tree

Showing 63 changed files with 958 additions and 182 deletions.
diff --git a/dev_scripts/test_root.mk b/dev_scripts/test_root.mk
@@ -46,12 +46,12 @@ reports-exist:
 test-all: reports-exist ## Run all our tests
 ifneq (1, $(PARALLEL_TESTING)) # Only run these tests if Parallel Only Testing is disabled
 	-echo "Running Serial Tests"
-	$(TEST_COMMAND) -m "serial"
+	$(TEST_COMMAND) -m "serial and not performance"
 	$(MAKE) mv-serial-reports
 endif
 ifneq (1, $(SERIAL_TESTING)) # Only run these tests if Serial Only Testing is disabled
 	-echo "Running Parallel Tests"
-	$(TEST_COMMAND) -n $(PARALLEL_TEST_COUNT) -m "not serial"
+	$(TEST_COMMAND) -n $(PARALLEL_TEST_COUNT) -m "not serial and not performance"
 endif
 ifneq (1, $(PARALLEL_TESTING))
 ifneq (1, $(SERIAL_TESTING))
@@ -99,12 +99,12 @@ test-ssmt: reports-exist ## Run our ssmt tests
 test-smoke: reports-exist ## Run our smoke tests
 ifneq (1, $(PARALLEL_TESTING)) # Only run these tests if Parallel Only Testing is disabled
 	-echo "Running Serial Tests"
-	$(TEST_COMMAND) -m "smoke and serial"
+	$(TEST_COMMAND) -m "smoke and serial and not performance"
 	$(MAKE) mv-serial-reports
 endif
 ifneq (1, $(SERIAL_TESTING)) # Only run these tests if Serial Only Testing is disabled
 	-echo "Running Parallel Tests"
-	$(TEST_COMMAND) -n $(PARALLEL_TEST_COUNT) -m "smoke and not serial"
+	$(TEST_COMMAND) -n $(PARALLEL_TEST_COUNT) -m "smoke and not serial and not performance"
 endif
 ifneq (1, $(PARALLEL_TESTING))
 ifneq (1, $(SERIAL_TESTING))

diff --git a/docs/cookbook/plugins/idmtools_item_sequence_example.ini b/docs/cookbook/plugins/idmtools_item_sequence_example.ini
@@ -0,0 +1,20 @@
+[COMMON]
+# Number of threads idmtools will use for analysis and other multi-threaded activities
+max_threads = 16
+
+# How many simulations per threads during simulation creation
+sims_per_thread = 20
+
+# Maximum number of LOCAL simulation ran simultaneously
+max_local_sims = 6
+
+# Maximum number of workers processing in parallel
+max_workers = 16
+
+# Maximum batch size to retrieve simulations
+batch_size = 10
+
+id_generator = item_sequence
+
+[item_sequence]
+id_format_str = {{ item_name }}{{ '%%07d' | format(data[item_name] | int) }}
diff --git a/docs/cookbook/plugins/sequential_id_example.py b/docs/cookbook/plugins/sequential_id_example.py
@@ -0,0 +1,50 @@
+import os
+import sys
+from functools import partial
+from typing import Any, Dict
+from pathlib import Path
+from idmtools import IdmConfigParser
+from idmtools.builders import SimulationBuilder
+from idmtools.core.platform_factory import Platform
+from idmtools.entities.experiment import Experiment
+from idmtools.entities.simulation import Simulation
+from idmtools.entities.templated_simulation import TemplatedSimulations
+from idmtools_models.python.json_python_task import JSONConfiguredPythonTask
+from idmtools_test.utils.utils import clear_id_cache
+
+# NOTE TO USER
+# You need to define your own SlurmNative configuration block before running this example
+# Please update 'idmtools_item_sequence_example.ini' accordingly
+
+platform = Platform('SlurmNative')
+clear_id_cache()
+parser = IdmConfigParser()
+parser._load_config_file(file_name='idmtools_item_sequence_example.ini')
+parser.ensure_init(file_name='idmtools_item_sequence_example.ini', force=True)
+sequence_file = Path(IdmConfigParser.get_option("item_sequence", "sequence_file",
+                                                Path().home().joinpath(".idmtools", "itemsequence", "index.json")))
+if sequence_file.exists():
+    sequence_file.unlink()
+
+task = JSONConfiguredPythonTask(script_path=os.path.join("..", "..", "..", "examples", "python_model", "inputs", "python_model_with_deps", "Assets", "model.py"),
+                                parameters=(dict(c=0)))
+
+ts = TemplatedSimulations(base_task=task)
+experiment = Experiment.from_template(ts)
+builder = SimulationBuilder()
+
+def param_update(simulation: Simulation, param: str, value: Any) -> Dict[str, Any]:
+    simulation.task.set_parameter(param, value)
+    simulation.tags['id'] = simulation.id
+    return {param: value}
+
+builder.add_sweep_definition(partial(param_update, param="a"), range(2))
+builder.add_sweep_definition(partial(param_update, param="b"), range(2))
+experiment.simulations.add_builder(builder)
+
+experiment.tags['id'] = experiment.id
+experiment.simulations = list(experiment.simulations)
+
+with platform:
+    experiment.run(wait_until_done=True)
+sys.exit(0 if experiment.succeeded else -1)
diff --git a/docs/idmtools.ini b/docs/idmtools.ini
@@ -16,6 +16,10 @@ max_local_sims = 6
 # Maximum number of workers processing in parallel
 max_workers = 16
 
+# What type of ids should idmtools use internally
+# use idmtools info plugins id_generators
+id_generator = uuid
+
 # You can also set number of workers per CPU
 # If you had 16 cpus and set to 2, 32 workers would be created
 # workers_per_cpu = 2

diff --git a/docs/plugin_documentation/id-generator-plugins.rst b/docs/plugin_documentation/id-generator-plugins.rst
@@ -0,0 +1,66 @@
+=====================
+ID Generation Plugins
+=====================
+
+**1. Create a file to host the plugin callback for generator (under idmtools_core/idmtools/plugins). The plugin must have the following format**::
+
+    from idmtools.core.interfaces.ientity import IEntity
+
+    from idmtools.registry.hook_specs import function_hook_impl
+
+    @function_hook_impl
+
+    def idmtools_generate_id(item: IEntity) -> str:
+        Args:
+            item: Item for which ID is being generated
+        Returns:
+        return <your id implementation here>
+
+
+The key things in this file are::
+
+    @function_hook_impl
+    def idmtools_generate_id(item: 'IEntity') -> str:
+
+This registers the plugin type with idmtools. By using the name idmtools_generate_id, we know you are defining a callback for ids.
+The callback must match the expected signature.
+
+
+**2. Modify setup.py 'idmtools_hooks' to include the new id generation plugin**::
+
+    entry_points=dict(
+        idmtools_hooks=[
+            "idmtools_id_generate_<name> = <path to plugin>"
+        ]
+    ),
+
+The *label* of the id plugin must start with **idmtools_id_generate_**
+The letters after **idmtools_id_generate_** will be used to select generator in the config.
+
+**3. Modify .ini config file to specify the desired id generator.**
+
+In the .ini configuration file under the 'COMMON' section, use the 'id_generator' option to specify the desired id plugin.
+
+For example, if you want to use the uuid generation plugin ('idmtools_id_generate_uuid'), in the .ini file, you would set the following::
+
+    [COMMON]
+    id_generator = uuid
+
+Similarly, if you want to use the item_sequence plugin ('idmtools_id_generate_item_sequence'), you would specify the following in the .ini file::
+
+    [COMMON]
+    id_generator = item_sequence
+
+The item_sequence plugin allows you to use sequential ids for items in your experiment (experiments themselves as well as simulations, etc).
+You can customize use of this plugin by defining an 'item_sequence' section in the .ini file and using the variables:
+
+    * *sequence_file*: Json file that is used to store the last-used numbers for item ids. For example, if we have one experiment that was defined with two simulations, this file would keep track of the most recently used ids with the following: {"Simulation": 2, "Experiment": 1}. To note: the sequences start at 0. The default value for this filename (if it is not defined by the user) is index.json, which would be created in the user's home directory (at '.idmtools/itemsequence/index.json'). If a sequence_file IS specified, it is stored in the current working directory unless otherwise specified by a full path. If an item is generated that does not have the item_type attribute (i.e. Platform), its sequence will be stored under the 'Unknown' key in this json file. After an experiment is run, there will be a backup of this sequence file generated at the same location ({sequence_file_name}.json.bak); this is called as a post_run hook (specified under 'idmtools_platform_post_run' in item_sequence.py).
+    * *id_format_str*: This defines the desired format of the item ids (using the sequential id numbers stored in the sequence_file). In this string, one may access the sequential ids by using 'data[item_name]' (which would resolve to the next id #) as well as the 'item_name' (i.e. 'Simulation', 'Experiment'). The default for this value is '{item_name}{data[item_name]:07d}' (which would yield ids of 'Simulation0000000', 'Simulation0000001', etc).
+
+Configuration format::
+
+    [item_sequence]
+    sequence_file = <custom file name>.json
+    id_format_str = '<custom string format>'
+
+The configuration string format should be a jinja2 template. See https://jinja.palletsprojects.com/
diff --git a/docs/plugin_documentation/index.rst b/docs/plugin_documentation/index.rst
@@ -0,0 +1,9 @@
+=========================
+Plugin Documentation
+=========================
+
+.. toctree::
+   :maxdepth: 3
+   :titlesonly:
+
+   id-generator-plugins
diff --git a/docs/reference.rst b/docs/reference.rst
@@ -60,10 +60,6 @@ API class specifications
 
 .. uml:: /diagrams/apis-emod.puml
 
-.. toctree::
-   :maxdepth: 2
-   :titlesonly:
-
 
 
    api/idmtools_index

diff --git a/examples/builders/manual_building.py b/examples/builders/manual_building.py
@@ -39,6 +39,9 @@
     for i in range(5):
         # first copy the simulation
         sim = copy.deepcopy(base_simulation)
+        # For now, you have to reset the uid manually when copying here. In future, you should only need to do a
+        # copy method here
+        sim._uid = None
         # configure it
         sim.task.set_parameter("a", i)
         sim.task.set_parameter("b", i + 10)

diff --git a/idmtools_core/build_requirements.txt b/idmtools_core/build_requirements.txt
@@ -3,4 +3,5 @@ coverage>=5.3,<6.6
 flake8>=3.9.1
 flake8-docstrings>=1.6.0
 idm-buildtools>=1.0.3
-twine>=3.4.1
+twine>=3.4.1
+jinja2~=3.1.2
diff --git a/idmtools_core/dev_requirements.txt b/idmtools_core/dev_requirements.txt
@@ -12,3 +12,4 @@ pytest-xdist~=3.1
 pytest~=7.2.0
 xmlrunner~=1.7.7
 pytest-lazy-fixture
+jinja2~=3.1.2
diff --git a/idmtools_core/idmtools/analysis/analyze_manager.py b/idmtools_core/idmtools/analysis/analyze_manager.py
@@ -9,8 +9,7 @@
 import time
 from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed
 from logging import getLogger, DEBUG
-from typing import NoReturn, List, Dict, Tuple, Optional, Union, TYPE_CHECKING
-from uuid import UUID
+from typing import NoReturn, List, Dict, Tuple, Optional, TYPE_CHECKING
 from tqdm import tqdm
 from idmtools import IdmConfigParser
 from idmtools.analysis.map_worker_entry import map_item
@@ -70,26 +69,26 @@ class ItemsNotReady(Exception):
         pass
 
     def __init__(self, platform: 'IPlatform' = None, configuration: dict = None,
-                 ids: List[Tuple[Union[str, UUID], ItemType]] = None,
+                 ids: List[Tuple[str, ItemType]] = None,
                  analyzers: List[IAnalyzer] = None, working_dir: str = None,
                  partial_analyze_ok: bool = False, max_items: Optional[int] = None, verbose: bool = True,
                  force_manager_working_directory: bool = False,
-                 exclude_ids: List[Union[str, UUID]] = None, analyze_failed_items: bool = False,
+                 exclude_ids: List[str] = None, analyze_failed_items: bool = False,
                  max_workers: Optional[int] = None, executor_type: str = 'process'):
         """
         Initialize the AnalyzeManager.
 
         Args:
             platform (IPlatform): Platform
             configuration (dict, optional): Initial Configuration. Defaults to None.
-            ids (Tuple[UUID, ItemType], optional): List of ids as pair of Tuple and ItemType. Defaults to None.
+            ids (Tuple[str, ItemType], optional): List of ids as pair of Tuple and ItemType. Defaults to None.
             analyzers (List[IAnalyzer], optional): List of Analyzers. Defaults to None.
             working_dir (str, optional): The working directory. Defaults to os.getcwd().
             partial_analyze_ok (bool, optional): Whether partial analysis is ok. When this is True, Experiments in progress or Failed can be analyzed. Defaults to False.
             max_items (int, optional): Max Items to analyze. Useful when developing and testing an Analyzer. Defaults to None.
             verbose (bool, optional): Print extra information about analysis. Defaults to True.
             force_manager_working_directory (bool, optional): [description]. Defaults to False.
-            exclude_ids (List[UUID], optional): [description]. Defaults to None.
+            exclude_ids (List[str], optional): [description]. Defaults to None.
             analyze_failed_items (bool, optional): Allows analyzing of failed items. Useful when you are trying to aggregate items that have failed. Defaults to False.
             max_workers (int, optional): Set the max workers. If not provided, falls back to the configuration item *max_threads*. If max_workers is not set in configuration, defaults to CPU count
             executor_type: (str): Whether to use process or thread pooling. Process pooling is more efficient but threading might be required in some environments
@@ -146,7 +145,7 @@ def __init__(self, platform: 'IPlatform' = None, configuration: dict = None,
         for oid, otype in ids:
             logger.debug(f'Getting metadata for {oid} and {otype}')
             item = self.platform.get_item(oid, otype, force=True, raw=True)
-            item.uid = item.id if isinstance(item.id, UUID) else UUID(item.id)
+            item.uid = str(item.id)
             item.platform = self.platform
             items.append(item)
         self.potential_items: List[IEntity] = []
@@ -155,10 +154,10 @@ def __init__(self, platform: 'IPlatform' = None, configuration: dict = None,
             logger.debug(f'Flattening items for {i.uid}')
             self.potential_items.extend(self.platform.flatten_item(item=i, raw=True))
 
-        # These are leaf items to be ignored in analysis. Make sure they are UUID and then prune them from analysis.
+        # These are leaf items to be ignored in analysis. Prune them from analysis.
         self.exclude_ids = exclude_ids or []
         for index, oid in enumerate(self.exclude_ids):
-            self.exclude_ids[index] = oid if isinstance(oid, UUID) else UUID(oid)
+            self.exclude_ids[index] = str(oid)
         self.potential_items = [item for item in self.potential_items if item.uid not in self.exclude_ids]
         for item in self.potential_items:
             item.platform = self.platform
@@ -204,7 +203,7 @@ def add_item(self, item: IEntity) -> NoReturn:
         """
         self.potential_items.extend(self.platform.flatten_item(item=item, raw=True))
 
-    def _get_items_to_analyze(self) -> Dict[UUID, IEntity]:
+    def _get_items_to_analyze(self) -> Dict[str, IEntity]:
         """
         Get a list of items derived from :meth:`self._items` that are available to analyze.
 
@@ -415,7 +414,7 @@ def analyze(self) -> bool:
             user_logger.error('No items were provided; cannot run analysis.')
             return False
         # trim processing to those items that are ready and match requested limits
-        self._items: Dict[UUID, IEntity] = self._get_items_to_analyze()
+        self._items: Dict[str, IEntity] = self._get_items_to_analyze()
 
         if len(self._items) == 0:
             user_logger.error('No items are ready; cannot run analysis.')

diff --git a/idmtools_core/idmtools/analysis/map_worker_entry.py b/idmtools_core/idmtools/analysis/map_worker_entry.py
@@ -7,10 +7,9 @@
 """
 import itertools
 from logging import getLogger, DEBUG
-from uuid import UUID
 from idmtools.core.interfaces.ientity import IEntity
 from idmtools.utils.file_parser import FileParser
-from typing import TYPE_CHECKING, Union, Dict
+from typing import TYPE_CHECKING, Dict
 from idmtools.core.interfaces.iitem import IItem
 from idmtools.entities.ianalyzer import TAnalyzerList
 
@@ -20,15 +19,15 @@
 logger = getLogger(__name__)
 
 
-def map_item(item: IItem) -> Dict[Union[str, UUID], Dict]:
+def map_item(item: IItem) -> Dict[str, Dict]:
     """
     Initialize some worker-global values; a worker process entry point for analyzer item-mapping.
 
     Args:
         item: The item (often simulation) to process.
 
     Returns:
-        Dict[Union[str, UUID], Dict]
+        Dict[str, Dict]
     """
     # Retrieve the global variables coming from the pool initialization
 
@@ -42,7 +41,7 @@ def map_item(item: IItem) -> Dict[Union[str, UUID], Dict]:
     return _get_mapped_data_for_item(item, analyzers, platform)
 
 
-def _get_mapped_data_for_item(item: IEntity, analyzers: TAnalyzerList, platform: 'IPlatform') -> Dict[Union[str, UUID], Dict]:
+def _get_mapped_data_for_item(item: IEntity, analyzers: TAnalyzerList, platform: 'IPlatform') -> Dict[str, Dict]:
     """
     Get mapped data from an item.
 
@@ -54,7 +53,7 @@ def _get_mapped_data_for_item(item: IEntity, analyzers: TAnalyzerList, platform:
         platform: A platform object to query for information.
 
     Returns:
-        Dict[Union[str, UUID], Dict] - Array mapping file data to from UUID/string to contents
+        Dict[str, Dict] - Array mapping file data to from str to contents
 
     """
     try:

diff --git a/idmtools_core/idmtools/assets/asset_collection.py b/idmtools_core/idmtools/assets/asset_collection.py
@@ -9,7 +9,6 @@
 from logging import getLogger
 from os import PathLike
 from typing import List, NoReturn, TypeVar, Union, Any, Dict, TYPE_CHECKING
-from uuid import UUID
 from idmtools.assets import Asset, TAssetList
 from idmtools.assets import TAssetFilterList
 from idmtools.assets.errors import DuplicatedAssetError
@@ -63,7 +62,7 @@ def __init__(self, assets: Union[List[str], TAssetList, 'AssetCollection'] = Non
         self.tags = self.tags or tags
 
     @classmethod
-    def from_id(cls, item_id: Union[str, UUID], platform: 'IPlatform' = None, as_copy: bool = False,  # noqa E821
+    def from_id(cls, item_id: str, platform: 'IPlatform' = None, as_copy: bool = False,  # noqa E821
                 **kwargs) -> 'AssetCollection':
         """
         Loads a AssetCollection from id.

diff --git a/idmtools_core/idmtools/core/enums.py b/idmtools_core/idmtools/core/enums.py
@@ -3,9 +3,13 @@
 
 Copyright 2021, Bill & Melinda Gates Foundation. All rights reserved.
 """
+from pathlib import Path
+
 from enum import Enum
 
 TRUTHY_VALUES = ['1', 'y', 'yes', 'on', 'true', 't', 1, True]
+# Used to store idmtools user specific config/data
+IDMTOOLS_USER_HOME = Path().home().joinpath(".idmtools")
 
 
 class EntityStatus(Enum):