From 30494e475abb29bb9699671dd2f575bed906402e Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 18 Mar 2020 18:39:41 -0400 Subject: [PATCH 01/94] Work in progress Signed-off-by: Andy Neff --- terra/compute/base.py | 19 +++++++++++++++++- terra/core/settings.py | 11 +++++++++++ terra/core/utils.py | 6 ++++++ terra/executor/celery/executor.py | 17 ---------------- terra/task.py | 33 +++++++++++++++++++++++++++++++ 5 files changed, 68 insertions(+), 18 deletions(-) create mode 100644 terra/task.py diff --git a/terra/compute/base.py b/terra/compute/base.py index 9197d192..b6911b11 100644 --- a/terra/compute/base.py +++ b/terra/compute/base.py @@ -1,5 +1,8 @@ import os +import json +from terra import settings +from terra.core.settings import TerraJSONEncoder import terra.compute.utils from terra.executor import Executor from terra.logger import getLogger @@ -75,7 +78,21 @@ def pre_run(self): :class:`terra.compute.base.BaseService` is mainly responsible for handling Executors that need a separate volume translation ''' - self.executor_configuration_map = Executor.configuration_map(self) + executor_volume_map = Executor.configuration_map(self) + + # If there is a non-empty mapping, then create a custom executor settings + if executor_volume_map: + executor_settings = terra.compute.utils.translate_settings_paths( + TerraJSONEncoder.serializableSettings(settings), + executor_volume_map) + + with open(os.path.join(self.temp_dir.name, + 'executor_config.json'), 'w') as fid: + json.dump(executor_settings, fid) + + # Tell the executor, you have your own settings file. + self.env['TERRA_EXECUTOR_SETTINGS_FILE'] = \ + '/tmp_settings/executor_config.json' def post_run(self): pass diff --git a/terra/core/settings.py b/terra/core/settings.py index f7fec15f..b4f644e4 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -428,6 +428,17 @@ def _setup(self, name=None): self.configure(json.load(fid)) self._wrapped.config_file = os.environ.get(ENVIRONMENT_VARIABLE) + def __getstate__(self): + if self._wrapped is None: + self._setup() + return {'_wrapped': self._wrapped} + + def __setstate__(self, state): + self._wrapped = state['_wrapped'] + + from terra.core.signals import post_settings_configured + post_settings_configured.send(sender=self) + def __repr__(self): # Hardcode the class name as otherwise it yields 'Settings'. if self._wrapped is None: diff --git a/terra/core/utils.py b/terra/core/utils.py index 7aae9eee..57781fff 100644 --- a/terra/core/utils.py +++ b/terra/core/utils.py @@ -193,3 +193,9 @@ def _connect_backend(self, *args, **kwargs): def __call__(self, *args, **kwargs): return self._connection(*args, **kwargs) + +import threading + +class ThreadedHandler(Handler): + def _connection(self): + return self._connect_backend() \ No newline at end of file diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index 0f796194..e0648dbb 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -231,21 +231,4 @@ def __init__(self, service_info): volume_map = compute.get_volume_map(config, service_clone) - # # In the case of docker, the config has /tmp_settings in there, this - # # should be removed, as it is not in the celery worker. I don't think it - # # would cause any problems, but it's inaccurate. - # volume_map = [v for v in volume_map if v[1] != '/tmp_settings'] - return volume_map - - # optional_args = {} - # optional_args['justfile'] = justfile - - # args = ["--wrap", "Just-docker-compose"] + \ - # sum([['-f', cf] for cf in compose_files], []) + \ - # ['config'] - - # pid = just(*args, stdout=PIPE, - # **optional_args, - # env=service_info.env) - # return pid.communicate()[0] diff --git a/terra/task.py b/terra/task.py new file mode 100644 index 00000000..0d97bce8 --- /dev/null +++ b/terra/task.py @@ -0,0 +1,33 @@ +from os import environ as env + +from celery import Task, shared_task as original_shared_task + +__all__ = ['TerraTask', 'shared_task'] + +def shared_task(*args, **kwargs): + kwargs['bind'] = True + kwargs['base'] = TerraTask + return original_shared_task(*args, **kwargs) + +class TerraTask(Task): + @staticmethod + def _patch_settings(args, kwargs): + if 'TERRA_EXECUTOR_SETTINGS_FILE' in env: + # TODO: Cache loads for efficiency? + settings = json.load(env['TERRA_EXECUTOR_SETTINGS_FILE']) + + # If args is not empty, the first arg was settings + if args: + args[0] = settings + else: + kwargs['settings'] = settings + + def apply_async(self, args=None, kwargs=None, task_id=None, user=None, + *args2, **kwargs2): + TerraTask._patch_settings(args, kwargs) + return super().apply_async(args=args, kwargs=kwargs, + task_id=task_id, *args2, **kwargs2) + + def apply(self, *args, **kwargs): + TerraTask._patch_settings(args, kwargs) + return super().apply(*args, **kwargs) From 9096ce94a7209813f5bac21047dff16b6f9d3434 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 19 Mar 2020 13:47:28 -0400 Subject: [PATCH 02/94] Update vsi_common Signed-off-by: Andy Neff --- external/vsi_common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/vsi_common b/external/vsi_common index c98eb284..1e039570 160000 --- a/external/vsi_common +++ b/external/vsi_common @@ -1 +1 @@ -Subproject commit c98eb284c89311e5b1d89a949660a0fa24df3818 +Subproject commit 1e039570b5a5e61e4eb02707d150e6a3480b7e03 From 04d951c94a408cb484e36830e6e2b8364d883eb7 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Sun, 5 Apr 2020 17:34:01 -0400 Subject: [PATCH 03/94] POC of celery task with settings runs Signed-off-by: Andy Neff --- Justfile | 7 +++++++ README.md | 10 ++++++++++ external/vsi_common | 2 +- terra/core/settings.py | 7 +++++-- terra/workflow.py | 8 ++++++++ 5 files changed, 31 insertions(+), 3 deletions(-) diff --git a/Justfile b/Justfile index 511fbd9a..d537042d 100755 --- a/Justfile +++ b/Justfile @@ -130,6 +130,13 @@ function terra_caseify() Terra_Pipenv run celery -A terra.executor.celery.app worker --loglevel="${TERRA_CELLER_LOG_LEVEL-INFO}" -n "${node_name}" ;; + run_flower) # Start the flower server + Terra_Pipenv run celery -A terra.executor.celery.app flower + ;; + shutdown_celery) # Shuts down all celery works on all nodes + Terra_Pipenv run python -c "from terra.executor.celery import app; app.control.broadcast('shutdown')" + ;; + ### Run Debugging containers ### generate-redis-commander-hash) # Generate a redis commander hash touch "${TERRA_REDIS_COMMANDER_SECRET_FILE}" diff --git a/README.md b/README.md index 255a3c3a..2167ed72 100644 --- a/README.md +++ b/README.md @@ -17,3 +17,13 @@ source 'setup.env' just terra sync just terra run ``` + +## Running an app in celery + +1. `just terra up` - To start redis queue (only once) +2. `just run celery` - To start a celery worker (run on each worker node) +3. `just run dsm ...` - To start processing job + +When done +4. `just shutdown celery` - To shutdown _all_ celery workers on _all_ nodes +5. `just terra down` - To shutdown redis. \ No newline at end of file diff --git a/external/vsi_common b/external/vsi_common index 1e039570..f6f04189 160000 --- a/external/vsi_common +++ b/external/vsi_common @@ -1 +1 @@ -Subproject commit 1e039570b5a5e61e4eb02707d150e6a3480b7e03 +Subproject commit f6f0418971e44a4b5ffa3688f49668af8ecff276 diff --git a/terra/core/settings.py b/terra/core/settings.py index b4f644e4..5e6c04c6 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -436,8 +436,11 @@ def __getstate__(self): def __setstate__(self, state): self._wrapped = state['_wrapped'] - from terra.core.signals import post_settings_configured - post_settings_configured.send(sender=self) + # This should NOT be done on a pre instance basis, this if only for + # the global terra.settings. So maybe this should be done in context + # manager?? + # from terra.core.signals import post_settings_configured + # post_settings_configured.send(sender=self) def __repr__(self): # Hardcode the class name as otherwise it yields 'Settings'. diff --git a/terra/workflow.py b/terra/workflow.py index ae4bb9cd..1f8fd060 100644 --- a/terra/workflow.py +++ b/terra/workflow.py @@ -1,3 +1,6 @@ +from uuid import uuid4 +from datetime import datetime + from terra import settings from terra.logger import getLogger logger = getLogger(__name__) @@ -8,6 +11,10 @@ class BaseWorkflow: The base class for all Terra Workflows ''' + def __init__(self): + self.uuid = uuid4() + self.start_time = datetime.now() + def run(self): pass @@ -21,6 +28,7 @@ class PipelineWorkflow: def __init__(self): self.pipeline = list() + super().__init__() # locate index of service name in workflow pipeline def service_index(self, service_name=None, default_index=0): From cd1a85a8c1cd602f369780f8a0f4c56b3704b0a0 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Sun, 5 Apr 2020 19:29:47 -0400 Subject: [PATCH 04/94] Logging can now be reconfigured - Even supports the setting context manager - Added FORKED_BY_MULTIPROCESSING in hopes of Windows support? - Added TERRA_IS_CELERY_WORKER so I can know if I'm a worker - The main logger is now setup by the Executor, so in the future ProcessPool and celery can support their own logging schemes Signed-off-by: Andy Neff --- Justfile | 9 +++- docker-compose-main.yml | 3 +- terra/core/settings.py | 8 ++- terra/core/signals.py | 2 + terra/executor/celery/__init__.py | 6 +++ terra/executor/utils.py | 37 +++++++++++++- terra/logger.py | 81 ++++++++++++++++++------------- 7 files changed, 109 insertions(+), 37 deletions(-) diff --git a/Justfile b/Justfile index d537042d..e24961bb 100755 --- a/Justfile +++ b/Justfile @@ -127,7 +127,14 @@ function terra_caseify() node_name="docker@%h" fi - Terra_Pipenv run celery -A terra.executor.celery.app worker --loglevel="${TERRA_CELLER_LOG_LEVEL-INFO}" -n "${node_name}" + # Untested + if [ "${OS-}" = "Windows_NT" ]; then + # https://www.distributedpython.com/2018/08/21/celery-4-windows/ + local FORKED_BY_MULTIPROCESSING + export FORKED_BY_MULTIPROCESSING=1 + fi + + TERRA_IS_CELERY_WORKER=1 Terra_Pipenv run celery -A terra.executor.celery.app worker --loglevel="${TERRA_CELLER_LOG_LEVEL-INFO}" -n "${node_name}" ;; run_flower) # Start the flower server diff --git a/docker-compose-main.yml b/docker-compose-main.yml index ea86c742..49eca801 100644 --- a/docker-compose-main.yml +++ b/docker-compose-main.yml @@ -6,7 +6,7 @@ services: dockerfile: docker/terra.Dockerfile # prevent different users from clobbering each others images image: ${TERRA_DOCKER_REPO}:terra_${TERRA_USERNAME} - environment: + environment: &terra_environment # Variables for docker_entrypoint.bsh - DOCKER_UID=${TERRA_UID} - DOCKER_GIDS=${TERRA_GIDS} @@ -17,6 +17,7 @@ services: - JUST_SETTINGS=${TERRA_TERRA_DIR_DOCKER}/terra.env - PYTHONPATH=${TERRA_PYTHONPATH-} - TZ + - TERRA_IS_CELERY_WORKER=${TERRA_IS_CELERY_WORKER-0} cap_add: - SYS_PTRACE # Useful for gdb volumes: diff --git a/terra/core/settings.py b/terra/core/settings.py index 5e6c04c6..2bb6b51a 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -533,7 +533,13 @@ def __enter__(self): return self._wrapped.__enter__() def __exit__(self, exc_type=None, exc_value=None, traceback=None): - return self._wrapped.__exit__(exc_type, exc_value, traceback) + return_value = self._wrapped.__exit__(exc_type, exc_value, traceback) + + # Incase the logger was messed with in the context, reset it. + from terra.core.signals import post_settings_context + post_settings_context.send(sender=self) + + return return_value class ObjectDict(dict): diff --git a/terra/core/signals.py b/terra/core/signals.py index 20f2a12f..00699321 100644 --- a/terra/core/signals.py +++ b/terra/core/signals.py @@ -353,6 +353,8 @@ def _decorator(func): manual call to :func:`terra.core.settings.LazySettings.configure`. ''' +post_settings_context = Signal() + from terra.logger import getLogger # noqa logger = getLogger(__name__) # Must be after post_settings_configured to prevent circular import errors. diff --git a/terra/executor/celery/__init__.py b/terra/executor/celery/__init__.py index 96420497..20c7a96c 100644 --- a/terra/executor/celery/__init__.py +++ b/terra/executor/celery/__init__.py @@ -13,6 +13,12 @@ app = Celery(env['TERRA_CELERY_MAIN_NAME']) app.config_from_object(env['TERRA_CELERY_CONF']) +# app.running = False +# from celery.signals import worker_process_init +# @worker_process_init.connect +# def set_running(*args, **kwargs): +# app.running = True + # import traceback # traceback.print_stack() diff --git a/terra/executor/utils.py b/terra/executor/utils.py index d7432a22..38739561 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -1,7 +1,11 @@ +import os +import logging import concurrent.futures +from importlib import import_module + from terra import settings from terra.core.utils import ClassHandler -from importlib import import_module +import terra.logger class ExecutorHandler(ClassHandler): @@ -50,6 +54,37 @@ def configuration_map(self, service_info): return {} return self._connection.configuration_map(service_info) + def reconfigure_logger(self, logging_handler): + # The default logging handler is a StreamHandler. This will reconfigure the + # Stream handler, should + log_file = os.path.join(settings.processing_dir, + terra.logger._logs.default_log_prefix) + + if not os.path.samefile(log_file, self._log_file.name): + os.makedirs(settings.processing_dir, exist_ok=True) + log_file = open(log_file, 'a') + + def configure_logger(self): + # ThreadPoolExecutor will work just fine with a normal StreamHandler + + try: + self._configure_logger() + # In CeleryPoolExecutor, use the Celery logger. + # Use this to determine if main process or just a worker? + # https://stackoverflow.com/a/45022530/4166604 + # Use JUST_IS_CELERY_WORKER + except AttributeError: + # Setup log file for use in configure + self._log_file = os.path.join(settings.processing_dir, + terra.logger._logs.default_log_prefix) + os.makedirs(settings.processing_dir, exist_ok=True) + self._log_file = open(self._log_file, 'a') + + self._logging_handler = logging.StreamHandler(stream=self._log_file) + return self._logging_handler + + # TODO: ProcessPool - Log server + Executor = ExecutorHandler() '''ExecutorHandler: The executor handler that all services will be interfacing diff --git a/terra/logger.py b/terra/logger.py index 7357d0b3..901b8f53 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -156,11 +156,11 @@ def __init__(self): self.preconfig_stderr_handler.setFormatter(self.default_formatter) self.root_logger.addHandler(self.preconfig_stderr_handler) - self.preconfig_file_handler = \ + self.preconfig_main_log_handler = \ logging.handlers.MemoryHandler(capacity=1000) - self.preconfig_file_handler.setLevel(0) - self.preconfig_file_handler.setFormatter(self.default_formatter) - self.root_logger.addHandler(self.preconfig_file_handler) + self.preconfig_main_log_handler.setLevel(0) + self.preconfig_main_log_handler.setFormatter(self.default_formatter) + self.root_logger.addHandler(self.preconfig_main_log_handler) # Replace the exception hook with our exception handler self.setup_logging_exception_hook() @@ -230,48 +230,60 @@ def handle_traceback(*args, **kwargs): # pragma: no cover except ImportError: # pragma: no cover pass - def configure_logger(self, sender, **kwargs): - ''' - Call back function to configure the logger after settings have been - configured - ''' + def reconfigure_logger(self, sender=None, **kwargs): + if not self._configured: + self.root_logger.error("It is unexpected for reconfigure_logger to be " + "called, without first calling configure_logger. " + "This is not critical, but should not happen.") - from terra import settings - from terra.core.settings import TerraJSONEncoder + self.set_level_and_formatter() - if self._configured: - self.root_logger.error("Configure logger called twice, this is " - "unexpected") - raise ImproperlyConfigured() + # Must be imported after settings configed + from terra.executor import Executor + Executor.reconfigure_logger(self.main_log_handler) + def set_level_and_formatter(self): + from terra import settings formatter = logging.Formatter(fmt=settings.logging.format, datefmt=settings.logging.date_format, style=settings.logging.style) - # Setup log file for use in configure - self.log_file = os.path.join(settings.processing_dir, - self.default_log_prefix) - os.makedirs(settings.processing_dir, exist_ok=True) - self.log_file = open(self.log_file, 'a') - - self.file_handler = logging.StreamHandler(stream=self.log_file) - # Configure log level level = settings.logging.level if isinstance(level, str): # make level case insensitive level = level.upper() self.stderr_handler.setLevel(level) - self.file_handler.setLevel(level) + self.main_log_handler.setLevel(level) # Configure format - self.file_handler.setFormatter(formatter) + self.main_log_handler.setFormatter(formatter) self.stderr_handler.setFormatter(formatter) + def configure_logger(self, sender, **kwargs): + ''' + Call back function to configure the logger after settings have been + configured + ''' + + from terra import settings + from terra.core.settings import TerraJSONEncoder + + if self._configured: + self.root_logger.error("Configure logger called twice, this is " + "unexpected") + raise ImproperlyConfigured() + + # Must be imported after settings configed + from terra.executor import Executor + self.main_log_handler = Executor.configure_logger() + + self.set_level_and_formatter() + # Swap some handlers - self.root_logger.addHandler(self.file_handler) + self.root_logger.addHandler(self.main_log_handler) self.root_logger.removeHandler(self.preconfig_stderr_handler) - self.root_logger.removeHandler(self.preconfig_file_handler) + self.root_logger.removeHandler(self.preconfig_main_log_handler) self.root_logger.removeHandler(self.tmp_handler) settings_dump = os.path.join(settings.processing_dir, @@ -294,17 +306,17 @@ def configure_logger(self, sender, **kwargs): # Filter file buffer. Never remove default_stderr_handler_level message, # they won't be in the new output file - self.preconfig_file_handler.buffer = \ - [x for x in self.preconfig_file_handler.buffer - if (x.levelno >= self.file_handler.level)] + self.preconfig_main_log_handler.buffer = \ + [x for x in self.preconfig_main_log_handler.buffer + if (x.levelno >= self.main_log_handler.level)] # Flush the buffers self.preconfig_stderr_handler.setTarget(self.stderr_handler) self.preconfig_stderr_handler.flush() self.preconfig_stderr_handler = None - self.preconfig_file_handler.setTarget(self.file_handler) - self.preconfig_file_handler.flush() - self.preconfig_file_handler = None + self.preconfig_main_log_handler.setTarget(self.main_log_handler) + self.preconfig_main_log_handler.flush() + self.preconfig_main_log_handler = None self.tmp_handler = None # Remove the temporary file now that you are done with it @@ -455,3 +467,6 @@ def handle_warning(message, category, filename, lineno, file=None, line=None): # register post_configure with settings terra.core.signals.post_settings_configured.connect(_logs.configure_logger) + + # Handle a "with" settings context manager + terra.core.signals.post_settings_context.connect(_logs.reconfigure_logger) From c474330aa6af3fe05399db33c3a4da9f63ad7631 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Tue, 7 Apr 2020 18:30:19 -0400 Subject: [PATCH 05/94] Set result serializer Signed-off-by: Andy Neff --- terra/executor/celery/celeryconfig.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/terra/executor/celery/celeryconfig.py b/terra/executor/celery/celeryconfig.py index c53e01d6..a254cfc2 100644 --- a/terra/executor/celery/celeryconfig.py +++ b/terra/executor/celery/celeryconfig.py @@ -21,7 +21,9 @@ result_backend = broker_url task_serializer = 'pickle' +result_serializer = 'pickle' accept_content = ['json', 'pickle'] +result_accept_content = ['json', 'pickle'] result_expires = 3600 # App needs to define include From 2662b62c0df45a9bb2f5f9fac5efba570a04b79d Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 8 Apr 2020 17:33:08 -0400 Subject: [PATCH 06/94] Almost working Signed-off-by: Andy Neff --- terra/compute/base.py | 20 ++++-------- terra/executor/utils.py | 6 ++-- terra/logger.py | 3 +- terra/task.py | 71 +++++++++++++++++++++++++++++++---------- 4 files changed, 67 insertions(+), 33 deletions(-) diff --git a/terra/compute/base.py b/terra/compute/base.py index b6911b11..1664ea06 100644 --- a/terra/compute/base.py +++ b/terra/compute/base.py @@ -2,7 +2,6 @@ import json from terra import settings -from terra.core.settings import TerraJSONEncoder import terra.compute.utils from terra.executor import Executor from terra.logger import getLogger @@ -78,21 +77,14 @@ def pre_run(self): :class:`terra.compute.base.BaseService` is mainly responsible for handling Executors that need a separate volume translation ''' - executor_volume_map = Executor.configuration_map(self) - # If there is a non-empty mapping, then create a custom executor settings - if executor_volume_map: - executor_settings = terra.compute.utils.translate_settings_paths( - TerraJSONEncoder.serializableSettings(settings), - executor_volume_map) + # The executor volume map is calculated on the host side, where all the + # information is available. For example if using docker and celery, then + # docker config need to be run to get the container volumes, and that has + # to be run on the host machine. So this is calculated here. + settings.executor_volume_map = Executor.configuration_map(self) + logger.critical(settings.executor_volume_map) - with open(os.path.join(self.temp_dir.name, - 'executor_config.json'), 'w') as fid: - json.dump(executor_settings, fid) - - # Tell the executor, you have your own settings file. - self.env['TERRA_EXECUTOR_SETTINGS_FILE'] = \ - '/tmp_settings/executor_config.json' def post_run(self): pass diff --git a/terra/executor/utils.py b/terra/executor/utils.py index 38739561..eba711bf 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -60,9 +60,11 @@ def reconfigure_logger(self, logging_handler): log_file = os.path.join(settings.processing_dir, terra.logger._logs.default_log_prefix) - if not os.path.samefile(log_file, self._log_file.name): + # if not os.path.samefile(log_file, self._log_file.name): + if log_file != self._log_file.name: os.makedirs(settings.processing_dir, exist_ok=True) - log_file = open(log_file, 'a') + self._log_file.close() + self._log_file = open(log_file, 'a') def configure_logger(self): # ThreadPoolExecutor will work just fine with a normal StreamHandler diff --git a/terra/logger.py b/terra/logger.py index 901b8f53..99117fa2 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -321,7 +321,8 @@ def configure_logger(self, sender, **kwargs): # Remove the temporary file now that you are done with it self.tmp_file.close() - os.unlink(self.tmp_file.name) + if os.path.exists(self.tmp_file.name): + os.unlink(self.tmp_file.name) self.tmp_file = None self._configured = True diff --git a/terra/task.py b/terra/task.py index 0d97bce8..160f1581 100644 --- a/terra/task.py +++ b/terra/task.py @@ -1,7 +1,14 @@ from os import environ as env +from tempfile import gettempdir from celery import Task, shared_task as original_shared_task +from terra import settings +from terra.core.settings import TerraJSONEncoder +from terra.executor import Executor +import terra.logger +import terra.compute.utils + __all__ = ['TerraTask', 'shared_task'] def shared_task(*args, **kwargs): @@ -10,24 +17,56 @@ def shared_task(*args, **kwargs): return original_shared_task(*args, **kwargs) class TerraTask(Task): - @staticmethod - def _patch_settings(args, kwargs): - if 'TERRA_EXECUTOR_SETTINGS_FILE' in env: - # TODO: Cache loads for efficiency? - settings = json.load(env['TERRA_EXECUTOR_SETTINGS_FILE']) - - # If args is not empty, the first arg was settings - if args: - args[0] = settings - else: - kwargs['settings'] = settings + settings = None + # @staticmethod + # def _patch_settings(args, kwargs): + # if 'TERRA_EXECUTOR_SETTINGS_FILE' in env: + # # TODO: Cache loads for efficiency? + # settings = json.load(env['TERRA_EXECUTOR_SETTINGS_FILE']) + + # # If args is not empty, the first arg was settings + # if args: + # args[0] = settings + # else: + # kwargs['settings'] = settings + + def serialize_settings(self): + # If there is a non-empty mapping, then create a custom executor settings + executor_volume_map = self.request.settings.pop('executor_volume_map', + None) + if executor_volume_map: + return terra.compute.utils.translate_settings_paths( + TerraJSONEncoder.serializableSettings(self.request.settings), + executor_volume_map) + return self.request.settings def apply_async(self, args=None, kwargs=None, task_id=None, user=None, *args2, **kwargs2): - TerraTask._patch_settings(args, kwargs) return super().apply_async(args=args, kwargs=kwargs, - task_id=task_id, *args2, **kwargs2) + task_id=task_id, *args2, headers={'settings': TerraJSONEncoder.serializableSettings(settings)}, + **kwargs2) + + # def apply(self, *args, **kwargs): + # # TerraTask._patch_settings(args, kwargs) + # return super().apply(*args, settings={'X': 15}, **kwargs) - def apply(self, *args, **kwargs): - TerraTask._patch_settings(args, kwargs) - return super().apply(*args, **kwargs) + def __call__(self, *args, **kwargs): + print('111') + if getattr(self.request, 'settings', None): + print('222') + if not settings.configured: + print('333') + settings.configure({'processing_dir': gettempdir()}) + with settings: + print('444') + print(settings) + settings._wrapped.clear() + settings._wrapped.update(self.serialize_settings()) + print(settings) + settings.processing_dir=gettempdir() + terra.logger._logs.reconfigure_logger() + return_value = self.run(*args, **kwargs) + else: + return_value = self.run(*args, **kwargs) + self.settings = None + return return_value \ No newline at end of file From 5212227e5c09ea8ddd9c492fbf78b73f36e7c57d Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Tue, 14 Apr 2020 16:08:09 -0400 Subject: [PATCH 07/94] Task map translation working for POC Signed-off-by: Andy Neff --- Justfile | 5 +++++ docs/terra/settings.rst | 13 +++++++++++++ terra/compute/base.py | 3 +-- terra/compute/container.py | 9 ++++++++- terra/core/settings.py | 20 ++++++++++++++++++-- terra/logger.py | 2 ++ terra/task.py | 24 ++++++++++++++++-------- 7 files changed, 63 insertions(+), 13 deletions(-) diff --git a/Justfile b/Justfile index e24961bb..af081528 100755 --- a/Justfile +++ b/Justfile @@ -28,6 +28,11 @@ JUST_DEFAULTIFY_FUNCTIONS+=(terra_caseify) function Terra_Pipenv() { if [[ ${TERRA_LOCAL-} == 1 ]]; then + if [ -n "${VIRTUAL_ENV+set}" ]; then + echo "Warning: You appear to be in a virtual env" >&2 + echo "Deactivate external virtual envs before running just" >&2 + ask_question "Continue?" n + fi PIPENV_PIPFILE="${TERRA_CWD}/Pipfile" pipenv ${@+"${@}"} || return $? else Just-docker-compose -f "${TERRA_CWD}/docker-compose-main.yml" run ${TERRA_PIPENV_IMAGE-terra} pipenv ${@+"${@}"} || return $? diff --git a/docs/terra/settings.rst b/docs/terra/settings.rst index 2ad9e400..21bc38dc 100644 --- a/docs/terra/settings.rst +++ b/docs/terra/settings.rst @@ -1,6 +1,19 @@ .. _settings: +Terra Settings +-------------- + +.. option:: terra.zone + + Terra can be running in one of three areas of execution, or "zones": the master controller (``controller``), a service runner (``runner``), or a task (``task``). The different zones could all be running on the main host, or other containers or computers, depending on the compute and executor. + + The master controller includes: the CLI, workflow, stage and service definitions layers. + + This variable is automatically updated, and should only be read. + + Default: ``controller`` + Workflow Settings ----------------- diff --git a/terra/compute/base.py b/terra/compute/base.py index 1664ea06..974e51bb 100644 --- a/terra/compute/base.py +++ b/terra/compute/base.py @@ -83,8 +83,7 @@ def pre_run(self): # docker config need to be run to get the container volumes, and that has # to be run on the host machine. So this is calculated here. settings.executor_volume_map = Executor.configuration_map(self) - logger.critical(settings.executor_volume_map) - + logger.debug3("Executor Volume map: %s", settings.executor_volume_map) def post_run(self): pass diff --git a/terra/compute/container.py b/terra/compute/container.py index 1477f795..63842239 100644 --- a/terra/compute/container.py +++ b/terra/compute/container.py @@ -29,6 +29,10 @@ def __init__(self): self.extra_compose_files = [] def pre_run(self): + # Need to run Base's pre_run first, so it has a change to update settings + # for special exectutors, etc... + super().pre_run() + self.temp_dir = TemporaryDirectory() temp_dir = pathlib.Path(self.temp_dir.name) @@ -78,7 +82,10 @@ def pre_run(self): with open(temp_dir / 'config.json', 'w') as fid: json.dump(container_config, fid) - super().pre_run() + # Dump the original setting too, incase an executor needs to perform map + # translation too + with open(temp_dir / 'config.json.orig', 'w') as fid: + json.dump(TerraJSONEncoder.serializableSettings(settings), fid) def post_run(self): super().post_run() diff --git a/terra/core/settings.py b/terra/core/settings.py index 2bb6b51a..1bd687bb 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -149,12 +149,14 @@ import os from inspect import isfunction from functools import wraps +from json import JSONEncoder from terra.core.exceptions import ImproperlyConfigured +# Do not import terra.logger or terra.signals here, or any module that +# imports them from vsi.tools.python import ( nested_patch_inplace, nested_patch, nested_update, nested_in_dict ) -from json import JSONEncoder try: import jstyleson as json @@ -266,6 +268,9 @@ def unittest(self): "compute": { "arch": "terra.compute.dummy" }, + 'terra': { + 'zone': 'controller', + }, 'status_file': status_file, 'processing_dir': processing_dir, 'unittest': unittest, @@ -675,11 +680,22 @@ def serializableSettings(obj): if isinstance(obj, LazySettings): obj = obj._wrapped - return nested_patch( + # I do not os.path.expandvars(val) here, because the Just-docker-compose + # takes care of that for me, so I can still use the envvar names in the + # containers + + obj = nested_patch( obj, lambda k, v: isfunction(v) and hasattr(v, 'settings_property'), lambda k, v: v(obj)) + obj = nested_patch( + obj, + lambda k, v: any(v is not None and k.endswith(pattern) for pattern in filename_suffixes), + lambda k, v: os.path.expanduser(v)) + + return obj + @staticmethod def dumps(obj, **kwargs): ''' diff --git a/terra/logger.py b/terra/logger.py index 99117fa2..b45f612d 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -69,6 +69,8 @@ from datetime import datetime, timezone from terra.core.exceptions import ImproperlyConfigured +# Do not import terra.settings or terra.signals here, or any module that +# imports them from logging import ( CRITICAL, ERROR, INFO, FATAL, WARN, WARNING, NOTSET, diff --git a/terra/task.py b/terra/task.py index 160f1581..994279fc 100644 --- a/terra/task.py +++ b/terra/task.py @@ -1,4 +1,6 @@ +import json from os import environ as env +import os from tempfile import gettempdir from celery import Task, shared_task as original_shared_task @@ -8,6 +10,8 @@ from terra.executor import Executor import terra.logger import terra.compute.utils +from terra.logger import getLogger +logger = getLogger(__name__) __all__ = ['TerraTask', 'shared_task'] @@ -42,8 +46,10 @@ def serialize_settings(self): def apply_async(self, args=None, kwargs=None, task_id=None, user=None, *args2, **kwargs2): + with open(f'{env["TERRA_SETTINGS_FILE"]}.orig', 'r') as fid: + original_settings = json.load(fid) return super().apply_async(args=args, kwargs=kwargs, - task_id=task_id, *args2, headers={'settings': TerraJSONEncoder.serializableSettings(settings)}, + task_id=task_id, *args2, headers={'settings': original_settings}, **kwargs2) # def apply(self, *args, **kwargs): @@ -51,19 +57,21 @@ def apply_async(self, args=None, kwargs=None, task_id=None, user=None, # return super().apply(*args, settings={'X': 15}, **kwargs) def __call__(self, *args, **kwargs): - print('111') if getattr(self.request, 'settings', None): - print('222') if not settings.configured: - print('333') settings.configure({'processing_dir': gettempdir()}) with settings: - print('444') - print(settings) + logger.critical(settings) settings._wrapped.clear() settings._wrapped.update(self.serialize_settings()) - print(settings) - settings.processing_dir=gettempdir() + if not os.path.exists(settings.processing_dir): + logger.critical(f'Dir "{settings.processing_dir}" is not accessible ' + 'by the executor, please make sure the worker has ' + 'access to this directory') + settings.processing_dir = gettempdir() + logger.warning('Using temporary directory: ' + f'"{settings.processing_dir}" for the processing dir') + logger.critical(settings) terra.logger._logs.reconfigure_logger() return_value = self.run(*args, **kwargs) else: From 57789cfa14910169902178f42a12947d7c3abf95 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 15 Apr 2020 16:24:21 -0400 Subject: [PATCH 08/94] Added zone to logger Signed-off-by: Andy Neff --- terra/core/settings.py | 2 +- terra/logger.py | 44 +++++++++++++++++++++++++++++++++++------- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/terra/core/settings.py b/terra/core/settings.py index 1bd687bb..80d00b25 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -258,7 +258,7 @@ def unittest(self): { "logging": { "level": "ERROR", - "format": f"%(asctime)s (%(hostname)s): %(levelname)s - %(message)s", + "format": f"%(asctime)s (%(hostname)s:%(zone)s): %(levelname)s - %(message)s", "date_format": None, "style": "%" }, diff --git a/terra/logger.py b/terra/logger.py index b45f612d..ff8d5fe1 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -73,14 +73,15 @@ # imports them from logging import ( - CRITICAL, ERROR, INFO, FATAL, WARN, WARNING, NOTSET, + CRITICAL, ERROR, INFO, FATAL, WARN, WARNING, NOTSET, Filter, getLogger, _acquireLock, _releaseLock, currentframe, _srcfile as logging_srcfile, Logger as Logger_original ) __all__ = ['getLogger', 'CRITICAL', 'ERROR', 'INFO', 'FATAL', 'WARN', - 'WARNING', 'NOTSET', 'DEBUG1', 'DEBUG2', 'DEBUG3', 'Logger'] + 'WARNING', 'NOTSET', 'DEBUG1', 'DEBUG2', 'DEBUG3', 'DEBUG4', + 'Logger'] class HandlerLoggingContext(object): @@ -125,8 +126,8 @@ class _SetupTerraLogger(): A simple logger class used internally to configure the logger before and after :data:`terra.settings` is configured ''' - default_formatter = logging.Formatter('%(asctime)s (preconfig) : ' - '%(levelname)s - %(message)s') + default_formatter = logging.Formatter('%(asctime)s (%(hostname)s:%(zone)s) :' + ' %(levelname)s - %(message)s') default_stderr_handler_level = logging.WARNING default_tmp_prefix = "terra_initial_tmp_" default_log_prefix = "terra_log" @@ -330,11 +331,24 @@ def configure_logger(self, sender, **kwargs): self._configured = True -extra_logger_variables = {'hostname': platform.node()} -'''dict: Extra logger variables that can be reference in log messages''' +class TerraFilter(logging.Filter): + def filter(self, record): + record.hostname = platform.node() + if terra.settings.configured: + record.zone = terra.settings.terra.zone + else: + record.zone = 'preconfig' + return True class Logger(Logger_original): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # I like https://stackoverflow.com/a/17558764/4166604 better than + # https://stackoverflow.com/a/28050837/4166604, it has the ability to add + # logic/function calls, if I so desire + self.addFilter(TerraFilter()) + def findCaller(self, stack_info=False, stacklevel=1): """ Find the stack frame of the caller so that we can note the source @@ -375,7 +389,7 @@ def findCaller(self, stack_info=False, stacklevel=1): # Define _log instead of logger adapter, this works better (setLoggerClass) # https://stackoverflow.com/a/28050837/4166604 def _log(self, *args, **kwargs): - kwargs['extra'] = extra_logger_variables + # kwargs['extra'] = extra_logger_variables return super()._log(*args, **kwargs) def debug1(self, msg, *args, **kwargs): @@ -399,6 +413,13 @@ def debug3(self, msg, *args, **kwargs): ''' self.log(DEBUG3, msg, *args, **kwargs) + def debug4(self, msg, *args, **kwargs): + ''' + Logs a message with level :data:`DEBUG4` on this logger. The arguments are + interpreted as for :func:`logging.debug` + ''' + self.log(DEBUG4, msg, *args, **kwargs) + fatal = logging.LoggerAdapter.critical @@ -449,9 +470,18 @@ def handle_warning(message, category, filename, lineno, file=None, line=None): output used to debug algorithms ''' +DEBUG4 = 7 +''' +Debug level four, even more verbose. + +Should be used for spamming the screen +''' + + logging.addLevelName(DEBUG1, "DEBUG1") logging.addLevelName(DEBUG2, "DEBUG2") logging.addLevelName(DEBUG3, "DEBUG3") +logging.addLevelName(DEBUG4, "DEBUG4") logging.setLoggerClass(Logger) From 4c5385877c39e0c470bb7a2058e5988d52775e4b Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 15 Apr 2020 18:23:00 -0400 Subject: [PATCH 09/94] terra.zone set to task in dummy, sync, and celery executor Signed-off-by: Andy Neff --- terra/compute/container.py | 1 + terra/core/settings.py | 2 +- terra/executor/celery/executor.py | 2 +- terra/executor/dummy.py | 18 +++++++++++------- terra/executor/sync.py | 22 ++++++++++++---------- terra/logger.py | 9 ++++++++- terra/task.py | 1 + 7 files changed, 35 insertions(+), 20 deletions(-) diff --git a/terra/compute/container.py b/terra/compute/container.py index 63842239..21223788 100644 --- a/terra/compute/container.py +++ b/terra/compute/container.py @@ -79,6 +79,7 @@ def pre_run(self): + '|TERRA_SETTINGS_FILE' # Dump the settings + container_config['terra']['zone'] = 'runner' with open(temp_dir / 'config.json', 'w') as fid: json.dump(container_config, fid) diff --git a/terra/core/settings.py b/terra/core/settings.py index 80d00b25..f10db76f 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -258,7 +258,7 @@ def unittest(self): { "logging": { "level": "ERROR", - "format": f"%(asctime)s (%(hostname)s:%(zone)s): %(levelname)s - %(message)s", + "format": f"%(asctime)s (%(hostname)s:%(zone)s): %(levelname)s - %(filename)s - %(message)s", "date_format": None, "style": "%" }, diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index e0648dbb..2a77d78f 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -191,7 +191,7 @@ def submit(self, fn, *args, **kwargs): def shutdown(self, wait=True): with self._shutdown_lock: self._shutdown = True - for fut in self._futures: + for fut in tuple(self._futures): fut.cancel() if wait: diff --git a/terra/executor/dummy.py b/terra/executor/dummy.py index 7450ba11..90fb822d 100644 --- a/terra/executor/dummy.py +++ b/terra/executor/dummy.py @@ -7,7 +7,7 @@ class DummyExecutor(Executor): """ - Executor that does the nothin, just logs what would happen. + Executor that does nothing, just logs what would happen. """ def __init__(self, *arg, **kwargs): @@ -21,12 +21,16 @@ def submit(self, fn, *args, **kwargs): if self._shutdown: raise RuntimeError('cannot schedule new futures after shutdown') - f = Future() - logger.info(f'Run function: {fn}') - logger.info(f'With args: {args}') - logger.info(f'With kwargs: {kwargs}') - f.set_result(None) - return f + from terra import settings + + with settings: + settings.terra.zone = 'task' + f = Future() + logger.info(f'Run function: {fn}') + logger.info(f'With args: {args}') + logger.info(f'With kwargs: {kwargs}') + f.set_result(None) + return f def shutdown(self, wait=True): with self._shutdown_lock: diff --git a/terra/executor/sync.py b/terra/executor/sync.py index 2eba10cb..35cd333c 100644 --- a/terra/executor/sync.py +++ b/terra/executor/sync.py @@ -3,8 +3,6 @@ # No need for a global shutdown lock here, not multi-threaded/process - - class SyncExecutor(Executor): """ Executor that does the job synchronously. @@ -27,15 +25,19 @@ def submit(self, fn, *args, **kwargs): if self._shutdown: raise RuntimeError('cannot schedule new futures after shutdown') - f = Future() - try: - result = fn(*args, **kwargs) - except BaseException as e: - f.set_exception(e) - else: - f.set_result(result) + from terra import settings + with settings: + settings.terra.zone = 'task' + + f = Future() + try: + result = fn(*args, **kwargs) + except BaseException as e: + f.set_exception(e) + else: + f.set_result(result) - return f + return f def shutdown(self, wait=True): with self._shutdown_lock: diff --git a/terra/logger.py b/terra/logger.py index ff8d5fe1..f2e4a6ab 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -127,7 +127,8 @@ class _SetupTerraLogger(): after :data:`terra.settings` is configured ''' default_formatter = logging.Formatter('%(asctime)s (%(hostname)s:%(zone)s) :' - ' %(levelname)s - %(message)s') + ' %(levelname)s - %(filename)s -' + ' %(message)s') default_stderr_handler_level = logging.WARNING default_tmp_prefix = "terra_initial_tmp_" default_log_prefix = "terra_log" @@ -178,6 +179,12 @@ def __init__(self): # Enable warnings to default warnings.simplefilter('default') + warnings.filterwarnings("ignore", + category=DeprecationWarning, module='yaml', + message="ABCs from 'collections' instead of from 'collections.abc'") + warnings.filterwarnings("ignore", + category=DeprecationWarning, module='osgeo', + message="the imp module is deprecated") def setup_logging_exception_hook(self): ''' diff --git a/terra/task.py b/terra/task.py index 994279fc..261679db 100644 --- a/terra/task.py +++ b/terra/task.py @@ -72,6 +72,7 @@ def __call__(self, *args, **kwargs): logger.warning('Using temporary directory: ' f'"{settings.processing_dir}" for the processing dir') logger.critical(settings) + settings.terra.zone = 'task' terra.logger._logs.reconfigure_logger() return_value = self.run(*args, **kwargs) else: From 4672189cf2e1e99377155a1e801621a846ce0ba7 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 16 Apr 2020 11:24:05 -0400 Subject: [PATCH 10/94] Set terra.zone for tasks universally Signed-off-by: Andy Neff --- terra/executor/dummy.py | 16 ++++++---------- terra/executor/sync.py | 22 +++++++++------------- terra/task.py | 9 +++++++-- 3 files changed, 22 insertions(+), 25 deletions(-) diff --git a/terra/executor/dummy.py b/terra/executor/dummy.py index 90fb822d..febcf156 100644 --- a/terra/executor/dummy.py +++ b/terra/executor/dummy.py @@ -21,16 +21,12 @@ def submit(self, fn, *args, **kwargs): if self._shutdown: raise RuntimeError('cannot schedule new futures after shutdown') - from terra import settings - - with settings: - settings.terra.zone = 'task' - f = Future() - logger.info(f'Run function: {fn}') - logger.info(f'With args: {args}') - logger.info(f'With kwargs: {kwargs}') - f.set_result(None) - return f + f = Future() + logger.info(f'Run function: {fn}') + logger.info(f'With args: {args}') + logger.info(f'With kwargs: {kwargs}') + f.set_result(None) + return f def shutdown(self, wait=True): with self._shutdown_lock: diff --git a/terra/executor/sync.py b/terra/executor/sync.py index 35cd333c..c9c67cef 100644 --- a/terra/executor/sync.py +++ b/terra/executor/sync.py @@ -25,19 +25,15 @@ def submit(self, fn, *args, **kwargs): if self._shutdown: raise RuntimeError('cannot schedule new futures after shutdown') - from terra import settings - with settings: - settings.terra.zone = 'task' - - f = Future() - try: - result = fn(*args, **kwargs) - except BaseException as e: - f.set_exception(e) - else: - f.set_result(result) - - return f + f = Future() + try: + result = fn(*args, **kwargs) + except BaseException as e: + f.set_exception(e) + else: + f.set_result(result) + + return f def shutdown(self, wait=True): with self._shutdown_lock: diff --git a/terra/task.py b/terra/task.py index 261679db..e942efa4 100644 --- a/terra/task.py +++ b/terra/task.py @@ -15,11 +15,13 @@ __all__ = ['TerraTask', 'shared_task'] + def shared_task(*args, **kwargs): - kwargs['bind'] = True - kwargs['base'] = TerraTask + kwargs['bind'] = kwargs.pop('bin', True) + kwargs['base'] = kwargs.pop('base', TerraTask) return original_shared_task(*args, **kwargs) + class TerraTask(Task): settings = None # @staticmethod @@ -76,6 +78,9 @@ def __call__(self, *args, **kwargs): terra.logger._logs.reconfigure_logger() return_value = self.run(*args, **kwargs) else: + original_zone = settings.terra.zone + settings.terra.zone = 'task' return_value = self.run(*args, **kwargs) + settings.terra.zone = original_zone self.settings = None return return_value \ No newline at end of file From c61d107c449d7d6c18772aea4eda1a0adce7e898 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 16 Apr 2020 11:28:46 -0400 Subject: [PATCH 11/94] Fix typo [ci-skip] Signed-off-by: Andy Neff --- terra/task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terra/task.py b/terra/task.py index e942efa4..31cd66a7 100644 --- a/terra/task.py +++ b/terra/task.py @@ -17,7 +17,7 @@ def shared_task(*args, **kwargs): - kwargs['bind'] = kwargs.pop('bin', True) + kwargs['bind'] = kwargs.pop('bind', True) kwargs['base'] = kwargs.pop('base', TerraTask) return original_shared_task(*args, **kwargs) From e4ac27bb02707fa8070a818971280a99ddededbf Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 16 Apr 2020 18:46:41 -0400 Subject: [PATCH 12/94] Auto translate arguments to task functions - Now executor settings are auto translated from runner to exectutor, instead of from master controller to executor. This clears the path to more consistent behavior when it comes to passing in arguments. - Task args and kwargs that match the same suffix patterns settings so, will now be auto translated from runner to task. This should make for a more seemless transaction to task, as long as the variable names are right. - Executor and compute both have a volume map store in the settings now. --- terra/compute/base.py | 4 +-- terra/compute/container.py | 7 ++-- terra/core/settings.py | 8 +++-- terra/executor/dummy.py | 15 ++++---- terra/task.py | 71 +++++++++++++++++++++++++++++--------- 5 files changed, 73 insertions(+), 32 deletions(-) diff --git a/terra/compute/base.py b/terra/compute/base.py index 974e51bb..6aa1f86b 100644 --- a/terra/compute/base.py +++ b/terra/compute/base.py @@ -82,8 +82,8 @@ def pre_run(self): # information is available. For example if using docker and celery, then # docker config need to be run to get the container volumes, and that has # to be run on the host machine. So this is calculated here. - settings.executor_volume_map = Executor.configuration_map(self) - logger.debug3("Executor Volume map: %s", settings.executor_volume_map) + settings.executor.volume_map = Executor.configuration_map(self) + logger.debug4("Executor Volume map: %s", settings.executor.volume_map) def post_run(self): pass diff --git a/terra/compute/container.py b/terra/compute/container.py index 21223788..a3df6a8f 100644 --- a/terra/compute/container.py +++ b/terra/compute/container.py @@ -58,9 +58,8 @@ def pre_run(self): volume_str env_volume_index += 1 - volume_map = compute.configuration_map(self) - - logger.debug3("Compute Volume map: %s", volume_map) + settings.compute.volume_map = compute.configuration_map(self) + logger.debug4("Compute Volume map: %s", settings.compute.volume_map) # Setup config file for container @@ -68,7 +67,7 @@ def pre_run(self): container_config = translate_settings_paths( TerraJSONEncoder.serializableSettings(settings), - volume_map, + settings.compute.volume_map, self.container_platform) if os.name == "nt": # pragma: no linux cover diff --git a/terra/core/settings.py b/terra/core/settings.py index f10db76f..dd5e9bea 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -263,13 +263,15 @@ def unittest(self): "style": "%" }, "executor": { - "type": "ThreadPoolExecutor" + "type": "ThreadPoolExecutor", + 'volume_map': [] }, "compute": { - "arch": "terra.compute.dummy" + "arch": "terra.compute.dummy", + 'volume_map': [] }, 'terra': { - 'zone': 'controller', + 'zone': 'controller' }, 'status_file': status_file, 'processing_dir': processing_dir, diff --git a/terra/executor/dummy.py b/terra/executor/dummy.py index febcf156..49207135 100644 --- a/terra/executor/dummy.py +++ b/terra/executor/dummy.py @@ -1,6 +1,7 @@ from concurrent.futures import Future, Executor from threading import Lock +from terra import settings from terra.logger import getLogger logger = getLogger(__name__) @@ -21,12 +22,14 @@ def submit(self, fn, *args, **kwargs): if self._shutdown: raise RuntimeError('cannot schedule new futures after shutdown') - f = Future() - logger.info(f'Run function: {fn}') - logger.info(f'With args: {args}') - logger.info(f'With kwargs: {kwargs}') - f.set_result(None) - return f + with settings: + settings.terra.zone = 'task' + f = Future() + logger.info(f'Run function: {fn}') + logger.info(f'With args: {args}') + logger.info(f'With kwargs: {kwargs}') + f.set_result(None) + return f def shutdown(self, wait=True): with self._shutdown_lock: diff --git a/terra/task.py b/terra/task.py index 31cd66a7..21cbc0c5 100644 --- a/terra/task.py +++ b/terra/task.py @@ -5,6 +5,8 @@ from celery import Task, shared_task as original_shared_task +from vsi.tools.python import args_to_kwargs, ARGS, KWARGS + from terra import settings from terra.core.settings import TerraJSONEncoder from terra.executor import Executor @@ -36,36 +38,67 @@ class TerraTask(Task): # else: # kwargs['settings'] = settings - def serialize_settings(self): - # If there is a non-empty mapping, then create a custom executor settings - executor_volume_map = self.request.settings.pop('executor_volume_map', - None) + def _get_volume_mappings(self): + executor_volume_map = self.request.settings['executor']['volume_map'] + if executor_volume_map: - return terra.compute.utils.translate_settings_paths( - TerraJSONEncoder.serializableSettings(self.request.settings), - executor_volume_map) - return self.request.settings + reverse_compute_volume_map = \ + self.request.settings['compute']['volume_map'] + # Flip each mount point, so it goes from runner to controller + reverse_compute_volume_map = [[x[1], x[0]] + for x in reverse_compute_volume_map] + # Revere order. This will be important in case one mount point mounts + # inside another + reverse_compute_volume_map.reverse() + else: + reverse_compute_volume_map = [] - def apply_async(self, args=None, kwargs=None, task_id=None, user=None, + return (reverse_compute_volume_map, executor_volume_map) + + def translate_paths(self, payload, reverse_compute_volume_map, + executor_volume_map): + if reverse_compute_volume_map or executor_volume_map: + # If either translation is needed, start by applying the ~ home dir + # expansion and settings_property(which wouldn't have made it through + # pure json conversion, but the ~ will + payload = TerraJSONEncoder.serializableSettings(payload) + # Go from compute runner to master controller + if reverse_compute_volume_map: + payload = terra.compute.utils.translate_settings_paths( + payload, reverse_compute_volume_map) + # Go from master controller to exector + if executor_volume_map: + payload = terra.compute.utils.translate_settings_paths( + payload, executor_volume_map) + return payload + + def apply_async(self, args=None, kwargs=None, task_id=None, *args2, **kwargs2): - with open(f'{env["TERRA_SETTINGS_FILE"]}.orig', 'r') as fid: - original_settings = json.load(fid) + with open(env["TERRA_SETTINGS_FILE"], 'r') as fid: + current_settings = json.load(fid) return super().apply_async(args=args, kwargs=kwargs, - task_id=task_id, *args2, headers={'settings': original_settings}, - **kwargs2) + headers={'settings': current_settings}, + task_id=task_id, *args2, **kwargs2) + # Don't need to apply translations for apply, it runs locally # def apply(self, *args, **kwargs): # # TerraTask._patch_settings(args, kwargs) # return super().apply(*args, settings={'X': 15}, **kwargs) def __call__(self, *args, **kwargs): + # this is only set when apply_async was called. if getattr(self.request, 'settings', None): if not settings.configured: + # Cover a potential (unlikely) corner case where setting might not be + # configured yet settings.configure({'processing_dir': gettempdir()}) with settings: - logger.critical(settings) + reverse_compute_volume_map, executor_volume_map = \ + self._get_volume_mappings() + settings._wrapped.clear() - settings._wrapped.update(self.serialize_settings()) + settings._wrapped.update(self.translate_paths(self.request.settings, + reverse_compute_volume_map, executor_volume_map)) if not os.path.exists(settings.processing_dir): logger.critical(f'Dir "{settings.processing_dir}" is not accessible ' 'by the executor, please make sure the worker has ' @@ -73,10 +106,14 @@ def __call__(self, *args, **kwargs): settings.processing_dir = gettempdir() logger.warning('Using temporary directory: ' f'"{settings.processing_dir}" for the processing dir') - logger.critical(settings) settings.terra.zone = 'task' + kwargs = args_to_kwargs(self.run, args, kwargs) + args_only = kwargs.pop(ARGS, ()) + kwargs.update(kwargs.pop(KWARGS, ())) + kwargs = self.translate_paths(kwargs, + reverse_compute_volume_map, executor_volume_map) terra.logger._logs.reconfigure_logger() - return_value = self.run(*args, **kwargs) + return_value = self.run(*args_only, **kwargs) else: original_zone = settings.terra.zone settings.terra.zone = 'task' From e87e77f6162fa9ecaee5376ba3b7bb4c01c225a8 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 16 Apr 2020 20:31:32 -0400 Subject: [PATCH 13/94] Misc changed Signed-off-by: Andy Neff --- terra/compute/utils.py | 2 +- terra/compute/virtualenv.py | 2 +- terra/executor/dummy.py | 18 ++++++++++-------- terra/task.py | 1 + 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/terra/compute/utils.py b/terra/compute/utils.py index 7a9e4dd4..8d155dd1 100644 --- a/terra/compute/utils.py +++ b/terra/compute/utils.py @@ -189,7 +189,7 @@ def just(*args, **kwargs): if logger.getEffectiveLevel() <= DEBUG1: dd = dict_diff(env, just_env)[3] if dd: - logger.debug1('Environment Modification:\n' + '\n'.join(dd)) + logger.debug4('Environment Modification:\n' + '\n'.join(dd)) # Get bash path for windows compatibility. I can't explain this error, but # while the PATH is set right, I can't call "bash" because the WSL bash is diff --git a/terra/compute/virtualenv.py b/terra/compute/virtualenv.py index c65c6253..8d8edb0c 100644 --- a/terra/compute/virtualenv.py +++ b/terra/compute/virtualenv.py @@ -56,7 +56,7 @@ def run_service(self, service_info): if logger.getEffectiveLevel() <= DEBUG1: dd = dict_diff(os.environ, env)[3] if dd: - logger.debug1('Environment Modification:\n' + '\n'.join(dd)) + logger.debug4('Environment Modification:\n' + '\n'.join(dd)) # Similar (but different) to a bug in docker compute, the right python # executable is not found on the path, possibly because Popen doesn't diff --git a/terra/executor/dummy.py b/terra/executor/dummy.py index 49207135..9bef1d99 100644 --- a/terra/executor/dummy.py +++ b/terra/executor/dummy.py @@ -22,14 +22,16 @@ def submit(self, fn, *args, **kwargs): if self._shutdown: raise RuntimeError('cannot schedule new futures after shutdown') - with settings: - settings.terra.zone = 'task' - f = Future() - logger.info(f'Run function: {fn}') - logger.info(f'With args: {args}') - logger.info(f'With kwargs: {kwargs}') - f.set_result(None) - return f + original_zone = settings.terra.zone + # Fake the zone for the log messages + settings.terra.zone = 'task' + f = Future() + logger.info(f'Run function: {fn}') + logger.info(f'With args: {args}') + logger.info(f'With kwargs: {kwargs}') + f.set_result(None) + settings.terra.zone = original_zone + return f def shutdown(self, wait=True): with self._shutdown_lock: diff --git a/terra/task.py b/terra/task.py index 21cbc0c5..4422365c 100644 --- a/terra/task.py +++ b/terra/task.py @@ -77,6 +77,7 @@ def apply_async(self, args=None, kwargs=None, task_id=None, with open(env["TERRA_SETTINGS_FILE"], 'r') as fid: current_settings = json.load(fid) return super().apply_async(args=args, kwargs=kwargs, + # use settings._wrapped instead of current_settings? headers={'settings': current_settings}, task_id=task_id, *args2, **kwargs2) From 5545d6e81483beaa65e319c4d5bc98aed260ae27 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Mon, 20 Apr 2020 10:32:35 -0400 Subject: [PATCH 14/94] Fix resetting zone on exception [skipci] Signed-off-by: Andy Neff --- terra/task.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/terra/task.py b/terra/task.py index 31cd66a7..28976e74 100644 --- a/terra/task.py +++ b/terra/task.py @@ -80,7 +80,9 @@ def __call__(self, *args, **kwargs): else: original_zone = settings.terra.zone settings.terra.zone = 'task' - return_value = self.run(*args, **kwargs) - settings.terra.zone = original_zone + try: + return_value = self.run(*args, **kwargs) + finally: + settings.terra.zone = original_zone self.settings = None return return_value \ No newline at end of file From 1944ae6b374f5053001f27289f660a302049e0a0 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Tue, 21 Apr 2020 07:54:25 -0400 Subject: [PATCH 15/94] Return values in celery get volume translations - Temporary directors have a meaningful suffix - TERRA_KEEP_TEMP_DIR added for debuging - TERRA_CELERY_MAIN_NAME is automatic now - config.json.orig removed, as no longer needed - Fixed a duplicate volume_map bug Signed-off-by: Andy Neff --- terra.env | 8 ++++++-- terra/compute/container.py | 12 +++++------- terra/compute/docker.py | 4 +++- terra/compute/singularity.py | 3 ++- terra/compute/virtualenv.py | 7 +++++-- terra/executor/celery/__init__.py | 11 ++++++++++- terra/task.py | 26 +++++++++++++++++++------- 7 files changed, 50 insertions(+), 21 deletions(-) diff --git a/terra.env b/terra.env index 784ab0df..c23cd76d 100644 --- a/terra.env +++ b/terra.env @@ -79,9 +79,13 @@ fi #** # .. envvar:: TERRA_CELERY_MAIN_NAME # -# Name of the main module if running as __main__. This is used as the prefix for auto-generated task names. +# (Optional) Name of the main module if running as __main__. This is used as the prefix for auto-generated task names that are defined in the same module as ``__main__`` (Usually caused by ``python -m``). At first, python will try ``sys.modules['__main__'].__spec__.name``, before using this value, when that fails. +# +# .. envvar:: TERRA_KEEP_TEMP_DIR +# +# Optional environment variable, that when set to ``1`` will keep the temporary config files generated for containers. For debug use. #** -: ${TERRA_CELERY_MAIN_NAME=terra} + #** # .. envvar:: TERRA_CELERY_CONF # diff --git a/terra/compute/container.py b/terra/compute/container.py index a3df6a8f..ca01f13b 100644 --- a/terra/compute/container.py +++ b/terra/compute/container.py @@ -33,7 +33,9 @@ def pre_run(self): # for special exectutors, etc... super().pre_run() - self.temp_dir = TemporaryDirectory() + self.temp_dir = TemporaryDirectory(suffix=f"_{type(self).__name__}") + if env.get('TERRA_KEEP_TEMP_DIR', None) == "1": + self.temp_dir._finalizer.detach() temp_dir = pathlib.Path(self.temp_dir.name) # Check to see if and are already defined, this will play nicely with @@ -82,15 +84,11 @@ def pre_run(self): with open(temp_dir / 'config.json', 'w') as fid: json.dump(container_config, fid) - # Dump the original setting too, incase an executor needs to perform map - # translation too - with open(temp_dir / 'config.json.orig', 'w') as fid: - json.dump(TerraJSONEncoder.serializableSettings(settings), fid) - def post_run(self): super().post_run() # Delete temp_dir - self.temp_dir.cleanup() + if env.get('TERRA_KEEP_TEMP_DIR', None) != "1": + self.temp_dir.cleanup() # self.temp_dir = None # Causes a warning, hopefully there wasn't a reason # I did it this way. diff --git a/terra/compute/docker.py b/terra/compute/docker.py index bce7d367..932e6852 100644 --- a/terra/compute/docker.py +++ b/terra/compute/docker.py @@ -96,7 +96,9 @@ def get_volume_map(self, config, service_info): ans = re.match(docker_volume_re, volume).groups() volume_map.append((ans[0], ans[2])) - volume_map = volume_map + service_info.volumes + # This is not needed, because service_info.volumes are already in + # service_info.env, added by terra.compute.base.BaseService.pre_run + # volume_map = volume_map + service_info.volumes slashes = '/' if os.name == 'nt': diff --git a/terra/compute/singularity.py b/terra/compute/singularity.py index 561fdf93..7deb1228 100644 --- a/terra/compute/singularity.py +++ b/terra/compute/singularity.py @@ -72,7 +72,8 @@ def get_volume_map(self, config, service_info): volume = volume.split(':') volume_map.append((volume[0], volume[1])) - volume_map = volume_map + service_info.volumes + # I think this causes duplicates, just like in the docker + # volume_map = volume_map + service_info.volumes slashes = '/' if os.name == 'nt': diff --git a/terra/compute/virtualenv.py b/terra/compute/virtualenv.py index 8d8edb0c..26970d0b 100644 --- a/terra/compute/virtualenv.py +++ b/terra/compute/virtualenv.py @@ -105,7 +105,9 @@ def pre_run(self): super().pre_run() # Create a temp directory, store it in this instance - self.temp_dir = TemporaryDirectory() + self.temp_dir = TemporaryDirectory(suffix=f"_{type(self).__name__}") + if env.get('TERRA_KEEP_TEMP_DIR', None) == "1": + self.temp_dir._finalizer.detach() # Use a config.json file to store settings within that temp directory temp_config_file = os.path.join(self.temp_dir.name, 'config.json') @@ -124,4 +126,5 @@ def pre_run(self): def post_run(self): super().post_run() # Delete temp_dir - self.temp_dir.cleanup() + if env.get('TERRA_KEEP_TEMP_DIR', None) != "1": + self.temp_dir.cleanup() diff --git a/terra/executor/celery/__init__.py b/terra/executor/celery/__init__.py index 20c7a96c..9a9deaf9 100644 --- a/terra/executor/celery/__init__.py +++ b/terra/executor/celery/__init__.py @@ -1,5 +1,6 @@ #!/usr/bin/env python +import sys from os import environ as env from celery import Celery @@ -10,7 +11,15 @@ __all__ = ['CeleryExecutor'] -app = Celery(env['TERRA_CELERY_MAIN_NAME']) + +main_name = env.get('TERRA_CELERY_MAIN_NAME', None) +if main_name is None: + try: + main_name = sys.modules['__main__'].__spec__.name + except: + main_name = "main_name_unset_Set_TERRA_CELERY_MAIN_NAME" +app = Celery(main_name) + app.config_from_object(env['TERRA_CELERY_CONF']) # app.running = False diff --git a/terra/task.py b/terra/task.py index 8b141799..d3af7c16 100644 --- a/terra/task.py +++ b/terra/task.py @@ -42,18 +42,26 @@ def _get_volume_mappings(self): executor_volume_map = self.request.settings['executor']['volume_map'] if executor_volume_map: - reverse_compute_volume_map = \ + compute_volume_map = \ self.request.settings['compute']['volume_map'] # Flip each mount point, so it goes from runner to controller reverse_compute_volume_map = [[x[1], x[0]] - for x in reverse_compute_volume_map] + for x in compute_volume_map] # Revere order. This will be important in case one mount point mounts # inside another reverse_compute_volume_map.reverse() + + reverse_executor_volume_map = [[x[1], x[0]] + for x in executor_volume_map] + reverse_executor_volume_map.reverse() + else: reverse_compute_volume_map = [] + compute_volume_map = [] + reverse_executor_volume_map = [] - return (reverse_compute_volume_map, executor_volume_map) + return (compute_volume_map, reverse_compute_volume_map, + executor_volume_map, reverse_executor_volume_map) def translate_paths(self, payload, reverse_compute_volume_map, executor_volume_map): @@ -74,10 +82,8 @@ def translate_paths(self, payload, reverse_compute_volume_map, def apply_async(self, args=None, kwargs=None, task_id=None, *args2, **kwargs2): - with open(env["TERRA_SETTINGS_FILE"], 'r') as fid: - current_settings = json.load(fid) + current_settings = TerraJSONEncoder.serializableSettings(settings) return super().apply_async(args=args, kwargs=kwargs, - # use settings._wrapped instead of current_settings? headers={'settings': current_settings}, task_id=task_id, *args2, **kwargs2) @@ -94,7 +100,8 @@ def __call__(self, *args, **kwargs): # configured yet settings.configure({'processing_dir': gettempdir()}) with settings: - reverse_compute_volume_map, executor_volume_map = \ + compute_volume_map, reverse_compute_volume_map, \ + executor_volume_map, reverse_executor_volume_map = \ self._get_volume_mappings() settings._wrapped.clear() @@ -115,7 +122,12 @@ def __call__(self, *args, **kwargs): reverse_compute_volume_map, executor_volume_map) terra.logger._logs.reconfigure_logger() return_value = self.run(*args_only, **kwargs) + + return_value = self.translate_paths(return_value, + reverse_executor_volume_map, compute_volume_map) else: + # Must by just apply (synchronous), or a normal call with no volumes + # mapping original_zone = settings.terra.zone settings.terra.zone = 'task' try: From baa16a158914652d661821f58e302605ab37b37d Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Tue, 21 Apr 2020 08:12:13 -0400 Subject: [PATCH 16/94] Change celery executor log message levels - Once celery task logs are received, sucess will just be a spam message - The rest of the message are updated to reflect their severity Signed-off-by: Andy Neff --- terra/executor/celery/executor.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index 2a77d78f..cb6ead97 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -134,7 +134,7 @@ def _update_futures(self): ar.ready() # Just trigger the AsyncResult state update check if ar.state == 'REVOKED': - logger.debug1('Celery task "%s" canceled.', ar.id) + logger.warning('Celery task "%s" canceled.', ar.id) if not fut.cancelled(): if not fut.cancel(): # pragma: no cover logger.error('Future was not running but failed to be cancelled') @@ -142,18 +142,18 @@ def _update_futures(self): # Future is CANCELLED -> CANCELLED_AND_NOTIFIED elif ar.state in ('RUNNING', 'RETRY'): - logger.debug1('Celery task "%s" running.', ar.id) + logger.debug4('Celery task "%s" running.', ar.id) if not fut.running(): fut.set_running_or_notify_cancel() # Future is RUNNING elif ar.state == 'SUCCESS': - logger.debug1('Celery task "%s" resolved.', ar.id) + logger.debug4('Celery task "%s" resolved.', ar.id) fut.set_result(ar.get(disable_sync_subtasks=False)) # Future is FINISHED elif ar.state == 'FAILURE': - logger.debug1('Celery task "%s" resolved with error.', ar.id) + logger.info('Celery task "%s" resolved with error.', ar.id) fut.set_exception(ar.result) # Future is FINISHED From ed1be5ee24a251aa276a72a251a24f9e59fe2579 Mon Sep 17 00:00:00 2001 From: Scott Richardson Date: Tue, 21 Apr 2020 15:12:14 -0400 Subject: [PATCH 17/94] create a simple __main__.py to start customizing celery --- Justfile | 3 ++- terra/executor/celery/__init__.py | 7 +------ terra/executor/celery/__main__.py | 10 ++++++++++ terra/executor/dummy.py | 2 ++ 4 files changed, 15 insertions(+), 7 deletions(-) create mode 100644 terra/executor/celery/__main__.py diff --git a/Justfile b/Justfile index af081528..b47b33f2 100755 --- a/Justfile +++ b/Justfile @@ -139,7 +139,8 @@ function terra_caseify() export FORKED_BY_MULTIPROCESSING=1 fi - TERRA_IS_CELERY_WORKER=1 Terra_Pipenv run celery -A terra.executor.celery.app worker --loglevel="${TERRA_CELLER_LOG_LEVEL-INFO}" -n "${node_name}" + # We might be able to use CELERY_LOADER to avoid the -A argument + TERRA_IS_CELERY_WORKER=1 Terra_Pipenv run python -m terra.executor.celery -A terra.executor.celery.app worker --loglevel="${TERRA_CELLER_LOG_LEVEL-INFO}" -n "${node_name}" ;; run_flower) # Start the flower server diff --git a/terra/executor/celery/__init__.py b/terra/executor/celery/__init__.py index 9a9deaf9..3c49a3bb 100644 --- a/terra/executor/celery/__init__.py +++ b/terra/executor/celery/__init__.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python - import sys from os import environ as env @@ -17,7 +15,7 @@ try: main_name = sys.modules['__main__'].__spec__.name except: - main_name = "main_name_unset_Set_TERRA_CELERY_MAIN_NAME" + main_name = "main_name_unset__set_TERRA_CELERY_MAIN_NAME" app = Celery(main_name) app.config_from_object(env['TERRA_CELERY_CONF']) @@ -33,6 +31,3 @@ # Running on windows. # https://stackoverflow.com/questions/37255548/how-to-run-celery-on-windows - -if __name__ == '__main__': # pragma: no cover - app.start() diff --git a/terra/executor/celery/__main__.py b/terra/executor/celery/__main__.py new file mode 100644 index 00000000..c8a66d34 --- /dev/null +++ b/terra/executor/celery/__main__.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python + +from os import environ as env +from . import app + +def main(): + app.start() + +if __name__ == '__main__': # pragma: no cover + main() diff --git a/terra/executor/dummy.py b/terra/executor/dummy.py index 9bef1d99..efdfb35b 100644 --- a/terra/executor/dummy.py +++ b/terra/executor/dummy.py @@ -9,6 +9,8 @@ class DummyExecutor(Executor): """ Executor that does nothing, just logs what would happen. + + Note: Don't base new executors off of this example """ def __init__(self, *arg, **kwargs): From 2372d1dd5df50505ac14ca2fa30b4fdd9f0b0a89 Mon Sep 17 00:00:00 2001 From: Scott Richardson Date: Tue, 21 Apr 2020 15:45:24 -0400 Subject: [PATCH 18/94] disallow a few more characters in the randomly generated passwords --- terra.env | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terra.env b/terra.env index c23cd76d..c8ddbac5 100644 --- a/terra.env +++ b/terra.env @@ -72,8 +72,8 @@ fi if [[ ! -f /.dockerenv && ! -s "${TERRA_REDIS_SECRET_FILE}" ]]; then source "${VSI_COMMON_DIR}/linux/random.bsh" - # No quotes allowed - urandom_password 20 '\x21\x23-\x26\x28-\x7E' > "${TERRA_REDIS_SECRET_FILE}" + # Allow printable ascii characters excpet quotes, ';' (for an unknown redis/celery parsing reason), ':' or '@' (for redis url reasons) + urandom_password 20 '\x21\x23-\x26\x28-\x39\x3c-\x3f\x41-\x7E' > "${TERRA_REDIS_SECRET_FILE}" fi #** From 9fdad33cbb41b922695ac2d90bf3f9661e1ad711 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Tue, 21 Apr 2020 16:18:42 -0400 Subject: [PATCH 19/94] Handle non-string case of dict patching Signed-off-by: Andy Neff --- external/vsi_common | 2 +- terra/core/settings.py | 2 +- terra/executor/celery/executor.py | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/external/vsi_common b/external/vsi_common index f6f04189..b3e7684a 160000 --- a/external/vsi_common +++ b/external/vsi_common @@ -1 +1 @@ -Subproject commit f6f0418971e44a4b5ffa3688f49668af8ecff276 +Subproject commit b3e7684ad4ff093945087a33205fbefaad3b3dc1 diff --git a/terra/core/settings.py b/terra/core/settings.py index dd5e9bea..3c233511 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -693,7 +693,7 @@ def serializableSettings(obj): obj = nested_patch( obj, - lambda k, v: any(v is not None and k.endswith(pattern) for pattern in filename_suffixes), + lambda k, v: any(v is not None and isinstance(k, str) and k.endswith(pattern) for pattern in filename_suffixes), lambda k, v: os.path.expanduser(v)) return obj diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index cb6ead97..cb991942 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -41,6 +41,7 @@ def cancel(self): Returns True if the future was cancelled, False otherwise. A future cannot be cancelled if it is running or has already completed. """ + logger.info(f'Canceling task {self._ar.id}') with self._condition: if self._state in [RUNNING, FINISHED, CANCELLED, CANCELLED_AND_NOTIFIED]: return super().cancel() @@ -189,6 +190,7 @@ def submit(self, fn, *args, **kwargs): return future def shutdown(self, wait=True): + logger.info('Shutting down celery tasks...') with self._shutdown_lock: self._shutdown = True for fut in tuple(self._futures): From a0eec501cf0c816db60ffcc9f389fb4292691e7b Mon Sep 17 00:00:00 2001 From: Scott Richardson Date: Fri, 1 May 2020 11:12:00 -0400 Subject: [PATCH 20/94] fix hostname and zone logging identifiers --- terra/logger.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/terra/logger.py b/terra/logger.py index f2e4a6ab..8ff37991 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -340,11 +340,13 @@ def configure_logger(self, sender, **kwargs): class TerraFilter(logging.Filter): def filter(self, record): - record.hostname = platform.node() - if terra.settings.configured: - record.zone = terra.settings.terra.zone - else: - record.zone = 'preconfig' + if not hasattr(record, 'hostname'): + record.hostname = platform.node() + if not hasattr(record, 'zone'): + if terra.settings.configured: + record.zone = terra.settings.terra.zone + else: + record.zone = 'preconfig' return True From 1c5b5327c1dc1b9f70fccf6038c607f0ce03c4b2 Mon Sep 17 00:00:00 2001 From: Scott Richardson Date: Fri, 1 May 2020 15:13:32 -0400 Subject: [PATCH 21/94] fix ask_question usage --- Justfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Justfile b/Justfile index b47b33f2..4b45b094 100755 --- a/Justfile +++ b/Justfile @@ -31,7 +31,8 @@ function Terra_Pipenv() if [ -n "${VIRTUAL_ENV+set}" ]; then echo "Warning: You appear to be in a virtual env" >&2 echo "Deactivate external virtual envs before running just" >&2 - ask_question "Continue?" n + ask_question "Continue?" answer_continue n + [ "$answer_continue" == "0" ] && return 1 fi PIPENV_PIPFILE="${TERRA_CWD}/Pipfile" pipenv ${@+"${@}"} || return $? else From 90f743f00f6a2df6a23b43f9630cfa4677f8d332 Mon Sep 17 00:00:00 2001 From: Scott Richardson Date: Fri, 1 May 2020 16:57:00 -0400 Subject: [PATCH 22/94] Disable docker-compoes's tty allocation Add the -T flag to `docker-compose` which disables its tty allocation [1] (the inverse of docker's -t flag). This prevents the staircase formatting we were observing when printing (not just logging) messages received by the master controler's Listner (which is running in a background thread). Because this container runs in the forground, it can change the tty's settings. This is a problem because it looks like the container disables opost (which postprocess output) in the tty, which screws up the formatting done with onlcr [2]. Also, we don't need a tty because logging to stdout/stderr directly in these processes is not threadsafe. A simple example of this error can be seen with the 'Sending and receiving logging events across a network' example in the python logging cookbook, which sets up a handler on the server to listen for messages and a handler on the client to send them. Start the server with $ python3 server.py & and the client with: $ docker run -i --rm -v /home/sgrichar/terra/terra_dsm/external/terra:/src python:3 python3 /src/client.py which looks as expected; compared to this, which doesn't: $ docker run -it --rm -v /home/sgrichar/terra/terra_dsm/external/terra:/src python:3 python3 /src/client.py [1] https://github.com/moby/moby/issues/8513 [2] https://unix.stackexchange.com/a/242814/332869 [3] https://docs.python.org/3/howto/logging-cookbook.html#sending-and-receiving-logging-events-across-a-network --- terra/compute/docker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/terra/compute/docker.py b/terra/compute/docker.py index 932e6852..9b782558 100644 --- a/terra/compute/docker.py +++ b/terra/compute/docker.py @@ -42,7 +42,7 @@ def run_service(self, service_info): just --wrap Just-docker-compose \\ -f {service_info.compose_files} ... \\ - run {service_info.compose_service_name} \\ + run -T {service_info.compose_service_name} \\ {service_info.command} ''' optional_args = {} @@ -50,7 +50,7 @@ def run_service(self, service_info): pid = just("--wrap", "Just-docker-compose", *sum([['-f', cf] for cf in service_info.compose_files], []), - 'run', service_info.compose_service_name, + 'run', '-T', service_info.compose_service_name, *service_info.command + extra_arguments, **optional_args, env=service_info.env) From ddb29df7caa57abbbe02e69d838e62126e821d37 Mon Sep 17 00:00:00 2001 From: Scott Richardson Date: Fri, 1 May 2020 17:05:14 -0400 Subject: [PATCH 23/94] WIP logging from tasks and runners is sent over a TCP socket to the controller. Currently the process hangs and does not exit once the tasks are done --- Justfile | 3 +- terra/core/settings.py | 2 + terra/core/signals.py | 3 +- terra/executor/celery/__init__.py | 2 + terra/executor/celery/__main__.py | 20 +++++ terra/executor/utils.py | 145 ++++++++++++++++++++++++++++-- terra/logger.py | 36 ++++++++ terra/task.py | 15 +++- 8 files changed, 216 insertions(+), 10 deletions(-) diff --git a/Justfile b/Justfile index 4b45b094..7fc157f7 100755 --- a/Justfile +++ b/Justfile @@ -141,7 +141,8 @@ function terra_caseify() fi # We might be able to use CELERY_LOADER to avoid the -A argument - TERRA_IS_CELERY_WORKER=1 Terra_Pipenv run python -m terra.executor.celery -A terra.executor.celery.app worker --loglevel="${TERRA_CELLER_LOG_LEVEL-INFO}" -n "${node_name}" + TERRA_IS_CELERY_WORKER=1 Terra_Pipenv run python -m terra.executor.celery -A terra.executor.celery.app worker --loglevel="${TERRA_CELERY_LOG_LEVEL-INFO}" -n "${node_name}" + # REVIEW do you need extra args here ;; run_flower) # Start the flower server diff --git a/terra/core/settings.py b/terra/core/settings.py index 3c233511..9b4faca5 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -504,6 +504,8 @@ def read_json(json_file): for pattern in json_include_suffixes)), lambda key, value: read_json(value)) + #TODO : Load compute and executor class and call _connect_backend + post_settings_configured.send(sender=self) logger.debug2('Post settings configure') diff --git a/terra/core/signals.py b/terra/core/signals.py index 00699321..af77a95b 100644 --- a/terra/core/signals.py +++ b/terra/core/signals.py @@ -342,7 +342,8 @@ def _decorator(func): return _decorator -__all__ = ['Signal', 'receiver', 'post_settings_configured'] +__all__ = ['Signal', 'receiver', 'post_settings_configured', + 'post_settings_context'] # a signal for settings done being loaded post_settings_configured = Signal() diff --git a/terra/executor/celery/__init__.py b/terra/executor/celery/__init__.py index 3c49a3bb..b3bcc191 100644 --- a/terra/executor/celery/__init__.py +++ b/terra/executor/celery/__init__.py @@ -13,7 +13,9 @@ main_name = env.get('TERRA_CELERY_MAIN_NAME', None) if main_name is None: try: + # FIXME __spec__ is None (__main__ is builtin) main_name = sys.modules['__main__'].__spec__.name + # REVIEW can we catch a specific exception here, like AttributeError except: main_name = "main_name_unset__set_TERRA_CELERY_MAIN_NAME" app = Celery(main_name) diff --git a/terra/executor/celery/__main__.py b/terra/executor/celery/__main__.py index c8a66d34..88e19977 100644 --- a/terra/executor/celery/__main__.py +++ b/terra/executor/celery/__main__.py @@ -3,7 +3,27 @@ from os import environ as env from . import app +# Terra +from terra import settings + + def main(): + if env.get('TERRA_SETTINGS_FILE', '') == '': + print('SGR - default settings') + + settings.configure( + { + 'executor': {'type': 'CeleryExecutor'}, + 'terra': {'zone': 'task'}, + #'terra': {'zone': 'task_controller'}, + 'logging': {'level': 'NOTSET'} + } + ) + print('SGR - celery.__main__.py') + + # REVIEW are settings setup at this point; they must be setup before the + # celery tasks start + app.start() if __name__ == '__main__': # pragma: no cover diff --git a/terra/executor/utils.py b/terra/executor/utils.py index eba711bf..565bed49 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -7,6 +7,12 @@ from terra.core.utils import ClassHandler import terra.logger +import logging.handlers +import pickle +import socketserver +import struct +import threading + class ExecutorHandler(ClassHandler): ''' @@ -26,6 +32,8 @@ def _connect_backend(self): If not ``None``, override the name of the backend to load. ''' + print('SGR - _connect_backend') + backend_name = self._override_type if backend_name is None: @@ -51,12 +59,18 @@ def _connect_backend(self): def configuration_map(self, service_info): if not hasattr(self._connection, 'configuration_map'): - return {} + # Default behavior + return [] + # else call the class specific implementation return self._connection.configuration_map(service_info) def reconfigure_logger(self, logging_handler): - # The default logging handler is a StreamHandler. This will reconfigure the - # Stream handler, should + # The default logging handler is a StreamHandler. This will reconfigure its + # output stream + + print("SGR - reconfigure logging") + return + log_file = os.path.join(settings.processing_dir, terra.logger._logs.default_log_prefix) @@ -70,11 +84,13 @@ def configure_logger(self): # ThreadPoolExecutor will work just fine with a normal StreamHandler try: - self._configure_logger() + return self._configure_logger() + # REVIEW this may not be needed anymore. it also is in the + # Justfile and docker-compose.yml # In CeleryPoolExecutor, use the Celery logger. # Use this to determine if main process or just a worker? # https://stackoverflow.com/a/45022530/4166604 - # Use JUST_IS_CELERY_WORKER + # Use TERRA_IS_CELERY_WORKER except AttributeError: # Setup log file for use in configure self._log_file = os.path.join(settings.processing_dir, @@ -87,6 +103,125 @@ def configure_logger(self): # TODO: ProcessPool - Log server + def _configure_logger(self): + # FIXME don't hardcode hostname/port + self._hostname = 'kanade' # settings.terra.celery.hostname + self._port = logging.handlers.DEFAULT_TCP_LOGGING_PORT # settings.terra.celery.logging_port + + if settings.terra.zone == 'controller': + print("SGR - setting up controller logging") + + # setup the listener + self.tcp_logging_server = LogRecordSocketReceiver(self._hostname, self._port) + print('About to start TCP server...') + + lp = threading.Thread(target=self.tcp_logging_server.serve_until_stopped) + lp.setDaemon(True) + # FIXME can't actually handle a log message until logging is done configuring + lp.start() + # TODO do we need to join + #lp.join() + + raise AttributeError + elif settings.terra.zone == 'runner' or settings.terra.zone == 'task': + print("SGR - setting up runner/task logging") + + self._socket_handler = logging.handlers.SocketHandler(self._hostname, + self._port) + + # TODO would probably be good to also setup another handler to log to disk + + # TODO don't bother with a formatter, since a socket handler sends the event + # as an unformatted pickle + + return self._socket_handler + elif settings.terra.zone == 'task_controller': + # TODO log to disk + pass + else: + assert False, 'unknown zone: ' + settings.terra.zone + + def reconfigure_logger(self, logging_handler): + # setup the logging when a task is reconfigured; e.g., changing logging + # level or hostname + pass + + #if settings.hostname != self._hostname: + # # FIXME no idea how to reset this + # pass + +# from https://docs.python.org/3/howto/logging-cookbook.html +class LogRecordStreamHandler(socketserver.StreamRequestHandler): + """Handler for a streaming logging request. + + This basically logs the record using whatever logging policy is + configured locally. + """ + + def handle(self): + """ + Handle multiple requests - each expected to be a 4-byte length, + followed by the LogRecord in pickle format. Logs the record + according to whatever policy is configured locally. + """ + while True: + chunk = self.connection.recv(4) + if len(chunk) < 4: + break + slen = struct.unpack('>L', chunk)[0] + chunk = self.connection.recv(slen) + while len(chunk) < slen: + chunk = chunk + self.connection.recv(slen - len(chunk)) + obj = self.unPickle(chunk) + record = logging.makeLogRecord(obj) + self.handleLogRecord(record) + + def unPickle(self, data): + return pickle.loads(data) + + def handleLogRecord(self, record): + # if a name is specified, we use the named logger rather than the one + # implied by the record. + if self.server.logname is not None: + name = self.server.logname + else: + name = record.name + logger = terra.logger.getLogger(name) + # N.B. EVERY record gets logged. This is because Logger.handle + # is normally called AFTER logger-level filtering. If you want + # to do filtering, do it at the client end to save wasting + # cycles and network bandwidth! + logger.handle(record) + +class LogRecordSocketReceiver(socketserver.ThreadingTCPServer): + """ + Simple TCP socket-based logging receiver suitable for testing. + """ + + allow_reuse_address = True + + def __init__(self, host='localhost', + port=logging.handlers.DEFAULT_TCP_LOGGING_PORT, + handler=LogRecordStreamHandler): + socketserver.ThreadingTCPServer.__init__(self, (host, port), handler) + self.abort = 0 + self.timeout = 1 + self.logname = None + #self.request_queue_size = 20 + + def serve_until_stopped(self): + import select + abort = 0 + print('SGR - STARTING LISTNER') + while not abort: + rd, wr, ex = select.select([self.socket.fileno()], + [], [], + self.timeout) + if rd: + print('SGR - RD') + self.handle_request() + abort = self.abort + Executor = ExecutorHandler() '''ExecutorHandler: The executor handler that all services will be interfacing diff --git a/terra/logger.py b/terra/logger.py index 8ff37991..f30a821a 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -249,6 +249,7 @@ def reconfigure_logger(self, sender=None, **kwargs): self.set_level_and_formatter() # Must be imported after settings configed + # TODO: Replace these two lines with reconfigure_signal from terra.executor import Executor Executor.reconfigure_logger(self.main_log_handler) @@ -263,6 +264,7 @@ def set_level_and_formatter(self): if isinstance(level, str): # make level case insensitive level = level.upper() + print('SGR - log level ' + level) self.stderr_handler.setLevel(level) self.main_log_handler.setLevel(level) @@ -285,6 +287,7 @@ def configure_logger(self, sender, **kwargs): raise ImproperlyConfigured() # Must be imported after settings configed + # TODO: Replace these two lines with configure_signal below from terra.executor import Executor self.main_log_handler = Executor.configure_logger() @@ -335,8 +338,20 @@ def configure_logger(self, sender, **kwargs): os.unlink(self.tmp_file.name) self.tmp_file = None + print('SGR - logging configured for zone ' + settings.terra.zone) + #show_logs_and_handlers() + if settings.terra.zone == 'runner' or settings.terra.zone == 'task': + self.root_logger.removeHandler(self.stderr_handler) + self._configured = True + # TODO: Send logging configured signal + +from celery.signals import setup_logging + +@setup_logging.connect +def setup_loggers(*args, **kwargs): + print("SGR - celery logger") class TerraFilter(logging.Filter): def filter(self, record): @@ -350,6 +365,27 @@ def filter(self, record): return True +def show_log(k, v): + def show_dict_fields(prefix, dict1): + for fld,val in dict1.items(): + print('%s%s=%s' %(prefix, fld,val) ) + + if not isinstance(v, logging.PlaceHolder): + print('+ [%s] {%s} (%s) ' % (str.ljust( k, 20), str(v.__class__)[8:-2], logging.getLevelName(v.level)) ) + print(str.ljust( '-------------------------',20) ) + show_dict_fields(' -', v.__dict__) + + for h in v.handlers: + print(' +++%s (%s)' %(str(h.__class__)[8:-2], logging.getLevelName(h.level) )) + show_dict_fields(' -', h.__dict__) + +# from https://github.com/mickeyperlstein/logging_debugger/blob/master/__init__.py +def show_logs_and_handlers(): + show_log('root', logging.getLogger('')) + for k,v in logging.Logger.manager.loggerDict.items(): + show_log(k,v) + + class Logger(Logger_original): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/terra/task.py b/terra/task.py index d3af7c16..16a449b8 100644 --- a/terra/task.py +++ b/terra/task.py @@ -100,13 +100,16 @@ def __call__(self, *args, **kwargs): # configured yet settings.configure({'processing_dir': gettempdir()}) with settings: + # Calculate the exector's mapped version of the runner's settings compute_volume_map, reverse_compute_volume_map, \ executor_volume_map, reverse_executor_volume_map = \ self._get_volume_mappings() + # Load the executor version of the runner's settings settings._wrapped.clear() settings._wrapped.update(self.translate_paths(self.request.settings, reverse_compute_volume_map, executor_volume_map)) + # Just in case processing dir doesn't exists if not os.path.exists(settings.processing_dir): logger.critical(f'Dir "{settings.processing_dir}" is not accessible ' 'by the executor, please make sure the worker has ' @@ -114,20 +117,26 @@ def __call__(self, *args, **kwargs): settings.processing_dir = gettempdir() logger.warning('Using temporary directory: ' f'"{settings.processing_dir}" for the processing dir') - settings.terra.zone = 'task' + + logger.error('SGR - TERRA ZONE ' + str(settings.terra.zone)) + + settings.terra.zone = 'task' # was runner + # Calculate the exector's mapped version of the arguments kwargs = args_to_kwargs(self.run, args, kwargs) args_only = kwargs.pop(ARGS, ()) kwargs.update(kwargs.pop(KWARGS, ())) kwargs = self.translate_paths(kwargs, reverse_compute_volume_map, executor_volume_map) + # Set up logger to talk to master controller terra.logger._logs.reconfigure_logger() return_value = self.run(*args_only, **kwargs) + # Calculate the runner mapped version of the executor's return value return_value = self.translate_paths(return_value, reverse_executor_volume_map, compute_volume_map) else: - # Must by just apply (synchronous), or a normal call with no volumes - # mapping + # Must call (synchronous) apply or python __call__ with no volume + # mappings original_zone = settings.terra.zone settings.terra.zone = 'task' try: From ba358f2cf8c24880f7095f6d58a5241e78bd1273 Mon Sep 17 00:00:00 2001 From: Scott Richardson Date: Fri, 1 May 2020 17:43:03 -0400 Subject: [PATCH 24/94] minor modifications --- terra/executor/utils.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/terra/executor/utils.py b/terra/executor/utils.py index 565bed49..14455e98 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -80,6 +80,20 @@ def reconfigure_logger(self, logging_handler): self._log_file.close() self._log_file = open(log_file, 'a') + #self._reconfigure_logger(logging_handler) + + def _reconfigure_logger(self, logging_handler): + # FIXME no idea how to reset this + # setup the logging when a task is reconfigured; e.g., changing logging + # level or hostname + + if settings.terra.zone == 'runner' or settings.terra.zone == 'task': + print("SGR - reconfigure runner/task logging") + + # when the celery task is done, its logger is automatically reconfigured; + # use that opportunity to close the stream + #self._socket_handler.close() + def configure_logger(self): # ThreadPoolExecutor will work just fine with a normal StreamHandler @@ -136,8 +150,7 @@ def _configure_logger(self): return self._socket_handler elif settings.terra.zone == 'task_controller': - # TODO log to disk - pass + raise AttributeError else: assert False, 'unknown zone: ' + settings.terra.zone @@ -207,7 +220,6 @@ def __init__(self, host='localhost', self.abort = 0 self.timeout = 1 self.logname = None - #self.request_queue_size = 20 def serve_until_stopped(self): import select From dd628da851069c59333f4967542ff7c159a6223b Mon Sep 17 00:00:00 2001 From: Scott Richardson Date: Mon, 4 May 2020 12:58:11 -0400 Subject: [PATCH 25/94] should still be equiv --- terra/executor/utils.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/terra/executor/utils.py b/terra/executor/utils.py index 14455e98..76344681 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -154,15 +154,6 @@ def _configure_logger(self): else: assert False, 'unknown zone: ' + settings.terra.zone - def reconfigure_logger(self, logging_handler): - # setup the logging when a task is reconfigured; e.g., changing logging - # level or hostname - pass - - #if settings.hostname != self._hostname: - # # FIXME no idea how to reset this - # pass - # from https://docs.python.org/3/howto/logging-cookbook.html class LogRecordStreamHandler(socketserver.StreamRequestHandler): """Handler for a streaming logging request. From 9f9450881de1c213883241dba989ccde4cc5423b Mon Sep 17 00:00:00 2001 From: Scott Richardson Date: Mon, 4 May 2020 17:26:03 -0400 Subject: [PATCH 26/94] add FIXME --- terra/executor/celery/__main__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/terra/executor/celery/__main__.py b/terra/executor/celery/__main__.py index 88e19977..4c69667e 100644 --- a/terra/executor/celery/__main__.py +++ b/terra/executor/celery/__main__.py @@ -14,6 +14,7 @@ def main(): settings.configure( { 'executor': {'type': 'CeleryExecutor'}, + # FIXME 'terra': {'zone': 'task'}, #'terra': {'zone': 'task_controller'}, 'logging': {'level': 'NOTSET'} From ea09e5fd3e4d39e6b60c241e0669b45766111120 Mon Sep 17 00:00:00 2001 From: Scott Richardson Date: Mon, 4 May 2020 17:32:41 -0400 Subject: [PATCH 27/94] update some comments; only close the socket if it exists; but things are not working because the task inherits the task_controlers configured logger so leave out the task_controler zone for now" --- terra/executor/utils.py | 25 +++++++++++++++---------- terra/task.py | 12 ++++++++++-- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/terra/executor/utils.py b/terra/executor/utils.py index 76344681..b9c921ca 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -69,18 +69,18 @@ def reconfigure_logger(self, logging_handler): # output stream print("SGR - reconfigure logging") - return - log_file = os.path.join(settings.processing_dir, - terra.logger._logs.default_log_prefix) + if settings.terra.zone == 'controller' or settings.terra.zone == 'task_controller': + log_file = os.path.join(settings.processing_dir, + terra.logger._logs.default_log_prefix) - # if not os.path.samefile(log_file, self._log_file.name): - if log_file != self._log_file.name: - os.makedirs(settings.processing_dir, exist_ok=True) - self._log_file.close() - self._log_file = open(log_file, 'a') + # if not os.path.samefile(log_file, self._log_file.name): + if log_file != self._log_file.name: + os.makedirs(settings.processing_dir, exist_ok=True) + self._log_file.close() + self._log_file = open(log_file, 'a') - #self._reconfigure_logger(logging_handler) + self._reconfigure_logger(logging_handler) def _reconfigure_logger(self, logging_handler): # FIXME no idea how to reset this @@ -92,11 +92,14 @@ def _reconfigure_logger(self, logging_handler): # when the celery task is done, its logger is automatically reconfigured; # use that opportunity to close the stream - #self._socket_handler.close() + if hasattr(self, '_socket_handler'): + self._socket_handler.close() def configure_logger(self): # ThreadPoolExecutor will work just fine with a normal StreamHandler + print('SGR - configure logging ' + settings.terra.zone) + try: return self._configure_logger() # REVIEW this may not be needed anymore. it also is in the @@ -150,6 +153,8 @@ def _configure_logger(self): return self._socket_handler elif settings.terra.zone == 'task_controller': + print("SGR - setting up task_controller logging") + raise AttributeError else: assert False, 'unknown zone: ' + settings.terra.zone diff --git a/terra/task.py b/terra/task.py index 16a449b8..74171b27 100644 --- a/terra/task.py +++ b/terra/task.py @@ -118,7 +118,7 @@ def __call__(self, *args, **kwargs): logger.warning('Using temporary directory: ' f'"{settings.processing_dir}" for the processing dir') - logger.error('SGR - TERRA ZONE ' + str(settings.terra.zone)) + logger.error('SGR - TerraTask ZONE ' + str(settings.terra.zone)) settings.terra.zone = 'task' # was runner # Calculate the exector's mapped version of the arguments @@ -130,6 +130,8 @@ def __call__(self, *args, **kwargs): # Set up logger to talk to master controller terra.logger._logs.reconfigure_logger() return_value = self.run(*args_only, **kwargs) + # REVIEW the problem is the zone changes when this gets called on scope __exit__ + terra.logger._logs.reconfigure_logger() # Calculate the runner mapped version of the executor's return value return_value = self.translate_paths(return_value, @@ -144,4 +146,10 @@ def __call__(self, *args, **kwargs): finally: settings.terra.zone = original_zone self.settings = None - return return_value \ No newline at end of file + return return_value + +class LogErrorsTask(TerraTask): + # from https://stackoverflow.com/a/45333231/1771778 + def on_failure(self, exc, task_id, args, kwargs, einfo): + logger.exception('Celery task failure!!!1', exc_info=exc) + super(LogErrorsTask, self).on_failure(exc, task_id, args, kwargs, einfo) From 639c5b72ec5ee0c1703e42c3365c1460a92d9eaa Mon Sep 17 00:00:00 2001 From: Scott Richardson Date: Tue, 5 May 2020 16:57:26 -0400 Subject: [PATCH 28/94] use signals to coordinate setting up of the logger between logger.py and executor.py --- terra/core/settings.py | 5 +- terra/core/signals.py | 18 ++- terra/executor/celery/executor.py | 193 +++++++++++++++++++++++++++++ terra/executor/utils.py | 195 ++++-------------------------- terra/logger.py | 38 +++--- 5 files changed, 258 insertions(+), 191 deletions(-) diff --git a/terra/core/settings.py b/terra/core/settings.py index 9b4faca5..da8dcb95 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -504,7 +504,10 @@ def read_json(json_file): for pattern in json_include_suffixes)), lambda key, value: read_json(value)) - #TODO : Load compute and executor class and call _connect_backend + # Importing these here is intentional + from terra.executor import Executor + from terra.compute import compute + # compute._connection # call a cached property post_settings_configured.send(sender=self) logger.debug2('Post settings configure') diff --git a/terra/core/signals.py b/terra/core/signals.py index af77a95b..7b637b24 100644 --- a/terra/core/signals.py +++ b/terra/core/signals.py @@ -343,7 +343,7 @@ def _decorator(func): __all__ = ['Signal', 'receiver', 'post_settings_configured', - 'post_settings_context'] + 'post_settings_context', 'logger_configure'] # a signal for settings done being loaded post_settings_configured = Signal() @@ -355,6 +355,22 @@ def _decorator(func): ''' post_settings_context = Signal() +'''Signal: +Sent after scope __exit__ from a settings context (i.e., with statement). +''' + +# REVIEW should this be called post_logger_configure +logger_configure = Signal() +'''Signal: +Sent to the executor after the logger has been configured. This will happen +after the post_settings_configured signal. +''' + +logger_reconfigure = Signal() +'''Signal: +Sent to the executor after the logger has been reconfigured. This will happen +after the logger_configure signal. +''' from terra.logger import getLogger # noqa logger = getLogger(__name__) diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index cb991942..dbe61325 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -16,17 +16,31 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os from os import environ as env from concurrent.futures import Future, Executor, as_completed from concurrent.futures._base import (RUNNING, FINISHED, CANCELLED, CANCELLED_AND_NOTIFIED) from threading import Lock, Thread import time +import logging.handlers +import pickle +import socketserver +import struct +import threading +from celery.signals import setup_logging + +import terra +from terra import settings from terra.logger import getLogger logger = getLogger(__name__) +@setup_logging.connect +def setup_loggers(*args, **kwargs): + print("SGR - celery logger") + class CeleryExecutorFuture(Future): def __init__(self, asyncresult): self._ar = asyncresult @@ -234,3 +248,182 @@ def __init__(self, service_info): volume_map = compute.get_volume_map(config, service_clone) return volume_map + + @staticmethod + def reconfigure_logger(sender, **kwargs): + # sender is logger in this case + # + # The default logging handler is a StreamHandler. This will reconfigure its + # output stream + + print("SGR - reconfigure logging") + + if settings.terra.zone == 'controller' or settings.terra.zone == 'task_controller': + log_file = os.path.join(settings.processing_dir, + terra.logger._logs.default_log_prefix) + + # if not os.path.samefile(log_file, sender._log_file.name): + if log_file != sender._log_file.name: + os.makedirs(settings.processing_dir, exist_ok=True) + sender._log_file.close() + sender._log_file = open(log_file, 'a') + + CeleryExecutor._reconfigure_logger(sender, **kwargs) + + # TODO move into a base executor class; mirror compute/base.py + @staticmethod + def configure_logger(sender, **kwargs): + # sender is logger in this case + + # ThreadPoolExecutor will work just fine with a normal StreamHandler + + print('SGR - configure logging ' + settings.terra.zone) + + try: + handler = CeleryExecutor._configure_logger(sender, **kwargs) + # REVIEW this may not be needed anymore. it also is in the + # Justfile and docker-compose.yml + # In CeleryPoolExecutor, use the Celery logger. + # Use this to determine if main process or just a worker? + # https://stackoverflow.com/a/45022530/4166604 + # Use TERRA_IS_CELERY_WORKER + except AttributeError: + # Setup log file for use in configure + sender._log_file = os.path.join(settings.processing_dir, + terra.logger._logs.default_log_prefix) + os.makedirs(settings.processing_dir, exist_ok=True) + sender._log_file = open(sender._log_file, 'a') + + sender._logging_handler = logging.StreamHandler(stream=sender._log_file) + handler = sender._logging_handler + + # TODO: ProcessPool - Log server + + # FIXME this is hacky. it requires the executor know it is responsible for + # creating this variable on the logger + terra.logger._logs.main_log_handler = handler + + @staticmethod + def _reconfigure_logger(sender, **kwargs): + # FIXME no idea how to reset this + # setup the logging when a task is reconfigured; e.g., changing logging + # level or hostname + + if settings.terra.zone == 'runner' or settings.terra.zone == 'task': + print("SGR - reconfigure runner/task logging") + + # when the celery task is done, its logger is automatically reconfigured; + # use that opportunity to close the stream + if hasattr(sender, '_socket_handler'): + sender._socket_handler.close() + + @staticmethod + def _configure_logger(sender, **kwargs): + # FIXME don't hardcode hostname/port + sender._hostname = 'kanade' # settings.terra.celery.hostname + sender._port = logging.handlers.DEFAULT_TCP_LOGGING_PORT # settings.terra.celery.logging_port + + if settings.terra.zone == 'controller': + print("SGR - setting up controller logging") + + # setup the listener + sender.tcp_logging_server = LogRecordSocketReceiver(sender._hostname, sender._port) + print('About to start TCP server...') + + lp = threading.Thread(target=sender.tcp_logging_server.serve_until_stopped) + lp.setDaemon(True) + # FIXME can't actually handle a log message until logging is done configuring + lp.start() + # TODO do we need to join + #lp.join() + + raise AttributeError + elif settings.terra.zone == 'runner' or settings.terra.zone == 'task': + print("SGR - setting up runner/task logging") + + sender._socket_handler = logging.handlers.SocketHandler(sender._hostname, + sender._port) + + # TODO would probably be good to also setup another handler to log to disk + + # TODO don't bother with a formatter, since a socket handler sends the event + # as an unformatted pickle + + return sender._socket_handler + elif settings.terra.zone == 'task_controller': + print("SGR - setting up task_controller logging") + + raise AttributeError + else: + assert False, 'unknown zone: ' + settings.terra.zone + +# from https://docs.python.org/3/howto/logging-cookbook.html +class LogRecordStreamHandler(socketserver.StreamRequestHandler): + """Handler for a streaming logging request. + + This basically logs the record using whatever logging policy is + configured locally. + """ + + def handle(self): + """ + Handle multiple requests - each expected to be a 4-byte length, + followed by the LogRecord in pickle format. Logs the record + according to whatever policy is configured locally. + """ + while True: + chunk = self.connection.recv(4) + if len(chunk) < 4: + break + slen = struct.unpack('>L', chunk)[0] + chunk = self.connection.recv(slen) + while len(chunk) < slen: + chunk = chunk + self.connection.recv(slen - len(chunk)) + obj = self.unPickle(chunk) + record = logging.makeLogRecord(obj) + self.handleLogRecord(record) + + def unPickle(self, data): + return pickle.loads(data) + + def handleLogRecord(self, record): + # if a name is specified, we use the named logger rather than the one + # implied by the record. + if self.server.logname is not None: + name = self.server.logname + else: + name = record.name + logger = terra.logger.getLogger(name) + # N.B. EVERY record gets logged. This is because Logger.handle + # is normally called AFTER logger-level filtering. If you want + # to do filtering, do it at the client end to save wasting + # cycles and network bandwidth! + logger.handle(record) + +class LogRecordSocketReceiver(socketserver.ThreadingTCPServer): + """ + Simple TCP socket-based logging receiver suitable for testing. + """ + + allow_reuse_address = True + + def __init__(self, host='localhost', + port=logging.handlers.DEFAULT_TCP_LOGGING_PORT, + handler=LogRecordStreamHandler): + socketserver.ThreadingTCPServer.__init__(self, (host, port), handler) + self.abort = 0 + self.timeout = 1 + self.logname = None + + def serve_until_stopped(self): + import select + abort = 0 + print('SGR - STARTING LISTNER') + while not abort: + rd, wr, ex = select.select([self.socket.fileno()], + [], [], + self.timeout) + if rd: + print('SGR - RD') + self.handle_request() + abort = self.abort diff --git a/terra/executor/utils.py b/terra/executor/utils.py index b9c921ca..947f5d01 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -4,15 +4,10 @@ from importlib import import_module from terra import settings +import terra.core.signals from terra.core.utils import ClassHandler import terra.logger -import logging.handlers -import pickle -import socketserver -import struct -import threading - class ExecutorHandler(ClassHandler): ''' @@ -20,6 +15,28 @@ class ExecutorHandler(ClassHandler): the ``concurrent.futures`` executor class. ''' + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # REVIEW could be moved out of the class and _configure_logger could be + # a staticmethod once we can remove the hasattr check + terra.core.signals.logger_configure.connect(self._configure_logger) + terra.core.signals.logger_reconfigure.connect(self._reconfigure_logger) + + def _configure_logger(self, sender, **kwargs): + print('SGR - connect configure_logger signals') + + # Register the Executor-specific configure_logger with the logger + if hasattr(self, 'configure_logger'): + self.configure_logger(sender, **kwargs) + + def _reconfigure_logger(self, sender, **kwargs): + print('SGR - connect reconfigure_logger signals') + + # Register the Executor-specific configure_logger with the logger + if hasattr(self, 'reconfigure_logger'): + self.reconfigure_logger(sender, **kwargs) + def _connect_backend(self): ''' Loads the executor backend's base module, given either a fully qualified @@ -64,172 +81,6 @@ def configuration_map(self, service_info): # else call the class specific implementation return self._connection.configuration_map(service_info) - def reconfigure_logger(self, logging_handler): - # The default logging handler is a StreamHandler. This will reconfigure its - # output stream - - print("SGR - reconfigure logging") - - if settings.terra.zone == 'controller' or settings.terra.zone == 'task_controller': - log_file = os.path.join(settings.processing_dir, - terra.logger._logs.default_log_prefix) - - # if not os.path.samefile(log_file, self._log_file.name): - if log_file != self._log_file.name: - os.makedirs(settings.processing_dir, exist_ok=True) - self._log_file.close() - self._log_file = open(log_file, 'a') - - self._reconfigure_logger(logging_handler) - - def _reconfigure_logger(self, logging_handler): - # FIXME no idea how to reset this - # setup the logging when a task is reconfigured; e.g., changing logging - # level or hostname - - if settings.terra.zone == 'runner' or settings.terra.zone == 'task': - print("SGR - reconfigure runner/task logging") - - # when the celery task is done, its logger is automatically reconfigured; - # use that opportunity to close the stream - if hasattr(self, '_socket_handler'): - self._socket_handler.close() - - def configure_logger(self): - # ThreadPoolExecutor will work just fine with a normal StreamHandler - - print('SGR - configure logging ' + settings.terra.zone) - - try: - return self._configure_logger() - # REVIEW this may not be needed anymore. it also is in the - # Justfile and docker-compose.yml - # In CeleryPoolExecutor, use the Celery logger. - # Use this to determine if main process or just a worker? - # https://stackoverflow.com/a/45022530/4166604 - # Use TERRA_IS_CELERY_WORKER - except AttributeError: - # Setup log file for use in configure - self._log_file = os.path.join(settings.processing_dir, - terra.logger._logs.default_log_prefix) - os.makedirs(settings.processing_dir, exist_ok=True) - self._log_file = open(self._log_file, 'a') - - self._logging_handler = logging.StreamHandler(stream=self._log_file) - return self._logging_handler - - # TODO: ProcessPool - Log server - - def _configure_logger(self): - # FIXME don't hardcode hostname/port - self._hostname = 'kanade' # settings.terra.celery.hostname - self._port = logging.handlers.DEFAULT_TCP_LOGGING_PORT # settings.terra.celery.logging_port - - if settings.terra.zone == 'controller': - print("SGR - setting up controller logging") - - # setup the listener - self.tcp_logging_server = LogRecordSocketReceiver(self._hostname, self._port) - print('About to start TCP server...') - - lp = threading.Thread(target=self.tcp_logging_server.serve_until_stopped) - lp.setDaemon(True) - # FIXME can't actually handle a log message until logging is done configuring - lp.start() - # TODO do we need to join - #lp.join() - - raise AttributeError - elif settings.terra.zone == 'runner' or settings.terra.zone == 'task': - print("SGR - setting up runner/task logging") - - self._socket_handler = logging.handlers.SocketHandler(self._hostname, - self._port) - - # TODO would probably be good to also setup another handler to log to disk - - # TODO don't bother with a formatter, since a socket handler sends the event - # as an unformatted pickle - - return self._socket_handler - elif settings.terra.zone == 'task_controller': - print("SGR - setting up task_controller logging") - - raise AttributeError - else: - assert False, 'unknown zone: ' + settings.terra.zone - -# from https://docs.python.org/3/howto/logging-cookbook.html -class LogRecordStreamHandler(socketserver.StreamRequestHandler): - """Handler for a streaming logging request. - - This basically logs the record using whatever logging policy is - configured locally. - """ - - def handle(self): - """ - Handle multiple requests - each expected to be a 4-byte length, - followed by the LogRecord in pickle format. Logs the record - according to whatever policy is configured locally. - """ - while True: - chunk = self.connection.recv(4) - if len(chunk) < 4: - break - slen = struct.unpack('>L', chunk)[0] - chunk = self.connection.recv(slen) - while len(chunk) < slen: - chunk = chunk + self.connection.recv(slen - len(chunk)) - obj = self.unPickle(chunk) - record = logging.makeLogRecord(obj) - self.handleLogRecord(record) - - def unPickle(self, data): - return pickle.loads(data) - - def handleLogRecord(self, record): - # if a name is specified, we use the named logger rather than the one - # implied by the record. - if self.server.logname is not None: - name = self.server.logname - else: - name = record.name - logger = terra.logger.getLogger(name) - # N.B. EVERY record gets logged. This is because Logger.handle - # is normally called AFTER logger-level filtering. If you want - # to do filtering, do it at the client end to save wasting - # cycles and network bandwidth! - logger.handle(record) - -class LogRecordSocketReceiver(socketserver.ThreadingTCPServer): - """ - Simple TCP socket-based logging receiver suitable for testing. - """ - - allow_reuse_address = True - - def __init__(self, host='localhost', - port=logging.handlers.DEFAULT_TCP_LOGGING_PORT, - handler=LogRecordStreamHandler): - socketserver.ThreadingTCPServer.__init__(self, (host, port), handler) - self.abort = 0 - self.timeout = 1 - self.logname = None - - def serve_until_stopped(self): - import select - abort = 0 - print('SGR - STARTING LISTNER') - while not abort: - rd, wr, ex = select.select([self.socket.fileno()], - [], [], - self.timeout) - if rd: - print('SGR - RD') - self.handle_request() - abort = self.abort - Executor = ExecutorHandler() '''ExecutorHandler: The executor handler that all services will be interfacing diff --git a/terra/logger.py b/terra/logger.py index f30a821a..39aa405f 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -248,10 +248,15 @@ def reconfigure_logger(self, sender=None, **kwargs): self.set_level_and_formatter() - # Must be imported after settings configed - # TODO: Replace these two lines with reconfigure_signal - from terra.executor import Executor - Executor.reconfigure_logger(self.main_log_handler) + # This sends a signal to the current Executor type, a module level variable + # which has already been imported at the end of LasySettings.configure. + # Executor is setup automatically, via + # Handler.__getattr__ => Handler._connection => Executor._connect_backend, + # when the signal is sent to Executor._reconfigure_logger. + # We import Executor in LasySettings.configure instead of here to reduce + # the concerns of this module + import terra.core.signals + terra.core.signals.logger_reconfigure.send(sender=self) def set_level_and_formatter(self): from terra import settings @@ -286,10 +291,17 @@ def configure_logger(self, sender, **kwargs): "unexpected") raise ImproperlyConfigured() - # Must be imported after settings configed - # TODO: Replace these two lines with configure_signal below - from terra.executor import Executor - self.main_log_handler = Executor.configure_logger() + print('SGR - sending logger_configure signal') + + # This sends a signal to the current Executor type, a module level variable + # which has already been imported at the end of LasySettings.configure. + # Executor is setup automatically, via + # Handler.__getattr__ => Handler._connection => Executor._connect_backend, + # when the signal is sent to Executor._configure_logger. + # We import Executor in LasySettings.configure instead of here to reduce + # the concerns of this module + import terra.core.signals + terra.core.signals.logger_configure.send(sender=self) self.set_level_and_formatter() @@ -345,14 +357,6 @@ def configure_logger(self, sender, **kwargs): self._configured = True - # TODO: Send logging configured signal - -from celery.signals import setup_logging - -@setup_logging.connect -def setup_loggers(*args, **kwargs): - print("SGR - celery logger") - class TerraFilter(logging.Filter): def filter(self, record): if not hasattr(record, 'hostname'): @@ -543,7 +547,7 @@ def handle_warning(message, category, filename, lineno, file=None, line=None): # Configure logging (pre configure) _logs = _SetupTerraLogger() - # register post_configure with settings + # Register post_configure with settings terra.core.signals.post_settings_configured.connect(_logs.configure_logger) # Handle a "with" settings context manager From 03dd663e8aad0a7fa697c9ed226fecf8da78339c Mon Sep 17 00:00:00 2001 From: Scott Richardson Date: Tue, 5 May 2020 20:33:23 -0400 Subject: [PATCH 29/94] add base executor --- terra/executor/base.py | 61 ++++++++++++++++++++++++++ terra/executor/celery/executor.py | 73 +++++-------------------------- terra/executor/utils.py | 12 +++-- terra/logger.py | 20 +++------ 4 files changed, 86 insertions(+), 80 deletions(-) create mode 100644 terra/executor/base.py diff --git a/terra/executor/base.py b/terra/executor/base.py new file mode 100644 index 00000000..dfafff25 --- /dev/null +++ b/terra/executor/base.py @@ -0,0 +1,61 @@ +import os +import logging.handlers +from concurrent.futures import Future, Executor, as_completed + +import terra +from terra import settings +from terra.logger import getLogger +logger = getLogger(__name__) + + +class BaseExecutor(Executor): + @staticmethod + def reconfigure_logger(sender, **kwargs): + # sender is logger in this case + # + # The default logging handler is a StreamHandler. This will reconfigure its + # output stream + + print("SGR - reconfigure logging") + + if settings.terra.zone == 'controller' or settings.terra.zone == 'task_controller': + log_file = os.path.join(settings.processing_dir, + terra.logger._logs.default_log_prefix) + + # if not os.path.samefile(log_file, sender._log_file.name): + if log_file != sender._log_file.name: + os.makedirs(settings.processing_dir, exist_ok=True) + sender._log_file.close() + sender._log_file = open(log_file, 'a') + + CeleryExecutor._reconfigure_logger(sender, **kwargs) + + @staticmethod + def configure_logger(sender, **kwargs): + # sender is logger in this case + + # ThreadPoolExecutor will work just fine with a normal StreamHandler + + print('SGR - configure logging ' + settings.terra.zone) + + # REVIEW TERRA_IS_CELERY_WORKER may not be needed anymore, now that we have + # zones. it is in the Justfile and docker-compose.yml + + # Setup log file for use in configure + sender._log_file = os.path.join(settings.processing_dir, + terra.logger._logs.default_log_prefix) + os.makedirs(settings.processing_dir, exist_ok=True) + sender._log_file = open(sender._log_file, 'a') + + sender._logging_handler = logging.StreamHandler(stream=sender._log_file) + handler = sender._logging_handler + + # TODO: ProcessPool - Log server + + # FIXME this is hacky. it requires the executor know it is responsible for + # creating this variable on the logger + terra.logger._logs.main_log_handler = handler + + +class BaseFuture(Future): + pass diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index dbe61325..d9e73972 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -18,7 +18,8 @@ import os from os import environ as env -from concurrent.futures import Future, Executor, as_completed +from terra.executor.base import BaseFuture, BaseExecutor +from concurrent.futures import as_completed from concurrent.futures._base import (RUNNING, FINISHED, CANCELLED, CANCELLED_AND_NOTIFIED) from threading import Lock, Thread @@ -41,7 +42,7 @@ def setup_loggers(*args, **kwargs): print("SGR - celery logger") -class CeleryExecutorFuture(Future): +class CeleryExecutorFuture(BaseFuture): def __init__(self, asyncresult): self._ar = asyncresult super().__init__() @@ -90,7 +91,7 @@ def cancel(self): return result -class CeleryExecutor(Executor): +class CeleryExecutor(BaseExecutor): """ Executor implementation using celery tasks. @@ -251,60 +252,6 @@ def __init__(self, service_info): @staticmethod def reconfigure_logger(sender, **kwargs): - # sender is logger in this case - # - # The default logging handler is a StreamHandler. This will reconfigure its - # output stream - - print("SGR - reconfigure logging") - - if settings.terra.zone == 'controller' or settings.terra.zone == 'task_controller': - log_file = os.path.join(settings.processing_dir, - terra.logger._logs.default_log_prefix) - - # if not os.path.samefile(log_file, sender._log_file.name): - if log_file != sender._log_file.name: - os.makedirs(settings.processing_dir, exist_ok=True) - sender._log_file.close() - sender._log_file = open(log_file, 'a') - - CeleryExecutor._reconfigure_logger(sender, **kwargs) - - # TODO move into a base executor class; mirror compute/base.py - @staticmethod - def configure_logger(sender, **kwargs): - # sender is logger in this case - - # ThreadPoolExecutor will work just fine with a normal StreamHandler - - print('SGR - configure logging ' + settings.terra.zone) - - try: - handler = CeleryExecutor._configure_logger(sender, **kwargs) - # REVIEW this may not be needed anymore. it also is in the - # Justfile and docker-compose.yml - # In CeleryPoolExecutor, use the Celery logger. - # Use this to determine if main process or just a worker? - # https://stackoverflow.com/a/45022530/4166604 - # Use TERRA_IS_CELERY_WORKER - except AttributeError: - # Setup log file for use in configure - sender._log_file = os.path.join(settings.processing_dir, - terra.logger._logs.default_log_prefix) - os.makedirs(settings.processing_dir, exist_ok=True) - sender._log_file = open(sender._log_file, 'a') - - sender._logging_handler = logging.StreamHandler(stream=sender._log_file) - handler = sender._logging_handler - - # TODO: ProcessPool - Log server - - # FIXME this is hacky. it requires the executor know it is responsible for - # creating this variable on the logger - terra.logger._logs.main_log_handler = handler - - @staticmethod - def _reconfigure_logger(sender, **kwargs): # FIXME no idea how to reset this # setup the logging when a task is reconfigured; e.g., changing logging # level or hostname @@ -318,7 +265,7 @@ def _reconfigure_logger(sender, **kwargs): sender._socket_handler.close() @staticmethod - def _configure_logger(sender, **kwargs): + def configure_logger(sender, **kwargs): # FIXME don't hardcode hostname/port sender._hostname = 'kanade' # settings.terra.celery.hostname sender._port = logging.handlers.DEFAULT_TCP_LOGGING_PORT # settings.terra.celery.logging_port @@ -326,6 +273,8 @@ def _configure_logger(sender, **kwargs): if settings.terra.zone == 'controller': print("SGR - setting up controller logging") + super(CeleryExecutor, CeleryExecutor).configure_logger(sender, **kwargs) + # setup the listener sender.tcp_logging_server = LogRecordSocketReceiver(sender._hostname, sender._port) print('About to start TCP server...') @@ -336,8 +285,6 @@ def _configure_logger(sender, **kwargs): lp.start() # TODO do we need to join #lp.join() - - raise AttributeError elif settings.terra.zone == 'runner' or settings.terra.zone == 'task': print("SGR - setting up runner/task logging") @@ -349,11 +296,13 @@ def _configure_logger(sender, **kwargs): # TODO don't bother with a formatter, since a socket handler sends the event # as an unformatted pickle - return sender._socket_handler + # FIXME this is hacky. it requires the executor know it is responsible for + # creating this variable on the logger + terra.logger._logs.main_log_handler = sender._socket_handler elif settings.terra.zone == 'task_controller': print("SGR - setting up task_controller logging") - raise AttributeError + super(CeleryExecutor, CeleryExecutor).configure_logger(sender, **kwargs) else: assert False, 'unknown zone: ' + settings.terra.zone diff --git a/terra/executor/utils.py b/terra/executor/utils.py index 947f5d01..19859e96 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -27,15 +27,13 @@ def _configure_logger(self, sender, **kwargs): print('SGR - connect configure_logger signals') # Register the Executor-specific configure_logger with the logger - if hasattr(self, 'configure_logger'): - self.configure_logger(sender, **kwargs) + self.configure_logger(sender, **kwargs) def _reconfigure_logger(self, sender, **kwargs): print('SGR - connect reconfigure_logger signals') # Register the Executor-specific configure_logger with the logger - if hasattr(self, 'reconfigure_logger'): - self.reconfigure_logger(sender, **kwargs) + self.reconfigure_logger(sender, **kwargs) def _connect_backend(self): ''' @@ -86,3 +84,9 @@ def configuration_map(self, service_info): '''ExecutorHandler: The executor handler that all services will be interfacing with when running parallel computation tasks. ''' + +# This Executor type is setup automatically, via +# Handler.__getattr__ => Handler._connection => Executor._connect_backend, +# when the signal is sent. +#terra.core.signals.logger_configure.connect(lambda _: Executor._configure_logger) +#terra.core.signals.logger_reconfigure.connect(lambda _: Executor._reconfigure_logger) diff --git a/terra/logger.py b/terra/logger.py index 39aa405f..5247727f 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -248,13 +248,9 @@ def reconfigure_logger(self, sender=None, **kwargs): self.set_level_and_formatter() - # This sends a signal to the current Executor type, a module level variable - # which has already been imported at the end of LasySettings.configure. - # Executor is setup automatically, via - # Handler.__getattr__ => Handler._connection => Executor._connect_backend, - # when the signal is sent to Executor._reconfigure_logger. - # We import Executor in LasySettings.configure instead of here to reduce - # the concerns of this module + # This sends a signal to the current Executor type, which has already been + # imported at the end of LasySettings.configure. We don't import Executor + # here to reduce the concerns of this module import terra.core.signals terra.core.signals.logger_reconfigure.send(sender=self) @@ -293,13 +289,9 @@ def configure_logger(self, sender, **kwargs): print('SGR - sending logger_configure signal') - # This sends a signal to the current Executor type, a module level variable - # which has already been imported at the end of LasySettings.configure. - # Executor is setup automatically, via - # Handler.__getattr__ => Handler._connection => Executor._connect_backend, - # when the signal is sent to Executor._configure_logger. - # We import Executor in LasySettings.configure instead of here to reduce - # the concerns of this module + # This sends a signal to the current Executor type, which has already been + # imported at the end of LasySettings.configure. We don't import Executor + # here to reduce the concerns of this module import terra.core.signals terra.core.signals.logger_configure.send(sender=self) From 618a5fdb682544682bfba15fb673971e3d432e41 Mon Sep 17 00:00:00 2001 From: Scott Richardson Date: Wed, 6 May 2020 09:01:57 -0400 Subject: [PATCH 30/94] update comments --- terra/core/signals.py | 2 +- terra/executor/utils.py | 13 +++++-------- terra/logger.py | 2 ++ 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/terra/core/signals.py b/terra/core/signals.py index 7b637b24..e6745f69 100644 --- a/terra/core/signals.py +++ b/terra/core/signals.py @@ -343,7 +343,7 @@ def _decorator(func): __all__ = ['Signal', 'receiver', 'post_settings_configured', - 'post_settings_context', 'logger_configure'] + 'post_settings_context', 'logger_configure', 'logger_reconfigure'] # a signal for settings done being loaded post_settings_configured = Signal() diff --git a/terra/executor/utils.py b/terra/executor/utils.py index 19859e96..1eaaa86e 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -18,11 +18,14 @@ class ExecutorHandler(ClassHandler): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - # REVIEW could be moved out of the class and _configure_logger could be - # a staticmethod once we can remove the hasattr check + # This Executor type is setup automatically, via + # Handler.__getattr__ => Handler._connection => Executor._connect_backend, + # when the signal is sent. terra.core.signals.logger_configure.connect(self._configure_logger) terra.core.signals.logger_reconfigure.connect(self._reconfigure_logger) + # These methods are necessary because the Executor actually behaves as a + # specific BaseExecutor type, so calls to methods must pass through this type def _configure_logger(self, sender, **kwargs): print('SGR - connect configure_logger signals') @@ -84,9 +87,3 @@ def configuration_map(self, service_info): '''ExecutorHandler: The executor handler that all services will be interfacing with when running parallel computation tasks. ''' - -# This Executor type is setup automatically, via -# Handler.__getattr__ => Handler._connection => Executor._connect_backend, -# when the signal is sent. -#terra.core.signals.logger_configure.connect(lambda _: Executor._configure_logger) -#terra.core.signals.logger_reconfigure.connect(lambda _: Executor._reconfigure_logger) diff --git a/terra/logger.py b/terra/logger.py index 5247727f..2456a298 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -292,6 +292,7 @@ def configure_logger(self, sender, **kwargs): # This sends a signal to the current Executor type, which has already been # imported at the end of LasySettings.configure. We don't import Executor # here to reduce the concerns of this module + # REVIEW can this be imported at the top? import terra.core.signals terra.core.signals.logger_configure.send(sender=self) @@ -344,6 +345,7 @@ def configure_logger(self, sender, **kwargs): print('SGR - logging configured for zone ' + settings.terra.zone) #show_logs_and_handlers() + # REVIEW this is odd if settings.terra.zone == 'runner' or settings.terra.zone == 'task': self.root_logger.removeHandler(self.stderr_handler) From 14dce8bf58e672e08dc8e35a8619a3b17f250ea2 Mon Sep 17 00:00:00 2001 From: Scott Richardson Date: Wed, 6 May 2020 15:34:16 -0400 Subject: [PATCH 31/94] update comments --- Justfile | 1 - terra/executor/base.py | 2 -- terra/executor/celery/executor.py | 1 + 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Justfile b/Justfile index 7fc157f7..c10d832e 100755 --- a/Justfile +++ b/Justfile @@ -142,7 +142,6 @@ function terra_caseify() # We might be able to use CELERY_LOADER to avoid the -A argument TERRA_IS_CELERY_WORKER=1 Terra_Pipenv run python -m terra.executor.celery -A terra.executor.celery.app worker --loglevel="${TERRA_CELERY_LOG_LEVEL-INFO}" -n "${node_name}" - # REVIEW do you need extra args here ;; run_flower) # Start the flower server diff --git a/terra/executor/base.py b/terra/executor/base.py index dfafff25..890b136d 100644 --- a/terra/executor/base.py +++ b/terra/executor/base.py @@ -28,8 +28,6 @@ def reconfigure_logger(sender, **kwargs): sender._log_file.close() sender._log_file = open(log_file, 'a') - CeleryExecutor._reconfigure_logger(sender, **kwargs) - @staticmethod def configure_logger(sender, **kwargs): # sender is logger in this case diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index d9e73972..274cf326 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -38,6 +38,7 @@ logger = getLogger(__name__) +# stop celery from hijacking the logger @setup_logging.connect def setup_loggers(*args, **kwargs): print("SGR - celery logger") From 2c44cbc7d6653e373d49f57ced50731d6ef4f7ef Mon Sep 17 00:00:00 2001 From: Scott Richardson Date: Wed, 6 May 2020 15:35:00 -0400 Subject: [PATCH 32/94] get a stack trace when an exception happens in a celery worker --- terra/task.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/terra/task.py b/terra/task.py index 74171b27..c51718a4 100644 --- a/terra/task.py +++ b/terra/task.py @@ -148,8 +148,7 @@ def __call__(self, *args, **kwargs): self.settings = None return return_value -class LogErrorsTask(TerraTask): # from https://stackoverflow.com/a/45333231/1771778 def on_failure(self, exc, task_id, args, kwargs, einfo): logger.exception('Celery task failure!!!1', exc_info=exc) - super(LogErrorsTask, self).on_failure(exc, task_id, args, kwargs, einfo) + super(TerraTask, self).on_failure(exc, task_id, args, kwargs, einfo) From 555187ff532ae8ed0952e0ae17686a4eea2beb2b Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 6 May 2020 16:49:35 -0400 Subject: [PATCH 33/94] Add celery as a proper dependency Signed-off-by: Andy Neff --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d03058cc..3b2f8cff 100644 --- a/setup.py +++ b/setup.py @@ -11,6 +11,8 @@ extra_requires=extra_requires, install_requires=[ "jstyleson", - "envcontext" + "envcontext", + # I use signal and task from celery, no matter what + "celery" ] ) From d329eedcaec021d9ae32f64042609b8693d24be8 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 6 May 2020 18:57:29 -0400 Subject: [PATCH 34/94] Unhardcode logging server Signed-off-by: Andy Neff --- terra/core/settings.py | 24 +++++++++++++++---- terra/executor/celery/executor.py | 9 +++----- terra/logger.py | 38 +++++++++++++++---------------- 3 files changed, 42 insertions(+), 29 deletions(-) diff --git a/terra/core/settings.py b/terra/core/settings.py index da8dcb95..0c59c0f4 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -147,9 +147,11 @@ # POSSIBILITY OF SUCH DAMAGE. import os +from logging.handlers import DEFAULT_TCP_LOGGING_PORT from inspect import isfunction from functools import wraps from json import JSONEncoder +import platform from terra.core.exceptions import ImproperlyConfigured # Do not import terra.logger or terra.signals here, or any module that @@ -249,8 +251,12 @@ def unittest(self): return os.environ.get('TERRA_UNITTEST', None) == "1" +@settings_property +def need_to_set_virtualenv_dir(self): + raise ImproperlyConfigured("You are using the virtualenv compute, and did " + "not set settings.compute.virtualenv_dir in your " + "config file.") -# TODO: come up with a way for apps to extend this themselves global_templates = [ ( # Global Defaults @@ -260,7 +266,15 @@ def unittest(self): "level": "ERROR", "format": f"%(asctime)s (%(hostname)s:%(zone)s): %(levelname)s - %(filename)s - %(message)s", "date_format": None, - "style": "%" + "style": "%", + "server": { + # This is tricky use of a setting, because the master controller will + # be the first to set it, but the runner and task will inherit the + # master controller's values, not their node names, should they be + # different (such as celery and spark) + "hostname": platform.node(), + "port": DEFAULT_TCP_LOGGING_PORT + } }, "executor": { "type": "ThreadPoolExecutor", @@ -271,6 +285,8 @@ def unittest(self): 'volume_map': [] }, 'terra': { + # unlike other settings, this should NOT be overwritten by a + # config.json file, there is currently nothing to prevent that 'zone': 'controller' }, 'status_file': status_file, @@ -281,11 +297,11 @@ def unittest(self): ), ( {"compute": {"arch": "terra.compute.virtualenv"}}, # Pattern - {"compute": {"virtualenv_dir": None}} # Defaults + {"compute": {"virtualenv_dir": need_to_set_virtualenv_dir}} # Defaults ), ( # So much for DRY :( {"compute": {"arch": "virtualenv"}}, - {"compute": {"virtualenv_dir": None}} + {"compute": {"virtualenv_dir": need_to_set_virtualenv_dir}} ) ] ''':class:`list` of (:class:`dict`, :class:`dict`): Templates are how we diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index 274cf326..ee7c86da 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -267,9 +267,8 @@ def reconfigure_logger(sender, **kwargs): @staticmethod def configure_logger(sender, **kwargs): - # FIXME don't hardcode hostname/port - sender._hostname = 'kanade' # settings.terra.celery.hostname - sender._port = logging.handlers.DEFAULT_TCP_LOGGING_PORT # settings.terra.celery.logging_port + sender._hostname = settings.logging.server.hostname + sender._port = settings.logging.server.port if settings.terra.zone == 'controller': print("SGR - setting up controller logging") @@ -278,7 +277,7 @@ def configure_logger(sender, **kwargs): # setup the listener sender.tcp_logging_server = LogRecordSocketReceiver(sender._hostname, sender._port) - print('About to start TCP server...') + print('SGR - About to start TCP server...') lp = threading.Thread(target=sender.tcp_logging_server.serve_until_stopped) lp.setDaemon(True) @@ -292,8 +291,6 @@ def configure_logger(sender, **kwargs): sender._socket_handler = logging.handlers.SocketHandler(sender._hostname, sender._port) - # TODO would probably be good to also setup another handler to log to disk - # TODO don't bother with a formatter, since a socket handler sends the event # as an unformatted pickle diff --git a/terra/logger.py b/terra/logger.py index 2456a298..760d5d9c 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -363,25 +363,25 @@ def filter(self, record): return True -def show_log(k, v): - def show_dict_fields(prefix, dict1): - for fld,val in dict1.items(): - print('%s%s=%s' %(prefix, fld,val) ) - - if not isinstance(v, logging.PlaceHolder): - print('+ [%s] {%s} (%s) ' % (str.ljust( k, 20), str(v.__class__)[8:-2], logging.getLevelName(v.level)) ) - print(str.ljust( '-------------------------',20) ) - show_dict_fields(' -', v.__dict__) - - for h in v.handlers: - print(' +++%s (%s)' %(str(h.__class__)[8:-2], logging.getLevelName(h.level) )) - show_dict_fields(' -', h.__dict__) - -# from https://github.com/mickeyperlstein/logging_debugger/blob/master/__init__.py -def show_logs_and_handlers(): - show_log('root', logging.getLogger('')) - for k,v in logging.Logger.manager.loggerDict.items(): - show_log(k,v) +# def show_log(k, v): +# def show_dict_fields(prefix, dict1): +# for fld,val in dict1.items(): +# print('%s%s=%s' %(prefix, fld,val) ) + +# if not isinstance(v, logging.PlaceHolder): +# print('+ [%s] {%s} (%s) ' % (str.ljust( k, 20), str(v.__class__)[8:-2], logging.getLevelName(v.level)) ) +# print(str.ljust( '-------------------------',20) ) +# show_dict_fields(' -', v.__dict__) + +# for h in v.handlers: +# print(' +++%s (%s)' %(str(h.__class__)[8:-2], logging.getLevelName(h.level) )) +# show_dict_fields(' -', h.__dict__) + +# # from https://github.com/mickeyperlstein/logging_debugger/blob/master/__init__.py +# def show_logs_and_handlers(): +# show_log('root', logging.getLogger('')) +# for k,v in logging.Logger.manager.loggerDict.items(): +# show_log(k,v) class Logger(Logger_original): From a169977258d9f0837ec809d119e492e03a34f53a Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Fri, 8 May 2020 03:35:52 -0400 Subject: [PATCH 35/94] Fix flower and redis_commander Signed-off-by: Andy Neff --- Justfile | 2 +- docker-compose-main.yml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Justfile b/Justfile index c10d832e..4fc219bb 100755 --- a/Justfile +++ b/Justfile @@ -145,7 +145,7 @@ function terra_caseify() ;; run_flower) # Start the flower server - Terra_Pipenv run celery -A terra.executor.celery.app flower + Terra_Pipenv run python -m terra.executor.celery -A terra.executor.celery.app flower ;; shutdown_celery) # Shuts down all celery works on all nodes Terra_Pipenv run python -c "from terra.executor.celery import app; app.control.broadcast('shutdown')" diff --git a/docker-compose-main.yml b/docker-compose-main.yml index 49eca801..932cca6a 100644 --- a/docker-compose-main.yml +++ b/docker-compose-main.yml @@ -47,14 +47,14 @@ services: - source: redis_secret target: ${TERRA_REDIS_SECRET_DOCKER} - source: redis_commander_secret - target: ${TERRA_REDIS_COMMANDER_SECRET_FILE} + target: ${TERRA_REDIS_COMMANDER_SECRET} command: | sh -c ' echo -n '"'"'{ "connections":[ { "password": "'"'"' > /redis-commander/config/local-production.json - cat /run/secrets/redis_password | sed '"'"'s|\\|\\\\|g;s|"|\\"|g'"'"' >> /redis-commander/config/local-production.json + cat /run/secrets/${TERRA_REDIS_SECRET_DOCKER} | sed '"'"'s|\\|\\\\|g;s|"|\\"|g'"'"' >> /redis-commander/config/local-production.json echo -n '"'"'", "host": "${TERRA_REDIS_HOSTNAME_DOCKER}", "label": "terra", @@ -69,7 +69,7 @@ services: "httpAuth": { "username": "admin", "passwordHash": "'"'"'>> /redis-commander/config/local-production.json - cat "/run/secrets/${TERRA_REDIS_COMMANDER_SECRET_FILE}" | sed '"'"'s|\\|\\\\|g;s|"|\\"|g'"'"' >> /redis-commander/config/local-production.json + cat "/run/secrets/${TERRA_REDIS_COMMANDER_SECRET}" | sed '"'"'s|\\|\\\\|g;s|"|\\"|g'"'"' >> /redis-commander/config/local-production.json echo '"'"'" } } From 9f613b7bc1bdb9b022e4b7cbf246c74344f9279f Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Fri, 8 May 2020 03:37:36 -0400 Subject: [PATCH 36/94] Remove run redis. - This is confusing, and not the right way to start redis Signed-off-by: Andy Neff --- Justfile | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Justfile b/Justfile index 4fc219bb..59196464 100755 --- a/Justfile +++ b/Justfile @@ -121,10 +121,6 @@ function terra_caseify() extra_args=$# ;; - run_redis) # Run redis - Just-docker-compose -f "${TERRA_CWD}/docker-compose.yml" run redis ${@+"${@}"} - extra_args=$# - ;; run_celery) # Starts a celery worker local node_name if [[ ${TERRA_LOCAL-} == 1 ]]; then From 439b30c69f61ae8808f4cb262037ccf1041bc5d5 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Fri, 8 May 2020 04:22:32 -0400 Subject: [PATCH 37/94] set task vs task_controller zones right Signed-off-by: Andy Neff --- terra/executor/celery/__init__.py | 14 ++++++++++++++ terra/executor/celery/__main__.py | 4 ++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/terra/executor/celery/__init__.py b/terra/executor/celery/__init__.py index b3bcc191..3850ebd2 100644 --- a/terra/executor/celery/__init__.py +++ b/terra/executor/celery/__init__.py @@ -22,6 +22,20 @@ app.config_from_object(env['TERRA_CELERY_CONF']) +from celery.signals import worker_process_init, worker_init +@worker_process_init.connect +def start_worker_child(*args, **kwargs): + from terra import settings + settings.terra.zone = 'task' + print(args) + print(kwargs) + logger.info('hi') + +@worker_init.connect +def start_worker(*args, **kwargs): + logger.info('Hi') + + # app.running = False # from celery.signals import worker_process_init # @worker_process_init.connect diff --git a/terra/executor/celery/__main__.py b/terra/executor/celery/__main__.py index 4c69667e..98d6f0f3 100644 --- a/terra/executor/celery/__main__.py +++ b/terra/executor/celery/__main__.py @@ -15,8 +15,8 @@ def main(): { 'executor': {'type': 'CeleryExecutor'}, # FIXME - 'terra': {'zone': 'task'}, - #'terra': {'zone': 'task_controller'}, + # 'terra': {'zone': 'task'}, + 'terra': {'zone': 'task_controller'}, 'logging': {'level': 'NOTSET'} } ) From ec65c2cb534b949c5e4be59b3bee736ac31e6ce5 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Fri, 8 May 2020 05:17:37 -0400 Subject: [PATCH 38/94] Refactor some [ci-skip] - Well, it's not completely broken, just not working right Signed-off-by: Andy Neff --- terra/core/settings.py | 2 +- terra/executor/base.py | 45 +++++++++++++++---------------- terra/executor/celery/__init__.py | 12 ++++----- terra/executor/celery/__main__.py | 5 ++-- terra/executor/celery/executor.py | 4 +-- terra/logger.py | 14 ++++++++-- terra/task.py | 6 ++--- 7 files changed, 47 insertions(+), 41 deletions(-) diff --git a/terra/core/settings.py b/terra/core/settings.py index 0c59c0f4..16541309 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -565,7 +565,7 @@ def __exit__(self, exc_type=None, exc_value=None, traceback=None): # Incase the logger was messed with in the context, reset it. from terra.core.signals import post_settings_context - post_settings_context.send(sender=self) + post_settings_context.send(sender=self, post_settings_context=True) return return_value diff --git a/terra/executor/base.py b/terra/executor/base.py index 890b136d..5ce63e3a 100644 --- a/terra/executor/base.py +++ b/terra/executor/base.py @@ -1,4 +1,5 @@ import os +import logging import logging.handlers from concurrent.futures import Future, Executor, as_completed @@ -9,25 +10,6 @@ class BaseExecutor(Executor): - @staticmethod - def reconfigure_logger(sender, **kwargs): - # sender is logger in this case - # - # The default logging handler is a StreamHandler. This will reconfigure its - # output stream - - print("SGR - reconfigure logging") - - if settings.terra.zone == 'controller' or settings.terra.zone == 'task_controller': - log_file = os.path.join(settings.processing_dir, - terra.logger._logs.default_log_prefix) - - # if not os.path.samefile(log_file, sender._log_file.name): - if log_file != sender._log_file.name: - os.makedirs(settings.processing_dir, exist_ok=True) - sender._log_file.close() - sender._log_file = open(log_file, 'a') - @staticmethod def configure_logger(sender, **kwargs): # sender is logger in this case @@ -41,7 +23,7 @@ def configure_logger(sender, **kwargs): # Setup log file for use in configure sender._log_file = os.path.join(settings.processing_dir, - terra.logger._logs.default_log_prefix) + terra.logger._logs.default_log_prefix) os.makedirs(settings.processing_dir, exist_ok=True) sender._log_file = open(sender._log_file, 'a') @@ -50,10 +32,27 @@ def configure_logger(sender, **kwargs): # TODO: ProcessPool - Log server - # FIXME this is hacky. it requires the executor know it is responsible for - # creating this variable on the logger - terra.logger._logs.main_log_handler = handler + sender.root_logger.addHandler(handler) + sender.main_log_handler = handler + @staticmethod + def reconfigure_logger(sender, **kwargs): + # sender is logger in this case + # + # The default logging handler is a StreamHandler. This will reconfigure its + # output stream + + print("SGR - reconfigure logging") + + if settings.terra.zone == 'controller' or settings.terra.zone == 'task_controller': + log_file = os.path.join(settings.processing_dir, + terra.logger._logs.default_log_prefix) + + # if not os.path.samefile(log_file, sender._log_file.name): + if log_file != sender._log_file.name: + os.makedirs(settings.processing_dir, exist_ok=True) + sender._log_file.close() + sender._log_file = open(log_file, 'a') class BaseFuture(Future): pass diff --git a/terra/executor/celery/__init__.py b/terra/executor/celery/__init__.py index 3850ebd2..c103f1d7 100644 --- a/terra/executor/celery/__init__.py +++ b/terra/executor/celery/__init__.py @@ -27,13 +27,13 @@ def start_worker_child(*args, **kwargs): from terra import settings settings.terra.zone = 'task' - print(args) - print(kwargs) - logger.info('hi') +# print(args) +# print(kwargs) +# logger.info('hi') -@worker_init.connect -def start_worker(*args, **kwargs): - logger.info('Hi') +# @worker_init.connect +# def start_worker(*args, **kwargs): +# logger.info('Hi') # app.running = False diff --git a/terra/executor/celery/__main__.py b/terra/executor/celery/__main__.py index 98d6f0f3..1947e2a3 100644 --- a/terra/executor/celery/__main__.py +++ b/terra/executor/celery/__main__.py @@ -14,10 +14,9 @@ def main(): settings.configure( { 'executor': {'type': 'CeleryExecutor'}, - # FIXME - # 'terra': {'zone': 'task'}, 'terra': {'zone': 'task_controller'}, - 'logging': {'level': 'NOTSET'} + 'logging': {'level': 'INFO'} + # 'logging': {'level': 'NOTSET'} } ) print('SGR - celery.__main__.py') diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index ee7c86da..f7879d5b 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -294,9 +294,7 @@ def configure_logger(sender, **kwargs): # TODO don't bother with a formatter, since a socket handler sends the event # as an unformatted pickle - # FIXME this is hacky. it requires the executor know it is responsible for - # creating this variable on the logger - terra.logger._logs.main_log_handler = sender._socket_handler + sender.main_log_handler = sender._socket_handler elif settings.terra.zone == 'task_controller': print("SGR - setting up task_controller logging") diff --git a/terra/logger.py b/terra/logger.py index 760d5d9c..ff6481e3 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -186,6 +186,18 @@ def __init__(self): category=DeprecationWarning, module='osgeo', message="the imp module is deprecated") + @property + def main_log_handler(self): + try: + return self.__main_log_handler + except AttributeError: + raise AttributeError("'_logs' has no 'main_log_handler'. An executor " + "class' 'configure_logger' method should setup a " + "'main_log_handler'.") + @main_log_handler.setter + def main_log_handler(self, value): + self.__main_log_handler = value + def setup_logging_exception_hook(self): ''' Setup logging of uncaught exceptions @@ -292,14 +304,12 @@ def configure_logger(self, sender, **kwargs): # This sends a signal to the current Executor type, which has already been # imported at the end of LasySettings.configure. We don't import Executor # here to reduce the concerns of this module - # REVIEW can this be imported at the top? import terra.core.signals terra.core.signals.logger_configure.send(sender=self) self.set_level_and_formatter() # Swap some handlers - self.root_logger.addHandler(self.main_log_handler) self.root_logger.removeHandler(self.preconfig_stderr_handler) self.root_logger.removeHandler(self.preconfig_main_log_handler) self.root_logger.removeHandler(self.tmp_handler) diff --git a/terra/task.py b/terra/task.py index c51718a4..a96cdb2d 100644 --- a/terra/task.py +++ b/terra/task.py @@ -128,10 +128,10 @@ def __call__(self, *args, **kwargs): kwargs = self.translate_paths(kwargs, reverse_compute_volume_map, executor_volume_map) # Set up logger to talk to master controller - terra.logger._logs.reconfigure_logger() + terra.logger._logs.reconfigure_logger(pre_run_task=True) return_value = self.run(*args_only, **kwargs) - # REVIEW the problem is the zone changes when this gets called on scope __exit__ - terra.logger._logs.reconfigure_logger() + # # REVIEW the problem is the zone changes when this gets called on scope __exit__ + # terra.logger._logs.reconfigure_logger() # Calculate the runner mapped version of the executor's return value return_value = self.translate_paths(return_value, From 6f6e00b63e461bf853b163bc808dcfa5a59020b8 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Fri, 8 May 2020 18:36:17 -0400 Subject: [PATCH 39/94] Minor cleanups Signed-off-by: Andy Neff --- terra/executor/base.py | 6 ++--- terra/executor/celery/executor.py | 43 ++++++++++++++++++------------ terra/executor/utils.py | 29 +++++++------------- terra/logger.py | 40 +++++++++++++++------------- terra/task.py | 44 +++++++++++++------------------ 5 files changed, 76 insertions(+), 86 deletions(-) diff --git a/terra/executor/base.py b/terra/executor/base.py index 5ce63e3a..f099d99a 100644 --- a/terra/executor/base.py +++ b/terra/executor/base.py @@ -27,13 +27,11 @@ def configure_logger(sender, **kwargs): os.makedirs(settings.processing_dir, exist_ok=True) sender._log_file = open(sender._log_file, 'a') - sender._logging_handler = logging.StreamHandler(stream=sender._log_file) - handler = sender._logging_handler + sender.main_log_handler = logging.StreamHandler(stream=sender._log_file) # TODO: ProcessPool - Log server - sender.root_logger.addHandler(handler) - sender.main_log_handler = handler + sender.root_logger.addHandler(sender.main_log_handler) @staticmethod def reconfigure_logger(sender, **kwargs): diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index f7879d5b..3c55499c 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -17,6 +17,7 @@ # limitations under the License. import os +import atexit from os import environ as env from terra.executor.base import BaseFuture, BaseExecutor from concurrent.futures import as_completed @@ -251,20 +252,6 @@ def __init__(self, service_info): return volume_map - @staticmethod - def reconfigure_logger(sender, **kwargs): - # FIXME no idea how to reset this - # setup the logging when a task is reconfigured; e.g., changing logging - # level or hostname - - if settings.terra.zone == 'runner' or settings.terra.zone == 'task': - print("SGR - reconfigure runner/task logging") - - # when the celery task is done, its logger is automatically reconfigured; - # use that opportunity to close the stream - if hasattr(sender, '_socket_handler'): - sender._socket_handler.close() - @staticmethod def configure_logger(sender, **kwargs): sender._hostname = settings.logging.server.hostname @@ -283,10 +270,18 @@ def configure_logger(sender, **kwargs): lp.setDaemon(True) # FIXME can't actually handle a log message until logging is done configuring lp.start() - # TODO do we need to join - #lp.join() + + @atexit.register + def cleanup_thread(): + print("SGR - Sending cease and desist to LogRecordSocketReceiver") + sender.tcp_logging_server.abort = 1 + lp.join(timeout=5) + if lp.is_alive(): + print("SGR - LogRecordSocketReceiver thread did not die") + print("SGR - LogRecordSocketReceiver died!") + elif settings.terra.zone == 'runner' or settings.terra.zone == 'task': - print("SGR - setting up runner/task logging") + print(f"SGR - setting up {settings.terra.zone} logging") sender._socket_handler = logging.handlers.SocketHandler(sender._hostname, sender._port) @@ -302,6 +297,20 @@ def configure_logger(sender, **kwargs): else: assert False, 'unknown zone: ' + settings.terra.zone + @staticmethod + def reconfigure_logger(sender, **kwargs): + # FIXME no idea how to reset this + # setup the logging when a task is reconfigured; e.g., changing logging + # level or hostname + + if settings.terra.zone == 'runner' or settings.terra.zone == 'task': + print("SGR - reconfigure runner/task logging") + + # when the celery task is done, its logger is automatically reconfigured; + # use that opportunity to close the stream + if hasattr(sender, '_socket_handler'): + sender._socket_handler.close() + # from https://docs.python.org/3/howto/logging-cookbook.html class LogRecordStreamHandler(socketserver.StreamRequestHandler): """Handler for a streaming logging request. diff --git a/terra/executor/utils.py b/terra/executor/utils.py index 1eaaa86e..b392070a 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -18,26 +18,6 @@ class ExecutorHandler(ClassHandler): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - # This Executor type is setup automatically, via - # Handler.__getattr__ => Handler._connection => Executor._connect_backend, - # when the signal is sent. - terra.core.signals.logger_configure.connect(self._configure_logger) - terra.core.signals.logger_reconfigure.connect(self._reconfigure_logger) - - # These methods are necessary because the Executor actually behaves as a - # specific BaseExecutor type, so calls to methods must pass through this type - def _configure_logger(self, sender, **kwargs): - print('SGR - connect configure_logger signals') - - # Register the Executor-specific configure_logger with the logger - self.configure_logger(sender, **kwargs) - - def _reconfigure_logger(self, sender, **kwargs): - print('SGR - connect reconfigure_logger signals') - - # Register the Executor-specific configure_logger with the logger - self.reconfigure_logger(sender, **kwargs) - def _connect_backend(self): ''' Loads the executor backend's base module, given either a fully qualified @@ -87,3 +67,12 @@ def configuration_map(self, service_info): '''ExecutorHandler: The executor handler that all services will be interfacing with when running parallel computation tasks. ''' +# This Executor type is setup automatically, via +# Handler.__getattr__ => Handler._connection => Executor._connect_backend, +# when the signal is sent. So use a lambda to delay getattr +terra.core.signals.logger_configure.connect( + lambda *args, **kwargs: Executor.configure_logger(*args, **kwargs), + weak=False) +terra.core.signals.logger_reconfigure.connect( + lambda *args, **kwargs: Executor.reconfigure_logger(*args, **kwargs), + weak=False) diff --git a/terra/logger.py b/terra/logger.py index ff6481e3..1b532e45 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -252,20 +252,6 @@ def handle_traceback(*args, **kwargs): # pragma: no cover except ImportError: # pragma: no cover pass - def reconfigure_logger(self, sender=None, **kwargs): - if not self._configured: - self.root_logger.error("It is unexpected for reconfigure_logger to be " - "called, without first calling configure_logger. " - "This is not critical, but should not happen.") - - self.set_level_and_formatter() - - # This sends a signal to the current Executor type, which has already been - # imported at the end of LasySettings.configure. We don't import Executor - # here to reduce the concerns of this module - import terra.core.signals - terra.core.signals.logger_reconfigure.send(sender=self) - def set_level_and_formatter(self): from terra import settings formatter = logging.Formatter(fmt=settings.logging.format, @@ -306,10 +292,9 @@ def configure_logger(self, sender, **kwargs): # here to reduce the concerns of this module import terra.core.signals terra.core.signals.logger_configure.send(sender=self) - self.set_level_and_formatter() - # Swap some handlers + # Now that the real logger has been set up, swap some handlers self.root_logger.removeHandler(self.preconfig_stderr_handler) self.root_logger.removeHandler(self.preconfig_main_log_handler) self.root_logger.removeHandler(self.tmp_handler) @@ -328,7 +313,7 @@ def configure_logger(self, sender, **kwargs): # level messages. This is probably not necessary because error/critical # messages before configure should be rare, and are probably worth # repeating. Repeating is the only way to get them formatted right the - # second time anyways. This applys to stderr only, not the log file + # second time anyways. This applies to stderr only, not the log file # if (x.levelno >= level)] and # (x.levelno < default_stderr_handler_level)] @@ -354,13 +339,30 @@ def configure_logger(self, sender, **kwargs): self.tmp_file = None print('SGR - logging configured for zone ' + settings.terra.zone) - #show_logs_and_handlers() - # REVIEW this is odd + + # Now in configure_logger, you are able to access settings and determine + # whether there should be a stderr handler or not. If you don't so this, + # both the master controller and service runner will output the same log + # messages, duplicating output on stderr. if settings.terra.zone == 'runner' or settings.terra.zone == 'task': self.root_logger.removeHandler(self.stderr_handler) self._configured = True + def reconfigure_logger(self, sender=None, **kwargs): + if not self._configured: + self.root_logger.error("It is unexpected for reconfigure_logger to be " + "called, without first calling configure_logger. " + "This is not critical, but should not happen.") + + # This sends a signal to the current Executor type, which has already been + # imported at the end of LazySettings.configure. We don't import Executor + # here to reduce the concerns of this module + import terra.core.signals + terra.core.signals.logger_reconfigure.send(sender=self) + + self.set_level_and_formatter() + class TerraFilter(logging.Filter): def filter(self, record): if not hasattr(record, 'hostname'): diff --git a/terra/task.py b/terra/task.py index a96cdb2d..d0185f86 100644 --- a/terra/task.py +++ b/terra/task.py @@ -18,6 +18,9 @@ __all__ = ['TerraTask', 'shared_task'] +# Take the shared task decorate, and add some defaults, so you don't need to +# specify this EVERY task, and gives you a way to add to all tasks +# automagically def shared_task(*args, **kwargs): kwargs['bind'] = kwargs.pop('bind', True) kwargs['base'] = kwargs.pop('base', TerraTask) @@ -25,19 +28,6 @@ def shared_task(*args, **kwargs): class TerraTask(Task): - settings = None - # @staticmethod - # def _patch_settings(args, kwargs): - # if 'TERRA_EXECUTOR_SETTINGS_FILE' in env: - # # TODO: Cache loads for efficiency? - # settings = json.load(env['TERRA_EXECUTOR_SETTINGS_FILE']) - - # # If args is not empty, the first arg was settings - # if args: - # args[0] = settings - # else: - # kwargs['settings'] = settings - def _get_volume_mappings(self): executor_volume_map = self.request.settings['executor']['volume_map'] @@ -80,6 +70,10 @@ def translate_paths(self, payload, reverse_compute_volume_map, payload, executor_volume_map) return payload + # Don't need to apply translations for apply, it runs locally + # def apply(self, *args, **kwargs): + + # apply_async needs to smuggle a copy of the settings to the task def apply_async(self, args=None, kwargs=None, task_id=None, *args2, **kwargs2): current_settings = TerraJSONEncoder.serializableSettings(settings) @@ -87,11 +81,6 @@ def apply_async(self, args=None, kwargs=None, task_id=None, headers={'settings': current_settings}, task_id=task_id, *args2, **kwargs2) - # Don't need to apply translations for apply, it runs locally - # def apply(self, *args, **kwargs): - # # TerraTask._patch_settings(args, kwargs) - # return super().apply(*args, settings={'X': 15}, **kwargs) - def __call__(self, *args, **kwargs): # this is only set when apply_async was called. if getattr(self.request, 'settings', None): @@ -99,6 +88,8 @@ def __call__(self, *args, **kwargs): # Cover a potential (unlikely) corner case where setting might not be # configured yet settings.configure({'processing_dir': gettempdir()}) + + # Create a settings context, so I can replace it with the task's settings with settings: # Calculate the exector's mapped version of the runner's settings compute_volume_map, reverse_compute_volume_map, \ @@ -109,6 +100,7 @@ def __call__(self, *args, **kwargs): settings._wrapped.clear() settings._wrapped.update(self.translate_paths(self.request.settings, reverse_compute_volume_map, executor_volume_map)) + # Just in case processing dir doesn't exists if not os.path.exists(settings.processing_dir): logger.critical(f'Dir "{settings.processing_dir}" is not accessible ' @@ -120,7 +112,10 @@ def __call__(self, *args, **kwargs): logger.error('SGR - TerraTask ZONE ' + str(settings.terra.zone)) - settings.terra.zone = 'task' # was runner + # No longer needed here, all celery worker pool children are set to + # 'task' zone permanently, by worker_process_init + # settings.terra.zone = 'task' # was runner + # Calculate the exector's mapped version of the arguments kwargs = args_to_kwargs(self.run, args, kwargs) args_only = kwargs.pop(ARGS, ()) @@ -130,8 +125,6 @@ def __call__(self, *args, **kwargs): # Set up logger to talk to master controller terra.logger._logs.reconfigure_logger(pre_run_task=True) return_value = self.run(*args_only, **kwargs) - # # REVIEW the problem is the zone changes when this gets called on scope __exit__ - # terra.logger._logs.reconfigure_logger() # Calculate the runner mapped version of the executor's return value return_value = self.translate_paths(return_value, @@ -145,10 +138,9 @@ def __call__(self, *args, **kwargs): return_value = self.run(*args, **kwargs) finally: settings.terra.zone = original_zone - self.settings = None return return_value - # from https://stackoverflow.com/a/45333231/1771778 - def on_failure(self, exc, task_id, args, kwargs, einfo): - logger.exception('Celery task failure!!!1', exc_info=exc) - super(TerraTask, self).on_failure(exc, task_id, args, kwargs, einfo) + # # from https://stackoverflow.com/a/45333231/1771778 + # def on_failure(self, exc, task_id, args, kwargs, einfo): + # logger.exception('Celery task failure!!!', exc_info=exc) + # return super().on_failure(exc, task_id, args, kwargs, einfo) From ff9702774ad23be4d4b65836fb2ff966307e516d Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Fri, 8 May 2020 20:53:46 -0400 Subject: [PATCH 40/94] Mid fixing Signed-off-by: Andy Neff --- Justfile | 2 +- docker-compose-main.yml | 1 - terra/compute/base.py | 91 +++++++++++++++- terra/compute/utils.py | 8 +- terra/executor/base.py | 25 ----- terra/executor/celery/executor.py | 175 ++++++++---------------------- terra/logger.py | 86 +++++++++++++-- 7 files changed, 224 insertions(+), 164 deletions(-) diff --git a/Justfile b/Justfile index 59196464..9f4c2307 100755 --- a/Justfile +++ b/Justfile @@ -137,7 +137,7 @@ function terra_caseify() fi # We might be able to use CELERY_LOADER to avoid the -A argument - TERRA_IS_CELERY_WORKER=1 Terra_Pipenv run python -m terra.executor.celery -A terra.executor.celery.app worker --loglevel="${TERRA_CELERY_LOG_LEVEL-INFO}" -n "${node_name}" + Terra_Pipenv run python -m terra.executor.celery -A terra.executor.celery.app worker --loglevel="${TERRA_CELERY_LOG_LEVEL-INFO}" -n "${node_name}" ;; run_flower) # Start the flower server diff --git a/docker-compose-main.yml b/docker-compose-main.yml index 932cca6a..353d13c5 100644 --- a/docker-compose-main.yml +++ b/docker-compose-main.yml @@ -17,7 +17,6 @@ services: - JUST_SETTINGS=${TERRA_TERRA_DIR_DOCKER}/terra.env - PYTHONPATH=${TERRA_PYTHONPATH-} - TZ - - TERRA_IS_CELERY_WORKER=${TERRA_IS_CELERY_WORKER-0} cap_add: - SYS_PTRACE # Useful for gdb volumes: diff --git a/terra/compute/base.py b/terra/compute/base.py index 6aa1f86b..691c4d8b 100644 --- a/terra/compute/base.py +++ b/terra/compute/base.py @@ -1,10 +1,15 @@ import os import json +import time +import atexit +from logging import StreamHandler +from logging.handlers import SocketHandler +import threading from terra import settings import terra.compute.utils from terra.executor import Executor -from terra.logger import getLogger +from terra.logger import getLogger, LogRecordSocketReceiver logger = getLogger(__name__) @@ -186,5 +191,89 @@ def configuration_map_service(self, service_info): return service_info.volumes + @staticmethod + def configure_logger(sender, **kwargs): + if settings.terra.zone == 'controller': + print("SGR - setting up controller logging") + + # Setup log file for use in configure + sender._log_file = os.path.join(settings.processing_dir, + terra.logger._logs.default_log_prefix) + os.makedirs(settings.processing_dir, exist_ok=True) + sender._log_file = open(sender._log_file, 'a') + sender.main_log_handler = StreamHandler(stream=sender._log_file) + sender.root_logger.addHandler(sender.main_log_handler) + + + # setup the TCP socket listener + sender.tcp_logging_server = LogRecordSocketReceiver( + settings.logging.server.hostname, settings.logging.server.port) + print('SGR - About to start TCP server...') + + listener_thread = threading.Thread( + target=sender.tcp_logging_server.serve_until_stopped) + listener_thread.setDaemon(True) + listener_thread.start() + + # Wait up to a second, to make sure the thread started + for _ in range(1000): + if sender.tcp_logging_server.ready: + break + time.sleep(0.001) + + # Auto cleanup + @atexit.register + def cleanup_thread(): + print("SGR - Sending cease and desist to LogRecordSocketReceiver") + sender.tcp_logging_server.abort = 1 + listener_thread.join(timeout=5) + if listener_thread.is_alive(): + print("SGR - LogRecordSocketReceiver thread did not die") + print("SGR - LogRecordSocketReceiver died!") + elif settings.terra.zone == 'runner': + print(f"SGR - setting up BaseCompute:runner logging") + + sender.main_log_handler = SocketHandler( + settings.logging.server.hostname, settings.logging.server.port) + + # Now in configure_logger, you are able to access settings and determine + # whether there should be a stderr handler or not. If you don't so this, + # both the master controller and service runner will output the same log + # messages, duplicating output on stderr. + sender.root_logger.removeHandler(sender.stderr_handler) + # Some executors may need to do this too. + + @staticmethod + def reconfigure_logger(sender, **kwargs): + # sender is logger in this case + # + # The default logging handler is a StreamHandler. This will reconfigure its + # output stream + + if settings.terra.zone == 'controller': + print("SGR - BaseComputer:controller reconfigure logging") + log_file = os.path.join(settings.processing_dir, + terra.logger._logs.default_log_prefix) + + # if not os.path.samefile(log_file, sender._log_file.name): + if log_file != sender._log_file.name: + os.makedirs(settings.processing_dir, exist_ok=True) + sender._log_file.close() + sender._log_file = open(log_file, 'a') + elif settings.terra.zone == 'runner': + print("SGR - BaseComputer:runner reconfigure logging") + # Only if it's changed + if settings.logging.server.hostname != sender.main_log_handler.host or \ + settings.logging.server.port != sender.main_log_handler.port: + # Reconnect Socket Handler + sender.main_log_handler.close() + try: + sender.root_logger.removeHandler(sender.main_log_handler) + except ValueError: + print(f"This shouldn't be happening...") + pass + + sender.main_log_handler = SocketHandler( + settings.logging.server.hostname, settings.logging.server.port) services = {} diff --git a/terra/compute/utils.py b/terra/compute/utils.py index 8d155dd1..95e30468 100644 --- a/terra/compute/utils.py +++ b/terra/compute/utils.py @@ -40,6 +40,7 @@ from vsi.tools.python import nested_patch from terra.core.utils import Handler +import terra.core.signals from terra import settings import terra.compute.base from terra.core.settings import filename_suffixes @@ -92,7 +93,12 @@ def _connect_backend(self): For the most part, workflows will be interacting with :data:`compute` to ``run`` services. Easier access via ``terra.compute.compute`` ''' - +terra.core.signals.logger_configure.connect( + lambda *args, **kwargs: compute.configure_logger(*args, **kwargs), + weak=False) +terra.core.signals.logger_reconfigure.connect( + lambda *args, **kwargs: compute.reconfigure_logger(*args, **kwargs), + weak=False) def get_default_service_class(cls): ''' diff --git a/terra/executor/base.py b/terra/executor/base.py index f099d99a..c19d409d 100644 --- a/terra/executor/base.py +++ b/terra/executor/base.py @@ -18,21 +18,6 @@ def configure_logger(sender, **kwargs): print('SGR - configure logging ' + settings.terra.zone) - # REVIEW TERRA_IS_CELERY_WORKER may not be needed anymore, now that we have - # zones. it is in the Justfile and docker-compose.yml - - # Setup log file for use in configure - sender._log_file = os.path.join(settings.processing_dir, - terra.logger._logs.default_log_prefix) - os.makedirs(settings.processing_dir, exist_ok=True) - sender._log_file = open(sender._log_file, 'a') - - sender.main_log_handler = logging.StreamHandler(stream=sender._log_file) - - # TODO: ProcessPool - Log server - - sender.root_logger.addHandler(sender.main_log_handler) - @staticmethod def reconfigure_logger(sender, **kwargs): # sender is logger in this case @@ -42,15 +27,5 @@ def reconfigure_logger(sender, **kwargs): print("SGR - reconfigure logging") - if settings.terra.zone == 'controller' or settings.terra.zone == 'task_controller': - log_file = os.path.join(settings.processing_dir, - terra.logger._logs.default_log_prefix) - - # if not os.path.samefile(log_file, sender._log_file.name): - if log_file != sender._log_file.name: - os.makedirs(settings.processing_dir, exist_ok=True) - sender._log_file.close() - sender._log_file = open(log_file, 'a') - class BaseFuture(Future): pass diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index 3c55499c..8975ceff 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -19,20 +19,18 @@ import os import atexit from os import environ as env -from terra.executor.base import BaseFuture, BaseExecutor from concurrent.futures import as_completed from concurrent.futures._base import (RUNNING, FINISHED, CANCELLED, CANCELLED_AND_NOTIFIED) from threading import Lock, Thread import time -import logging.handlers -import pickle -import socketserver -import struct -import threading +from logging import NullHandler, StreamHandler +from logging.handlers import SocketHandler from celery.signals import setup_logging +from terra.executor.base import BaseFuture, BaseExecutor +from terra.core.exceptions import ImproperlyConfigured import terra from terra import settings from terra.logger import getLogger @@ -254,130 +252,51 @@ def __init__(self, service_info): @staticmethod def configure_logger(sender, **kwargs): - sender._hostname = settings.logging.server.hostname - sender._port = settings.logging.server.port - - if settings.terra.zone == 'controller': - print("SGR - setting up controller logging") - - super(CeleryExecutor, CeleryExecutor).configure_logger(sender, **kwargs) - - # setup the listener - sender.tcp_logging_server = LogRecordSocketReceiver(sender._hostname, sender._port) - print('SGR - About to start TCP server...') - - lp = threading.Thread(target=sender.tcp_logging_server.serve_until_stopped) - lp.setDaemon(True) - # FIXME can't actually handle a log message until logging is done configuring - lp.start() - - @atexit.register - def cleanup_thread(): - print("SGR - Sending cease and desist to LogRecordSocketReceiver") - sender.tcp_logging_server.abort = 1 - lp.join(timeout=5) - if lp.is_alive(): - print("SGR - LogRecordSocketReceiver thread did not die") - print("SGR - LogRecordSocketReceiver died!") - - elif settings.terra.zone == 'runner' or settings.terra.zone == 'task': - print(f"SGR - setting up {settings.terra.zone} logging") - - sender._socket_handler = logging.handlers.SocketHandler(sender._hostname, - sender._port) - - # TODO don't bother with a formatter, since a socket handler sends the event - # as an unformatted pickle - - sender.main_log_handler = sender._socket_handler + if settings.terra.zone == 'task': + print(f"SGR - Not setting up CeleryExecutor:task logging") + sender.main_log_handler = NullHandler() elif settings.terra.zone == 'task_controller': - print("SGR - setting up task_controller logging") - - super(CeleryExecutor, CeleryExecutor).configure_logger(sender, **kwargs) - else: - assert False, 'unknown zone: ' + settings.terra.zone + print("SGR - setting up CeleryExecutor:task_controller logging") + # TODO: Not dry with BaseComputer configure(controller) + # Setup log file for use in configure + sender._log_file = os.path.join(settings.processing_dir, + terra.logger._logs.default_log_prefix) + os.makedirs(settings.processing_dir, exist_ok=True) + sender._log_file = open(sender._log_file, 'a') + sender.main_log_handler = StreamHandler(stream=sender._log_file) + sender.root_logger.addHandler(sender.main_log_handler) @staticmethod - def reconfigure_logger(sender, **kwargs): - # FIXME no idea how to reset this - # setup the logging when a task is reconfigured; e.g., changing logging - # level or hostname - - if settings.terra.zone == 'runner' or settings.terra.zone == 'task': + def reconfigure_logger(sender, pre_run_task=False, + post_settings_context=False, **kwargs): + # if settings.terra.zone == 'runner' or + if settings.terra.zone == 'task': print("SGR - reconfigure runner/task logging") - # when the celery task is done, its logger is automatically reconfigured; - # use that opportunity to close the stream - if hasattr(sender, '_socket_handler'): - sender._socket_handler.close() - -# from https://docs.python.org/3/howto/logging-cookbook.html -class LogRecordStreamHandler(socketserver.StreamRequestHandler): - """Handler for a streaming logging request. - - This basically logs the record using whatever logging policy is - configured locally. - """ - - def handle(self): - """ - Handle multiple requests - each expected to be a 4-byte length, - followed by the LogRecord in pickle format. Logs the record - according to whatever policy is configured locally. - """ - while True: - chunk = self.connection.recv(4) - if len(chunk) < 4: - break - slen = struct.unpack('>L', chunk)[0] - chunk = self.connection.recv(slen) - while len(chunk) < slen: - chunk = chunk + self.connection.recv(slen - len(chunk)) - obj = self.unPickle(chunk) - record = logging.makeLogRecord(obj) - self.handleLogRecord(record) - - def unPickle(self, data): - return pickle.loads(data) - - def handleLogRecord(self, record): - # if a name is specified, we use the named logger rather than the one - # implied by the record. - if self.server.logname is not None: - name = self.server.logname - else: - name = record.name - logger = terra.logger.getLogger(name) - # N.B. EVERY record gets logged. This is because Logger.handle - # is normally called AFTER logger-level filtering. If you want - # to do filtering, do it at the client end to save wasting - # cycles and network bandwidth! - logger.handle(record) - -class LogRecordSocketReceiver(socketserver.ThreadingTCPServer): - """ - Simple TCP socket-based logging receiver suitable for testing. - """ - - allow_reuse_address = True - - def __init__(self, host='localhost', - port=logging.handlers.DEFAULT_TCP_LOGGING_PORT, - handler=LogRecordStreamHandler): - socketserver.ThreadingTCPServer.__init__(self, (host, port), handler) - self.abort = 0 - self.timeout = 1 - self.logname = None - - def serve_until_stopped(self): - import select - abort = 0 - print('SGR - STARTING LISTNER') - while not abort: - rd, wr, ex = select.select([self.socket.fileno()], - [], [], - self.timeout) - if rd: - print('SGR - RD') - self.handle_request() - abort = self.abort + if pre_run_task: + print(f"SGR - Actually setting up CeleryExecutor:task logging") + sender.main_log_handler = SocketHandler( + settings.logging.server.hostname, + settings.logging.server.port) + + if post_settings_context: + print(f"SGR - Actually destroying CeleryExecutor:task logging") + # when the celery task is done, its logger is automatically + # reconfigured; use that opportunity to close the stream + if sender.main_log_handler: + sender.main_log_handler.close() + try: + sender.root_logger.removeHandler(sender.main_log_handler) + except ValueError: + print(f"This shouldn't be happening...") + pass + sender.main_log_handler = NullHandler() + elif settings.terra.zone == 'task_controller': + # TODO: Not dry with BaseComputer reconfigure(controller) + log_file = os.path.join(settings.processing_dir, + terra.logger._logs.default_log_prefix) + + if log_file != sender._log_file.name: + os.makedirs(settings.processing_dir, exist_ok=True) + sender._log_file.close() + sender._log_file = open(log_file, 'a') diff --git a/terra/logger.py b/terra/logger.py index 1b532e45..a41c725c 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -67,6 +67,10 @@ import io import warnings from datetime import datetime, timezone +import socketserver +import struct +import select +import pickle from terra.core.exceptions import ImproperlyConfigured # Do not import terra.settings or terra.signals here, or any module that @@ -121,6 +125,81 @@ def __exit__(self, et, ev, tb): # implicit return of None => don't swallow exceptions +# from https://docs.python.org/3/howto/logging-cookbook.html +class LogRecordStreamHandler(socketserver.StreamRequestHandler): + """Handler for a streaming logging request. + + This basically logs the record using whatever logging policy is + configured locally. + """ + + def handle(self): + """ + Handle multiple requests - each expected to be a 4-byte length, + followed by the LogRecord in pickle format. Logs the record + according to whatever policy is configured locally. + """ + while True: + chunk = self.connection.recv(4) + if len(chunk) < 4: + break + slen = struct.unpack('>L', chunk)[0] + chunk = self.connection.recv(slen) + while len(chunk) < slen: + chunk = chunk + self.connection.recv(slen - len(chunk)) + obj = self.unPickle(chunk) + record = logging.makeLogRecord(obj) + self.handleLogRecord(record) + + def unPickle(self, data): + return pickle.loads(data) + + def handleLogRecord(self, record): + # if a name is specified, we use the named logger rather than the one + # implied by the record. + if self.server.logname is not None: + name = self.server.logname + else: + name = record.name + logger = getLogger(name) + # N.B. EVERY record gets logged. This is because Logger.handle + # is normally called AFTER logger-level filtering. If you want + # to do filtering, do it at the client end to save wasting + # cycles and network bandwidth! + logger.handle(record) + + +class LogRecordSocketReceiver(socketserver.ThreadingTCPServer): + """ + Simple TCP socket-based logging receiver suitable for testing. + """ + + allow_reuse_address = True + + def __init__(self, host='localhost', + port=logging.handlers.DEFAULT_TCP_LOGGING_PORT, + handler=LogRecordStreamHandler): + socketserver.ThreadingTCPServer.__init__(self, (host, port), handler) + self.abort = False + self.ready = False + self.timeout = 1 + self.logname = None + + def serve_until_stopped(self): + abort = False + ready = True + print('SGR - STARTING LISTENER') + while not abort: + rd, wr, ex = select.select([self.socket.fileno()], + [], [], + self.timeout) + if rd: + print('SGR - RD') + self.handle_request() + abort = self.abort + ready = False + + class _SetupTerraLogger(): ''' A simple logger class used internally to configure the logger before and @@ -340,13 +419,6 @@ def configure_logger(self, sender, **kwargs): print('SGR - logging configured for zone ' + settings.terra.zone) - # Now in configure_logger, you are able to access settings and determine - # whether there should be a stderr handler or not. If you don't so this, - # both the master controller and service runner will output the same log - # messages, duplicating output on stderr. - if settings.terra.zone == 'runner' or settings.terra.zone == 'task': - self.root_logger.removeHandler(self.stderr_handler) - self._configured = True def reconfigure_logger(self, sender=None, **kwargs): From b98b85e623e12d914c0efa4c62f06bcb9ea68a18 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Sat, 9 May 2020 00:19:45 -0400 Subject: [PATCH 41/94] Logging working! - ProcessPool still needs to be implemented Signed-off-by: Andy Neff --- terra/compute/base.py | 2 ++ terra/core/settings.py | 2 +- terra/executor/base.py | 13 ++----------- terra/executor/celery/executor.py | 7 +++++-- terra/executor/process.py | 8 ++++++++ terra/executor/thread.py | 7 +++++++ terra/executor/utils.py | 16 ++++++++++------ terra/logger.py | 8 ++++---- terra/task.py | 6 ++---- 9 files changed, 41 insertions(+), 28 deletions(-) create mode 100644 terra/executor/process.py create mode 100644 terra/executor/thread.py diff --git a/terra/compute/base.py b/terra/compute/base.py index 691c4d8b..2feb52ca 100644 --- a/terra/compute/base.py +++ b/terra/compute/base.py @@ -235,6 +235,7 @@ def cleanup_thread(): sender.main_log_handler = SocketHandler( settings.logging.server.hostname, settings.logging.server.port) + sender.root_logger.addHandler(sender.main_log_handler) # Now in configure_logger, you are able to access settings and determine # whether there should be a stderr handler or not. If you don't so this, @@ -275,5 +276,6 @@ def reconfigure_logger(sender, **kwargs): sender.main_log_handler = SocketHandler( settings.logging.server.hostname, settings.logging.server.port) + sender.root_logger.addHandler(sender.main_log_handler) services = {} diff --git a/terra/core/settings.py b/terra/core/settings.py index 16541309..095ca936 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -264,7 +264,7 @@ def need_to_set_virtualenv_dir(self): { "logging": { "level": "ERROR", - "format": f"%(asctime)s (%(hostname)s:%(zone)s): %(levelname)s - %(filename)s - %(message)s", + "format": "%(asctime)s (%(hostname)s:%(zone)s): \x1b[31m%(levelname)s/%(processName)s\x1b[0m - %(filename)s - %(message)s", "date_format": None, "style": "%", "server": { diff --git a/terra/executor/base.py b/terra/executor/base.py index c19d409d..171d689c 100644 --- a/terra/executor/base.py +++ b/terra/executor/base.py @@ -12,20 +12,11 @@ class BaseExecutor(Executor): @staticmethod def configure_logger(sender, **kwargs): - # sender is logger in this case - - # ThreadPoolExecutor will work just fine with a normal StreamHandler - - print('SGR - configure logging ' + settings.terra.zone) + pass @staticmethod def reconfigure_logger(sender, **kwargs): - # sender is logger in this case - # - # The default logging handler is a StreamHandler. This will reconfigure its - # output stream - - print("SGR - reconfigure logging") + pass class BaseFuture(Future): pass diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index 8975ceff..99fd55d1 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -271,14 +271,15 @@ def reconfigure_logger(sender, pre_run_task=False, post_settings_context=False, **kwargs): # if settings.terra.zone == 'runner' or if settings.terra.zone == 'task': - print("SGR - reconfigure runner/task logging") + print("SGR - reconfigure task logging", kwargs) if pre_run_task: print(f"SGR - Actually setting up CeleryExecutor:task logging") + print(f"SGR - {settings.logging.server.hostname}:{settings.logging.server.port}") sender.main_log_handler = SocketHandler( settings.logging.server.hostname, settings.logging.server.port) - + sender.root_logger.addHandler(sender.main_log_handler) if post_settings_context: print(f"SGR - Actually destroying CeleryExecutor:task logging") # when the celery task is done, its logger is automatically @@ -291,6 +292,8 @@ def reconfigure_logger(sender, pre_run_task=False, print(f"This shouldn't be happening...") pass sender.main_log_handler = NullHandler() + sender.root_logger.addHandler(sender.main_log_handler) + print("SGR - reconfigured task logging") elif settings.terra.zone == 'task_controller': # TODO: Not dry with BaseComputer reconfigure(controller) log_file = os.path.join(settings.processing_dir, diff --git a/terra/executor/process.py b/terra/executor/process.py new file mode 100644 index 00000000..2b99cc24 --- /dev/null +++ b/terra/executor/process.py @@ -0,0 +1,8 @@ +import concurrent.futures +import terra.executor.base + +__all__ = ['ProcessPoolExecutor'] + +class ProcessPoolExecutor(concurrent.futures.ProcessPoolExecutor, + terra.executor.base.BaseExecutor): + pass \ No newline at end of file diff --git a/terra/executor/thread.py b/terra/executor/thread.py new file mode 100644 index 00000000..429e19b1 --- /dev/null +++ b/terra/executor/thread.py @@ -0,0 +1,7 @@ +import concurrent.futures +import terra.executor.base + +__all__ = ['ThreadPoolExecutor'] + +class ThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor, terra.executor.base.BaseExecutor): + pass \ No newline at end of file diff --git a/terra/executor/utils.py b/terra/executor/utils.py index b392070a..d3463830 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -43,13 +43,17 @@ def _connect_backend(self): elif backend_name == "SyncExecutor": from terra.executor.sync import SyncExecutor return SyncExecutor - elif backend_name == "ThreadPoolExecutor": - return concurrent.futures.ThreadPoolExecutor - elif backend_name == "ProcessPoolExecutor": - return concurrent.futures.ProcessPoolExecutor + elif backend_name == "ThreadPoolExecutor" or \ + backend_name == "concurrent.futures.ThreadPoolExecutor": + from terra.executor.thread import ThreadPoolExecutor + return terra.executor.thread.ThreadPoolExecutor + elif backend_name == "ProcessPoolExecutor" or \ + backend_name == "concurrent.futures.ProcessPoolExecutor": + from terra.executor.process import ProcessPoolExecutor + return terra.executor.process.ProcessPoolExecutor elif backend_name == "CeleryExecutor": - import terra.executor.celery - return terra.executor.celery.CeleryExecutor + from terra.executor.celery import CeleryExecutor + return CeleryExecutor else: module_name = backend_name.rsplit('.', 1) module = import_module(f'{module_name[0]}') diff --git a/terra/logger.py b/terra/logger.py index a41c725c..eec0e677 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -350,7 +350,7 @@ def set_level_and_formatter(self): self.main_log_handler.setFormatter(formatter) self.stderr_handler.setFormatter(formatter) - def configure_logger(self, sender, **kwargs): + def configure_logger(self, sender=None, signal=None, **kwargs): ''' Call back function to configure the logger after settings have been configured @@ -370,7 +370,7 @@ def configure_logger(self, sender, **kwargs): # imported at the end of LasySettings.configure. We don't import Executor # here to reduce the concerns of this module import terra.core.signals - terra.core.signals.logger_configure.send(sender=self) + terra.core.signals.logger_configure.send(sender=self, **kwargs) self.set_level_and_formatter() # Now that the real logger has been set up, swap some handlers @@ -421,7 +421,7 @@ def configure_logger(self, sender, **kwargs): self._configured = True - def reconfigure_logger(self, sender=None, **kwargs): + def reconfigure_logger(self, sender=None, signal=None, **kwargs): if not self._configured: self.root_logger.error("It is unexpected for reconfigure_logger to be " "called, without first calling configure_logger. " @@ -431,7 +431,7 @@ def reconfigure_logger(self, sender=None, **kwargs): # imported at the end of LazySettings.configure. We don't import Executor # here to reduce the concerns of this module import terra.core.signals - terra.core.signals.logger_reconfigure.send(sender=self) + terra.core.signals.logger_reconfigure.send(sender=self, **kwargs) self.set_level_and_formatter() diff --git a/terra/task.py b/terra/task.py index d0185f86..d5e2cc29 100644 --- a/terra/task.py +++ b/terra/task.py @@ -100,6 +100,8 @@ def __call__(self, *args, **kwargs): settings._wrapped.clear() settings._wrapped.update(self.translate_paths(self.request.settings, reverse_compute_volume_map, executor_volume_map)) + # This is needed here, because I just loaded settings from a runner! + settings.terra.zone = 'task' # Just in case processing dir doesn't exists if not os.path.exists(settings.processing_dir): @@ -112,10 +114,6 @@ def __call__(self, *args, **kwargs): logger.error('SGR - TerraTask ZONE ' + str(settings.terra.zone)) - # No longer needed here, all celery worker pool children are set to - # 'task' zone permanently, by worker_process_init - # settings.terra.zone = 'task' # was runner - # Calculate the exector's mapped version of the arguments kwargs = args_to_kwargs(self.run, args, kwargs) args_only = kwargs.pop(ARGS, ()) From 9de92cad87479a1ec5ec450292e7d09f8c0c2f9c Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Mon, 11 May 2020 17:20:40 -0400 Subject: [PATCH 42/94] Fixes Signed-off-by: Andy Neff --- terra/compute/base.py | 17 +++++++++++------ terra/compute/utils.py | 8 ++------ terra/core/settings.py | 2 +- terra/executor/celery/executor.py | 14 +++++++++----- terra/task.py | 7 +++---- 5 files changed, 26 insertions(+), 22 deletions(-) diff --git a/terra/compute/base.py b/terra/compute/base.py index 2feb52ca..7c61b87b 100644 --- a/terra/compute/base.py +++ b/terra/compute/base.py @@ -5,6 +5,7 @@ from logging import StreamHandler from logging.handlers import SocketHandler import threading +import warnigns from terra import settings import terra.compute.utils @@ -220,6 +221,10 @@ def configure_logger(sender, **kwargs): if sender.tcp_logging_server.ready: break time.sleep(0.001) + else: # pragma: no cover + warnigns.warn("TCP Logging server thread did not startup. " + "This is probably not a problem, unless logging isn't " + "working.", RuntimeWarning) # Auto cleanup @atexit.register @@ -227,9 +232,10 @@ def cleanup_thread(): print("SGR - Sending cease and desist to LogRecordSocketReceiver") sender.tcp_logging_server.abort = 1 listener_thread.join(timeout=5) - if listener_thread.is_alive(): - print("SGR - LogRecordSocketReceiver thread did not die") - print("SGR - LogRecordSocketReceiver died!") + if listener_thread.is_alive(): # pragma: no cover + warnings.warn("TCP Logger Server Thread did not shut down " + "gracefully. Attempting to exit anyways.", + RuntimeWarning) elif settings.terra.zone == 'runner': print(f"SGR - setting up BaseCompute:runner logging") @@ -240,7 +246,7 @@ def cleanup_thread(): # Now in configure_logger, you are able to access settings and determine # whether there should be a stderr handler or not. If you don't so this, # both the master controller and service runner will output the same log - # messages, duplicating output on stderr. + # messages, duplicating/interleaving output on stderr. sender.root_logger.removeHandler(sender.stderr_handler) # Some executors may need to do this too. @@ -270,8 +276,7 @@ def reconfigure_logger(sender, **kwargs): sender.main_log_handler.close() try: sender.root_logger.removeHandler(sender.main_log_handler) - except ValueError: - print(f"This shouldn't be happening...") + except ValueError: # pragma: no cover pass sender.main_log_handler = SocketHandler( diff --git a/terra/compute/utils.py b/terra/compute/utils.py index 95e30468..8a6ca2df 100644 --- a/terra/compute/utils.py +++ b/terra/compute/utils.py @@ -93,12 +93,8 @@ def _connect_backend(self): For the most part, workflows will be interacting with :data:`compute` to ``run`` services. Easier access via ``terra.compute.compute`` ''' -terra.core.signals.logger_configure.connect( - lambda *args, **kwargs: compute.configure_logger(*args, **kwargs), - weak=False) -terra.core.signals.logger_reconfigure.connect( - lambda *args, **kwargs: compute.reconfigure_logger(*args, **kwargs), - weak=False) +terra.core.signals.logger_configure.connect(compute.configure_logger) +terra.core.signals.logger_reconfigure.connect(compute.reconfigure_logger) def get_default_service_class(cls): ''' diff --git a/terra/core/settings.py b/terra/core/settings.py index 095ca936..88f4481a 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -264,7 +264,7 @@ def need_to_set_virtualenv_dir(self): { "logging": { "level": "ERROR", - "format": "%(asctime)s (%(hostname)s:%(zone)s): \x1b[31m%(levelname)s/%(processName)s\x1b[0m - %(filename)s - %(message)s", + "format": "%(asctime)s (%(hostname)s:%(zone)s): %(levelname)s/%(processName)s - %(filename)s - %(message)s", "date_format": None, "style": "%", "server": { diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index 99fd55d1..7ea183c1 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -252,12 +252,13 @@ def __init__(self, service_info): @staticmethod def configure_logger(sender, **kwargs): - if settings.terra.zone == 'task': + if settings.terra.zone == 'task': # pragma: no cover + # This will never really be reached, because the task_controller will + # configure the logger, and than fork. print(f"SGR - Not setting up CeleryExecutor:task logging") sender.main_log_handler = NullHandler() elif settings.terra.zone == 'task_controller': print("SGR - setting up CeleryExecutor:task_controller logging") - # TODO: Not dry with BaseComputer configure(controller) # Setup log file for use in configure sender._log_file = os.path.join(settings.processing_dir, terra.logger._logs.default_log_prefix) @@ -269,11 +270,16 @@ def configure_logger(sender, **kwargs): @staticmethod def reconfigure_logger(sender, pre_run_task=False, post_settings_context=False, **kwargs): - # if settings.terra.zone == 'runner' or if settings.terra.zone == 'task': print("SGR - reconfigure task logging", kwargs) if pre_run_task: + if sender.main_log_handler: + sender.main_log_handler.close() + try: + sender.root_logger.removeHandler(sender.main_log_handler) + except ValueError: + pass print(f"SGR - Actually setting up CeleryExecutor:task logging") print(f"SGR - {settings.logging.server.hostname}:{settings.logging.server.port}") sender.main_log_handler = SocketHandler( @@ -289,13 +295,11 @@ def reconfigure_logger(sender, pre_run_task=False, try: sender.root_logger.removeHandler(sender.main_log_handler) except ValueError: - print(f"This shouldn't be happening...") pass sender.main_log_handler = NullHandler() sender.root_logger.addHandler(sender.main_log_handler) print("SGR - reconfigured task logging") elif settings.terra.zone == 'task_controller': - # TODO: Not dry with BaseComputer reconfigure(controller) log_file = os.path.join(settings.processing_dir, terra.logger._logs.default_log_prefix) diff --git a/terra/task.py b/terra/task.py index d5e2cc29..1a9e34b2 100644 --- a/terra/task.py +++ b/terra/task.py @@ -18,9 +18,8 @@ __all__ = ['TerraTask', 'shared_task'] -# Take the shared task decorate, and add some defaults, so you don't need to -# specify this EVERY task, and gives you a way to add to all tasks -# automagically +# Take the shared task decorator, and add some Terra defaults, so you don't +# need to specify them EVERY task def shared_task(*args, **kwargs): kwargs['bind'] = kwargs.pop('bind', True) kwargs['base'] = kwargs.pop('base', TerraTask) @@ -100,7 +99,7 @@ def __call__(self, *args, **kwargs): settings._wrapped.clear() settings._wrapped.update(self.translate_paths(self.request.settings, reverse_compute_volume_map, executor_volume_map)) - # This is needed here, because I just loaded settings from a runner! + # This is needed here because I just loaded settings from a runner! settings.terra.zone = 'task' # Just in case processing dir doesn't exists From 3e53e1c3911aeb2484827968bce0db8fd105444d Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Mon, 11 May 2020 17:34:46 -0400 Subject: [PATCH 43/94] Remove debugging prints Signed-off-by: Andy Neff --- terra/compute/base.py | 11 +---------- terra/executor/celery/__main__.py | 4 ---- terra/executor/celery/executor.py | 10 +--------- terra/executor/utils.py | 2 -- terra/logger.py | 7 ------- terra/task.py | 2 -- 6 files changed, 2 insertions(+), 34 deletions(-) diff --git a/terra/compute/base.py b/terra/compute/base.py index 7c61b87b..3bd55636 100644 --- a/terra/compute/base.py +++ b/terra/compute/base.py @@ -5,7 +5,7 @@ from logging import StreamHandler from logging.handlers import SocketHandler import threading -import warnigns +import warnings from terra import settings import terra.compute.utils @@ -195,8 +195,6 @@ def configuration_map_service(self, service_info): @staticmethod def configure_logger(sender, **kwargs): if settings.terra.zone == 'controller': - print("SGR - setting up controller logging") - # Setup log file for use in configure sender._log_file = os.path.join(settings.processing_dir, terra.logger._logs.default_log_prefix) @@ -209,8 +207,6 @@ def configure_logger(sender, **kwargs): # setup the TCP socket listener sender.tcp_logging_server = LogRecordSocketReceiver( settings.logging.server.hostname, settings.logging.server.port) - print('SGR - About to start TCP server...') - listener_thread = threading.Thread( target=sender.tcp_logging_server.serve_until_stopped) listener_thread.setDaemon(True) @@ -229,7 +225,6 @@ def configure_logger(sender, **kwargs): # Auto cleanup @atexit.register def cleanup_thread(): - print("SGR - Sending cease and desist to LogRecordSocketReceiver") sender.tcp_logging_server.abort = 1 listener_thread.join(timeout=5) if listener_thread.is_alive(): # pragma: no cover @@ -237,8 +232,6 @@ def cleanup_thread(): "gracefully. Attempting to exit anyways.", RuntimeWarning) elif settings.terra.zone == 'runner': - print(f"SGR - setting up BaseCompute:runner logging") - sender.main_log_handler = SocketHandler( settings.logging.server.hostname, settings.logging.server.port) sender.root_logger.addHandler(sender.main_log_handler) @@ -258,7 +251,6 @@ def reconfigure_logger(sender, **kwargs): # output stream if settings.terra.zone == 'controller': - print("SGR - BaseComputer:controller reconfigure logging") log_file = os.path.join(settings.processing_dir, terra.logger._logs.default_log_prefix) @@ -268,7 +260,6 @@ def reconfigure_logger(sender, **kwargs): sender._log_file.close() sender._log_file = open(log_file, 'a') elif settings.terra.zone == 'runner': - print("SGR - BaseComputer:runner reconfigure logging") # Only if it's changed if settings.logging.server.hostname != sender.main_log_handler.host or \ settings.logging.server.port != sender.main_log_handler.port: diff --git a/terra/executor/celery/__main__.py b/terra/executor/celery/__main__.py index 1947e2a3..127f9f0f 100644 --- a/terra/executor/celery/__main__.py +++ b/terra/executor/celery/__main__.py @@ -9,8 +9,6 @@ def main(): if env.get('TERRA_SETTINGS_FILE', '') == '': - print('SGR - default settings') - settings.configure( { 'executor': {'type': 'CeleryExecutor'}, @@ -19,8 +17,6 @@ def main(): # 'logging': {'level': 'NOTSET'} } ) - print('SGR - celery.__main__.py') - # REVIEW are settings setup at this point; they must be setup before the # celery tasks start diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index 7ea183c1..c8231645 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -40,7 +40,7 @@ # stop celery from hijacking the logger @setup_logging.connect def setup_loggers(*args, **kwargs): - print("SGR - celery logger") + pass class CeleryExecutorFuture(BaseFuture): def __init__(self, asyncresult): @@ -255,10 +255,8 @@ def configure_logger(sender, **kwargs): if settings.terra.zone == 'task': # pragma: no cover # This will never really be reached, because the task_controller will # configure the logger, and than fork. - print(f"SGR - Not setting up CeleryExecutor:task logging") sender.main_log_handler = NullHandler() elif settings.terra.zone == 'task_controller': - print("SGR - setting up CeleryExecutor:task_controller logging") # Setup log file for use in configure sender._log_file = os.path.join(settings.processing_dir, terra.logger._logs.default_log_prefix) @@ -271,8 +269,6 @@ def configure_logger(sender, **kwargs): def reconfigure_logger(sender, pre_run_task=False, post_settings_context=False, **kwargs): if settings.terra.zone == 'task': - print("SGR - reconfigure task logging", kwargs) - if pre_run_task: if sender.main_log_handler: sender.main_log_handler.close() @@ -280,14 +276,11 @@ def reconfigure_logger(sender, pre_run_task=False, sender.root_logger.removeHandler(sender.main_log_handler) except ValueError: pass - print(f"SGR - Actually setting up CeleryExecutor:task logging") - print(f"SGR - {settings.logging.server.hostname}:{settings.logging.server.port}") sender.main_log_handler = SocketHandler( settings.logging.server.hostname, settings.logging.server.port) sender.root_logger.addHandler(sender.main_log_handler) if post_settings_context: - print(f"SGR - Actually destroying CeleryExecutor:task logging") # when the celery task is done, its logger is automatically # reconfigured; use that opportunity to close the stream if sender.main_log_handler: @@ -298,7 +291,6 @@ def reconfigure_logger(sender, pre_run_task=False, pass sender.main_log_handler = NullHandler() sender.root_logger.addHandler(sender.main_log_handler) - print("SGR - reconfigured task logging") elif settings.terra.zone == 'task_controller': log_file = os.path.join(settings.processing_dir, terra.logger._logs.default_log_prefix) diff --git a/terra/executor/utils.py b/terra/executor/utils.py index d3463830..01facb3d 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -30,8 +30,6 @@ def _connect_backend(self): If not ``None``, override the name of the backend to load. ''' - print('SGR - _connect_backend') - backend_name = self._override_type if backend_name is None: diff --git a/terra/logger.py b/terra/logger.py index eec0e677..9b492989 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -188,13 +188,11 @@ def __init__(self, host='localhost', def serve_until_stopped(self): abort = False ready = True - print('SGR - STARTING LISTENER') while not abort: rd, wr, ex = select.select([self.socket.fileno()], [], [], self.timeout) if rd: - print('SGR - RD') self.handle_request() abort = self.abort ready = False @@ -342,7 +340,6 @@ def set_level_and_formatter(self): if isinstance(level, str): # make level case insensitive level = level.upper() - print('SGR - log level ' + level) self.stderr_handler.setLevel(level) self.main_log_handler.setLevel(level) @@ -364,8 +361,6 @@ def configure_logger(self, sender=None, signal=None, **kwargs): "unexpected") raise ImproperlyConfigured() - print('SGR - sending logger_configure signal') - # This sends a signal to the current Executor type, which has already been # imported at the end of LasySettings.configure. We don't import Executor # here to reduce the concerns of this module @@ -417,8 +412,6 @@ def configure_logger(self, sender=None, signal=None, **kwargs): os.unlink(self.tmp_file.name) self.tmp_file = None - print('SGR - logging configured for zone ' + settings.terra.zone) - self._configured = True def reconfigure_logger(self, sender=None, signal=None, **kwargs): diff --git a/terra/task.py b/terra/task.py index 1a9e34b2..d55c392f 100644 --- a/terra/task.py +++ b/terra/task.py @@ -111,8 +111,6 @@ def __call__(self, *args, **kwargs): logger.warning('Using temporary directory: ' f'"{settings.processing_dir}" for the processing dir') - logger.error('SGR - TerraTask ZONE ' + str(settings.terra.zone)) - # Calculate the exector's mapped version of the arguments kwargs = args_to_kwargs(self.run, args, kwargs) args_only = kwargs.pop(ARGS, ()) From 6e0ba4f13a8985fc23f71335d155cf7af1ead9d6 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Tue, 12 May 2020 15:42:03 -0400 Subject: [PATCH 44/94] Clean up celery main Signed-off-by: Andy Neff --- terra/executor/celery/__main__.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/terra/executor/celery/__main__.py b/terra/executor/celery/__main__.py index 127f9f0f..6f593ef8 100644 --- a/terra/executor/celery/__main__.py +++ b/terra/executor/celery/__main__.py @@ -13,12 +13,9 @@ def main(): { 'executor': {'type': 'CeleryExecutor'}, 'terra': {'zone': 'task_controller'}, - 'logging': {'level': 'INFO'} - # 'logging': {'level': 'NOTSET'} + 'logging': {'level': 'NOTSET'} } ) - # REVIEW are settings setup at this point; they must be setup before the - # celery tasks start app.start() From 95721a6442d9c4f76d5855d740d89cbda213ab82 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 13 May 2020 11:04:31 -0400 Subject: [PATCH 45/94] Broken Signed-off-by: Andy Neff --- terra/compute/base.py | 11 +++--- terra/compute/utils.py | 8 +++- terra/core/settings.py | 4 +- terra/logger.py | 4 +- terra/tests/demo/__main__.py | 69 +++++++++++++++++++++++++++++++++++ terra/tests/demo/services.py | 34 +++++++++++++++++ terra/tests/demo/workflows.py | 13 +++++++ 7 files changed, 132 insertions(+), 11 deletions(-) create mode 100644 terra/tests/demo/__main__.py create mode 100644 terra/tests/demo/services.py create mode 100644 terra/tests/demo/workflows.py diff --git a/terra/compute/base.py b/terra/compute/base.py index 3bd55636..781231f0 100644 --- a/terra/compute/base.py +++ b/terra/compute/base.py @@ -218,7 +218,7 @@ def configure_logger(sender, **kwargs): break time.sleep(0.001) else: # pragma: no cover - warnigns.warn("TCP Logging server thread did not startup. " + warnings.warn("TCP Logging server thread did not startup. " "This is probably not a problem, unless logging isn't " "working.", RuntimeWarning) @@ -236,10 +236,11 @@ def cleanup_thread(): settings.logging.server.hostname, settings.logging.server.port) sender.root_logger.addHandler(sender.main_log_handler) - # Now in configure_logger, you are able to access settings and determine - # whether there should be a stderr handler or not. If you don't so this, - # both the master controller and service runner will output the same log - # messages, duplicating/interleaving output on stderr. + # Now in configure_logger, settings are loaded and you are able to + # determine this is the runner and there should not be a stderr handler. + # If you don't do this, both the master controller and service runner + # will output the same log messages, duplicating/interleaving output on + # stderr. sender.root_logger.removeHandler(sender.stderr_handler) # Some executors may need to do this too. diff --git a/terra/compute/utils.py b/terra/compute/utils.py index 8a6ca2df..95e30468 100644 --- a/terra/compute/utils.py +++ b/terra/compute/utils.py @@ -93,8 +93,12 @@ def _connect_backend(self): For the most part, workflows will be interacting with :data:`compute` to ``run`` services. Easier access via ``terra.compute.compute`` ''' -terra.core.signals.logger_configure.connect(compute.configure_logger) -terra.core.signals.logger_reconfigure.connect(compute.reconfigure_logger) +terra.core.signals.logger_configure.connect( + lambda *args, **kwargs: compute.configure_logger(*args, **kwargs), + weak=False) +terra.core.signals.logger_reconfigure.connect( + lambda *args, **kwargs: compute.reconfigure_logger(*args, **kwargs), + weak=False) def get_default_service_class(cls): ''' diff --git a/terra/core/settings.py b/terra/core/settings.py index 88f4481a..b8ed67e8 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -521,8 +521,8 @@ def read_json(json_file): lambda key, value: read_json(value)) # Importing these here is intentional - from terra.executor import Executor - from terra.compute import compute + import terra.executor + import terra.compute # compute._connection # call a cached property post_settings_configured.send(sender=self) diff --git a/terra/logger.py b/terra/logger.py index 9b492989..5d627868 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -187,7 +187,7 @@ def __init__(self, host='localhost', def serve_until_stopped(self): abort = False - ready = True + self.ready = True while not abort: rd, wr, ex = select.select([self.socket.fileno()], [], [], @@ -195,7 +195,7 @@ def serve_until_stopped(self): if rd: self.handle_request() abort = self.abort - ready = False + self.ready = False class _SetupTerraLogger(): diff --git a/terra/tests/demo/__main__.py b/terra/tests/demo/__main__.py new file mode 100644 index 00000000..60c8d756 --- /dev/null +++ b/terra/tests/demo/__main__.py @@ -0,0 +1,69 @@ +''' +Demo app that tests if a terra config is working + +*** WARNING *** This will spin up real computers and workers, if you are +configured to do so. May result in a small amount of billing. +''' + +import argparse +from os import environ as env +import tempfile +import os +import json +import pydoc + +from terra import settings +from terra.core.settings import ENVIRONMENT_VARIABLE +from terra.utils.cli import FullPaths + + +def get_parser(): + parser = argparse.ArgumentParser(description="View Angle Runner") + aa = parser.add_argument + aa('--loglevel', type=str, help="Log level", default=None) + aa('settings', type=str, help="JSON settings file", + default=os.environ.get(ENVIRONMENT_VARIABLE), action=FullPaths) + return parser + + +def main(processing_dir, args=None): + args = get_parser().parse_args(args) + + # Load settings + with open(args.settings, 'r') as fid: + settings_json = json.load(fid) + + # Patch settings for demo + settings_json['processing_dir'] = processing_dir + if args.loglevel: + try: + settings_json['logging']['level'] = args.loglevel + except KeyError: + settings_json['logging'] = {'level': args.loglevel} + + # Configure settings + settings.configure(settings_json) + + import pprint + pprint.pprint(settings) + + # Run workflow + from .workflows import DemoWorkflow + DemoWorkflow().demonate() + + import terra.logger + print(terra.logger._logs.tcp_logging_server.ready) + + + +if __name__ == '__main__': + processing_dir = tempfile.TemporaryDirectory() + try: + main(processing_dir.name) + with open(os.path.join(processing_dir.name, 'terra_log'), 'r') as fid: + print('-------------------') + print('Paging log messages') + print('-------------------') + pydoc.pager(fid.read()) + finally: + processing_dir.cleanup() diff --git a/terra/tests/demo/services.py b/terra/tests/demo/services.py new file mode 100644 index 00000000..970aaa76 --- /dev/null +++ b/terra/tests/demo/services.py @@ -0,0 +1,34 @@ +from terra import settings +from terra.compute.docker import ( + Service as DockerService, + Compute as DockerCompute +) +from terra.compute.singularity import ( + Service as SingularityService, + Compute as SingularityCompute +) +from terra.compute.base import BaseService +from terra.core.settings import TerraJSONEncoder +from os import environ as env +import json +import os +import posixpath +from terra.logger import getLogger +logger = getLogger(__name__) + +class Demo1(BaseService): + ''' Retrieve the view angles and print them out ''' + + +@DockerCompute.register(ViewAngle) +class ViewAngleRetrieval_docker(DockerService, ViewAngle): + def __init__(self): + super().__init__() + + # self.command = ['python', '-m', 'viewangle.runner_viewangle'] + self.command = ['python', '-m', 'print(12345)'] + + self.compose_files = [os.path.join(env['TERRA_TERRA_DIR'], + 'docker-compose-main.yml')] + + self.compose_service_name = 'terra' \ No newline at end of file diff --git a/terra/tests/demo/workflows.py b/terra/tests/demo/workflows.py new file mode 100644 index 00000000..308e0322 --- /dev/null +++ b/terra/tests/demo/workflows.py @@ -0,0 +1,13 @@ +from terra import settings +from terra.compute import compute + +from terra.logger import getLogger +logger = getLogger(__name__) + +class DemoWorkflow: + def demonate(self): + logger.critical('Starting demo workflow') + # compute.run('terra.tests.demo.services.Demo1') + # compute.run('terra.tests.demo.services.Demo2') + # compute.run('terra.tests.demo.services.Demo3') + logger.critical('Ran demo workflow') \ No newline at end of file From 0ffc409e689e0abd3c5beeac64fb6dd7233d8e18 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 14 May 2020 14:26:14 -0400 Subject: [PATCH 46/94] Added more to demo app Signed-off-by: Andy Neff --- terra/tests/demo/__init__.py | 0 terra/tests/demo/__main__.py | 4 ++-- terra/tests/demo/runners/__init__.py | 0 terra/tests/demo/runners/demo1.py | 27 +++++++++++++++++++++++++++ terra/tests/demo/services.py | 20 ++++++++++++-------- terra/tests/demo/workflows.py | 2 +- 6 files changed, 42 insertions(+), 11 deletions(-) create mode 100644 terra/tests/demo/__init__.py create mode 100644 terra/tests/demo/runners/__init__.py create mode 100644 terra/tests/demo/runners/demo1.py diff --git a/terra/tests/demo/__init__.py b/terra/tests/demo/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/terra/tests/demo/__main__.py b/terra/tests/demo/__main__.py index 60c8d756..3ee1ad6f 100644 --- a/terra/tests/demo/__main__.py +++ b/terra/tests/demo/__main__.py @@ -44,8 +44,8 @@ def main(processing_dir, args=None): # Configure settings settings.configure(settings_json) - import pprint - pprint.pprint(settings) + # import pprint + # pprint.pprint(settings) # Run workflow from .workflows import DemoWorkflow diff --git a/terra/tests/demo/runners/__init__.py b/terra/tests/demo/runners/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/terra/tests/demo/runners/demo1.py b/terra/tests/demo/runners/demo1.py new file mode 100644 index 00000000..b919111b --- /dev/null +++ b/terra/tests/demo/runners/demo1.py @@ -0,0 +1,27 @@ +''' +Demo app that tests if a terra config is working + +*** WARNING *** This will spin up real computers and workers, if you are +configured to do so. May result in a small amount of billing. +''' + +import argparse +from os import environ as env +import tempfile +import os +import json +import pydoc + +from terra.utils.cli import ArgumentParser +from terra import settings +from terra.logger import getLogger +logger = getLogger(__name__) + + +def main(args=None): + settings.terra.zone + logger.critical('Demo 1') + +if __name__ == '__main__': + ArgumentParser().parse_args() + main() diff --git a/terra/tests/demo/services.py b/terra/tests/demo/services.py index 970aaa76..a54a27cd 100644 --- a/terra/tests/demo/services.py +++ b/terra/tests/demo/services.py @@ -7,6 +7,10 @@ Service as SingularityService, Compute as SingularityCompute ) +from terra.compute.virtualenv import ( + Service as VirtualEnvService, + Compute as VirtualEnvCompute +) from terra.compute.base import BaseService from terra.core.settings import TerraJSONEncoder from os import environ as env @@ -17,18 +21,18 @@ logger = getLogger(__name__) class Demo1(BaseService): - ''' Retrieve the view angles and print them out ''' + ''' Simple Demo Service ''' + command = ['python', '-m', 'terra.tests.demo.runners.demo1'] -@DockerCompute.register(ViewAngle) -class ViewAngleRetrieval_docker(DockerService, ViewAngle): +@DockerCompute.register(Demo1) +class Demo1_docker(DockerService, Demo1): def __init__(self): super().__init__() - - # self.command = ['python', '-m', 'viewangle.runner_viewangle'] - self.command = ['python', '-m', 'print(12345)'] - self.compose_files = [os.path.join(env['TERRA_TERRA_DIR'], 'docker-compose-main.yml')] + self.compose_service_name = 'terra' - self.compose_service_name = 'terra' \ No newline at end of file +@VirtualEnvCompute.register(Demo1) +class Demo1_virtualenv(VirtualEnvService, Demo1): + pass \ No newline at end of file diff --git a/terra/tests/demo/workflows.py b/terra/tests/demo/workflows.py index 308e0322..ab1bf08b 100644 --- a/terra/tests/demo/workflows.py +++ b/terra/tests/demo/workflows.py @@ -7,7 +7,7 @@ class DemoWorkflow: def demonate(self): logger.critical('Starting demo workflow') - # compute.run('terra.tests.demo.services.Demo1') + compute.run('terra.tests.demo.services.Demo1') # compute.run('terra.tests.demo.services.Demo2') # compute.run('terra.tests.demo.services.Demo3') logger.critical('Ran demo workflow') \ No newline at end of file From 1fd7748a48d3c81f0fe24b9d8cad61ff7952f533 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Mon, 18 May 2020 19:56:44 -0400 Subject: [PATCH 47/94] Runner logging messages with new stderr filter Signed-off-by: Andy Neff --- docker-compose-main.yml | 24 ++++++----- terra/compute/base.py | 14 +++---- terra/compute/container.py | 11 +++--- terra/compute/virtualenv.py | 4 +- terra/logger.py | 77 ++++++++++++++++++++++++++++-------- terra/tests/demo/services.py | 11 +++++- 6 files changed, 97 insertions(+), 44 deletions(-) diff --git a/docker-compose-main.yml b/docker-compose-main.yml index 353d13c5..3176dfec 100644 --- a/docker-compose-main.yml +++ b/docker-compose-main.yml @@ -8,15 +8,15 @@ services: image: ${TERRA_DOCKER_REPO}:terra_${TERRA_USERNAME} environment: &terra_environment # Variables for docker_entrypoint.bsh - - DOCKER_UID=${TERRA_UID} - - DOCKER_GIDS=${TERRA_GIDS} - - DOCKER_GROUP_NAMES=${TERRA_GROUP_NAMES} - - DOCKER_USERNAME=user - - DISPLAY - - JUSTFILE=${TERRA_TERRA_DIR_DOCKER}/docker/terra.Justfile - - JUST_SETTINGS=${TERRA_TERRA_DIR_DOCKER}/terra.env - - PYTHONPATH=${TERRA_PYTHONPATH-} - - TZ + DOCKER_UID: ${TERRA_UID} + DOCKER_GIDS: ${TERRA_GIDS} + DOCKER_GROUP_NAMES: ${TERRA_GROUP_NAMES} + DOCKER_USERNAME: user + JUSTFILE: ${TERRA_TERRA_DIR_DOCKER}/docker/terra.Justfile + JUST_SETTINGS: ${TERRA_TERRA_DIR_DOCKER}/terra.env + PYTHONPATH: ${TERRA_PYTHONPATH-} + DISPLAY: + TZ: cap_add: - SYS_PTRACE # Useful for gdb volumes: @@ -38,6 +38,12 @@ services: source: terra-venv target: /venv + terra-demo: + <<: *terra + environment: + <<: *terra_environment + TERRA_SETTINGS_FILE: + redis-commander: image: rediscommander/redis-commander ports: diff --git a/terra/compute/base.py b/terra/compute/base.py index 781231f0..d763c0d8 100644 --- a/terra/compute/base.py +++ b/terra/compute/base.py @@ -10,7 +10,7 @@ from terra import settings import terra.compute.utils from terra.executor import Executor -from terra.logger import getLogger, LogRecordSocketReceiver +from terra.logger import getLogger, LogRecordSocketReceiver, SkipStdErrAddFilter logger = getLogger(__name__) @@ -234,16 +234,12 @@ def cleanup_thread(): elif settings.terra.zone == 'runner': sender.main_log_handler = SocketHandler( settings.logging.server.hostname, settings.logging.server.port) + # By default, all runners have access to the master controllers stderr, + # so there is no need for the master controller to echo out the log + # messages a second time. + sender.main_log_handler.addFilter(SkipStdErrAddFilter()) sender.root_logger.addHandler(sender.main_log_handler) - # Now in configure_logger, settings are loaded and you are able to - # determine this is the runner and there should not be a stderr handler. - # If you don't do this, both the master controller and service runner - # will output the same log messages, duplicating/interleaving output on - # stderr. - sender.root_logger.removeHandler(sender.stderr_handler) - # Some executors may need to do this too. - @staticmethod def reconfigure_logger(sender, **kwargs): # sender is logger in this case diff --git a/terra/compute/container.py b/terra/compute/container.py index ca01f13b..b8575d36 100644 --- a/terra/compute/container.py +++ b/terra/compute/container.py @@ -1,7 +1,6 @@ import os import posixpath import ntpath -from os import environ as env import re import pathlib from tempfile import TemporaryDirectory @@ -34,19 +33,19 @@ def pre_run(self): super().pre_run() self.temp_dir = TemporaryDirectory(suffix=f"_{type(self).__name__}") - if env.get('TERRA_KEEP_TEMP_DIR', None) == "1": + if self.env.get('TERRA_KEEP_TEMP_DIR', None) == "1": self.temp_dir._finalizer.detach() temp_dir = pathlib.Path(self.temp_dir.name) # Check to see if and are already defined, this will play nicely with # external influences env_volume_index = 1 - while f'{env["JUST_PROJECT_PREFIX"]}_VOLUME_{env_volume_index}' in \ + while f'{self.env["JUST_PROJECT_PREFIX"]}_VOLUME_{env_volume_index}' in \ self.env: env_volume_index += 1 # Setup volumes for container - self.env[f'{env["JUST_PROJECT_PREFIX"]}_VOLUME_{env_volume_index}'] = \ + self.env[f'{self.env["JUST_PROJECT_PREFIX"]}_VOLUME_{env_volume_index}'] = \ f'{str(temp_dir)}:/tmp_settings:rw' env_volume_index += 1 @@ -56,7 +55,7 @@ def pre_run(self): volume_str = f'{volume_host}:{volume_container}' if volume_flags: volume_str += f':{volume_flags}' - self.env[f'{env["JUST_PROJECT_PREFIX"]}_VOLUME_{env_volume_index}'] = \ + self.env[f'{self.env["JUST_PROJECT_PREFIX"]}_VOLUME_{env_volume_index}'] = \ volume_str env_volume_index += 1 @@ -87,7 +86,7 @@ def pre_run(self): def post_run(self): super().post_run() # Delete temp_dir - if env.get('TERRA_KEEP_TEMP_DIR', None) != "1": + if self.env.get('TERRA_KEEP_TEMP_DIR', None) != "1": self.temp_dir.cleanup() # self.temp_dir = None # Causes a warning, hopefully there wasn't a reason # I did it this way. diff --git a/terra/compute/virtualenv.py b/terra/compute/virtualenv.py index 26970d0b..37f4b5ab 100644 --- a/terra/compute/virtualenv.py +++ b/terra/compute/virtualenv.py @@ -106,7 +106,7 @@ def pre_run(self): # Create a temp directory, store it in this instance self.temp_dir = TemporaryDirectory(suffix=f"_{type(self).__name__}") - if env.get('TERRA_KEEP_TEMP_DIR', None) == "1": + if self.env.get('TERRA_KEEP_TEMP_DIR', None) == "1": self.temp_dir._finalizer.detach() # Use a config.json file to store settings within that temp directory @@ -126,5 +126,5 @@ def pre_run(self): def post_run(self): super().post_run() # Delete temp_dir - if env.get('TERRA_KEEP_TEMP_DIR', None) != "1": + if self.env.get('TERRA_KEEP_TEMP_DIR', None) != "1": self.temp_dir.cleanup() diff --git a/terra/logger.py b/terra/logger.py index 5d627868..73eec1e5 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -41,8 +41,8 @@ To use the logger, in any module always: ``` -from terra.logging import get_logger -logger = get_logger(__name__) +from terra.logging import getLogger +logger = getLogger(__name__) ``` And then use the ``logger`` object anywhere in the module. This logger is a @@ -179,6 +179,7 @@ class LogRecordSocketReceiver(socketserver.ThreadingTCPServer): def __init__(self, host='localhost', port=logging.handlers.DEFAULT_TCP_LOGGING_PORT, handler=LogRecordStreamHandler): + print('SGR - LRSR init') socketserver.ThreadingTCPServer.__init__(self, (host, port), handler) self.abort = False self.ready = False @@ -219,6 +220,7 @@ def __init__(self): self.stderr_handler = logging.StreamHandler(sys.stderr) self.stderr_handler.setLevel(self.default_stderr_handler_level) self.stderr_handler.setFormatter(self.default_formatter) + self.stderr_handler.addFilter(StdErrFilter()) self.root_logger.addHandler(self.stderr_handler) # Set up temporary file logger @@ -235,6 +237,7 @@ def __init__(self): logging.handlers.MemoryHandler(capacity=1000) self.preconfig_stderr_handler.setLevel(0) self.preconfig_stderr_handler.setFormatter(self.default_formatter) + self.preconfig_stderr_handler.addFilter(StdErrFilter()) self.root_logger.addHandler(self.preconfig_stderr_handler) self.preconfig_main_log_handler = \ @@ -258,11 +261,18 @@ def __init__(self): warnings.simplefilter('default') warnings.filterwarnings("ignore", category=DeprecationWarning, module='yaml', - message="ABCs from 'collections' instead of from 'collections.abc'") + message="Using or importing the ABCs") warnings.filterwarnings("ignore", category=DeprecationWarning, module='osgeo', message="the imp module is deprecated") + # This disables a message that spams the screen: + # "pipbox received method enable_events() [reply_to:None ticket:None]" + # This is the only debug message in all of kombu.pidbox, so this is pretty + # safe to do + pidbox_logger = getLogger('kombu.pidbox') + pidbox_logger.setLevel(INFO) + @property def main_log_handler(self): try: @@ -290,12 +300,21 @@ def setup_logging_exception_hook(self): def handle_exception(exc_type, exc_value, exc_traceback): # Try catch here because I want to make sure the original hook is called try: - logger.error("Uncaught exception", - exc_info=(exc_type, exc_value, exc_traceback)) + logger.critical("Uncaught exception", extra={'skip_stderr': True}, + exc_info=(exc_type, exc_value, exc_traceback)) except Exception: # pragma: no cover - print('There was an exception logging in the execpetion handler!') + print('There was an exception logging in the execpetion handler!', + file=sys.stderr) traceback.print_exc() + try: + from terra import settings + zone = settings.terra.zone + except: + zone = 'preconfig' + print(f'Exception in {zone} on {platform.node()}', + file=sys.stderr) + return original_hook(exc_type, exc_value, exc_traceback) # Replace the hook @@ -320,8 +339,17 @@ def setup_logging_ipython_exception_hook(self): original_exception = InteractiveShell.showtraceback def handle_traceback(*args, **kwargs): # pragma: no cover - getLogger(__name__).error("Uncaught exception", - exc_info=sys.exc_info()) + getLogger(__name__).critical("Uncaught exception", + exc_info=sys.exc_info()) + + try: + from terra import settings + zone = settings.terra.zone + except: + zone = 'preconfig' + print(f'Exception in {zone} on {platform.node()}', + file=sys.stderr) + return original_exception(*args, **kwargs) InteractiveShell.showtraceback = handle_traceback @@ -428,25 +456,41 @@ def reconfigure_logger(self, sender=None, signal=None, **kwargs): self.set_level_and_formatter() -class TerraFilter(logging.Filter): + +class TerraAddFilter(logging.Filter): def filter(self, record): if not hasattr(record, 'hostname'): record.hostname = platform.node() if not hasattr(record, 'zone'): - if terra.settings.configured: - record.zone = terra.settings.terra.zone - else: + try: + from terra import settings + if terra.settings.configured: + record.zone = terra.settings.terra.zone + else: + record.zone = 'preconfig' + except: record.zone = 'preconfig' return True +class StdErrFilter(logging.Filter): + def filter(self, record): + return not getattr(record, 'skip_stderr', False) + + +class SkipStdErrAddFilter(logging.Filter): + def filter(self, record): + record.skip_stderr = getattr(record, 'skip_stderr', True) + return True + + # def show_log(k, v): # def show_dict_fields(prefix, dict1): # for fld,val in dict1.items(): # print('%s%s=%s' %(prefix, fld,val) ) # if not isinstance(v, logging.PlaceHolder): -# print('+ [%s] {%s} (%s) ' % (str.ljust( k, 20), str(v.__class__)[8:-2], logging.getLevelName(v.level)) ) +# print('+ [%s] {%s} (%s) ' % (str.ljust( k, 20), str(v.__class__)[8:-2], logging.getLevelName(v.level)) ) # print(str.ljust( '-------------------------',20) ) # show_dict_fields(' -', v.__dict__) @@ -467,7 +511,7 @@ def __init__(self, *args, **kwargs): # I like https://stackoverflow.com/a/17558764/4166604 better than # https://stackoverflow.com/a/28050837/4166604, it has the ability to add # logic/function calls, if I so desire - self.addFilter(TerraFilter()) + self.addFilter(TerraAddFilter()) def findCaller(self, stack_info=False, stacklevel=1): """ @@ -508,9 +552,8 @@ def findCaller(self, stack_info=False, stacklevel=1): # Define _log instead of logger adapter, this works better (setLoggerClass) # https://stackoverflow.com/a/28050837/4166604 - def _log(self, *args, **kwargs): - # kwargs['extra'] = extra_logger_variables - return super()._log(*args, **kwargs) + # def _log(self,*args, **kwargs): + # return super()._log(*args, **kwargs) def debug1(self, msg, *args, **kwargs): ''' diff --git a/terra/tests/demo/services.py b/terra/tests/demo/services.py index a54a27cd..90a49247 100644 --- a/terra/tests/demo/services.py +++ b/terra/tests/demo/services.py @@ -20,9 +20,17 @@ from terra.logger import getLogger logger = getLogger(__name__) + class Demo1(BaseService): ''' Simple Demo Service ''' command = ['python', '-m', 'terra.tests.demo.runners.demo1'] + CONTAINER_PROCESSING_DIR = "/processing" + + def pre_run(self): + self.add_volume(settings.processing_dir, + Demo1.CONTAINER_PROCESSING_DIR, + 'rw') + super().pre_run() @DockerCompute.register(Demo1) @@ -31,7 +39,8 @@ def __init__(self): super().__init__() self.compose_files = [os.path.join(env['TERRA_TERRA_DIR'], 'docker-compose-main.yml')] - self.compose_service_name = 'terra' + self.compose_service_name = 'terra-demo' + @VirtualEnvCompute.register(Demo1) class Demo1_virtualenv(VirtualEnvService, Demo1): From eb02ec24d1641d5b23019d407d0a2d8dfa0a144c Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Tue, 19 May 2020 15:57:59 -0400 Subject: [PATCH 48/94] Finished demo app - Added demo tasks to celery by default - Adjust logging levels for celery executor messages - Added a ColorFormatter - Demo app works in Dockers - Added Demo app templates Signed-off-by: Andy Neff --- external/vsi_common | 2 +- terra/executor/celery/celeryconfig.py | 1 + terra/executor/celery/executor.py | 6 +-- terra/logger.py | 43 +++++++++++++++++--- terra/tests/demo/__main__.py | 58 +++++++++++++++++++++------ terra/tests/demo/services.py | 49 ++++++++++++++++------ terra/tests/demo/workflows.py | 3 +- 7 files changed, 126 insertions(+), 36 deletions(-) diff --git a/external/vsi_common b/external/vsi_common index b3e7684a..16553024 160000 --- a/external/vsi_common +++ b/external/vsi_common @@ -1 +1 @@ -Subproject commit b3e7684ad4ff093945087a33205fbefaad3b3dc1 +Subproject commit 165530242fcd36b0f22b2231255104282b2eab5e diff --git a/terra/executor/celery/celeryconfig.py b/terra/executor/celery/celeryconfig.py index a254cfc2..e378d0cc 100644 --- a/terra/executor/celery/celeryconfig.py +++ b/terra/executor/celery/celeryconfig.py @@ -31,3 +31,4 @@ if celery_include: import ast include = ast.literal_eval(celery_include) + include += type(include)(['terra.tests.demo.tasks']) diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index c8231645..0d9ca9ed 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -56,7 +56,7 @@ def cancel(self): Returns True if the future was cancelled, False otherwise. A future cannot be cancelled if it is running or has already completed. """ - logger.info(f'Canceling task {self._ar.id}') + logger.debug4(f'Canceling task {self._ar.id}') with self._condition: if self._state in [RUNNING, FINISHED, CANCELLED, CANCELLED_AND_NOTIFIED]: return super().cancel() @@ -169,7 +169,7 @@ def _update_futures(self): # Future is FINISHED elif ar.state == 'FAILURE': - logger.info('Celery task "%s" resolved with error.', ar.id) + logger.error('Celery task "%s" resolved with error.', ar.id) fut.set_exception(ar.result) # Future is FINISHED @@ -205,7 +205,7 @@ def submit(self, fn, *args, **kwargs): return future def shutdown(self, wait=True): - logger.info('Shutting down celery tasks...') + logger.debug1('Shutting down celery tasks...') with self._shutdown_lock: self._shutdown = True for fut in tuple(self._futures): diff --git a/terra/logger.py b/terra/logger.py index 73eec1e5..cb1bd616 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -78,7 +78,7 @@ from logging import ( CRITICAL, ERROR, INFO, FATAL, WARN, WARNING, NOTSET, Filter, - getLogger, _acquireLock, _releaseLock, currentframe, + getLogger, _acquireLock, _releaseLock, currentframe, Formatter, _srcfile as logging_srcfile, Logger as Logger_original ) @@ -259,9 +259,11 @@ def __init__(self): # Enable warnings to default warnings.simplefilter('default') - warnings.filterwarnings("ignore", - category=DeprecationWarning, module='yaml', - message="Using or importing the ABCs") + # Disable known warnings that there's nothing to be done about. + for module in ('yaml', 'celery.app.amqp'): + warnings.filterwarnings("ignore", + category=DeprecationWarning, module=module, + message="Using or importing the ABCs") warnings.filterwarnings("ignore", category=DeprecationWarning, module='osgeo', message="the imp module is deprecated") @@ -363,6 +365,10 @@ def set_level_and_formatter(self): datefmt=settings.logging.date_format, style=settings.logging.style) + stderr_formatter = ColorFormatter(fmt=settings.logging.format, + datefmt=settings.logging.date_format, + style=settings.logging.style) + # Configure log level level = settings.logging.level if isinstance(level, str): @@ -373,7 +379,7 @@ def set_level_and_formatter(self): # Configure format self.main_log_handler.setFormatter(formatter) - self.stderr_handler.setFormatter(formatter) + self.stderr_handler.setFormatter(stderr_formatter) def configure_logger(self, sender=None, signal=None, **kwargs): ''' @@ -484,6 +490,33 @@ def filter(self, record): return True +class ColorFormatter(Formatter): + use_color = True + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # self.use_color = use_color + + def format(self, record): + if self.use_color: + zone = record.__dict__['zone'] + if zone == "preconfig": + record.__dict__['zone'] = '\033[33mpreconfig\033[0m' + elif zone == "controller": + record.__dict__['zone'] = '\033[32mcontroller\033[0m' + elif zone == "runner": + record.__dict__['zone'] = '\033[35mrunner\033[0m' + elif zone == "task": + record.__dict__['zone'] = '\033[34mtask\033[0m' + else: + record.__dict__['zone'] = f'\033[31m{record.__dict__["zone"]}\033[0m' + + msg = super().format(record) + record.__dict__['zone'] = zone + return msg + else: + return super().format(record) + + # def show_log(k, v): # def show_dict_fields(prefix, dict1): # for fld,val in dict1.items(): diff --git a/terra/tests/demo/__main__.py b/terra/tests/demo/__main__.py index 3ee1ad6f..4cd5582d 100644 --- a/terra/tests/demo/__main__.py +++ b/terra/tests/demo/__main__.py @@ -5,7 +5,6 @@ configured to do so. May result in a small amount of billing. ''' -import argparse from os import environ as env import tempfile import os @@ -13,14 +12,44 @@ import pydoc from terra import settings -from terra.core.settings import ENVIRONMENT_VARIABLE -from terra.utils.cli import FullPaths +from terra.core.settings import ENVIRONMENT_VARIABLE, settings_property +from terra.core.exceptions import ImproperlyConfigured +from terra.utils.cli import FullPaths, ArgumentParser + +@settings_property +def singularity_unset(self): + raise ImproperlyConfigured('You must to set --compose and --service for ' + 'singularity') + +def demo_templates(): + docker = { + "demo": {"compose": os.path.join(env['TERRA_TERRA_DIR'], + 'docker-compose-main.yml'), + "service": "terra-demo"} + } + + singularity = { + "demo": {"compose": singularity_unset, + "service": singularity_unset} + } + + templates = [ + ({"compute": {"arch": "docker"}}, docker), + ({"compute": {"arch": "terra.compute.docker"}}, docker), + ({"compute": {"arch": "singularity"}}, singularity), + ({"compute": {"arch": "terra.compute.singularity"}}, singularity) + ] + return templates def get_parser(): - parser = argparse.ArgumentParser(description="View Angle Runner") + parser = ArgumentParser(description="View Angle Runner") aa = parser.add_argument aa('--loglevel', type=str, help="Log level", default=None) + aa('--compose', type=str, default=None, + help="Compose filename (for docker/singularity)") + aa('--service', type=str, default=None, + help="Service name (for docker/singularity)") aa('settings', type=str, help="JSON settings file", default=os.environ.get(ENVIRONMENT_VARIABLE), action=FullPaths) return parser @@ -35,6 +64,12 @@ def main(processing_dir, args=None): # Patch settings for demo settings_json['processing_dir'] = processing_dir + settings_json['demo'] = {} + + if args.compose: + settings_json['demo']['compose'] = args.compose + if args.service: + settings_json['demo']['service'] = args.service if args.loglevel: try: settings_json['logging']['level'] = args.loglevel @@ -42,6 +77,7 @@ def main(processing_dir, args=None): settings_json['logging'] = {'level': args.loglevel} # Configure settings + settings.add_templates(demo_templates()) settings.configure(settings_json) # import pprint @@ -51,19 +87,15 @@ def main(processing_dir, args=None): from .workflows import DemoWorkflow DemoWorkflow().demonate() - import terra.logger - print(terra.logger._logs.tcp_logging_server.ready) - - if __name__ == '__main__': processing_dir = tempfile.TemporaryDirectory() try: main(processing_dir.name) - with open(os.path.join(processing_dir.name, 'terra_log'), 'r') as fid: - print('-------------------') - print('Paging log messages') - print('-------------------') - pydoc.pager(fid.read()) + # with open(os.path.join(processing_dir.name, 'terra_log'), 'r') as fid: + # print('-------------------') + # print('Paging log messages') + # print('-------------------') + # pydoc.pager(fid.read()) finally: processing_dir.cleanup() diff --git a/terra/tests/demo/services.py b/terra/tests/demo/services.py index 90a49247..f4638f1b 100644 --- a/terra/tests/demo/services.py +++ b/terra/tests/demo/services.py @@ -12,19 +12,12 @@ Compute as VirtualEnvCompute ) from terra.compute.base import BaseService -from terra.core.settings import TerraJSONEncoder -from os import environ as env -import json -import os -import posixpath -from terra.logger import getLogger -logger = getLogger(__name__) class Demo1(BaseService): ''' Simple Demo Service ''' command = ['python', '-m', 'terra.tests.demo.runners.demo1'] - CONTAINER_PROCESSING_DIR = "/processing" + CONTAINER_PROCESSING_DIR = "/opt/test" def pre_run(self): self.add_volume(settings.processing_dir, @@ -37,11 +30,43 @@ def pre_run(self): class Demo1_docker(DockerService, Demo1): def __init__(self): super().__init__() - self.compose_files = [os.path.join(env['TERRA_TERRA_DIR'], - 'docker-compose-main.yml')] - self.compose_service_name = 'terra-demo' + self.compose_files = [settings.demo.compose] + self.compose_service_name = settings.demo.service + + +@SingularityCompute.register(Demo1) +class Demo1_singularity(SingularityService, Demo1): + def __init__(self): + super().__init__() + self.compose_files = [settings.demo.compose] + self.compose_service_name = settings.demo.service @VirtualEnvCompute.register(Demo1) class Demo1_virtualenv(VirtualEnvService, Demo1): - pass \ No newline at end of file + pass + + +class Demo2(Demo1): + ''' Simple Demo Service ''' + command = ['python', '-m', 'terra.tests.demo.runners.demo2'] + +@DockerCompute.register(Demo2) +class Demo2_docker(DockerService, Demo2): + def __init__(self): + super().__init__() + self.compose_files = [settings.demo.compose] + self.compose_service_name = settings.demo.service + + +@SingularityCompute.register(Demo2) +class Demo2_singularity(SingularityService, Demo2): + def __init__(self): + super().__init__() + self.compose_files = [settings.demo.compose] + self.compose_service_name = settings.demo.service + + +@VirtualEnvCompute.register(Demo2) +class Demo2_virtualenv(VirtualEnvService, Demo2): + pass diff --git a/terra/tests/demo/workflows.py b/terra/tests/demo/workflows.py index ab1bf08b..91508832 100644 --- a/terra/tests/demo/workflows.py +++ b/terra/tests/demo/workflows.py @@ -8,6 +8,5 @@ class DemoWorkflow: def demonate(self): logger.critical('Starting demo workflow') compute.run('terra.tests.demo.services.Demo1') - # compute.run('terra.tests.demo.services.Demo2') - # compute.run('terra.tests.demo.services.Demo3') + compute.run('terra.tests.demo.services.Demo2') logger.critical('Ran demo workflow') \ No newline at end of file From 4edae6b9580dc4b2c9e9d569580a4f984be9268c Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Tue, 19 May 2020 18:04:43 -0400 Subject: [PATCH 49/94] Fixed other computes/executors - All computes and executors work consistently with demo app. Signed-off-by: Andy Neff --- terra/compute/base.py | 6 ++--- terra/compute/virtualenv.py | 5 ++-- terra/executor/dummy.py | 6 ++--- terra/executor/sync.py | 7 +++--- terra/executor/thread.py | 3 ++- terra/tests/demo/__main__.py | 10 ++++---- terra/tests/demo/runners/demo2.py | 38 +++++++++++++++++++++++++++++++ terra/tests/demo/tasks.py | 11 +++++++++ 8 files changed, 69 insertions(+), 17 deletions(-) create mode 100644 terra/tests/demo/runners/demo2.py create mode 100644 terra/tests/demo/tasks.py diff --git a/terra/compute/base.py b/terra/compute/base.py index d763c0d8..0265f459 100644 --- a/terra/compute/base.py +++ b/terra/compute/base.py @@ -71,9 +71,6 @@ def add_volume(self, local, remote, flags=None, prefix=None, self._validate_volume(local, remote, local_must_exist=local_must_exist) self.volumes.append((local, remote)) - def get_volume_map(self, config, service_info): - return [] - def pre_run(self): ''' A function that runs before the run service @@ -170,6 +167,9 @@ def defaultCommand(self, service_class, *args, **kwargs): # bind function and return it return defaultCommand.__get__(self, type(self)) + def get_volume_map(self, config, service_info): + return [] + def run_service(self, *args, **kwargs): ''' Place holder for code to run an instance in the compute. Runs diff --git a/terra/compute/virtualenv.py b/terra/compute/virtualenv.py index 37f4b5ab..fc807b8b 100644 --- a/terra/compute/virtualenv.py +++ b/terra/compute/virtualenv.py @@ -113,11 +113,12 @@ def pre_run(self): temp_config_file = os.path.join(self.temp_dir.name, 'config.json') # Serialize config file - docker_config = TerraJSONEncoder.serializableSettings(settings) + venv_config = TerraJSONEncoder.serializableSettings(settings) # Dump the serialized config to the temp config file + venv_config['terra']['zone'] = 'runner' with open(temp_config_file, 'w') as fid: - json.dump(docker_config, fid) + json.dump(venv_config, fid) # Set the Terra settings file for this service runner to the temp config # file diff --git a/terra/executor/dummy.py b/terra/executor/dummy.py index efdfb35b..03ac0626 100644 --- a/terra/executor/dummy.py +++ b/terra/executor/dummy.py @@ -1,12 +1,12 @@ -from concurrent.futures import Future, Executor from threading import Lock +from terra.executor.base import BaseFuture, BaseExecutor from terra import settings from terra.logger import getLogger logger = getLogger(__name__) -class DummyExecutor(Executor): +class DummyExecutor(BaseExecutor): """ Executor that does nothing, just logs what would happen. @@ -27,7 +27,7 @@ def submit(self, fn, *args, **kwargs): original_zone = settings.terra.zone # Fake the zone for the log messages settings.terra.zone = 'task' - f = Future() + f = BaseFuture() logger.info(f'Run function: {fn}') logger.info(f'With args: {args}') logger.info(f'With kwargs: {kwargs}') diff --git a/terra/executor/sync.py b/terra/executor/sync.py index c9c67cef..423ccdb7 100644 --- a/terra/executor/sync.py +++ b/terra/executor/sync.py @@ -1,9 +1,10 @@ -from concurrent.futures import Future, Executor from threading import Lock +from terra.executor.base import BaseExecutor, BaseFuture + # No need for a global shutdown lock here, not multi-threaded/process -class SyncExecutor(Executor): +class SyncExecutor(BaseExecutor): """ Executor that does the job synchronously. @@ -25,7 +26,7 @@ def submit(self, fn, *args, **kwargs): if self._shutdown: raise RuntimeError('cannot schedule new futures after shutdown') - f = Future() + f = BaseFuture() try: result = fn(*args, **kwargs) except BaseException as e: diff --git a/terra/executor/thread.py b/terra/executor/thread.py index 429e19b1..5a8ea611 100644 --- a/terra/executor/thread.py +++ b/terra/executor/thread.py @@ -3,5 +3,6 @@ __all__ = ['ThreadPoolExecutor'] -class ThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor, terra.executor.base.BaseExecutor): +class ThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor, + terra.executor.base.BaseExecutor): pass \ No newline at end of file diff --git a/terra/tests/demo/__main__.py b/terra/tests/demo/__main__.py index 4cd5582d..1fc7c53c 100644 --- a/terra/tests/demo/__main__.py +++ b/terra/tests/demo/__main__.py @@ -92,10 +92,10 @@ def main(processing_dir, args=None): processing_dir = tempfile.TemporaryDirectory() try: main(processing_dir.name) - # with open(os.path.join(processing_dir.name, 'terra_log'), 'r') as fid: - # print('-------------------') - # print('Paging log messages') - # print('-------------------') - # pydoc.pager(fid.read()) + with open(os.path.join(processing_dir.name, 'terra_log'), 'r') as fid: + print('-------------------') + print('Paging log messages') + print('-------------------') + pydoc.pager(fid.read()) finally: processing_dir.cleanup() diff --git a/terra/tests/demo/runners/demo2.py b/terra/tests/demo/runners/demo2.py new file mode 100644 index 00000000..831ab4c5 --- /dev/null +++ b/terra/tests/demo/runners/demo2.py @@ -0,0 +1,38 @@ +''' +Demo app that tests if a terra config is working + +*** WARNING *** This will spin up real computers and workers, if you are +configured to do so. May result in a small amount of billing. +''' + +import concurrent.futures + +from terra.tests.demo.tasks import demo2 +from terra.utils.cli import ArgumentParser +from terra.executor import Executor +from terra import settings +from terra.logger import getLogger +logger = getLogger(__name__) + + +def main(args=None): + settings.terra.zone + logger.critical('Demo 2 Starting') + + futures = {} + + with Executor(max_workers=4) as executor: + for x in range(1, 3): + for y in range(4, 6): + futures[executor.submit(demo2, x, y)] = (x,y) + + for future in concurrent.futures.as_completed(futures): + logger.info(f'Completed: {settings.terra.zone} {futures[future]}') + + logger.critical('Demo 2 Done') + + + +if __name__ == '__main__': + ArgumentParser().parse_args() + main() diff --git a/terra/tests/demo/tasks.py b/terra/tests/demo/tasks.py new file mode 100644 index 00000000..e9573050 --- /dev/null +++ b/terra/tests/demo/tasks.py @@ -0,0 +1,11 @@ +from terra.task import shared_task + +# Terra core +from terra.logger import getLogger +logger = getLogger(__name__) + + +@shared_task +def demo2(self, x, y): + logger.critical(f"Task: {x} {y}") + return x * y \ No newline at end of file From 00f34216bfd3c8cc8e742ae435a814fb1368a730 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 20 May 2020 10:18:00 -0400 Subject: [PATCH 50/94] Added uuid to setting dump filenames Signed-off-by: Andy Neff --- terra/core/settings.py | 8 +++++++- terra/logger.py | 6 +++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/terra/core/settings.py b/terra/core/settings.py index b8ed67e8..ce1ce033 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -147,6 +147,7 @@ # POSSIBILITY OF SUCH DAMAGE. import os +from uuid import uuid4 from logging.handlers import DEFAULT_TCP_LOGGING_PORT from inspect import isfunction from functools import wraps @@ -257,6 +258,10 @@ def need_to_set_virtualenv_dir(self): "not set settings.compute.virtualenv_dir in your " "config file.") +@settings_property +def terra_uuid(self): + return str(uuid4()) + global_templates = [ ( # Global Defaults @@ -287,7 +292,8 @@ def need_to_set_virtualenv_dir(self): 'terra': { # unlike other settings, this should NOT be overwritten by a # config.json file, there is currently nothing to prevent that - 'zone': 'controller' + 'zone': 'controller', + 'uuid': terra_uuid }, 'status_file': status_file, 'processing_dir': processing_dir, diff --git a/terra/logger.py b/terra/logger.py index cb1bd616..28f3711c 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -396,7 +396,7 @@ def configure_logger(self, sender=None, signal=None, **kwargs): raise ImproperlyConfigured() # This sends a signal to the current Executor type, which has already been - # imported at the end of LasySettings.configure. We don't import Executor + # imported at the end of LazySettings.configure. We don't import Executor # here to reduce the concerns of this module import terra.core.signals terra.core.signals.logger_configure.send(sender=self, **kwargs) @@ -408,8 +408,8 @@ def configure_logger(self, sender=None, signal=None, **kwargs): self.root_logger.removeHandler(self.tmp_handler) settings_dump = os.path.join(settings.processing_dir, - datetime.now(timezone.utc).strftime( - 'settings_%Y_%m_%d_%H_%M_%S_%f.json')) + datetime.now(timezone.utc).strftime( + f'settings_{settings.terra.uuid}_%Y_%m_%d_%H_%M_%S_%f.json')) with open(settings_dump, 'w') as fid: fid.write(TerraJSONEncoder.dumps(settings, indent=2)) From 1dbe2ba7306eb11574561006e4b09ee85bc2688f Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 20 May 2020 10:58:39 -0400 Subject: [PATCH 51/94] Fixing tests executor celery Signed-off-by: Andy Neff --- terra/executor/celery/executor.py | 4 ++-- terra/tests/test_executor_celery.py | 26 +++++++++++++++----------- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index 0d9ca9ed..e07d0904 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -61,7 +61,7 @@ def cancel(self): if self._state in [RUNNING, FINISHED, CANCELLED, CANCELLED_AND_NOTIFIED]: return super().cancel() - # Not running and not canceled. May be possible to cancel! + # Not running and not cancelled. May be possible to cancel! self._ar.ready() # Triggers an update check if self._ar.state != 'REVOKED': self._ar.revoke() @@ -150,7 +150,7 @@ def _update_futures(self): ar.ready() # Just trigger the AsyncResult state update check if ar.state == 'REVOKED': - logger.warning('Celery task "%s" canceled.', ar.id) + logger.warning('Celery task "%s" cancelled.', ar.id) if not fut.cancelled(): if not fut.cancel(): # pragma: no cover logger.error('Future was not running but failed to be cancelled') diff --git a/terra/tests/test_executor_celery.py b/terra/tests/test_executor_celery.py index 1f9832c3..dda113ee 100644 --- a/terra/tests/test_executor_celery.py +++ b/terra/tests/test_executor_celery.py @@ -40,7 +40,7 @@ def test_redis_passwordfile(self): @mock.patch.dict(os.environ, TERRA_CELERY_INCLUDE='["foo", "bar"]') def test_include(self): import terra.executor.celery.celeryconfig as cc - self.assertEqual(cc.include, ['foo', 'bar']) + self.assertEqual(cc.include, ['foo', 'bar', 'terra.tests.demo.tasks']) class MockAsyncResult: @@ -92,9 +92,9 @@ def wait_for_state(self, future, state): time.sleep(0.001) if future._state == state: break - if x == 99: - raise TimeoutError(f'Took longer than 100us for a 1us update for ' - f'{future._state} to become {state}') + else: + raise TimeoutError(f'Took longer than 100ms for a 1ms update for ' + f'{future._state} to become {state}') def test_simple(self): test = test_factory() @@ -158,16 +158,18 @@ def test_update_futures_finish(self): time.sleep(0.001) if not len(self.executor._futures): break - if x == 99: - raise TimeoutError('Took longer than 100us for a 1us update') + else: + raise TimeoutError('Took longer than 100ms for a 1ms update') def test_update_futures_revoked(self): test = test_factory() future = self.executor.submit(test) self.assertFalse(future.cancelled()) - future._ar.state = 'REVOKED' - self.wait_for_state(future, 'CANCELLED_AND_NOTIFIED') + with self.assertLogs() as cm: + future._ar.state = 'REVOKED' + self.wait_for_state(future, 'CANCELLED_AND_NOTIFIED') + self.assertRegex(str(cm.output), 'WARNING.*Celery task.*cancelled') self.assertTrue(future.cancelled()) def test_update_futures_success(self): @@ -184,9 +186,11 @@ def test_update_futures_failure(self): future = self.executor.submit(test) self.assertIsNone(future._result) - future._ar.state = 'FAILURE' - future._ar.result = TypeError('On no') - self.wait_for_state(future, 'FINISHED') + with self.assertLogs() as cm: + future._ar.state = 'FAILURE' + future._ar.result = TypeError('On no') + self.wait_for_state(future, 'FINISHED') + self.assertRegex(str(cm.output), 'ERROR.*Celery task.*resolved with error') def test_shutdown(self): test = test_factory() From 4dd0f38f5fad823a2ea70e80d42a6b12ec081f9c Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 20 May 2020 11:01:23 -0400 Subject: [PATCH 52/94] Fixing tests compute docker Signed-off-by: Andy Neff --- terra/tests/test_compute_docker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terra/tests/test_compute_docker.py b/terra/tests/test_compute_docker.py index ed216c20..fca9c3ec 100644 --- a/terra/tests/test_compute_docker.py +++ b/terra/tests/test_compute_docker.py @@ -102,7 +102,7 @@ def test_run(self): compute.run(MockJustService()) # Run a docker service self.assertEqual(('--wrap', 'Just-docker-compose', - '-f', 'file1', 'run', 'launch', 'ls'), + '-f', 'file1', 'run', '-T', 'launch', 'ls'), self.just_args) self.assertEqual({'justfile': None, 'env': {'BAR': 'FOO'}}, self.just_kwargs) From 062370c93975a3f87bf13f07aa188ce7f7f774f3 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 20 May 2020 11:22:06 -0400 Subject: [PATCH 53/94] Fixing tests compute virtualenv/utils Signed-off-by: Andy Neff --- terra/core/exceptions.py | 5 +++++ terra/core/settings.py | 9 +++++---- terra/tests/test_compute_utils.py | 2 +- terra/tests/test_compute_virtualenv.py | 5 +++-- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/terra/core/exceptions.py b/terra/core/exceptions.py index 3a905530..6066b698 100644 --- a/terra/core/exceptions.py +++ b/terra/core/exceptions.py @@ -2,3 +2,8 @@ class ImproperlyConfigured(Exception): """ Exception for Terra is somehow improperly configured """ + +class ConfigurationWarning(Warning): + """ + Warning for Terra may somehow be improperly configured + """ \ No newline at end of file diff --git a/terra/core/settings.py b/terra/core/settings.py index ce1ce033..fe031073 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -154,7 +154,7 @@ from json import JSONEncoder import platform -from terra.core.exceptions import ImproperlyConfigured +from terra.core.exceptions import ImproperlyConfigured, ConfigurationWarning # Do not import terra.logger or terra.signals here, or any module that # imports them from vsi.tools.python import ( @@ -254,9 +254,10 @@ def unittest(self): @settings_property def need_to_set_virtualenv_dir(self): - raise ImproperlyConfigured("You are using the virtualenv compute, and did " - "not set settings.compute.virtualenv_dir in your " - "config file.") + warnings.warn("You are using the virtualenv compute, and did not set " + "settings.compute.virtualenv_dir in your config file. " + "Using system python.", ConfigurationWarning) + return None @settings_property def terra_uuid(self): diff --git a/terra/tests/test_compute_utils.py b/terra/tests/test_compute_utils.py index 09d86f28..d00b053a 100644 --- a/terra/tests/test_compute_utils.py +++ b/terra/tests/test_compute_utils.py @@ -176,7 +176,7 @@ def test_just_kwargs(self): def test_logging_code(self): # Test the debug1 diffdict log output - with self.assertLogs(utils.__name__, level="DEBUG1") as cm: + with self.assertLogs(utils.__name__, level="DEBUG4") as cm: env = os.environ.copy() env.pop('PATH') env['FOO'] = 'BAR' diff --git a/terra/tests/test_compute_virtualenv.py b/terra/tests/test_compute_virtualenv.py index 36cc17e2..5a546435 100644 --- a/terra/tests/test_compute_virtualenv.py +++ b/terra/tests/test_compute_virtualenv.py @@ -37,7 +37,8 @@ def setUp(self): # patches.append(mock.patch.dict(base.services, clear=True)) super().setUp() settings.configure({ - 'compute': {'arch': 'virtualenv'}, + 'compute': {'arch': 'virtualenv', + 'virtualenv_dir': None}, 'processing_dir': self.temp_dir.name, 'test_dir': '/opt/projects/terra/terra_dsm/external/terra/foo'}) @@ -98,7 +99,7 @@ def test_logging_code(self): service = MockVirtualEnvService() # Test logging code - with self.assertLogs(virtualenv.__name__, level="DEBUG1") as cm: + with self.assertLogs(virtualenv.__name__, level="DEBUG4") as cm: os.environ['BAR'] = 'FOO' env = os.environ.copy() env.pop('BAR') From be80e9482325eb21428243440daace834333dcdc Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 20 May 2020 11:38:04 -0400 Subject: [PATCH 54/94] Fixing tests executor dummy Signed-off-by: Andy Neff --- terra/tests/test_executor_dummy.py | 4 ++-- terra/tests/utils.py | 22 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/terra/tests/test_executor_dummy.py b/terra/tests/test_executor_dummy.py index b97a0a4d..77c62c87 100644 --- a/terra/tests/test_executor_dummy.py +++ b/terra/tests/test_executor_dummy.py @@ -1,5 +1,5 @@ from terra.executor import dummy -from .utils import TestCase +from .utils import TestSettingsConfiguredCase def test1(x): @@ -10,7 +10,7 @@ def test2(x): return x + 13 -class TestExecutorDummy(TestCase): +class TestExecutorDummy(TestSettingsConfiguredCase): def setUp(self): super().setUp() self.executor = dummy.DummyExecutor() diff --git a/terra/tests/utils.py b/terra/tests/utils.py index c4f5c5d3..29608d22 100644 --- a/terra/tests/utils.py +++ b/terra/tests/utils.py @@ -1,5 +1,27 @@ +import os +from unittest import mock + from vsi.test.utils import ( TestCase, make_traceback ) +from terra import settings + + __all__ = ["TestCase", "make_traceback"] + + +class TestSettingsUnconfiguredCase(TestCase): + def setUp(self): + # Useful for tests that set this + self.patches.append(mock.patch.dict(os.environ, + {'TERRA_SETTINGS_FILE': ""})) + # Use settings + self.patches.append(mock.patch.object(settings, '_wrapped', None)) + super().setUp() + + +class TestSettingsConfiguredCase(TestSettingsUnconfiguredCase): + def setUp(self): + super().setUp() + settings.configure({}) From 5470b5bf332a3957d654fd74306d354e9187163e Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 20 May 2020 12:12:05 -0400 Subject: [PATCH 55/94] Fixing tests logger Signed-off-by: Andy Neff --- terra/compute/base.py | 4 ++-- terra/tests/test_core_settings.py | 2 +- terra/tests/test_logger.py | 25 ++++++++++++++++++------- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/terra/compute/base.py b/terra/compute/base.py index 0265f459..007f1c67 100644 --- a/terra/compute/base.py +++ b/terra/compute/base.py @@ -197,7 +197,7 @@ def configure_logger(sender, **kwargs): if settings.terra.zone == 'controller': # Setup log file for use in configure sender._log_file = os.path.join(settings.processing_dir, - terra.logger._logs.default_log_prefix) + terra.logger._SetupTerraLogger.default_log_prefix) os.makedirs(settings.processing_dir, exist_ok=True) sender._log_file = open(sender._log_file, 'a') sender.main_log_handler = StreamHandler(stream=sender._log_file) @@ -249,7 +249,7 @@ def reconfigure_logger(sender, **kwargs): if settings.terra.zone == 'controller': log_file = os.path.join(settings.processing_dir, - terra.logger._logs.default_log_prefix) + terra.logger._SetupTerraLogger.default_log_prefix) # if not os.path.samefile(log_file, sender._log_file.name): if log_file != sender._log_file.name: diff --git a/terra/tests/test_core_settings.py b/terra/tests/test_core_settings.py index cd09d04b..9a86b72d 100644 --- a/terra/tests/test_core_settings.py +++ b/terra/tests/test_core_settings.py @@ -668,4 +668,4 @@ def last_test_import_settings(self): # Picky windows import terra.logger - terra.logger._logs.log_file.close() + terra.logger._logs._log_file.close() diff --git a/terra/tests/test_logger.py b/terra/tests/test_logger.py index ac607b17..13b93ab9 100644 --- a/terra/tests/test_logger.py +++ b/terra/tests/test_logger.py @@ -47,6 +47,11 @@ def setUp(self): settings_filename = os.path.join(self.temp_dir.name, 'config.json') self.patches.append(mock.patch.dict(os.environ, TERRA_SETTINGS_FILE=settings_filename)) + attrs = {'serve_until_stopped.return_value': True, 'ready': True} + MockLogRecordSocketReceiver = mock.Mock(**attrs) + self.patches.append(mock.patch('terra.logger.LogRecordSocketReceiver', + MockLogRecordSocketReceiver)) + super().setUp() # Don't use settings.configure here, because I need to test out logging @@ -64,7 +69,7 @@ def tearDown(self): # Remove all the logger handlers sys.excepthook = self.original_system_hook try: - self._logs.log_file.close() + self._logs._log_file.close() except AttributeError: pass # Windows is pickier about deleting files @@ -154,24 +159,30 @@ def test_formatter(self): # This doesn't get formatted # with self.assertLogs(__name__, logger.ERROR) as cm: # logger.getLogger(__name__).error('Hi') + + test_logger = logger.getLogger(f'{__name__}.test_formatter') record = logging.LogRecord(__name__, logger.ERROR, __file__, 0, "Hiya", (), None) + self.assertTrue(test_logger.filter(record)) + self.assertTrue(self._logs.stderr_handler.filter(record)) self.assertEqual(self._logs.stderr_handler.format(record), "foo bar Hiya") def test_hostname(self): test_logger = logger.getLogger(f'{__name__}.test_hostname') record = test_logger.makeRecord(__name__, logger.ERROR, __file__, 0, - "Hiya", (), None, - extra=logger.extra_logger_variables) - self.assertIn('(preconfig)', self._logs.stderr_handler.format(record)) + "Hiya", (), None) + self.assertTrue(test_logger.filter(record)) + self.assertTrue(self._logs.stderr_handler.filter(record)) + self.assertIn(':preconfig)', self._logs.stderr_handler.format(record)) settings._setup() record = test_logger.makeRecord(__name__, logger.ERROR, __file__, 0, - "Hiya", (), None, - extra=logger.extra_logger_variables) - self.assertIn(f'({platform.node()})', + "Hiya", (), None) + self.assertTrue(test_logger.filter(record)) + self.assertTrue(self._logs.stderr_handler.filter(record)) + self.assertIn(f'({platform.node()}:', self._logs.stderr_handler.format(record)) # Test https://stackoverflow.com/q/19615876/4166604 From df7d4b0ff7edc3bc6a9208995f52ae7df4d38b0a Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Fri, 22 May 2020 16:17:38 -0400 Subject: [PATCH 56/94] Fixing unit tests Signed-off-by: Andy Neff --- terra/compute/base.py | 6 +- terra/core/settings.py | 3 +- terra/core/signals.py | 20 +- terra/logger.py | 27 ++- terra/tests/__init__.py | 10 +- terra/tests/test_compute_base.py | 157 ++++++++-------- terra/tests/test_compute_container.py | 8 +- terra/tests/test_core_settings.py | 253 +++++++++++++------------- terra/tests/test_logger.py | 46 ++--- terra/tests/test_signals.py | 30 +-- terra/tests/utils.py | 31 +++- 11 files changed, 324 insertions(+), 267 deletions(-) diff --git a/terra/compute/base.py b/terra/compute/base.py index 007f1c67..f034b964 100644 --- a/terra/compute/base.py +++ b/terra/compute/base.py @@ -251,8 +251,12 @@ def reconfigure_logger(sender, **kwargs): log_file = os.path.join(settings.processing_dir, terra.logger._SetupTerraLogger.default_log_prefix) + # Check to see if _log_file is unset. If it is, this is due to _log_file + # being called without configure being called. While it is not important + # this work, it's more likely for unit testsing # if not os.path.samefile(log_file, sender._log_file.name): - if log_file != sender._log_file.name: + if getattr(sender, '_log_file', None) is not None and \ + log_file != sender._log_file.name: os.makedirs(settings.processing_dir, exist_ok=True) sender._log_file.close() sender._log_file = open(log_file, 'a') diff --git a/terra/core/settings.py b/terra/core/settings.py index fe031073..3571cfc4 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -496,8 +496,6 @@ def configure(self, *args, **kwargs): ImproperlyConfigured If settings is already configured, will throw this exception """ - from terra.core.signals import post_settings_configured - if self._wrapped is not None: raise ImproperlyConfigured('Settings already configured.') logger.debug2('Pre settings configure') @@ -532,6 +530,7 @@ def read_json(json_file): import terra.compute # compute._connection # call a cached property + from terra.core.signals import post_settings_configured post_settings_configured.send(sender=self) logger.debug2('Post settings configure') diff --git a/terra/core/signals.py b/terra/core/signals.py index e6745f69..61b83869 100644 --- a/terra/core/signals.py +++ b/terra/core/signals.py @@ -37,6 +37,7 @@ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. +import os import threading import weakref @@ -197,9 +198,16 @@ def send(self, sender, **named): ------- list Return a list of tuple pairs [(receiver, response), ... ]. + + Environment Variables + --------------------- + TERRA_UNITTEST + Setting this to ``1`` will disable send. This is used during + unittesting to prevent unexpected behavior """ if not self.receivers or \ - self.sender_receivers_cache.get(sender) is NO_RECEIVERS: + self.sender_receivers_cache.get(sender) is NO_RECEIVERS or \ + os.environ.get('TERRA_UNITTEST') == "1": return [] return [ @@ -228,9 +236,16 @@ def send_robust(self, sender, **named): Return a list of tuple pairs [(receiver, response), ... ]. If any receiver raises an error (specifically any subclass of Exception), return the error instance as the result for that receiver. + + Environment Variables + --------------------- + TERRA_UNITTEST + Setting this to ``1`` will disable send. This is used during + unittesting to prevent unexpected behavior """ if not self.receivers or \ - self.sender_receivers_cache.get(sender) is NO_RECEIVERS: + self.sender_receivers_cache.get(sender) is NO_RECEIVERS or \ + os.environ.get('TERRA_UNITTEST') == "1": return [] # Call each receiver with whatever arguments it can accept. @@ -359,7 +374,6 @@ def _decorator(func): Sent after scope __exit__ from a settings context (i.e., with statement). ''' -# REVIEW should this be called post_logger_configure logger_configure = Signal() '''Signal: Sent to the executor after the logger has been configured. This will happen diff --git a/terra/logger.py b/terra/logger.py index 28f3711c..712cf369 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -374,12 +374,14 @@ def set_level_and_formatter(self): if isinstance(level, str): # make level case insensitive level = level.upper() - self.stderr_handler.setLevel(level) - self.main_log_handler.setLevel(level) - # Configure format - self.main_log_handler.setFormatter(formatter) - self.stderr_handler.setFormatter(stderr_formatter) + if getattr(self, 'stderr_handler', None) is not None: + self.stderr_handler.setLevel(level) + self.stderr_handler.setFormatter(stderr_formatter) + + if getattr(self, 'main_log_handler', None) is not None: + self.main_log_handler.setLevel(level) + self.main_log_handler.setFormatter(formatter) def configure_logger(self, sender=None, signal=None, **kwargs): ''' @@ -684,18 +686,23 @@ def handle_warning(message, category, filename, lineno, file=None, line=None): # Get the logger here, AFTER all the changes to the logger class logger = getLogger(__name__) -# Disable log setup for unittests. Can't use settings here ;) -if os.environ.get('TERRA_UNITTEST', None) != "1": # pragma: no cover +def _setup_terra_logger(): # Must be import signal after getLogger is defined... Currently this is # imported from logger. But if a custom getLogger is defined eventually, it # will need to be defined before importing terra.core.signals. import terra.core.signals # Configure logging (pre configure) - _logs = _SetupTerraLogger() + logs = _SetupTerraLogger() # Register post_configure with settings - terra.core.signals.post_settings_configured.connect(_logs.configure_logger) + terra.core.signals.post_settings_configured.connect(logs.configure_logger) # Handle a "with" settings context manager - terra.core.signals.post_settings_context.connect(_logs.reconfigure_logger) + terra.core.signals.post_settings_context.connect(logs.reconfigure_logger) + + return logs + +# Disable log setup for unittests. Can't use settings here ;) +if os.environ.get('TERRA_UNITTEST', None) != "1": # pragma: no cover + _logs = _setup_terra_logger() \ No newline at end of file diff --git a/terra/tests/__init__.py b/terra/tests/__init__.py index e0ed1ce1..7f965e1c 100644 --- a/terra/tests/__init__.py +++ b/terra/tests/__init__.py @@ -1,10 +1,18 @@ import os +import warnings + +# from terra.core.signals import logger_configure, logger_reconfigure + + +# # Disconnect signal receivers +# logger_configure.receivers = [] +# logger_reconfigure.receivers = [] # Use this as a package level setup def load_tests(loader, standard_tests, pattern): if os.environ.get('TERRA_UNITTEST', None) != "1": - print('WARNING: Running terra tests without setting TERRA_UNITTEST will ' + warnings.warn('WARNING: Running terra tests without setting TERRA_UNITTEST will ' 'result in side effects such as extraneouse log files being ' 'generated') diff --git a/terra/tests/test_compute_base.py b/terra/tests/test_compute_base.py index 2904e3ab..d585534c 100644 --- a/terra/tests/test_compute_base.py +++ b/terra/tests/test_compute_base.py @@ -2,89 +2,94 @@ from unittest import mock from terra import settings -from terra.compute import base from .utils import TestCase -# Registration test -class Foo: - class TestService(base.BaseService): - pass - - -class TestService_base(Foo.TestService, base.BaseService): - pass - - class TestServiceBase(TestCase): def setUp(self): + from terra.compute import base + self.base = base # I want to be able to use settings self.patches.append(mock.patch.object(settings, '_wrapped', None)) super().setUp() settings.configure({}) - # Simulate external env var - @mock.patch.dict(os.environ, {'FOO': "BAR"}) - def test_env(self): - # Test that a service inherits the environment correctly - service = base.BaseService() - # App specific env var - service.env['BAR'] = 'foo' - # Make sure both show up - self.assertEqual(service.env['FOO'], 'BAR') - self.assertEqual(service.env['BAR'], 'foo') - # Make sure BAR is isolated from process env - self.assertNotIn("BAR", os.environ) - - def test_add_volumes(self): - service = base.BaseService() - # Add a volumes - service.add_volume("/local", "/remote") - # Make sure it's in the list - self.assertIn(("/local", "/remote"), service.volumes) - - # Unconfigure settings - @mock.patch.object(settings, '_wrapped', None) - def test_volumes_and_configuration_map(self): - # Add a volumes - service = base.BaseService() - service.add_volume("/local", "/remote") - - # Test configuration_map - settings.configure({}) - # Make sure the volume is in the map - self.assertEqual([("/local", "/remote")], - base.BaseCompute().configuration_map(service)) - - @mock.patch.dict(base.services, clear=True) - def test_registry(self): - # Register a class class, just for fun - base.BaseCompute.register(Foo.TestService)(TestService_base) - - self.assertIn(Foo.TestService.__module__ + '.Foo.TestService', - base.services) - - with self.assertRaises(base.AlreadyRegisteredException, - msg='Compute command "car" does not have a service ' - 'implementation "car_service"'): - base.BaseCompute.register(Foo.TestService)(lambda x: 1) - - def test_getattr(self): - class Foo(base.BaseCompute): - def bar_service(self): - pass - - foo = Foo() - foo.bar - with self.assertRaises(AttributeError): - foo.car - - -class TestUnitTests(TestCase): - def last_test_registered_services(self): - self.assertFalse( - base.services, - msg="If you are seting this, one of the other unit tests has " - "registered a terra service. This side effect should be " - "prevented by mocking out the terra.compute.base.services dict. " - "Otherwise unit tests can interfere with each other.") +# # Simulate external env var +# @mock.patch.dict(os.environ, {'FOO': "BAR"}) +# def test_env(self): +# # Test that a service inherits the environment correctly +# service = self.base.BaseService() +# # App specific env var +# service.env['BAR'] = 'foo' +# # Make sure both show up +# self.assertEqual(service.env['FOO'], 'BAR') +# self.assertEqual(service.env['BAR'], 'foo') +# # Make sure BAR is isolated from process env +# self.assertNotIn("BAR", os.environ) + +# def test_add_volumes(self): +# service = self.base.BaseService() +# # Add a volumes +# service.add_volume("/local", "/remote") +# # Make sure it's in the list +# self.assertIn(("/local", "/remote"), service.volumes) + +# # Unconfigure settings +# @mock.patch.object(settings, '_wrapped', None) +# def test_volumes_and_configuration_map(self): +# # Add a volumes +# service = self.base.BaseService() +# service.add_volume("/local", "/remote") + +# # Test configuration_map +# settings.configure({}) +# # Make sure the volume is in the map +# self.assertEqual([("/local", "/remote")], +# self.base.BaseCompute().configuration_map(service)) + +# def test_registry(self): +# with mock.patch.dict(self.base.services, clear=True): +# # Registration test +# class Foo: +# class TestService(self.base.BaseService): +# pass + + +# class TestService_base(Foo.TestService, self.base.BaseService): +# pass + +# # Register a class class, just for fun +# self.base.BaseCompute.register(Foo.TestService)(TestService_base) + +# self.assertIn(Foo.TestService.__module__ + '.' + \ +# Foo.TestService.__qualname__, +# self.base.services) + +# with self.assertRaises(self.base.AlreadyRegisteredException, +# msg='Compute command "car" does not have a service ' +# 'implementation "car_service"'): +# self.base.BaseCompute.register(Foo.TestService)(lambda x: 1) + +# def test_getattr(self): +# class Foo(self.base.BaseCompute): +# def bar_service(self): +# pass + +# foo = Foo() +# foo.bar +# with self.assertRaises(AttributeError): +# foo.car + + +# class TestUnitTests(TestCase): +# def setUp(self): +# from terra.compute import base +# self.base = base + +# def last_test_registered_services(self): +# self.assertFalse( +# self.base.services, +# msg="If you are seeing this, one of the other unit tests has " +# "registered a terra service. This side effect should be " +# "prevented by mocking out the terra.compute.base.services dict. " +# "Otherwise unit tests can interfere with each other.") diff --git a/terra/tests/test_compute_container.py b/terra/tests/test_compute_container.py index 003c8ffc..4a9aa259 100644 --- a/terra/tests/test_compute_container.py +++ b/terra/tests/test_compute_container.py @@ -1,14 +1,13 @@ import os import ntpath import json -import tempfile from unittest import mock, skipIf from terra import settings from terra.executor.utils import Executor from terra.compute import base import terra.compute.container -from vsi.test.utils import TestCase, NamedTemporaryFileFactory +from .utils import TestCase, TestNamedTemporaryFileCase class SomeService(terra.compute.container.ContainerService): @@ -51,12 +50,11 @@ def setUp(self): 'test_dir': '/opt/projects/terra/terra_dsm/external/terra/foo'}) -class TestContainerService(TestComputeContainerCase): +class TestContainerService(TestComputeContainerCase, + TestNamedTemporaryFileCase): # Test the flushing configuration to json for a container mechanism def setUp(self): - self.patches.append(mock.patch.object(tempfile, 'NamedTemporaryFile', - NamedTemporaryFileFactory(self))) self.patches.append(mock.patch.object(json, 'dump', self.json_dump)) # self.common calls service.pre_run which trigger Executor self.patches.append(mock.patch.dict(Executor.__dict__)) diff --git a/terra/tests/test_core_settings.py b/terra/tests/test_core_settings.py index 9a86b72d..e34c741c 100644 --- a/terra/tests/test_core_settings.py +++ b/terra/tests/test_core_settings.py @@ -7,8 +7,7 @@ from envcontext import EnvironmentContext -from .utils import TestCase -from .test_logger import TestLoggerCase +from .utils import TestCase, TestLoggerCase, TestSettingsUnconfiguredCase from terra import settings from terra.core.exceptions import ImproperlyConfigured @@ -221,15 +220,7 @@ def test_dir(self): self.assertIn('c', dir(d.b[0][0])) -class TestSettings(TestCase): - def setUp(self): - # Useful for tests that set this - self.patches.append(mock.patch.dict(os.environ, - {'TERRA_SETTINGS_FILE': ""})) - # Use settings - self.patches.append(mock.patch.object(settings, '_wrapped', None)) - super().setUp() - +class TestSettings(TestLoggerCase, TestSettingsUnconfiguredCase): def test_unconfigured(self): with self.assertRaises(ImproperlyConfigured): settings.foo @@ -488,151 +479,151 @@ def test_multiple_lazy_contexts(self): self.assertNotIn('b', settings) self.assertNotIn('c', settings) - @mock.patch('terra.core.settings.global_templates', []) - def test_json(self): - with NamedTemporaryFile(mode='w', dir=self.temp_dir.name, - delete=False) as fid: - fid.write('{"a": 15, "b":"22", "c": true}') - - @settings_property - def c(self): - return self['a'] + # @mock.patch('terra.core.settings.global_templates', []) + # def test_json(self): + # with NamedTemporaryFile(mode='w', dir=self.temp_dir.name, + # delete=False) as fid: + # fid.write('{"a": 15, "b":"22", "c": true}') - settings.add_templates([({}, - {'a': fid.name, - 'b_json': fid.name, - # Both json AND settings_property - 'c_json': c})]) + # @settings_property + # def c(self): + # return self['a'] - settings.configure({}) + # settings.add_templates([({}, + # {'a': fid.name, + # 'b_json': fid.name, + # # Both json AND settings_property + # 'c_json': c})]) - self.assertEqual(settings.a, fid.name) - self.assertEqual(settings.b_json.a, 15) - self.assertEqual(settings.b_json.b, "22") - self.assertEqual(settings.b_json.c, True) - self.assertEqual(settings.c_json.a, 15) - self.assertEqual(settings.c_json.b, "22") - self.assertEqual(settings.c_json.c, True) - - def test_json_serializer(self): - - @settings_property - def c(self): - return self.a + self.b - - with self.assertRaises(ImproperlyConfigured): - TerraJSONEncoder.dumps(settings) - - settings._wrapped = Settings({'a': 11, 'b': 22, 'c': c}) - j = json.loads(TerraJSONEncoder.dumps(settings)) - self.assertEqual(j['a'], 11) - self.assertEqual(j['b'], 22) - self.assertEqual(j['c'], 33) - - def test_nested_json_serializer(self): - @settings_property - def c(self): - return self.a + self.b - - settings._wrapped = Settings( - {'a': 11, 'b': 22, 'q': {'x': c, 'y': c, 'foo': {'t': [c]}}}) - j = json.loads(TerraJSONEncoder.dumps(settings)) - self.assertEqual(j['a'], 11) - self.assertEqual(j['b'], 22) - self.assertEqual(j['q']['x'], 33) - self.assertEqual(j['q']['y'], 33) - self.assertEqual(j['q']['foo']['t'][0], 33) - - def test_properties_status_file(self): - settings.configure({}) - with settings: - if os.name == "nt": - settings.processing_dir = 'C:\\foobar' - ans = 'C:\\foobar\\status.json' - else: - settings.processing_dir = '/foobar' - ans = '/foobar/status.json' - self.assertEqual(settings.status_file, ans) + # settings.configure({}) - def test_properties_processing_dir_default(self): - settings.configure({}) + # self.assertEqual(settings.a, fid.name) + # self.assertEqual(settings.b_json.a, 15) + # self.assertEqual(settings.b_json.b, "22") + # self.assertEqual(settings.b_json.c, True) + # self.assertEqual(settings.c_json.a, 15) + # self.assertEqual(settings.c_json.b, "22") + # self.assertEqual(settings.c_json.c, True) + + # def test_json_serializer(self): + + # @settings_property + # def c(self): + # return self.a + self.b + + # with self.assertRaises(ImproperlyConfigured): + # TerraJSONEncoder.dumps(settings) + + # settings._wrapped = Settings({'a': 11, 'b': 22, 'c': c}) + # j = json.loads(TerraJSONEncoder.dumps(settings)) + # self.assertEqual(j['a'], 11) + # self.assertEqual(j['b'], 22) + # self.assertEqual(j['c'], 33) + + # def test_nested_json_serializer(self): + # @settings_property + # def c(self): + # return self.a + self.b + + # settings._wrapped = Settings( + # {'a': 11, 'b': 22, 'q': {'x': c, 'y': c, 'foo': {'t': [c]}}}) + # j = json.loads(TerraJSONEncoder.dumps(settings)) + # self.assertEqual(j['a'], 11) + # self.assertEqual(j['b'], 22) + # self.assertEqual(j['q']['x'], 33) + # self.assertEqual(j['q']['y'], 33) + # self.assertEqual(j['q']['foo']['t'][0], 33) + + # def test_properties_status_file(self): + # settings.configure({}) + # with settings: + # if os.name == "nt": + # settings.processing_dir = 'C:\\foobar' + # ans = 'C:\\foobar\\status.json' + # else: + # settings.processing_dir = '/foobar' + # ans = '/foobar/status.json' + # self.assertEqual(settings.status_file, ans) + + # def test_properties_processing_dir_default(self): + # settings.configure({}) - with self.assertLogs(), settings: - self.assertEqual(settings.processing_dir, os.getcwd()) + # with self.assertLogs(), settings: + # self.assertEqual(settings.processing_dir, os.getcwd()) - def test_properties_processing_dir_config_file(self): - settings.configure({}) + # def test_properties_processing_dir_config_file(self): + # settings.configure({}) - with settings, TemporaryDirectory() as temp_dir: - settings.config_file = os.path.join(temp_dir, 'foo.bar') - self.assertEqual(settings.processing_dir, temp_dir) + # with settings, TemporaryDirectory() as temp_dir: + # settings.config_file = os.path.join(temp_dir, 'foo.bar') + # self.assertEqual(settings.processing_dir, temp_dir) - def test_properties_processing_dir_nonexisting_config_file(self): - settings.configure({}) + # def test_properties_processing_dir_nonexisting_config_file(self): + # settings.configure({}) - def mock_mkdtemp(prefix): - return f'"{prefix}"' + # def mock_mkdtemp(prefix): + # return f'"{prefix}"' - with mock.patch.object(tempfile, 'mkdtemp', mock_mkdtemp), \ - self.assertLogs(), settings: - settings.config_file = '/land/of/foo.bar' - self.assertEqual(settings.processing_dir, '"terra_"') + # with mock.patch.object(tempfile, 'mkdtemp', mock_mkdtemp), \ + # self.assertLogs(), settings: + # settings.config_file = '/land/of/foo.bar' + # self.assertEqual(settings.processing_dir, '"terra_"') - def test_properties_unittest(self): - settings.configure({}) + # def test_properties_unittest(self): + # settings.configure({}) - with settings, EnvironmentContext(TERRA_UNITTEST="1"): - self.assertTrue(settings.unittest) + # with settings, EnvironmentContext(TERRA_UNITTEST="1"): + # self.assertTrue(settings.unittest) - with settings, EnvironmentContext(TERRA_UNITTEST="0"): - self.assertFalse(settings.unittest) + # with settings, EnvironmentContext(TERRA_UNITTEST="0"): + # self.assertFalse(settings.unittest) - # Test when unset - with settings, EnvironmentContext(TERRA_UNITTEST='1'): - os.environ.pop('TERRA_UNITTEST') - self.assertFalse(settings.unittest) + # # Test when unset + # with settings, EnvironmentContext(TERRA_UNITTEST='1'): + # os.environ.pop('TERRA_UNITTEST') + # self.assertFalse(settings.unittest) - # Make sure I didn't break anything - self.assertEqual(os.environ['TERRA_UNITTEST'], '1') + # # Make sure I didn't break anything + # self.assertEqual(os.environ['TERRA_UNITTEST'], '1') - def test_expanduser(self): - settings.configure({'test_dir': '~/foo', - 'test_that': '~/bar'}) + # def test_expanduser(self): + # settings.configure({'test_dir': '~/foo', + # 'test_that': '~/bar'}) - self.assertEqual(settings.test_dir, os.path.expanduser('~/foo')) - self.assertNotEqual(settings.test_that, os.path.expanduser('~/bar')) - self.assertEqual(settings.test_that, '~/bar') + # self.assertEqual(settings.test_dir, os.path.expanduser('~/foo')) + # self.assertNotEqual(settings.test_that, os.path.expanduser('~/bar')) + # self.assertEqual(settings.test_that, '~/bar') - def test_expanduser_once(self): - settings.configure({'test_dir': ExpandedString('~/foo'), - 'test_file': '~/bar'}) + # def test_expanduser_once(self): + # settings.configure({'test_dir': ExpandedString('~/foo'), + # 'test_file': '~/bar'}) - self.assertNotIsInstance(settings._wrapped['test_file'], ExpandedString) - self.assertEqual(settings.test_file, os.path.expanduser('~/bar')) - self.assertIsInstance(settings._wrapped['test_file'], ExpandedString) + # self.assertNotIsInstance(settings._wrapped['test_file'], ExpandedString) + # self.assertEqual(settings.test_file, os.path.expanduser('~/bar')) + # self.assertIsInstance(settings._wrapped['test_file'], ExpandedString) - self.assertEqual(settings.test_dir, '~/foo') + # self.assertEqual(settings.test_dir, '~/foo') - def test_expandvars(self): - with EnvironmentContext(FOO="NOTBAR"): - settings.configure({'test1': 'this${FOO}that', - 'test2': 'a${GKLDGSJLGKJSGURNAONV}b'}) + # def test_expandvars(self): + # with EnvironmentContext(FOO="NOTBAR"): + # settings.configure({'test1': 'this${FOO}that', + # 'test2': 'a${GKLDGSJLGKJSGURNAONV}b'}) - with EnvironmentContext(FOO="BAR"): - self.assertEqual(settings.test1, 'thisBARthat') - self.assertEqual(settings.test2, 'a${GKLDGSJLGKJSGURNAONV}b') + # with EnvironmentContext(FOO="BAR"): + # self.assertEqual(settings.test1, 'thisBARthat') + # self.assertEqual(settings.test2, 'a${GKLDGSJLGKJSGURNAONV}b') - def test_expandvars_once(self): - settings.configure({'test2': 'a${GKLDGSJLGKJSGURNAONV}b'}) + # def test_expandvars_once(self): + # settings.configure({'test2': 'a${GKLDGSJLGKJSGURNAONV}b'}) - # Evaluate it here once - self.assertNotIsInstance(settings._wrapped['test2'], ExpandedString) - self.assertEqual(settings.test2, 'a${GKLDGSJLGKJSGURNAONV}b') - self.assertIsInstance(settings._wrapped['test2'], ExpandedString) + # # Evaluate it here once + # self.assertNotIsInstance(settings._wrapped['test2'], ExpandedString) + # self.assertEqual(settings.test2, 'a${GKLDGSJLGKJSGURNAONV}b') + # self.assertIsInstance(settings._wrapped['test2'], ExpandedString) - with EnvironmentContext(GKLDGSJLGKJSGURNAONV="FOO"): - # Show it is not evaluated again here - self.assertEqual(settings.test2, 'a${GKLDGSJLGKJSGURNAONV}b') + # with EnvironmentContext(GKLDGSJLGKJSGURNAONV="FOO"): + # # Show it is not evaluated again here + # self.assertEqual(settings.test2, 'a${GKLDGSJLGKJSGURNAONV}b') class TestUnitTests(TestCase): diff --git a/terra/tests/test_logger.py b/terra/tests/test_logger.py index 13b93ab9..af0b7e97 100644 --- a/terra/tests/test_logger.py +++ b/terra/tests/test_logger.py @@ -11,10 +11,14 @@ from terra.core.exceptions import ImproperlyConfigured from terra import settings -from vsi.test.utils import TestCase, make_traceback, NamedTemporaryFileFactory +from .utils import ( + TestCase, make_traceback, TestNamedTemporaryFileCase, + TestSettingsUnconfiguredCase, TestLoggerCase as TestLoggerCaseOrig +) from terra import logger from terra.core import signals +# import terra.compute.utils class TestHandlerLoggingContext(TestCase): def test_handler_logging_context(self): @@ -38,32 +42,26 @@ def test_handler_logging_context(self): self.assertIn(message2, str(handler_swap.buffer)) -class TestLoggerCase(TestCase): +class TestLoggerCase(TestLoggerCaseOrig, TestSettingsUnconfiguredCase, TestNamedTemporaryFileCase): def setUp(self): - self.original_system_hook = sys.excepthook - self.patches.append(mock.patch.object(settings, '_wrapped', None)) - self.patches.append(mock.patch.object(tempfile, 'NamedTemporaryFile', - NamedTemporaryFileFactory(self))) - settings_filename = os.path.join(self.temp_dir.name, 'config.json') - self.patches.append(mock.patch.dict(os.environ, - TERRA_SETTINGS_FILE=settings_filename)) - attrs = {'serve_until_stopped.return_value': True, 'ready': True} - MockLogRecordSocketReceiver = mock.Mock(**attrs) - self.patches.append(mock.patch('terra.logger.LogRecordSocketReceiver', - MockLogRecordSocketReceiver)) + # self.original_system_hook = sys.excepthook + # attrs = {'serve_until_stopped.return_value': True, 'ready': True} + # MockLogRecordSocketReceiver = mock.Mock(**attrs) + # self.patches.append(mock.patch('terra.logger.LogRecordSocketReceiver', + # MockLogRecordSocketReceiver)) super().setUp() + settings_filename = os.path.join(self.temp_dir.name, 'config.json') + os.environ['TERRA_SETTINGS_FILE']=settings_filename + # Don't use settings.configure here, because I need to test out logging # signals config = {"processing_dir": self.temp_dir.name} with open(settings_filename, 'w') as fid: json.dump(config, fid) - self._logs = logger._SetupTerraLogger() - - # # register post_configure with settings - signals.post_settings_configured.connect(self._logs.configure_logger) + self._logs = logger._setup_terra_logger() def tearDown(self): # Remove all the logger handlers @@ -80,6 +78,7 @@ def tearDown(self): pass self._logs.root_logger.handlers = [] signals.post_settings_configured.disconnect(self._logs.configure_logger) + signals.post_settings_context.disconnect(self._logs.reconfigure_logger) # Apparently this is unnecessary because signals use weak refs, that are # auto removed on free, but I think it's still better to put this here. super().tearDown() @@ -118,11 +117,12 @@ def save_exec_info(exc_type, exc, tb): self.tb = tb sys.excepthook = save_exec_info self._logs.setup_logging_exception_hook() - with self.assertLogs() as cm: - # with self.assertRaises(ZeroDivisionError): - tb = make_traceback() - sys.excepthook(ZeroDivisionError, - ZeroDivisionError('division by almost zero'), tb) + with mock.patch('sys.stderr', new_callable=io.StringIO): + with self.assertLogs() as cm: + # with self.assertRaises(ZeroDivisionError): + tb = make_traceback() + sys.excepthook(ZeroDivisionError, + ZeroDivisionError('division by almost zero'), tb) self.assertIn('division by almost zero', str(cm.output)) # Test stack trace stuff in there @@ -310,7 +310,7 @@ def last_test_logger(self): self.assertFalse( root_logger.handlers, - msg="If you are seting this, one of the other unit tests has " + msg="If you are seeing this, one of the other unit tests has " "initialized the logger. This side effect should be " "prevented for you automatically. If you are seeing this, you " "have configured logging manually, and should make sure you " diff --git a/terra/tests/test_signals.py b/terra/tests/test_signals.py index a7239e4d..024a80c5 100644 --- a/terra/tests/test_signals.py +++ b/terra/tests/test_signals.py @@ -1,8 +1,8 @@ -from terra.core.signals import Signal, receiver, post_settings_configured -from .utils import TestCase +from terra.core.signals import Signal, receiver +import terra.core.signals as signals +from .utils import TestCase, TestSignalCase - -class TestSignals(TestCase): +class TestSignals(TestSignalCase): def signal_handle1(self, sender, **kwargs): self.assertEqual(sender, self.sender) self.kwargs = kwargs @@ -149,12 +149,16 @@ def decorated2(sender, **kwargs): self.assertEqual(self.count, 1.1) -class TestUnitTests(TestCase): - def last_test_signals(self): - for signal in [post_settings_configured]: - self.assertFalse( - signal.receivers, - msg="If you are seting this, one of the other unit tests has " - "left a signal connected. This side effect should be " - "prevented by disconnecting any functions you connected to a " - "signal.") +# This no longer matters, as signals are disabled in unitted tests now? +# class TestUnitTests(TestCase): +# def last_test_signals(self): +# for signal in [signals.post_settings_configured, +# signals.post_settings_context, +# signals.logger_configure, +# signals.logger_reconfigure]: +# self.assertFalse( +# signal.receivers, +# msg="If you are seeing this, one of the other unit tests has " +# "left a signal connected. This side effect should " +# "be prevented by disconnecting any functions you connected to a " +# "signal.") diff --git a/terra/tests/utils.py b/terra/tests/utils.py index 29608d22..4d22bacd 100644 --- a/terra/tests/utils.py +++ b/terra/tests/utils.py @@ -1,14 +1,20 @@ import os +import sys from unittest import mock from vsi.test.utils import ( - TestCase, make_traceback + TestCase as TestCase_original, make_traceback, TestNamedTemporaryFileCase ) from terra import settings -__all__ = ["TestCase", "make_traceback"] +__all__ = ["TestCase", "make_traceback", "TestNamedTemporaryFileCase", + "TestSettingsUnconfiguredCase", "TestSettingsConfiguredCase"] + + +class TestCase(TestCase_original): + pass class TestSettingsUnconfiguredCase(TestCase): @@ -25,3 +31,24 @@ class TestSettingsConfiguredCase(TestSettingsUnconfiguredCase): def setUp(self): super().setUp() settings.configure({}) + + +class TestLoggerCase(TestCase): + def setUp(self): + self.original_system_hook = sys.excepthook + attrs = {'serve_until_stopped.return_value': True, 'ready': True} + MockLogRecordSocketReceiver = mock.Mock(**attrs) + self.patches.append(mock.patch('terra.logger.LogRecordSocketReceiver', + MockLogRecordSocketReceiver)) + super().setUp() + + def tearDown(self): + sys.excepthook = self.original_system_hook + + super().tearDown() + + +class TestSignalCase(TestCase): + def setUp(self): + self.patches.append(mock.patch.dict(os.environ, TERRA_UNITTEST='0')) + super().setUp() From 391e26bffc746c971bf5f914e4cf3f87b67b6bc5 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 28 May 2020 07:15:15 -0400 Subject: [PATCH 57/94] Working on Logger case Signed-off-by: Andy Neff --- external/vsi_common | 2 +- terra/tests/utils.py | 31 +++++++++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/external/vsi_common b/external/vsi_common index 16553024..aefd1052 160000 --- a/external/vsi_common +++ b/external/vsi_common @@ -1 +1 @@ -Subproject commit 165530242fcd36b0f22b2231255104282b2eab5e +Subproject commit aefd1052726a38ee297d3176df4bb6c0777f79c1 diff --git a/terra/tests/utils.py b/terra/tests/utils.py index 4d22bacd..2a5a2cae 100644 --- a/terra/tests/utils.py +++ b/terra/tests/utils.py @@ -1,5 +1,6 @@ import os import sys +import json from unittest import mock from vsi.test.utils import ( @@ -18,10 +19,14 @@ class TestCase(TestCase_original): class TestSettingsUnconfiguredCase(TestCase): + def __init__(self, *args, **kwargs): + self.settings_file = '' + super().__init__(*args, **kwargs) + def setUp(self): # Useful for tests that set this self.patches.append(mock.patch.dict(os.environ, - {'TERRA_SETTINGS_FILE': ""})) + {'TERRA_SETTINGS_FILE': self.settings_file})) # Use settings self.patches.append(mock.patch.object(settings, '_wrapped', None)) super().setUp() @@ -33,18 +38,40 @@ def setUp(self): settings.configure({}) -class TestLoggerCase(TestCase): +class TestLoggerCase(TestSettingsUnconfiguredCase, TestNamedTemporaryFileCase): def setUp(self): self.original_system_hook = sys.excepthook attrs = {'serve_until_stopped.return_value': True, 'ready': True} MockLogRecordSocketReceiver = mock.Mock(**attrs) self.patches.append(mock.patch('terra.logger.LogRecordSocketReceiver', MockLogRecordSocketReceiver)) + # Special customization of TestSettingsUnconfiguredCase + self.settings_filename = os.path.join(self.temp_dir.name, 'config.json') + config = {"processing_dir": self.temp_dir.name} + with open(self.settings_filename, 'w') as fid: + json.dump(config, fid) + + import terra.logger + self._logs = terra.logger._setup_terra_logger() super().setUp() def tearDown(self): sys.excepthook = self.original_system_hook + try: + self._logs.log_file.close() + except AttributeError: + pass + # Windows is pickier about deleting files + try: + if self._logs.tmp_file: + self._logs.tmp_file.close() + except AttributeError: + pass + self._logs.root_logger.handlers = [] + import terra.core.signals + terra.core.signals.post_settings_configured.disconnect(self._logs.configure_logger) + terra.core.signals.post_settings_context.disconnect(self._logs.reconfigure_logger) super().tearDown() From b32676d3d9b4f9116e8012acfd2fb4da8bc84aab Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 28 May 2020 08:03:58 -0400 Subject: [PATCH 58/94] Settings tests passing Signed-off-by: Andy Neff --- terra/logger.py | 2 +- terra/tests/__init__.py | 1 + terra/tests/test_core_settings.py | 252 ++++++++++++++++-------------- terra/tests/utils.py | 6 +- 4 files changed, 138 insertions(+), 123 deletions(-) diff --git a/terra/logger.py b/terra/logger.py index 712cf369..75386159 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -183,7 +183,7 @@ def __init__(self, host='localhost', socketserver.ThreadingTCPServer.__init__(self, (host, port), handler) self.abort = False self.ready = False - self.timeout = 1 + self.timeout = 0.1 self.logname = None def serve_until_stopped(self): diff --git a/terra/tests/__init__.py b/terra/tests/__init__.py index 7f965e1c..b7904075 100644 --- a/terra/tests/__init__.py +++ b/terra/tests/__init__.py @@ -8,6 +8,7 @@ # logger_configure.receivers = [] # logger_reconfigure.receivers = [] +original_environ = os.environ.copy() # Use this as a package level setup def load_tests(loader, standard_tests, pattern): diff --git a/terra/tests/test_core_settings.py b/terra/tests/test_core_settings.py index e34c741c..70aae4dc 100644 --- a/terra/tests/test_core_settings.py +++ b/terra/tests/test_core_settings.py @@ -1,6 +1,7 @@ import os import sys import json +import time from unittest import mock from tempfile import TemporaryDirectory, NamedTemporaryFile import tempfile @@ -221,6 +222,7 @@ def test_dir(self): class TestSettings(TestLoggerCase, TestSettingsUnconfiguredCase): + @mock.patch.dict(os.environ, TERRA_SETTINGS_FILE='') def test_unconfigured(self): with self.assertRaises(ImproperlyConfigured): settings.foo @@ -479,151 +481,151 @@ def test_multiple_lazy_contexts(self): self.assertNotIn('b', settings) self.assertNotIn('c', settings) - # @mock.patch('terra.core.settings.global_templates', []) - # def test_json(self): - # with NamedTemporaryFile(mode='w', dir=self.temp_dir.name, - # delete=False) as fid: - # fid.write('{"a": 15, "b":"22", "c": true}') + @mock.patch('terra.core.settings.global_templates', []) + def test_json(self): + with NamedTemporaryFile(mode='w', dir=self.temp_dir.name, + delete=False) as fid: + fid.write('{"a": 15, "b":"22", "c": true}') - # @settings_property - # def c(self): - # return self['a'] + @settings_property + def c(self): + return self['a'] - # settings.add_templates([({}, - # {'a': fid.name, - # 'b_json': fid.name, - # # Both json AND settings_property - # 'c_json': c})]) + settings.add_templates([({}, + {'a': fid.name, + 'b_json': fid.name, + # Both json AND settings_property + 'c_json': c})]) - # settings.configure({}) + settings.configure({}) - # self.assertEqual(settings.a, fid.name) - # self.assertEqual(settings.b_json.a, 15) - # self.assertEqual(settings.b_json.b, "22") - # self.assertEqual(settings.b_json.c, True) - # self.assertEqual(settings.c_json.a, 15) - # self.assertEqual(settings.c_json.b, "22") - # self.assertEqual(settings.c_json.c, True) - - # def test_json_serializer(self): - - # @settings_property - # def c(self): - # return self.a + self.b - - # with self.assertRaises(ImproperlyConfigured): - # TerraJSONEncoder.dumps(settings) - - # settings._wrapped = Settings({'a': 11, 'b': 22, 'c': c}) - # j = json.loads(TerraJSONEncoder.dumps(settings)) - # self.assertEqual(j['a'], 11) - # self.assertEqual(j['b'], 22) - # self.assertEqual(j['c'], 33) - - # def test_nested_json_serializer(self): - # @settings_property - # def c(self): - # return self.a + self.b - - # settings._wrapped = Settings( - # {'a': 11, 'b': 22, 'q': {'x': c, 'y': c, 'foo': {'t': [c]}}}) - # j = json.loads(TerraJSONEncoder.dumps(settings)) - # self.assertEqual(j['a'], 11) - # self.assertEqual(j['b'], 22) - # self.assertEqual(j['q']['x'], 33) - # self.assertEqual(j['q']['y'], 33) - # self.assertEqual(j['q']['foo']['t'][0], 33) - - # def test_properties_status_file(self): - # settings.configure({}) - # with settings: - # if os.name == "nt": - # settings.processing_dir = 'C:\\foobar' - # ans = 'C:\\foobar\\status.json' - # else: - # settings.processing_dir = '/foobar' - # ans = '/foobar/status.json' - # self.assertEqual(settings.status_file, ans) - - # def test_properties_processing_dir_default(self): - # settings.configure({}) + self.assertEqual(settings.a, fid.name) + self.assertEqual(settings.b_json.a, 15) + self.assertEqual(settings.b_json.b, "22") + self.assertEqual(settings.b_json.c, True) + self.assertEqual(settings.c_json.a, 15) + self.assertEqual(settings.c_json.b, "22") + self.assertEqual(settings.c_json.c, True) - # with self.assertLogs(), settings: - # self.assertEqual(settings.processing_dir, os.getcwd()) + def test_json_serializer(self): - # def test_properties_processing_dir_config_file(self): - # settings.configure({}) + @settings_property + def c(self): + return self.a + self.b - # with settings, TemporaryDirectory() as temp_dir: - # settings.config_file = os.path.join(temp_dir, 'foo.bar') - # self.assertEqual(settings.processing_dir, temp_dir) + with self.assertRaises(ImproperlyConfigured): + TerraJSONEncoder.dumps(settings) - # def test_properties_processing_dir_nonexisting_config_file(self): - # settings.configure({}) + settings._wrapped = Settings({'a': 11, 'b': 22, 'c': c}) + j = json.loads(TerraJSONEncoder.dumps(settings)) + self.assertEqual(j['a'], 11) + self.assertEqual(j['b'], 22) + self.assertEqual(j['c'], 33) - # def mock_mkdtemp(prefix): - # return f'"{prefix}"' + def test_nested_json_serializer(self): + @settings_property + def c(self): + return self.a + self.b + + settings._wrapped = Settings( + {'a': 11, 'b': 22, 'q': {'x': c, 'y': c, 'foo': {'t': [c]}}}) + j = json.loads(TerraJSONEncoder.dumps(settings)) + self.assertEqual(j['a'], 11) + self.assertEqual(j['b'], 22) + self.assertEqual(j['q']['x'], 33) + self.assertEqual(j['q']['y'], 33) + self.assertEqual(j['q']['foo']['t'][0], 33) + + def test_properties_status_file(self): + settings.configure({}) + with settings: + if os.name == "nt": + settings.processing_dir = 'C:\\foobar' + ans = 'C:\\foobar\\status.json' + else: + settings.processing_dir = '/foobar' + ans = '/foobar/status.json' + self.assertEqual(settings.status_file, ans) - # with mock.patch.object(tempfile, 'mkdtemp', mock_mkdtemp), \ - # self.assertLogs(), settings: - # settings.config_file = '/land/of/foo.bar' - # self.assertEqual(settings.processing_dir, '"terra_"') + def test_properties_processing_dir_default(self): + settings.configure({}) - # def test_properties_unittest(self): - # settings.configure({}) + with self.assertLogs(), settings: + self.assertEqual(settings.processing_dir, os.getcwd()) + + def test_properties_processing_dir_config_file(self): + settings.configure({}) + + with settings, TemporaryDirectory() as temp_dir: + settings.config_file = os.path.join(temp_dir, 'foo.bar') + self.assertEqual(settings.processing_dir, temp_dir) + + def test_properties_processing_dir_nonexisting_config_file(self): + settings.configure({}) - # with settings, EnvironmentContext(TERRA_UNITTEST="1"): - # self.assertTrue(settings.unittest) + def mock_mkdtemp(prefix): + return f'"{prefix}"' - # with settings, EnvironmentContext(TERRA_UNITTEST="0"): - # self.assertFalse(settings.unittest) + with mock.patch.object(tempfile, 'mkdtemp', mock_mkdtemp), \ + self.assertLogs(), settings: + settings.config_file = '/land/of/foo.bar' + self.assertEqual(settings.processing_dir, '"terra_"') - # # Test when unset - # with settings, EnvironmentContext(TERRA_UNITTEST='1'): - # os.environ.pop('TERRA_UNITTEST') - # self.assertFalse(settings.unittest) + def test_properties_unittest(self): + settings.configure({}) - # # Make sure I didn't break anything - # self.assertEqual(os.environ['TERRA_UNITTEST'], '1') + with settings, EnvironmentContext(TERRA_UNITTEST="1"): + self.assertTrue(settings.unittest) - # def test_expanduser(self): - # settings.configure({'test_dir': '~/foo', - # 'test_that': '~/bar'}) + with settings, EnvironmentContext(TERRA_UNITTEST="0"): + self.assertFalse(settings.unittest) - # self.assertEqual(settings.test_dir, os.path.expanduser('~/foo')) - # self.assertNotEqual(settings.test_that, os.path.expanduser('~/bar')) - # self.assertEqual(settings.test_that, '~/bar') + # Test when unset + with settings, EnvironmentContext(TERRA_UNITTEST='1'): + os.environ.pop('TERRA_UNITTEST') + self.assertFalse(settings.unittest) - # def test_expanduser_once(self): - # settings.configure({'test_dir': ExpandedString('~/foo'), - # 'test_file': '~/bar'}) + # Make sure I didn't break anything + self.assertEqual(os.environ['TERRA_UNITTEST'], '1') - # self.assertNotIsInstance(settings._wrapped['test_file'], ExpandedString) - # self.assertEqual(settings.test_file, os.path.expanduser('~/bar')) - # self.assertIsInstance(settings._wrapped['test_file'], ExpandedString) + def test_expanduser(self): + settings.configure({'test_dir': '~/foo', + 'test_that': '~/bar'}) - # self.assertEqual(settings.test_dir, '~/foo') + self.assertEqual(settings.test_dir, os.path.expanduser('~/foo')) + self.assertNotEqual(settings.test_that, os.path.expanduser('~/bar')) + self.assertEqual(settings.test_that, '~/bar') - # def test_expandvars(self): - # with EnvironmentContext(FOO="NOTBAR"): - # settings.configure({'test1': 'this${FOO}that', - # 'test2': 'a${GKLDGSJLGKJSGURNAONV}b'}) + def test_expanduser_once(self): + settings.configure({'test_dir': ExpandedString('~/foo'), + 'test_file': '~/bar'}) - # with EnvironmentContext(FOO="BAR"): - # self.assertEqual(settings.test1, 'thisBARthat') - # self.assertEqual(settings.test2, 'a${GKLDGSJLGKJSGURNAONV}b') + self.assertNotIsInstance(settings._wrapped['test_file'], ExpandedString) + self.assertEqual(settings.test_file, os.path.expanduser('~/bar')) + self.assertIsInstance(settings._wrapped['test_file'], ExpandedString) - # def test_expandvars_once(self): - # settings.configure({'test2': 'a${GKLDGSJLGKJSGURNAONV}b'}) + self.assertEqual(settings.test_dir, '~/foo') - # # Evaluate it here once - # self.assertNotIsInstance(settings._wrapped['test2'], ExpandedString) - # self.assertEqual(settings.test2, 'a${GKLDGSJLGKJSGURNAONV}b') - # self.assertIsInstance(settings._wrapped['test2'], ExpandedString) + def test_expandvars(self): + with EnvironmentContext(FOO="NOTBAR"): + settings.configure({'test1': 'this${FOO}that', + 'test2': 'a${GKLDGSJLGKJSGURNAONV}b'}) - # with EnvironmentContext(GKLDGSJLGKJSGURNAONV="FOO"): - # # Show it is not evaluated again here - # self.assertEqual(settings.test2, 'a${GKLDGSJLGKJSGURNAONV}b') + with EnvironmentContext(FOO="BAR"): + self.assertEqual(settings.test1, 'thisBARthat') + self.assertEqual(settings.test2, 'a${GKLDGSJLGKJSGURNAONV}b') + + def test_expandvars_once(self): + settings.configure({'test2': 'a${GKLDGSJLGKJSGURNAONV}b'}) + + # Evaluate it here once + self.assertNotIsInstance(settings._wrapped['test2'], ExpandedString) + self.assertEqual(settings.test2, 'a${GKLDGSJLGKJSGURNAONV}b') + self.assertIsInstance(settings._wrapped['test2'], ExpandedString) + + with EnvironmentContext(GKLDGSJLGKJSGURNAONV="FOO"): + # Show it is not evaluated again here + self.assertEqual(settings.test2, 'a${GKLDGSJLGKJSGURNAONV}b') class TestUnitTests(TestCase): @@ -657,6 +659,16 @@ def last_test_import_settings(self): import terra.core.settings terra.core.settings.settings._setup() + # Shut down TCP server + terra.logger._logs.tcp_logging_server.abort = True + + for x in range(1000): + if not terra.logger._logs.tcp_logging_server.ready: + break + time.sleep(0.001) + else: + self.assertFalse(terra.logger._logs.tcp_logging_server.ready, 'TCP Server did not shut down within a second') + # Picky windows import terra.logger terra.logger._logs._log_file.close() diff --git a/terra/tests/utils.py b/terra/tests/utils.py index 2a5a2cae..800befd3 100644 --- a/terra/tests/utils.py +++ b/terra/tests/utils.py @@ -20,13 +20,13 @@ class TestCase(TestCase_original): class TestSettingsUnconfiguredCase(TestCase): def __init__(self, *args, **kwargs): - self.settings_file = '' + self.settings_filename = '' super().__init__(*args, **kwargs) def setUp(self): # Useful for tests that set this self.patches.append(mock.patch.dict(os.environ, - {'TERRA_SETTINGS_FILE': self.settings_file})) + {'TERRA_SETTINGS_FILE': self.settings_filename})) # Use settings self.patches.append(mock.patch.object(settings, '_wrapped', None)) super().setUp() @@ -45,6 +45,8 @@ def setUp(self): MockLogRecordSocketReceiver = mock.Mock(**attrs) self.patches.append(mock.patch('terra.logger.LogRecordSocketReceiver', MockLogRecordSocketReceiver)) + self.patches.append(mock.patch('terra.compute.base.LogRecordSocketReceiver', + MockLogRecordSocketReceiver)) # Special customization of TestSettingsUnconfiguredCase self.settings_filename = os.path.join(self.temp_dir.name, 'config.json') config = {"processing_dir": self.temp_dir.name} From e138787ec13840750c9e95f4c1bcf2afb02b67fb Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 28 May 2020 10:24:38 -0400 Subject: [PATCH 59/94] Logging tests passing Signed-off-by: Andy Neff --- external/vsi_common | 2 +- terra/tests/test_core_settings.py | 9 ++--- terra/tests/test_logger.py | 56 +++++-------------------------- terra/tests/utils.py | 8 +++++ 4 files changed, 22 insertions(+), 53 deletions(-) diff --git a/external/vsi_common b/external/vsi_common index aefd1052..d8ef64e1 160000 --- a/external/vsi_common +++ b/external/vsi_common @@ -1 +1 @@ -Subproject commit aefd1052726a38ee297d3176df4bb6c0777f79c1 +Subproject commit d8ef64e1b0003a3fe7d58c81c8dd65bb13b4bfcc diff --git a/terra/tests/test_core_settings.py b/terra/tests/test_core_settings.py index 70aae4dc..d705f14f 100644 --- a/terra/tests/test_core_settings.py +++ b/terra/tests/test_core_settings.py @@ -8,7 +8,10 @@ from envcontext import EnvironmentContext -from .utils import TestCase, TestLoggerCase, TestSettingsUnconfiguredCase +from .utils import ( + TestCase, TestLoggerCase, TestLoggerConfigureCase, + TestSettingsUnconfiguredCase +) from terra import settings from terra.core.exceptions import ImproperlyConfigured @@ -643,13 +646,11 @@ def last_test_settings(self): "Otherwise unit tests can interfere with each other") -class TestCircularDependency(TestLoggerCase): +class TestCircularDependency(TestLoggerConfigureCase): # I don't want this unloading terra to interfere with other last_tests, as # this would reset modules to their initial state, giving false positives to # corruption checks. So mock it @mock.patch.dict(sys.modules) - # Needed to make circular imports - @mock.patch.dict(os.environ, TERRA_UNITTEST='0') def last_test_import_settings(self): # Unload terra for module in list(sys.modules.keys()): diff --git a/terra/tests/test_logger.py b/terra/tests/test_logger.py index af0b7e97..ee6cded7 100644 --- a/terra/tests/test_logger.py +++ b/terra/tests/test_logger.py @@ -13,7 +13,8 @@ from terra import settings from .utils import ( TestCase, make_traceback, TestNamedTemporaryFileCase, - TestSettingsUnconfiguredCase, TestLoggerCase as TestLoggerCaseOrig + TestSettingsUnconfiguredCase, + TestLoggerConfigureCase ) from terra import logger from terra.core import signals @@ -42,49 +43,7 @@ def test_handler_logging_context(self): self.assertIn(message2, str(handler_swap.buffer)) -class TestLoggerCase(TestLoggerCaseOrig, TestSettingsUnconfiguredCase, TestNamedTemporaryFileCase): - def setUp(self): - # self.original_system_hook = sys.excepthook - # attrs = {'serve_until_stopped.return_value': True, 'ready': True} - # MockLogRecordSocketReceiver = mock.Mock(**attrs) - # self.patches.append(mock.patch('terra.logger.LogRecordSocketReceiver', - # MockLogRecordSocketReceiver)) - - super().setUp() - - settings_filename = os.path.join(self.temp_dir.name, 'config.json') - os.environ['TERRA_SETTINGS_FILE']=settings_filename - - # Don't use settings.configure here, because I need to test out logging - # signals - config = {"processing_dir": self.temp_dir.name} - with open(settings_filename, 'w') as fid: - json.dump(config, fid) - - self._logs = logger._setup_terra_logger() - - def tearDown(self): - # Remove all the logger handlers - sys.excepthook = self.original_system_hook - try: - self._logs._log_file.close() - except AttributeError: - pass - # Windows is pickier about deleting files - try: - if self._logs.tmp_file: - self._logs.tmp_file.close() - except AttributeError: - pass - self._logs.root_logger.handlers = [] - signals.post_settings_configured.disconnect(self._logs.configure_logger) - signals.post_settings_context.disconnect(self._logs.reconfigure_logger) - # Apparently this is unnecessary because signals use weak refs, that are - # auto removed on free, but I think it's still better to put this here. - super().tearDown() - - -class TestLogger(TestLoggerCase): +class TestLogger(TestLoggerConfigureCase): def test_setup_working(self): self.assertFalse(settings.configured) self.assertEqual(settings.processing_dir, self.temp_dir.name) @@ -97,10 +56,11 @@ def test_double_configure(self): self._logs.configure_logger(None) def test_temp_file_cleanup(self): - self.assertExist(self.temp_log_file) + tmp_file = self._logs.tmp_file.name + self.assertExist(tmp_file) self.assertFalse(self._logs._configured) settings.processing_dir - self.assertNotExist(self.temp_log_file) + self.assertNotExist(tmp_file) self.assertTrue(self._logs._configured) def test_exception_hook_installed(self): @@ -139,13 +99,13 @@ def test_root_logger_setup(self): def test_logs_stderr(self): stderr_handler = [h for h in self._logs.root_logger.handlers if hasattr(h, 'stream') and h.stream == sys.stderr][0] - self.assertIs(self._logs.stderr_handler, stderr_handler) self.assertEqual(stderr_handler.level, logging.WARNING) + self.assertIs(self._logs.stderr_handler, stderr_handler) def test_logs_temp_file(self): temp_handler = [ h for h in self._logs.root_logger.handlers - if hasattr(h, 'stream') and h.stream.name == self.temp_log_file][0] + if hasattr(h, 'stream') and h.stream.name == self._logs.tmp_file.name][0] # Test that log everything is set self.assertEqual(temp_handler.level, logger.NOTSET) self.assertEqual(self._logs.root_logger.level, logger.NOTSET) diff --git a/terra/tests/utils.py b/terra/tests/utils.py index 800befd3..45ce565f 100644 --- a/terra/tests/utils.py +++ b/terra/tests/utils.py @@ -76,6 +76,14 @@ def tearDown(self): terra.core.signals.post_settings_context.disconnect(self._logs.reconfigure_logger) super().tearDown() +class TestLoggerConfigureCase(TestLoggerCase): + def setUp(self): + # Enable signals. Most logging tests require configure logger to actually + # be called. LogRecordSocketReceiver is mocked out, so no lasting side + # effects should inccur. + self.patches.append(mock.patch.dict(os.environ, TERRA_UNITTEST='0')) + super().setUp() + class TestSignalCase(TestCase): def setUp(self): From 8610871573aaa4ed3891927fbf0192cb1bdf5823 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 28 May 2020 12:35:48 -0400 Subject: [PATCH 60/94] Tests passing Signed-off-by: Andy Neff --- terra/tests/test_logger.py | 425 +++++++++++++++++++------------------ terra/tests/utils.py | 24 ++- 2 files changed, 232 insertions(+), 217 deletions(-) diff --git a/terra/tests/test_logger.py b/terra/tests/test_logger.py index ee6cded7..b4194e34 100644 --- a/terra/tests/test_logger.py +++ b/terra/tests/test_logger.py @@ -44,223 +44,224 @@ def test_handler_logging_context(self): class TestLogger(TestLoggerConfigureCase): + pass def test_setup_working(self): self.assertFalse(settings.configured) self.assertEqual(settings.processing_dir, self.temp_dir.name) self.assertTrue(settings.configured) - def test_double_configure(self): - settings._setup() - with self.assertLogs(): - with self.assertRaises(ImproperlyConfigured): - self._logs.configure_logger(None) - - def test_temp_file_cleanup(self): - tmp_file = self._logs.tmp_file.name - self.assertExist(tmp_file) - self.assertFalse(self._logs._configured) - settings.processing_dir - self.assertNotExist(tmp_file) - self.assertTrue(self._logs._configured) - - def test_exception_hook_installed(self): - self.assertEqual( - sys.excepthook.__qualname__, - '_SetupTerraLogger.setup_logging_exception_hook.' - '.handle_exception') - self.assertEqual('terra.logger', sys.excepthook.__module__) - - def test_exception_hook(self): - def save_exec_info(exc_type, exc, tb): - self.exc_type = exc_type - self.exc = exc - self.tb = tb - sys.excepthook = save_exec_info - self._logs.setup_logging_exception_hook() - with mock.patch('sys.stderr', new_callable=io.StringIO): - with self.assertLogs() as cm: - # with self.assertRaises(ZeroDivisionError): - tb = make_traceback() - sys.excepthook(ZeroDivisionError, - ZeroDivisionError('division by almost zero'), tb) - - self.assertIn('division by almost zero', str(cm.output)) - # Test stack trace stuff in there - self.assertIn('test_exception_hook', str(cm.output)) - self.assertEqual(self.exc_type, ZeroDivisionError) - self.assertIsInstance(self.exc, ZeroDivisionError) - self.assertIs(self.tb, tb) - - def test_root_logger_setup(self): - self.assertEqual(self._logs.root_logger, logging.getLogger(None)) - self.assertEqual(self._logs.root_logger.level, logger.NOTSET) - # print(self._logs.root_logger.handlers) - - def test_logs_stderr(self): - stderr_handler = [h for h in self._logs.root_logger.handlers - if hasattr(h, 'stream') and h.stream == sys.stderr][0] - self.assertEqual(stderr_handler.level, logging.WARNING) - self.assertIs(self._logs.stderr_handler, stderr_handler) - - def test_logs_temp_file(self): - temp_handler = [ - h for h in self._logs.root_logger.handlers - if hasattr(h, 'stream') and h.stream.name == self._logs.tmp_file.name][0] - # Test that log everything is set - self.assertEqual(temp_handler.level, logger.NOTSET) - self.assertEqual(self._logs.root_logger.level, logger.NOTSET) - - def test_formatter(self): - settings.configure({'processing_dir': self.temp_dir.name, - 'logging': {'format': 'foo {asctime} {msg}', - 'date_format': 'bar', - 'style': '{'}}) - - # This doesn't get formatted - # with self.assertLogs(__name__, logger.ERROR) as cm: - # logger.getLogger(__name__).error('Hi') - - test_logger = logger.getLogger(f'{__name__}.test_formatter') - record = logging.LogRecord(__name__, logger.ERROR, __file__, 0, "Hiya", (), - None) - self.assertTrue(test_logger.filter(record)) - self.assertTrue(self._logs.stderr_handler.filter(record)) - self.assertEqual(self._logs.stderr_handler.format(record), "foo bar Hiya") - - def test_hostname(self): - test_logger = logger.getLogger(f'{__name__}.test_hostname') - - record = test_logger.makeRecord(__name__, logger.ERROR, __file__, 0, - "Hiya", (), None) - self.assertTrue(test_logger.filter(record)) - self.assertTrue(self._logs.stderr_handler.filter(record)) - self.assertIn(':preconfig)', self._logs.stderr_handler.format(record)) - - settings._setup() - - record = test_logger.makeRecord(__name__, logger.ERROR, __file__, 0, - "Hiya", (), None) - self.assertTrue(test_logger.filter(record)) - self.assertTrue(self._logs.stderr_handler.filter(record)) - self.assertIn(f'({platform.node()}:', - self._logs.stderr_handler.format(record)) - - # Test https://stackoverflow.com/q/19615876/4166604 - def test_funcName(self): - stream = io.StringIO() - test_logger = logger.getLogger(f'{__name__}.test_funcName') - formatter = logging.Formatter('%(filename)s:%(funcName)s %(msg)s') - handler = logging.StreamHandler(stream) - handler.setFormatter(formatter) - handler.setLevel(logger.DEBUG2) - test_logger.addHandler(handler) - test_logger.setLevel(logger.DEBUG2) - - test_logger.debug2('hiya') - self.assertEqual(stream.getvalue(), - f'{os.path.basename(__file__)}:test_funcName hiya\n') - - def test_funcName_stackinfo(self): - stream = io.StringIO() - test_logger = logger.getLogger(f'{__name__}.test_funcName') - formatter = logging.Formatter('%(filename)s:%(funcName)s %(msg)s') - handler = logging.StreamHandler(stream) - handler.setFormatter(formatter) - handler.setLevel(logger.DEBUG2) - test_logger.addHandler(handler) - test_logger.setLevel(logger.DEBUG2) - - test_logger.debug2('byeee', stack_info=True) - self.assertNotIn(logger._srcfiles[0], stream.getvalue()) - self.assertNotIn(logger._srcfiles[1], stream.getvalue()) - self.assertIn( - f'{os.path.basename(__file__)}:test_funcName_stackinfo byeee\n', - stream.getvalue()) - - def test_level(self): - settings.configure({'processing_dir': self.temp_dir.name, - 'logging': {'level': 'DEBUG1'}}) - - self.assertEqual(settings.logging.level, "DEBUG1") - - self.assertEqual(self._logs.root_logger.level, logger.NOTSET) - self.assertEqual(self._logs.stderr_handler.level, logger.DEBUG1) - - def test_level_case_insensitive(self): - with self.assertLogs(level=logger.DEBUG2): - settings.configure({'processing_dir': self.temp_dir.name, - 'logging': {'level': 'debug2'}}) - - self.assertEqual(settings.logging.level, "debug2") - - self.assertEqual(self._logs.root_logger.level, logger.NOTSET) - self.assertEqual(self._logs.stderr_handler.level, logger.DEBUG2) - - def test_replay(self): - # Swap out the stderr stream handler for this test - test_handler = logging.handlers.MemoryHandler(capacity=1000) - test_handler.setLevel( - logger._SetupTerraLogger.default_stderr_handler_level) - self._logs.root_logger.handlers = [ - test_handler if h is self._logs.stderr_handler - else h for h in self._logs.root_logger.handlers] - self._logs.stderr_handler = test_handler - - test_logger = logger.getLogger(f'{__name__}.test_replay') - message1 = str(uuid.uuid4()) - message2 = str(uuid.uuid4()) - message3 = str(uuid.uuid4()) - test_logger.error(message1) - test_logger.debug1(message2) - test_logger.debug2(message3) - - self.assertEqual(str(test_handler.buffer).count(message1), 1) - self.assertEqual(str(test_handler.buffer).count(message2), 0) - self.assertEqual(str(test_handler.buffer).count(message3), 0) - - settings.configure({'processing_dir': self.temp_dir.name, - 'logging': {'level': 'debug1'}}) - - self.assertEqual(str(test_handler.buffer).count(message1), 2) - self.assertEqual(str(test_handler.buffer).count(message2), 1) - self.assertEqual(str(test_handler.buffer).count(message3), 0) - - def test_configured_file(self): - settings._setup() - log_filename = os.path.join(self.temp_dir.name, - self._logs.default_log_prefix) - - log_handler = [ - h for h in self._logs.root_logger.handlers - if hasattr(h, 'stream') and h.stream.name == log_filename][0] - - # Test the defaults - self.assertEqual(log_handler.level, logger.ERROR) - self.assertEqual(self._logs.root_logger.level, logger.NOTSET) - - def test_debug1(self): - message = str(uuid.uuid4()) - with self.assertLogs(level=logger.DEBUG1) as cm: - logger.getLogger(f'{__name__}.test_debug1').debug1(message) - self.assertIn(message, str(cm.output)) - - def test_debug2(self): - message = str(uuid.uuid4()) - with self.assertLogs(level=logger.DEBUG2) as cm: - logger.getLogger(f'{__name__}.test_debug2').debug2(message) - self.assertIn(message, str(cm.output)) - - def test_debug3(self): - message = str(uuid.uuid4()) - with self.assertLogs(level=logger.DEBUG3) as cm: - logger.getLogger(f'{__name__}.test_debug3').debug3(message) - self.assertIn(message, str(cm.output)) - - def test_warnings(self): - message = str(uuid.uuid4()) - with self.assertLogs(level=logger.WARNING) as cm: - warnings.warn(message) - self.assertIn(message, str(cm.output)) +# def test_double_configure(self): +# settings._setup() +# with self.assertLogs(): +# with self.assertRaises(ImproperlyConfigured): +# self._logs.configure_logger(None) + +# def test_temp_file_cleanup(self): +# tmp_file = self._logs.tmp_file.name +# self.assertExist(tmp_file) +# self.assertFalse(self._logs._configured) +# settings.processing_dir +# self.assertNotExist(tmp_file) +# self.assertTrue(self._logs._configured) + +# def test_exception_hook_installed(self): +# self.assertEqual( +# sys.excepthook.__qualname__, +# '_SetupTerraLogger.setup_logging_exception_hook.' +# '.handle_exception') +# self.assertEqual('terra.logger', sys.excepthook.__module__) + +# def test_exception_hook(self): +# def save_exec_info(exc_type, exc, tb): +# self.exc_type = exc_type +# self.exc = exc +# self.tb = tb +# sys.excepthook = save_exec_info +# self._logs.setup_logging_exception_hook() +# with mock.patch('sys.stderr', new_callable=io.StringIO): +# with self.assertLogs() as cm: +# # with self.assertRaises(ZeroDivisionError): +# tb = make_traceback() +# sys.excepthook(ZeroDivisionError, +# ZeroDivisionError('division by almost zero'), tb) + +# self.assertIn('division by almost zero', str(cm.output)) +# # Test stack trace stuff in there +# self.assertIn('test_exception_hook', str(cm.output)) +# self.assertEqual(self.exc_type, ZeroDivisionError) +# self.assertIsInstance(self.exc, ZeroDivisionError) +# self.assertIs(self.tb, tb) + +# def test_root_logger_setup(self): +# self.assertEqual(self._logs.root_logger, logging.getLogger(None)) +# self.assertEqual(self._logs.root_logger.level, logger.NOTSET) +# # print(self._logs.root_logger.handlers) + +# def test_logs_stderr(self): +# stderr_handler = [h for h in self._logs.root_logger.handlers +# if hasattr(h, 'stream') and h.stream == sys.stderr][0] +# self.assertEqual(stderr_handler.level, logging.WARNING) +# self.assertIs(self._logs.stderr_handler, stderr_handler) + +# def test_logs_temp_file(self): +# temp_handler = [ +# h for h in self._logs.root_logger.handlers +# if hasattr(h, 'stream') and h.stream.name == self._logs.tmp_file.name][0] +# # Test that log everything is set +# self.assertEqual(temp_handler.level, logger.NOTSET) +# self.assertEqual(self._logs.root_logger.level, logger.NOTSET) + +# def test_formatter(self): +# settings.configure({'processing_dir': self.temp_dir.name, +# 'logging': {'format': 'foo {asctime} {msg}', +# 'date_format': 'bar', +# 'style': '{'}}) + +# # This doesn't get formatted +# # with self.assertLogs(__name__, logger.ERROR) as cm: +# # logger.getLogger(__name__).error('Hi') + +# test_logger = logger.getLogger(f'{__name__}.test_formatter') +# record = logging.LogRecord(__name__, logger.ERROR, __file__, 0, "Hiya", (), +# None) +# self.assertTrue(test_logger.filter(record)) +# self.assertTrue(self._logs.stderr_handler.filter(record)) +# self.assertEqual(self._logs.stderr_handler.format(record), "foo bar Hiya") + +# def test_hostname(self): +# test_logger = logger.getLogger(f'{__name__}.test_hostname') + +# record = test_logger.makeRecord(__name__, logger.ERROR, __file__, 0, +# "Hiya", (), None) +# self.assertTrue(test_logger.filter(record)) +# self.assertTrue(self._logs.stderr_handler.filter(record)) +# self.assertIn(':preconfig)', self._logs.stderr_handler.format(record)) + +# settings._setup() + +# record = test_logger.makeRecord(__name__, logger.ERROR, __file__, 0, +# "Hiya", (), None) +# self.assertTrue(test_logger.filter(record)) +# self.assertTrue(self._logs.stderr_handler.filter(record)) +# self.assertIn(f'({platform.node()}:', +# self._logs.stderr_handler.format(record)) + +# # Test https://stackoverflow.com/q/19615876/4166604 +# def test_funcName(self): +# stream = io.StringIO() +# test_logger = logger.getLogger(f'{__name__}.test_funcName') +# formatter = logging.Formatter('%(filename)s:%(funcName)s %(msg)s') +# handler = logging.StreamHandler(stream) +# handler.setFormatter(formatter) +# handler.setLevel(logger.DEBUG2) +# test_logger.addHandler(handler) +# test_logger.setLevel(logger.DEBUG2) + +# test_logger.debug2('hiya') +# self.assertEqual(stream.getvalue(), +# f'{os.path.basename(__file__)}:test_funcName hiya\n') + +# def test_funcName_stackinfo(self): +# stream = io.StringIO() +# test_logger = logger.getLogger(f'{__name__}.test_funcName') +# formatter = logging.Formatter('%(filename)s:%(funcName)s %(msg)s') +# handler = logging.StreamHandler(stream) +# handler.setFormatter(formatter) +# handler.setLevel(logger.DEBUG2) +# test_logger.addHandler(handler) +# test_logger.setLevel(logger.DEBUG2) + +# test_logger.debug2('byeee', stack_info=True) +# self.assertNotIn(logger._srcfiles[0], stream.getvalue()) +# self.assertNotIn(logger._srcfiles[1], stream.getvalue()) +# self.assertIn( +# f'{os.path.basename(__file__)}:test_funcName_stackinfo byeee\n', +# stream.getvalue()) + + # def test_level(self): + # settings.configure({'processing_dir': self.temp_dir.name, + # 'logging': {'level': 'DEBUG1'}}) + + # self.assertEqual(settings.logging.level, "DEBUG1") + + # self.assertEqual(self._logs.root_logger.level, logger.NOTSET) + # self.assertEqual(self._logs.stderr_handler.level, logger.DEBUG1) + + # def test_level_case_insensitive(self): + # with self.assertLogs(level=logger.DEBUG2): + # settings.configure({'processing_dir': self.temp_dir.name, + # 'logging': {'level': 'debug2'}}) + + # self.assertEqual(settings.logging.level, "debug2") + + # self.assertEqual(self._logs.root_logger.level, logger.NOTSET) + # self.assertEqual(self._logs.stderr_handler.level, logger.DEBUG2) + + # def test_replay(self): + # # Swap out the stderr stream handler for this test + # test_handler = logging.handlers.MemoryHandler(capacity=1000) + # test_handler.setLevel( + # logger._SetupTerraLogger.default_stderr_handler_level) + # self._logs.root_logger.handlers = [ + # test_handler if h is self._logs.stderr_handler + # else h for h in self._logs.root_logger.handlers] + # self._logs.stderr_handler = test_handler + + # test_logger = logger.getLogger(f'{__name__}.test_replay') + # message1 = str(uuid.uuid4()) + # message2 = str(uuid.uuid4()) + # message3 = str(uuid.uuid4()) + # test_logger.error(message1) + # test_logger.debug1(message2) + # test_logger.debug2(message3) + + # self.assertEqual(str(test_handler.buffer).count(message1), 1) + # self.assertEqual(str(test_handler.buffer).count(message2), 0) + # self.assertEqual(str(test_handler.buffer).count(message3), 0) + + # settings.configure({'processing_dir': self.temp_dir.name, + # 'logging': {'level': 'debug1'}}) + + # self.assertEqual(str(test_handler.buffer).count(message1), 2) + # self.assertEqual(str(test_handler.buffer).count(message2), 1) + # self.assertEqual(str(test_handler.buffer).count(message3), 0) + + # def test_configured_file(self): + # settings._setup() + # log_filename = os.path.join(self.temp_dir.name, + # self._logs.default_log_prefix) + + # log_handler = [ + # h for h in self._logs.root_logger.handlers + # if hasattr(h, 'stream') and h.stream.name == log_filename][0] + + # # Test the defaults + # self.assertEqual(log_handler.level, logger.ERROR) + # self.assertEqual(self._logs.root_logger.level, logger.NOTSET) + + # def test_debug1(self): + # message = str(uuid.uuid4()) + # with self.assertLogs(level=logger.DEBUG1) as cm: + # logger.getLogger(f'{__name__}.test_debug1').debug1(message) + # self.assertIn(message, str(cm.output)) + + # def test_debug2(self): + # message = str(uuid.uuid4()) + # with self.assertLogs(level=logger.DEBUG2) as cm: + # logger.getLogger(f'{__name__}.test_debug2').debug2(message) + # self.assertIn(message, str(cm.output)) + + # def test_debug3(self): + # message = str(uuid.uuid4()) + # with self.assertLogs(level=logger.DEBUG3) as cm: + # logger.getLogger(f'{__name__}.test_debug3').debug3(message) + # self.assertIn(message, str(cm.output)) + + # def test_warnings(self): + # message = str(uuid.uuid4()) + # with self.assertLogs(level=logger.WARNING) as cm: + # warnings.warn(message) + # self.assertIn(message, str(cm.output)) class TestUnitTests(TestCase): diff --git a/terra/tests/utils.py b/terra/tests/utils.py index 45ce565f..ac01ad94 100644 --- a/terra/tests/utils.py +++ b/terra/tests/utils.py @@ -76,12 +76,18 @@ def tearDown(self): terra.core.signals.post_settings_context.disconnect(self._logs.reconfigure_logger) super().tearDown() -class TestLoggerConfigureCase(TestLoggerCase): + +class TestComputeCase(TestCase): def setUp(self): - # Enable signals. Most logging tests require configure logger to actually - # be called. LogRecordSocketReceiver is mocked out, so no lasting side - # effects should inccur. - self.patches.append(mock.patch.dict(os.environ, TERRA_UNITTEST='0')) + import terra.compute.utils + self.patches.append(mock.patch.dict(terra.compute.utils.compute.__dict__)) + super().setUp() + + +class TestExecutorCase(TestCase): + def setUp(self): + import terra.executor.utils + self.patches.append(mock.patch.dict(terra.executor.utils.Executor.__dict__)) super().setUp() @@ -89,3 +95,11 @@ class TestSignalCase(TestCase): def setUp(self): self.patches.append(mock.patch.dict(os.environ, TERRA_UNITTEST='0')) super().setUp() + + +# Enable signals. Most logging tests require configure logger to actually +# be called. LogRecordSocketReceiver is mocked out, so no lasting side +# effects should inccur. +class TestLoggerConfigureCase(TestLoggerCase, TestSignalCase, + TestComputeCase, TestExecutorCase): + pass From 370d345af790c14841866ffb286ffb10d7745ccf Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 28 May 2020 13:00:03 -0400 Subject: [PATCH 61/94] Refactor tests to use common case classes Signed-off-by: Andy Neff --- terra/tests/test_compute_base.py | 163 +++++---- terra/tests/test_compute_container.py | 6 +- terra/tests/test_compute_docker.py | 6 +- terra/tests/test_compute_dummy.py | 6 +- terra/tests/test_compute_singularity.py | 6 +- terra/tests/test_compute_utils.py | 10 +- terra/tests/test_compute_virtualenv.py | 11 +- terra/tests/test_core_settings.py | 5 +- terra/tests/test_executor_utils.py | 9 +- terra/tests/test_logger.py | 424 ++++++++++++------------ terra/tests/test_utils_workflow.py | 11 +- terra/tests/utils.py | 24 +- 12 files changed, 332 insertions(+), 349 deletions(-) diff --git a/terra/tests/test_compute_base.py b/terra/tests/test_compute_base.py index d585534c..6db849da 100644 --- a/terra/tests/test_compute_base.py +++ b/terra/tests/test_compute_base.py @@ -2,94 +2,91 @@ from unittest import mock from terra import settings -from .utils import TestCase +from .utils import TestCase, TestSettingsConfiguredCase -class TestServiceBase(TestCase): +class TestServiceBase(TestSettingsConfiguredCase): def setUp(self): from terra.compute import base self.base = base - # I want to be able to use settings - self.patches.append(mock.patch.object(settings, '_wrapped', None)) super().setUp() + + # Simulate external env var + @mock.patch.dict(os.environ, {'FOO': "BAR"}) + def test_env(self): + # Test that a service inherits the environment correctly + service = self.base.BaseService() + # App specific env var + service.env['BAR'] = 'foo' + # Make sure both show up + self.assertEqual(service.env['FOO'], 'BAR') + self.assertEqual(service.env['BAR'], 'foo') + # Make sure BAR is isolated from process env + self.assertNotIn("BAR", os.environ) + + def test_add_volumes(self): + service = self.base.BaseService() + # Add a volumes + service.add_volume("/local", "/remote") + # Make sure it's in the list + self.assertIn(("/local", "/remote"), service.volumes) + + # Unconfigure settings + @mock.patch.object(settings, '_wrapped', None) + def test_volumes_and_configuration_map(self): + # Add a volumes + service = self.base.BaseService() + service.add_volume("/local", "/remote") + + # Test configuration_map settings.configure({}) + # Make sure the volume is in the map + self.assertEqual([("/local", "/remote")], + self.base.BaseCompute().configuration_map(service)) + + def test_registry(self): + with mock.patch.dict(self.base.services, clear=True): + # Registration test + class Foo: + class TestService(self.base.BaseService): + pass + + + class TestService_base(Foo.TestService, self.base.BaseService): + pass + + # Register a class class, just for fun + self.base.BaseCompute.register(Foo.TestService)(TestService_base) + + self.assertIn(Foo.TestService.__module__ + '.' + \ + Foo.TestService.__qualname__, + self.base.services) + + with self.assertRaises(self.base.AlreadyRegisteredException, + msg='Compute command "car" does not have a service ' + 'implementation "car_service"'): + self.base.BaseCompute.register(Foo.TestService)(lambda x: 1) + + def test_getattr(self): + class Foo(self.base.BaseCompute): + def bar_service(self): + pass + + foo = Foo() + foo.bar + with self.assertRaises(AttributeError): + foo.car + + +class TestUnitTests(TestCase): + def setUp(self): + from terra.compute import base + self.base = base -# # Simulate external env var -# @mock.patch.dict(os.environ, {'FOO': "BAR"}) -# def test_env(self): -# # Test that a service inherits the environment correctly -# service = self.base.BaseService() -# # App specific env var -# service.env['BAR'] = 'foo' -# # Make sure both show up -# self.assertEqual(service.env['FOO'], 'BAR') -# self.assertEqual(service.env['BAR'], 'foo') -# # Make sure BAR is isolated from process env -# self.assertNotIn("BAR", os.environ) - -# def test_add_volumes(self): -# service = self.base.BaseService() -# # Add a volumes -# service.add_volume("/local", "/remote") -# # Make sure it's in the list -# self.assertIn(("/local", "/remote"), service.volumes) - -# # Unconfigure settings -# @mock.patch.object(settings, '_wrapped', None) -# def test_volumes_and_configuration_map(self): -# # Add a volumes -# service = self.base.BaseService() -# service.add_volume("/local", "/remote") - -# # Test configuration_map -# settings.configure({}) -# # Make sure the volume is in the map -# self.assertEqual([("/local", "/remote")], -# self.base.BaseCompute().configuration_map(service)) - -# def test_registry(self): -# with mock.patch.dict(self.base.services, clear=True): -# # Registration test -# class Foo: -# class TestService(self.base.BaseService): -# pass - - -# class TestService_base(Foo.TestService, self.base.BaseService): -# pass - -# # Register a class class, just for fun -# self.base.BaseCompute.register(Foo.TestService)(TestService_base) - -# self.assertIn(Foo.TestService.__module__ + '.' + \ -# Foo.TestService.__qualname__, -# self.base.services) - -# with self.assertRaises(self.base.AlreadyRegisteredException, -# msg='Compute command "car" does not have a service ' -# 'implementation "car_service"'): -# self.base.BaseCompute.register(Foo.TestService)(lambda x: 1) - -# def test_getattr(self): -# class Foo(self.base.BaseCompute): -# def bar_service(self): -# pass - -# foo = Foo() -# foo.bar -# with self.assertRaises(AttributeError): -# foo.car - - -# class TestUnitTests(TestCase): -# def setUp(self): -# from terra.compute import base -# self.base = base - -# def last_test_registered_services(self): -# self.assertFalse( -# self.base.services, -# msg="If you are seeing this, one of the other unit tests has " -# "registered a terra service. This side effect should be " -# "prevented by mocking out the terra.compute.base.services dict. " -# "Otherwise unit tests can interfere with each other.") + def last_test_registered_services(self): + self.assertFalse( + self.base.services, + msg="If you are seeing this, one of the other unit tests has " + "registered a terra service. This side effect should be " + "prevented by mocking out the terra.compute.base.services dict. " + "Otherwise unit tests can interfere with each other.") diff --git a/terra/tests/test_compute_container.py b/terra/tests/test_compute_container.py index 4a9aa259..fd51ca36 100644 --- a/terra/tests/test_compute_container.py +++ b/terra/tests/test_compute_container.py @@ -7,7 +7,7 @@ from terra.executor.utils import Executor from terra.compute import base import terra.compute.container -from .utils import TestCase, TestNamedTemporaryFileCase +from .utils import TestNamedTemporaryFileCase, TestSettingsUnconfiguredCase class SomeService(terra.compute.container.ContainerService): @@ -29,11 +29,9 @@ def mock_map_lcow(self, *args, **kwargs): return [('/c/foo', '/bar')] -class TestComputeContainerCase(TestCase): +class TestComputeContainerCase(TestSettingsUnconfiguredCase): def setUp(self): self.temp_dir - # Use settings - self.patches.append(mock.patch.object(settings, '_wrapped', None)) # This will resets the _connection to an uninitialized state self.patches.append( mock.patch.object(terra.compute.utils.ComputeHandler, diff --git a/terra/tests/test_compute_docker.py b/terra/tests/test_compute_docker.py index fca9c3ec..8cf23351 100644 --- a/terra/tests/test_compute_docker.py +++ b/terra/tests/test_compute_docker.py @@ -10,13 +10,11 @@ from terra.compute import docker import terra.compute.utils -from .utils import TestCase +from .utils import TestSettingsUnconfiguredCase -class TestComputeDockerCase(TestCase): +class TestComputeDockerCase(TestSettingsUnconfiguredCase): def setUp(self): - # Use settings - self.patches.append(mock.patch.object(settings, '_wrapped', None)) # This will resets the _connection to an uninitialized state self.patches.append( mock.patch.object(terra.compute.utils.ComputeHandler, diff --git a/terra/tests/test_compute_dummy.py b/terra/tests/test_compute_dummy.py index c9fd616c..19f5e411 100644 --- a/terra/tests/test_compute_dummy.py +++ b/terra/tests/test_compute_dummy.py @@ -6,7 +6,7 @@ from terra.compute import dummy import terra.compute.utils -from .utils import TestCase +from .utils import TestSettingsUnconfiguredCase # Test Dummy Definition @@ -31,10 +31,8 @@ def __init__(self): self.d = 44 -class TestComputeDummyCase(TestCase): +class TestComputeDummyCase(TestSettingsUnconfiguredCase): def setUp(self): - # Use settings - self.patches.append(mock.patch.object(settings, '_wrapped', None)) # Use registry self.patches.append(mock.patch.dict(base.services, clear=True)) # Use compute diff --git a/terra/tests/test_compute_singularity.py b/terra/tests/test_compute_singularity.py index 44f57920..aa6efa80 100644 --- a/terra/tests/test_compute_singularity.py +++ b/terra/tests/test_compute_singularity.py @@ -6,13 +6,11 @@ from terra.compute import singularity import terra.compute.utils -from .utils import TestCase +from .utils import TestSettingsUnconfiguredCase -class TestComputeSingularityCase(TestCase): +class TestComputeSingularityCase(TestSettingsUnconfiguredCase): def setUp(self): - # Use settings - self.patches.append(mock.patch.object(settings, '_wrapped', None)) # This will resets the _connection to an uninitialized state self.patches.append( mock.patch.object(terra.compute.utils.ComputeHandler, diff --git a/terra/tests/test_compute_utils.py b/terra/tests/test_compute_utils.py index d00b053a..9cf8e49f 100644 --- a/terra/tests/test_compute_utils.py +++ b/terra/tests/test_compute_utils.py @@ -3,7 +3,7 @@ import warnings from terra import settings -from .utils import TestCase +from .utils import TestSettingsUnconfiguredCase import terra.compute.utils as utils import terra.compute.dummy import terra.compute.docker @@ -34,7 +34,7 @@ class Service2_test: # I am purposefully showing multiple ways to mock _wrapped for demonstration # purposes -class TestComputeUtilsCase(TestCase): +class TestComputeUtilsCase(TestSettingsUnconfiguredCase): def setUp(self): # Use setting self.patches.append(mock.patch.object(settings, '_wrapped', None)) @@ -129,12 +129,12 @@ def mock_popen(*args, **kwargs): class TestBaseJust(TestComputeUtilsCase): def setUp(self): - self.patches.append(mock.patch.object(utils, 'Popen', mock_popen)) - super().setUp() - # Make a copy self.original_env = os.environ.copy() + self.patches.append(mock.patch.object(utils, 'Popen', mock_popen)) + super().setUp() + def tearDown(self): super().tearDown() # Make sure nothing inadvertently changed environ diff --git a/terra/tests/test_compute_virtualenv.py b/terra/tests/test_compute_virtualenv.py index 5a546435..fff0d54e 100644 --- a/terra/tests/test_compute_virtualenv.py +++ b/terra/tests/test_compute_virtualenv.py @@ -7,7 +7,7 @@ from terra.compute import virtualenv import terra.compute.utils -from .utils import TestCase +from .utils import TestSettingsUnconfiguredCase class MockVirtualEnvService(virtualenv.Service): @@ -17,10 +17,8 @@ def __init__(self): self.env["BAR"] = "FOO" -class TestVirtualEnv(TestCase): +class TestVirtualEnv(TestSettingsUnconfiguredCase): def setUp(self): - # Use settings - self.patches.append(mock.patch.object(settings, '_wrapped', None)) # self.run trigger Executor self.patches.append(mock.patch.dict(Executor.__dict__)) # This will resets the _connection to an uninitialized state @@ -111,10 +109,13 @@ def test_logging_code(self): env_lines = [x for x in cm.output if "Environment Modification:" in x][0] env_lines = env_lines.split('\n') - self.assertEqual(len(env_lines), 4) + self.assertEqual(len(env_lines), 5) self.assertTrue(any(o.startswith('- BAR:') for o in env_lines)) self.assertTrue(any(o.startswith('+ FOO:') for o in env_lines)) # Added by Terra self.assertTrue(any(o.startswith('+ TERRA_SETTINGS_FILE:') for o in env_lines)) + # Added by TestSettingsUnconfiguredCase + self.assertTrue(any(o.startswith('- TERRA_SETTINGS_FILE:') + for o in env_lines)) diff --git a/terra/tests/test_core_settings.py b/terra/tests/test_core_settings.py index d705f14f..f0db8cda 100644 --- a/terra/tests/test_core_settings.py +++ b/terra/tests/test_core_settings.py @@ -224,7 +224,10 @@ def test_dir(self): self.assertIn('c', dir(d.b[0][0])) -class TestSettings(TestLoggerCase, TestSettingsUnconfiguredCase): +class TestSettings(TestLoggerCase): + # TestLoggerCase sets TERRA_SETTINGS_FILE to a valid file, in order to get + # an ImproperlyConfigured Exception here, TERRA_SETTINGS_FILE must be set to + # not a file, such as the empty string. @mock.patch.dict(os.environ, TERRA_SETTINGS_FILE='') def test_unconfigured(self): with self.assertRaises(ImproperlyConfigured): diff --git a/terra/tests/test_executor_utils.py b/terra/tests/test_executor_utils.py index a818c969..15a1f60e 100644 --- a/terra/tests/test_executor_utils.py +++ b/terra/tests/test_executor_utils.py @@ -2,18 +2,13 @@ import concurrent.futures from terra import settings -from .utils import TestCase +from .utils import TestCase, TestExecutorCase, TestSettingsUnconfiguredCase from terra.executor.utils import ExecutorHandler, Executor from terra.executor.dummy import DummyExecutor from terra.executor.sync import SyncExecutor -class TestExecutorHandler(TestCase): - def setUp(self): - self.patches.append(mock.patch.object(settings, '_wrapped', None)) - self.patches.append(mock.patch.dict(Executor.__dict__)) - super().setUp() - +class TestExecutorHandler(TestExecutorCase, TestSettingsUnconfiguredCase): def test_executor_handler(self): settings.configure({'executor': {'type': 'DummyExecutor'}}) diff --git a/terra/tests/test_logger.py b/terra/tests/test_logger.py index b4194e34..a246dbfb 100644 --- a/terra/tests/test_logger.py +++ b/terra/tests/test_logger.py @@ -50,218 +50,218 @@ def test_setup_working(self): self.assertEqual(settings.processing_dir, self.temp_dir.name) self.assertTrue(settings.configured) -# def test_double_configure(self): -# settings._setup() -# with self.assertLogs(): -# with self.assertRaises(ImproperlyConfigured): -# self._logs.configure_logger(None) - -# def test_temp_file_cleanup(self): -# tmp_file = self._logs.tmp_file.name -# self.assertExist(tmp_file) -# self.assertFalse(self._logs._configured) -# settings.processing_dir -# self.assertNotExist(tmp_file) -# self.assertTrue(self._logs._configured) - -# def test_exception_hook_installed(self): -# self.assertEqual( -# sys.excepthook.__qualname__, -# '_SetupTerraLogger.setup_logging_exception_hook.' -# '.handle_exception') -# self.assertEqual('terra.logger', sys.excepthook.__module__) - -# def test_exception_hook(self): -# def save_exec_info(exc_type, exc, tb): -# self.exc_type = exc_type -# self.exc = exc -# self.tb = tb -# sys.excepthook = save_exec_info -# self._logs.setup_logging_exception_hook() -# with mock.patch('sys.stderr', new_callable=io.StringIO): -# with self.assertLogs() as cm: -# # with self.assertRaises(ZeroDivisionError): -# tb = make_traceback() -# sys.excepthook(ZeroDivisionError, -# ZeroDivisionError('division by almost zero'), tb) - -# self.assertIn('division by almost zero', str(cm.output)) -# # Test stack trace stuff in there -# self.assertIn('test_exception_hook', str(cm.output)) -# self.assertEqual(self.exc_type, ZeroDivisionError) -# self.assertIsInstance(self.exc, ZeroDivisionError) -# self.assertIs(self.tb, tb) - -# def test_root_logger_setup(self): -# self.assertEqual(self._logs.root_logger, logging.getLogger(None)) -# self.assertEqual(self._logs.root_logger.level, logger.NOTSET) -# # print(self._logs.root_logger.handlers) - -# def test_logs_stderr(self): -# stderr_handler = [h for h in self._logs.root_logger.handlers -# if hasattr(h, 'stream') and h.stream == sys.stderr][0] -# self.assertEqual(stderr_handler.level, logging.WARNING) -# self.assertIs(self._logs.stderr_handler, stderr_handler) - -# def test_logs_temp_file(self): -# temp_handler = [ -# h for h in self._logs.root_logger.handlers -# if hasattr(h, 'stream') and h.stream.name == self._logs.tmp_file.name][0] -# # Test that log everything is set -# self.assertEqual(temp_handler.level, logger.NOTSET) -# self.assertEqual(self._logs.root_logger.level, logger.NOTSET) - -# def test_formatter(self): -# settings.configure({'processing_dir': self.temp_dir.name, -# 'logging': {'format': 'foo {asctime} {msg}', -# 'date_format': 'bar', -# 'style': '{'}}) - -# # This doesn't get formatted -# # with self.assertLogs(__name__, logger.ERROR) as cm: -# # logger.getLogger(__name__).error('Hi') - -# test_logger = logger.getLogger(f'{__name__}.test_formatter') -# record = logging.LogRecord(__name__, logger.ERROR, __file__, 0, "Hiya", (), -# None) -# self.assertTrue(test_logger.filter(record)) -# self.assertTrue(self._logs.stderr_handler.filter(record)) -# self.assertEqual(self._logs.stderr_handler.format(record), "foo bar Hiya") - -# def test_hostname(self): -# test_logger = logger.getLogger(f'{__name__}.test_hostname') - -# record = test_logger.makeRecord(__name__, logger.ERROR, __file__, 0, -# "Hiya", (), None) -# self.assertTrue(test_logger.filter(record)) -# self.assertTrue(self._logs.stderr_handler.filter(record)) -# self.assertIn(':preconfig)', self._logs.stderr_handler.format(record)) - -# settings._setup() - -# record = test_logger.makeRecord(__name__, logger.ERROR, __file__, 0, -# "Hiya", (), None) -# self.assertTrue(test_logger.filter(record)) -# self.assertTrue(self._logs.stderr_handler.filter(record)) -# self.assertIn(f'({platform.node()}:', -# self._logs.stderr_handler.format(record)) - -# # Test https://stackoverflow.com/q/19615876/4166604 -# def test_funcName(self): -# stream = io.StringIO() -# test_logger = logger.getLogger(f'{__name__}.test_funcName') -# formatter = logging.Formatter('%(filename)s:%(funcName)s %(msg)s') -# handler = logging.StreamHandler(stream) -# handler.setFormatter(formatter) -# handler.setLevel(logger.DEBUG2) -# test_logger.addHandler(handler) -# test_logger.setLevel(logger.DEBUG2) - -# test_logger.debug2('hiya') -# self.assertEqual(stream.getvalue(), -# f'{os.path.basename(__file__)}:test_funcName hiya\n') - -# def test_funcName_stackinfo(self): -# stream = io.StringIO() -# test_logger = logger.getLogger(f'{__name__}.test_funcName') -# formatter = logging.Formatter('%(filename)s:%(funcName)s %(msg)s') -# handler = logging.StreamHandler(stream) -# handler.setFormatter(formatter) -# handler.setLevel(logger.DEBUG2) -# test_logger.addHandler(handler) -# test_logger.setLevel(logger.DEBUG2) - -# test_logger.debug2('byeee', stack_info=True) -# self.assertNotIn(logger._srcfiles[0], stream.getvalue()) -# self.assertNotIn(logger._srcfiles[1], stream.getvalue()) -# self.assertIn( -# f'{os.path.basename(__file__)}:test_funcName_stackinfo byeee\n', -# stream.getvalue()) - - # def test_level(self): - # settings.configure({'processing_dir': self.temp_dir.name, - # 'logging': {'level': 'DEBUG1'}}) - - # self.assertEqual(settings.logging.level, "DEBUG1") - - # self.assertEqual(self._logs.root_logger.level, logger.NOTSET) - # self.assertEqual(self._logs.stderr_handler.level, logger.DEBUG1) - - # def test_level_case_insensitive(self): - # with self.assertLogs(level=logger.DEBUG2): - # settings.configure({'processing_dir': self.temp_dir.name, - # 'logging': {'level': 'debug2'}}) - - # self.assertEqual(settings.logging.level, "debug2") - - # self.assertEqual(self._logs.root_logger.level, logger.NOTSET) - # self.assertEqual(self._logs.stderr_handler.level, logger.DEBUG2) - - # def test_replay(self): - # # Swap out the stderr stream handler for this test - # test_handler = logging.handlers.MemoryHandler(capacity=1000) - # test_handler.setLevel( - # logger._SetupTerraLogger.default_stderr_handler_level) - # self._logs.root_logger.handlers = [ - # test_handler if h is self._logs.stderr_handler - # else h for h in self._logs.root_logger.handlers] - # self._logs.stderr_handler = test_handler - - # test_logger = logger.getLogger(f'{__name__}.test_replay') - # message1 = str(uuid.uuid4()) - # message2 = str(uuid.uuid4()) - # message3 = str(uuid.uuid4()) - # test_logger.error(message1) - # test_logger.debug1(message2) - # test_logger.debug2(message3) - - # self.assertEqual(str(test_handler.buffer).count(message1), 1) - # self.assertEqual(str(test_handler.buffer).count(message2), 0) - # self.assertEqual(str(test_handler.buffer).count(message3), 0) - - # settings.configure({'processing_dir': self.temp_dir.name, - # 'logging': {'level': 'debug1'}}) - - # self.assertEqual(str(test_handler.buffer).count(message1), 2) - # self.assertEqual(str(test_handler.buffer).count(message2), 1) - # self.assertEqual(str(test_handler.buffer).count(message3), 0) - - # def test_configured_file(self): - # settings._setup() - # log_filename = os.path.join(self.temp_dir.name, - # self._logs.default_log_prefix) - - # log_handler = [ - # h for h in self._logs.root_logger.handlers - # if hasattr(h, 'stream') and h.stream.name == log_filename][0] - - # # Test the defaults - # self.assertEqual(log_handler.level, logger.ERROR) - # self.assertEqual(self._logs.root_logger.level, logger.NOTSET) - - # def test_debug1(self): - # message = str(uuid.uuid4()) - # with self.assertLogs(level=logger.DEBUG1) as cm: - # logger.getLogger(f'{__name__}.test_debug1').debug1(message) - # self.assertIn(message, str(cm.output)) - - # def test_debug2(self): - # message = str(uuid.uuid4()) - # with self.assertLogs(level=logger.DEBUG2) as cm: - # logger.getLogger(f'{__name__}.test_debug2').debug2(message) - # self.assertIn(message, str(cm.output)) - - # def test_debug3(self): - # message = str(uuid.uuid4()) - # with self.assertLogs(level=logger.DEBUG3) as cm: - # logger.getLogger(f'{__name__}.test_debug3').debug3(message) - # self.assertIn(message, str(cm.output)) - - # def test_warnings(self): - # message = str(uuid.uuid4()) - # with self.assertLogs(level=logger.WARNING) as cm: - # warnings.warn(message) - # self.assertIn(message, str(cm.output)) + def test_double_configure(self): + settings._setup() + with self.assertLogs(): + with self.assertRaises(ImproperlyConfigured): + self._logs.configure_logger(None) + + def test_temp_file_cleanup(self): + tmp_file = self._logs.tmp_file.name + self.assertExist(tmp_file) + self.assertFalse(self._logs._configured) + settings.processing_dir + self.assertNotExist(tmp_file) + self.assertTrue(self._logs._configured) + + def test_exception_hook_installed(self): + self.assertEqual( + sys.excepthook.__qualname__, + '_SetupTerraLogger.setup_logging_exception_hook.' + '.handle_exception') + self.assertEqual('terra.logger', sys.excepthook.__module__) + + def test_exception_hook(self): + def save_exec_info(exc_type, exc, tb): + self.exc_type = exc_type + self.exc = exc + self.tb = tb + sys.excepthook = save_exec_info + self._logs.setup_logging_exception_hook() + with mock.patch('sys.stderr', new_callable=io.StringIO): + with self.assertLogs() as cm: + # with self.assertRaises(ZeroDivisionError): + tb = make_traceback() + sys.excepthook(ZeroDivisionError, + ZeroDivisionError('division by almost zero'), tb) + + self.assertIn('division by almost zero', str(cm.output)) + # Test stack trace stuff in there + self.assertIn('test_exception_hook', str(cm.output)) + self.assertEqual(self.exc_type, ZeroDivisionError) + self.assertIsInstance(self.exc, ZeroDivisionError) + self.assertIs(self.tb, tb) + + def test_root_logger_setup(self): + self.assertEqual(self._logs.root_logger, logging.getLogger(None)) + self.assertEqual(self._logs.root_logger.level, logger.NOTSET) + # print(self._logs.root_logger.handlers) + + def test_logs_stderr(self): + stderr_handler = [h for h in self._logs.root_logger.handlers + if hasattr(h, 'stream') and h.stream == sys.stderr][0] + self.assertEqual(stderr_handler.level, logging.WARNING) + self.assertIs(self._logs.stderr_handler, stderr_handler) + + def test_logs_temp_file(self): + temp_handler = [ + h for h in self._logs.root_logger.handlers + if hasattr(h, 'stream') and h.stream.name == self._logs.tmp_file.name][0] + # Test that log everything is set + self.assertEqual(temp_handler.level, logger.NOTSET) + self.assertEqual(self._logs.root_logger.level, logger.NOTSET) + + def test_formatter(self): + settings.configure({'processing_dir': self.temp_dir.name, + 'logging': {'format': 'foo {asctime} {msg}', + 'date_format': 'bar', + 'style': '{'}}) + + # This doesn't get formatted + # with self.assertLogs(__name__, logger.ERROR) as cm: + # logger.getLogger(__name__).error('Hi') + + test_logger = logger.getLogger(f'{__name__}.test_formatter') + record = logging.LogRecord(__name__, logger.ERROR, __file__, 0, "Hiya", (), + None) + self.assertTrue(test_logger.filter(record)) + self.assertTrue(self._logs.stderr_handler.filter(record)) + self.assertEqual(self._logs.stderr_handler.format(record), "foo bar Hiya") + + def test_hostname(self): + test_logger = logger.getLogger(f'{__name__}.test_hostname') + + record = test_logger.makeRecord(__name__, logger.ERROR, __file__, 0, + "Hiya", (), None) + self.assertTrue(test_logger.filter(record)) + self.assertTrue(self._logs.stderr_handler.filter(record)) + self.assertIn(':preconfig)', self._logs.stderr_handler.format(record)) + + settings._setup() + + record = test_logger.makeRecord(__name__, logger.ERROR, __file__, 0, + "Hiya", (), None) + self.assertTrue(test_logger.filter(record)) + self.assertTrue(self._logs.stderr_handler.filter(record)) + self.assertIn(f'({platform.node()}:', + self._logs.stderr_handler.format(record)) + + # Test https://stackoverflow.com/q/19615876/4166604 + def test_funcName(self): + stream = io.StringIO() + test_logger = logger.getLogger(f'{__name__}.test_funcName') + formatter = logging.Formatter('%(filename)s:%(funcName)s %(msg)s') + handler = logging.StreamHandler(stream) + handler.setFormatter(formatter) + handler.setLevel(logger.DEBUG2) + test_logger.addHandler(handler) + test_logger.setLevel(logger.DEBUG2) + + test_logger.debug2('hiya') + self.assertEqual(stream.getvalue(), + f'{os.path.basename(__file__)}:test_funcName hiya\n') + + def test_funcName_stackinfo(self): + stream = io.StringIO() + test_logger = logger.getLogger(f'{__name__}.test_funcName') + formatter = logging.Formatter('%(filename)s:%(funcName)s %(msg)s') + handler = logging.StreamHandler(stream) + handler.setFormatter(formatter) + handler.setLevel(logger.DEBUG2) + test_logger.addHandler(handler) + test_logger.setLevel(logger.DEBUG2) + + test_logger.debug2('byeee', stack_info=True) + self.assertNotIn(logger._srcfiles[0], stream.getvalue()) + self.assertNotIn(logger._srcfiles[1], stream.getvalue()) + self.assertIn( + f'{os.path.basename(__file__)}:test_funcName_stackinfo byeee\n', + stream.getvalue()) + + def test_level(self): + settings.configure({'processing_dir': self.temp_dir.name, + 'logging': {'level': 'DEBUG1'}}) + + self.assertEqual(settings.logging.level, "DEBUG1") + + self.assertEqual(self._logs.root_logger.level, logger.NOTSET) + self.assertEqual(self._logs.stderr_handler.level, logger.DEBUG1) + + def test_level_case_insensitive(self): + with self.assertLogs(level=logger.DEBUG2): + settings.configure({'processing_dir': self.temp_dir.name, + 'logging': {'level': 'debug2'}}) + + self.assertEqual(settings.logging.level, "debug2") + + self.assertEqual(self._logs.root_logger.level, logger.NOTSET) + self.assertEqual(self._logs.stderr_handler.level, logger.DEBUG2) + + def test_replay(self): + # Swap out the stderr stream handler for this test + test_handler = logging.handlers.MemoryHandler(capacity=1000) + test_handler.setLevel( + logger._SetupTerraLogger.default_stderr_handler_level) + self._logs.root_logger.handlers = [ + test_handler if h is self._logs.stderr_handler + else h for h in self._logs.root_logger.handlers] + self._logs.stderr_handler = test_handler + + test_logger = logger.getLogger(f'{__name__}.test_replay') + message1 = str(uuid.uuid4()) + message2 = str(uuid.uuid4()) + message3 = str(uuid.uuid4()) + test_logger.error(message1) + test_logger.debug1(message2) + test_logger.debug2(message3) + + self.assertEqual(str(test_handler.buffer).count(message1), 1) + self.assertEqual(str(test_handler.buffer).count(message2), 0) + self.assertEqual(str(test_handler.buffer).count(message3), 0) + + settings.configure({'processing_dir': self.temp_dir.name, + 'logging': {'level': 'debug1'}}) + + self.assertEqual(str(test_handler.buffer).count(message1), 2) + self.assertEqual(str(test_handler.buffer).count(message2), 1) + self.assertEqual(str(test_handler.buffer).count(message3), 0) + + def test_configured_file(self): + settings._setup() + log_filename = os.path.join(self.temp_dir.name, + self._logs.default_log_prefix) + + log_handler = [ + h for h in self._logs.root_logger.handlers + if hasattr(h, 'stream') and h.stream.name == log_filename][0] + + # Test the defaults + self.assertEqual(log_handler.level, logger.ERROR) + self.assertEqual(self._logs.root_logger.level, logger.NOTSET) + + def test_debug1(self): + message = str(uuid.uuid4()) + with self.assertLogs(level=logger.DEBUG1) as cm: + logger.getLogger(f'{__name__}.test_debug1').debug1(message) + self.assertIn(message, str(cm.output)) + + def test_debug2(self): + message = str(uuid.uuid4()) + with self.assertLogs(level=logger.DEBUG2) as cm: + logger.getLogger(f'{__name__}.test_debug2').debug2(message) + self.assertIn(message, str(cm.output)) + + def test_debug3(self): + message = str(uuid.uuid4()) + with self.assertLogs(level=logger.DEBUG3) as cm: + logger.getLogger(f'{__name__}.test_debug3').debug3(message) + self.assertIn(message, str(cm.output)) + + def test_warnings(self): + message = str(uuid.uuid4()) + with self.assertLogs(level=logger.WARNING) as cm: + warnings.warn(message) + self.assertIn(message, str(cm.output)) class TestUnitTests(TestCase): diff --git a/terra/tests/test_utils_workflow.py b/terra/tests/test_utils_workflow.py index 93b7b815..56f2ca54 100644 --- a/terra/tests/test_utils_workflow.py +++ b/terra/tests/test_utils_workflow.py @@ -6,22 +6,15 @@ from terra.utils.workflow import resumable, AlreadyRunException from terra import settings from terra.logger import DEBUG1 -from .utils import TestCase +from .utils import TestSettingsUnconfiguredCase class Klass: pass -class TestResumable(TestCase): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.patches = [] - +class TestResumable(TestSettingsUnconfiguredCase): def setUp(self): - self.patches.append(mock.patch.dict(os.environ, - {'TERRA_SETTINGS_FILE': ""})) - self.patches.append(mock.patch.object(settings, '_wrapped', None)) super().setUp() settings.configure({'processing_dir': self.temp_dir.name}) diff --git a/terra/tests/utils.py b/terra/tests/utils.py index ac01ad94..9af2ee6a 100644 --- a/terra/tests/utils.py +++ b/terra/tests/utils.py @@ -4,18 +4,16 @@ from unittest import mock from vsi.test.utils import ( - TestCase as TestCase_original, make_traceback, TestNamedTemporaryFileCase + TestCase, make_traceback, TestNamedTemporaryFileCase ) from terra import settings __all__ = ["TestCase", "make_traceback", "TestNamedTemporaryFileCase", - "TestSettingsUnconfiguredCase", "TestSettingsConfiguredCase"] - - -class TestCase(TestCase_original): - pass + "TestSettingsUnconfiguredCase", "TestSettingsConfiguredCase", + "TestComputeCase", "TestExecutorCase", "TestSignalCase", + "TestLoggerConfigureCase"] class TestSettingsUnconfiguredCase(TestCase): @@ -45,8 +43,9 @@ def setUp(self): MockLogRecordSocketReceiver = mock.Mock(**attrs) self.patches.append(mock.patch('terra.logger.LogRecordSocketReceiver', MockLogRecordSocketReceiver)) - self.patches.append(mock.patch('terra.compute.base.LogRecordSocketReceiver', - MockLogRecordSocketReceiver)) + self.patches.append(mock.patch( + 'terra.compute.base.LogRecordSocketReceiver', + MockLogRecordSocketReceiver)) # Special customization of TestSettingsUnconfiguredCase self.settings_filename = os.path.join(self.temp_dir.name, 'config.json') config = {"processing_dir": self.temp_dir.name} @@ -72,8 +71,10 @@ def tearDown(self): pass self._logs.root_logger.handlers = [] import terra.core.signals - terra.core.signals.post_settings_configured.disconnect(self._logs.configure_logger) - terra.core.signals.post_settings_context.disconnect(self._logs.reconfigure_logger) + terra.core.signals.post_settings_configured.disconnect( + self._logs.configure_logger) + terra.core.signals.post_settings_context.disconnect( + self._logs.reconfigure_logger) super().tearDown() @@ -87,7 +88,8 @@ def setUp(self): class TestExecutorCase(TestCase): def setUp(self): import terra.executor.utils - self.patches.append(mock.patch.dict(terra.executor.utils.Executor.__dict__)) + self.patches.append(mock.patch.dict( + terra.executor.utils.Executor.__dict__)) super().setUp() From b8e79dcb0253d2f98ee603de416f57ac9f4285f9 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 28 May 2020 14:31:00 -0400 Subject: [PATCH 62/94] Pep8 Signed-off-by: Andy Neff --- external/vsi_common | 2 +- terra/compute/base.py | 19 +++++----- terra/compute/container.py | 6 ++-- terra/compute/utils.py | 1 + terra/core/exceptions.py | 3 +- terra/core/settings.py | 15 +++++--- terra/core/utils.py | 3 +- terra/executor/base.py | 8 ++--- terra/executor/celery/__init__.py | 22 ++---------- terra/executor/celery/__main__.py | 1 + terra/executor/celery/executor.py | 5 ++- terra/executor/process.py | 3 +- terra/executor/thread.py | 3 +- terra/executor/utils.py | 11 +++--- terra/logger.py | 58 +++++++++++------------------- terra/task.py | 19 +++++----- terra/tests/__init__.py | 14 +++----- terra/tests/demo/__main__.py | 2 ++ terra/tests/demo/runners/demo1.py | 8 +---- terra/tests/demo/runners/demo2.py | 3 +- terra/tests/demo/services.py | 1 + terra/tests/demo/tasks.py | 2 +- terra/tests/demo/workflows.py | 4 +-- terra/tests/test_compute_base.py | 9 +++-- terra/tests/test_core_settings.py | 9 ++--- terra/tests/test_executor_utils.py | 2 +- terra/tests/test_logger.py | 16 +++------ terra/tests/test_other.py | 9 +++++ terra/tests/test_signals.py | 8 ++--- terra/tests/test_utils_workflow.py | 2 -- terra/tests/utils.py | 3 +- 31 files changed, 118 insertions(+), 153 deletions(-) create mode 100644 terra/tests/test_other.py diff --git a/external/vsi_common b/external/vsi_common index d8ef64e1..2047fe85 160000 --- a/external/vsi_common +++ b/external/vsi_common @@ -1 +1 @@ -Subproject commit d8ef64e1b0003a3fe7d58c81c8dd65bb13b4bfcc +Subproject commit 2047fe85503b7884735cae953e3dcf1c8217343c diff --git a/terra/compute/base.py b/terra/compute/base.py index f034b964..14e8696c 100644 --- a/terra/compute/base.py +++ b/terra/compute/base.py @@ -1,5 +1,4 @@ import os -import json import time import atexit from logging import StreamHandler @@ -10,7 +9,9 @@ from terra import settings import terra.compute.utils from terra.executor import Executor -from terra.logger import getLogger, LogRecordSocketReceiver, SkipStdErrAddFilter +from terra.logger import ( + getLogger, LogRecordSocketReceiver, SkipStdErrAddFilter +) logger = getLogger(__name__) @@ -196,14 +197,14 @@ def configuration_map_service(self, service_info): def configure_logger(sender, **kwargs): if settings.terra.zone == 'controller': # Setup log file for use in configure - sender._log_file = os.path.join(settings.processing_dir, + sender._log_file = os.path.join( + settings.processing_dir, terra.logger._SetupTerraLogger.default_log_prefix) os.makedirs(settings.processing_dir, exist_ok=True) sender._log_file = open(sender._log_file, 'a') sender.main_log_handler = StreamHandler(stream=sender._log_file) sender.root_logger.addHandler(sender.main_log_handler) - # setup the TCP socket listener sender.tcp_logging_server = LogRecordSocketReceiver( settings.logging.server.hostname, settings.logging.server.port) @@ -217,7 +218,7 @@ def configure_logger(sender, **kwargs): if sender.tcp_logging_server.ready: break time.sleep(0.001) - else: # pragma: no cover + else: # pragma: no cover warnings.warn("TCP Logging server thread did not startup. " "This is probably not a problem, unless logging isn't " "working.", RuntimeWarning) @@ -227,7 +228,7 @@ def configure_logger(sender, **kwargs): def cleanup_thread(): sender.tcp_logging_server.abort = 1 listener_thread.join(timeout=5) - if listener_thread.is_alive(): # pragma: no cover + if listener_thread.is_alive(): # pragma: no cover warnings.warn("TCP Logger Server Thread did not shut down " "gracefully. Attempting to exit anyways.", RuntimeWarning) @@ -248,7 +249,8 @@ def reconfigure_logger(sender, **kwargs): # output stream if settings.terra.zone == 'controller': - log_file = os.path.join(settings.processing_dir, + log_file = os.path.join( + settings.processing_dir, terra.logger._SetupTerraLogger.default_log_prefix) # Check to see if _log_file is unset. If it is, this is due to _log_file @@ -268,11 +270,12 @@ def reconfigure_logger(sender, **kwargs): sender.main_log_handler.close() try: sender.root_logger.removeHandler(sender.main_log_handler) - except ValueError: # pragma: no cover + except ValueError: # pragma: no cover pass sender.main_log_handler = SocketHandler( settings.logging.server.hostname, settings.logging.server.port) sender.root_logger.addHandler(sender.main_log_handler) + services = {} diff --git a/terra/compute/container.py b/terra/compute/container.py index b8575d36..ef3091c0 100644 --- a/terra/compute/container.py +++ b/terra/compute/container.py @@ -45,7 +45,8 @@ def pre_run(self): env_volume_index += 1 # Setup volumes for container - self.env[f'{self.env["JUST_PROJECT_PREFIX"]}_VOLUME_{env_volume_index}'] = \ + self.env[f'{self.env["JUST_PROJECT_PREFIX"]}_' + f'VOLUME_{env_volume_index}'] = \ f'{str(temp_dir)}:/tmp_settings:rw' env_volume_index += 1 @@ -55,7 +56,8 @@ def pre_run(self): volume_str = f'{volume_host}:{volume_container}' if volume_flags: volume_str += f':{volume_flags}' - self.env[f'{self.env["JUST_PROJECT_PREFIX"]}_VOLUME_{env_volume_index}'] = \ + self.env[f'{self.env["JUST_PROJECT_PREFIX"]}_' + f'VOLUME_{env_volume_index}'] = \ volume_str env_volume_index += 1 diff --git a/terra/compute/utils.py b/terra/compute/utils.py index 95e30468..912d8121 100644 --- a/terra/compute/utils.py +++ b/terra/compute/utils.py @@ -100,6 +100,7 @@ def _connect_backend(self): lambda *args, **kwargs: compute.reconfigure_logger(*args, **kwargs), weak=False) + def get_default_service_class(cls): ''' Gets a compute class' default Service class from the class object. diff --git a/terra/core/exceptions.py b/terra/core/exceptions.py index 6066b698..3d2dbae4 100644 --- a/terra/core/exceptions.py +++ b/terra/core/exceptions.py @@ -3,7 +3,8 @@ class ImproperlyConfigured(Exception): Exception for Terra is somehow improperly configured """ + class ConfigurationWarning(Warning): """ Warning for Terra may somehow be improperly configured - """ \ No newline at end of file + """ diff --git a/terra/core/settings.py b/terra/core/settings.py index 3571cfc4..2bd6a7d3 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -153,6 +153,7 @@ from functools import wraps from json import JSONEncoder import platform +import warnings from terra.core.exceptions import ImproperlyConfigured, ConfigurationWarning # Do not import terra.logger or terra.signals here, or any module that @@ -252,6 +253,7 @@ def unittest(self): return os.environ.get('TERRA_UNITTEST', None) == "1" + @settings_property def need_to_set_virtualenv_dir(self): warnings.warn("You are using the virtualenv compute, and did not set " @@ -259,10 +261,12 @@ def need_to_set_virtualenv_dir(self): "Using system python.", ConfigurationWarning) return None + @settings_property def terra_uuid(self): return str(uuid4()) + global_templates = [ ( # Global Defaults @@ -270,7 +274,8 @@ def terra_uuid(self): { "logging": { "level": "ERROR", - "format": "%(asctime)s (%(hostname)s:%(zone)s): %(levelname)s/%(processName)s - %(filename)s - %(message)s", + "format": "%(asctime)s (%(hostname)s:%(zone)s): " + "%(levelname)s/%(processName)s - %(filename)s - %(message)s", "date_format": None, "style": "%", "server": { @@ -526,8 +531,8 @@ def read_json(json_file): lambda key, value: read_json(value)) # Importing these here is intentional - import terra.executor - import terra.compute + import terra.executor # noqa + import terra.compute # noqa # compute._connection # call a cached property from terra.core.signals import post_settings_configured @@ -720,7 +725,9 @@ def serializableSettings(obj): obj = nested_patch( obj, - lambda k, v: any(v is not None and isinstance(k, str) and k.endswith(pattern) for pattern in filename_suffixes), + lambda k, v: any(v is not None and isinstance(k, str) + and k.endswith(pattern) + for pattern in filename_suffixes), lambda k, v: os.path.expanduser(v)) return obj diff --git a/terra/core/utils.py b/terra/core/utils.py index 57781fff..ce0df3b8 100644 --- a/terra/core/utils.py +++ b/terra/core/utils.py @@ -194,8 +194,7 @@ def _connect_backend(self, *args, **kwargs): def __call__(self, *args, **kwargs): return self._connection(*args, **kwargs) -import threading class ThreadedHandler(Handler): def _connection(self): - return self._connect_backend() \ No newline at end of file + return self._connect_backend() diff --git a/terra/executor/base.py b/terra/executor/base.py index 171d689c..288be72e 100644 --- a/terra/executor/base.py +++ b/terra/executor/base.py @@ -1,10 +1,5 @@ -import os -import logging -import logging.handlers -from concurrent.futures import Future, Executor, as_completed +from concurrent.futures import Future, Executor -import terra -from terra import settings from terra.logger import getLogger logger = getLogger(__name__) @@ -18,5 +13,6 @@ def configure_logger(sender, **kwargs): def reconfigure_logger(sender, **kwargs): pass + class BaseFuture(Future): pass diff --git a/terra/executor/celery/__init__.py b/terra/executor/celery/__init__.py index c103f1d7..1e9d73a6 100644 --- a/terra/executor/celery/__init__.py +++ b/terra/executor/celery/__init__.py @@ -1,6 +1,7 @@ import sys from os import environ as env +from celery.signals import worker_process_init from celery import Celery from .executor import CeleryExecutor @@ -16,34 +17,17 @@ # FIXME __spec__ is None (__main__ is builtin) main_name = sys.modules['__main__'].__spec__.name # REVIEW can we catch a specific exception here, like AttributeError - except: + except Exception: main_name = "main_name_unset__set_TERRA_CELERY_MAIN_NAME" app = Celery(main_name) app.config_from_object(env['TERRA_CELERY_CONF']) -from celery.signals import worker_process_init, worker_init + @worker_process_init.connect def start_worker_child(*args, **kwargs): from terra import settings settings.terra.zone = 'task' -# print(args) -# print(kwargs) -# logger.info('hi') - -# @worker_init.connect -# def start_worker(*args, **kwargs): -# logger.info('Hi') - - -# app.running = False -# from celery.signals import worker_process_init -# @worker_process_init.connect -# def set_running(*args, **kwargs): -# app.running = True - -# import traceback -# traceback.print_stack() # Running on windows. # https://stackoverflow.com/questions/37255548/how-to-run-celery-on-windows diff --git a/terra/executor/celery/__main__.py b/terra/executor/celery/__main__.py index 6f593ef8..c1d0dadb 100644 --- a/terra/executor/celery/__main__.py +++ b/terra/executor/celery/__main__.py @@ -19,5 +19,6 @@ def main(): app.start() + if __name__ == '__main__': # pragma: no cover main() diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index e07d0904..7f3fbd5a 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -17,7 +17,6 @@ # limitations under the License. import os -import atexit from os import environ as env from concurrent.futures import as_completed from concurrent.futures._base import (RUNNING, FINISHED, CANCELLED, @@ -30,7 +29,6 @@ from celery.signals import setup_logging from terra.executor.base import BaseFuture, BaseExecutor -from terra.core.exceptions import ImproperlyConfigured import terra from terra import settings from terra.logger import getLogger @@ -42,6 +40,7 @@ def setup_loggers(*args, **kwargs): pass + class CeleryExecutorFuture(BaseFuture): def __init__(self, asyncresult): self._ar = asyncresult @@ -252,7 +251,7 @@ def __init__(self, service_info): @staticmethod def configure_logger(sender, **kwargs): - if settings.terra.zone == 'task': # pragma: no cover + if settings.terra.zone == 'task': # pragma: no cover # This will never really be reached, because the task_controller will # configure the logger, and than fork. sender.main_log_handler = NullHandler() diff --git a/terra/executor/process.py b/terra/executor/process.py index 2b99cc24..bc42e7a0 100644 --- a/terra/executor/process.py +++ b/terra/executor/process.py @@ -3,6 +3,7 @@ __all__ = ['ProcessPoolExecutor'] + class ProcessPoolExecutor(concurrent.futures.ProcessPoolExecutor, terra.executor.base.BaseExecutor): - pass \ No newline at end of file + pass diff --git a/terra/executor/thread.py b/terra/executor/thread.py index 5a8ea611..355c6db5 100644 --- a/terra/executor/thread.py +++ b/terra/executor/thread.py @@ -3,6 +3,7 @@ __all__ = ['ThreadPoolExecutor'] + class ThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor, terra.executor.base.BaseExecutor): - pass \ No newline at end of file + pass diff --git a/terra/executor/utils.py b/terra/executor/utils.py index 01facb3d..75bbcc4e 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -1,6 +1,3 @@ -import os -import logging -import concurrent.futures from importlib import import_module from terra import settings @@ -42,13 +39,13 @@ def _connect_backend(self): from terra.executor.sync import SyncExecutor return SyncExecutor elif backend_name == "ThreadPoolExecutor" or \ - backend_name == "concurrent.futures.ThreadPoolExecutor": + backend_name == "concurrent.futures.ThreadPoolExecutor": from terra.executor.thread import ThreadPoolExecutor - return terra.executor.thread.ThreadPoolExecutor + return ThreadPoolExecutor elif backend_name == "ProcessPoolExecutor" or \ - backend_name == "concurrent.futures.ProcessPoolExecutor": + backend_name == "concurrent.futures.ProcessPoolExecutor": from terra.executor.process import ProcessPoolExecutor - return terra.executor.process.ProcessPoolExecutor + return ProcessPoolExecutor elif backend_name == "CeleryExecutor": from terra.executor.celery import CeleryExecutor return CeleryExecutor diff --git a/terra/logger.py b/terra/logger.py index 75386159..95ba789a 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -72,6 +72,7 @@ import select import pickle +import terra from terra.core.exceptions import ImproperlyConfigured # Do not import terra.settings or terra.signals here, or any module that # imports them @@ -191,8 +192,8 @@ def serve_until_stopped(self): self.ready = True while not abort: rd, wr, ex = select.select([self.socket.fileno()], - [], [], - self.timeout) + [], [], + self.timeout) if rd: self.handle_request() abort = self.abort @@ -262,11 +263,11 @@ def __init__(self): # Disable known warnings that there's nothing to be done about. for module in ('yaml', 'celery.app.amqp'): warnings.filterwarnings("ignore", - category=DeprecationWarning, module=module, - message="Using or importing the ABCs") + category=DeprecationWarning, module=module, + message="Using or importing the ABCs") warnings.filterwarnings("ignore", - category=DeprecationWarning, module='osgeo', - message="the imp module is deprecated") + category=DeprecationWarning, module='osgeo', + message="the imp module is deprecated") # This disables a message that spams the screen: # "pipbox received method enable_events() [reply_to:None ticket:None]" @@ -283,6 +284,7 @@ def main_log_handler(self): raise AttributeError("'_logs' has no 'main_log_handler'. An executor " "class' 'configure_logger' method should setup a " "'main_log_handler'.") + @main_log_handler.setter def main_log_handler(self, value): self.__main_log_handler = value @@ -312,7 +314,7 @@ def handle_exception(exc_type, exc_value, exc_traceback): try: from terra import settings zone = settings.terra.zone - except: + except Exception: zone = 'preconfig' print(f'Exception in {zone} on {platform.node()}', file=sys.stderr) @@ -347,7 +349,7 @@ def handle_traceback(*args, **kwargs): # pragma: no cover try: from terra import settings zone = settings.terra.zone - except: + except Exception: zone = 'preconfig' print(f'Exception in {zone} on {platform.node()}', file=sys.stderr) @@ -409,9 +411,10 @@ def configure_logger(self, sender=None, signal=None, **kwargs): self.root_logger.removeHandler(self.preconfig_main_log_handler) self.root_logger.removeHandler(self.tmp_handler) - settings_dump = os.path.join(settings.processing_dir, + settings_dump = os.path.join( + settings.processing_dir, datetime.now(timezone.utc).strftime( - f'settings_{settings.terra.uuid}_%Y_%m_%d_%H_%M_%S_%f.json')) + f'settings_{settings.terra.uuid}_%Y_%m_%d_%H_%M_%S_%f.json')) with open(settings_dump, 'w') as fid: fid.write(TerraJSONEncoder.dumps(settings, indent=2)) @@ -465,28 +468,27 @@ def reconfigure_logger(self, sender=None, signal=None, **kwargs): self.set_level_and_formatter() -class TerraAddFilter(logging.Filter): +class TerraAddFilter(Filter): def filter(self, record): if not hasattr(record, 'hostname'): record.hostname = platform.node() if not hasattr(record, 'zone'): try: - from terra import settings if terra.settings.configured: record.zone = terra.settings.terra.zone else: record.zone = 'preconfig' - except: + except BaseException: record.zone = 'preconfig' return True -class StdErrFilter(logging.Filter): +class StdErrFilter(Filter): def filter(self, record): return not getattr(record, 'skip_stderr', False) -class SkipStdErrAddFilter(logging.Filter): +class SkipStdErrAddFilter(Filter): def filter(self, record): record.skip_stderr = getattr(record, 'skip_stderr', True) return True @@ -494,6 +496,7 @@ def filter(self, record): class ColorFormatter(Formatter): use_color = True + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # self.use_color = use_color @@ -519,27 +522,6 @@ def format(self, record): return super().format(record) -# def show_log(k, v): -# def show_dict_fields(prefix, dict1): -# for fld,val in dict1.items(): -# print('%s%s=%s' %(prefix, fld,val) ) - -# if not isinstance(v, logging.PlaceHolder): -# print('+ [%s] {%s} (%s) ' % (str.ljust( k, 20), str(v.__class__)[8:-2], logging.getLevelName(v.level)) ) -# print(str.ljust( '-------------------------',20) ) -# show_dict_fields(' -', v.__dict__) - -# for h in v.handlers: -# print(' +++%s (%s)' %(str(h.__class__)[8:-2], logging.getLevelName(h.level) )) -# show_dict_fields(' -', h.__dict__) - -# # from https://github.com/mickeyperlstein/logging_debugger/blob/master/__init__.py -# def show_logs_and_handlers(): -# show_log('root', logging.getLogger('')) -# for k,v in logging.Logger.manager.loggerDict.items(): -# show_log(k,v) - - class Logger(Logger_original): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -686,6 +668,7 @@ def handle_warning(message, category, filename, lineno, file=None, line=None): # Get the logger here, AFTER all the changes to the logger class logger = getLogger(__name__) + def _setup_terra_logger(): # Must be import signal after getLogger is defined... Currently this is # imported from logger. But if a custom getLogger is defined eventually, it @@ -703,6 +686,7 @@ def _setup_terra_logger(): return logs + # Disable log setup for unittests. Can't use settings here ;) if os.environ.get('TERRA_UNITTEST', None) != "1": # pragma: no cover - _logs = _setup_terra_logger() \ No newline at end of file + _logs = _setup_terra_logger() diff --git a/terra/task.py b/terra/task.py index d55c392f..59f89fcc 100644 --- a/terra/task.py +++ b/terra/task.py @@ -1,5 +1,3 @@ -import json -from os import environ as env import os from tempfile import gettempdir @@ -9,7 +7,6 @@ from terra import settings from terra.core.settings import TerraJSONEncoder -from terra.executor import Executor import terra.logger import terra.compute.utils from terra.logger import getLogger @@ -41,7 +38,7 @@ def _get_volume_mappings(self): reverse_compute_volume_map.reverse() reverse_executor_volume_map = [[x[1], x[0]] - for x in executor_volume_map] + for x in executor_volume_map] reverse_executor_volume_map.reverse() else: @@ -92,13 +89,15 @@ def __call__(self, *args, **kwargs): with settings: # Calculate the exector's mapped version of the runner's settings compute_volume_map, reverse_compute_volume_map, \ - executor_volume_map, reverse_executor_volume_map = \ + executor_volume_map, reverse_executor_volume_map = \ self._get_volume_mappings() # Load the executor version of the runner's settings settings._wrapped.clear() - settings._wrapped.update(self.translate_paths(self.request.settings, - reverse_compute_volume_map, executor_volume_map)) + settings._wrapped.update(self.translate_paths( + self.request.settings, + reverse_compute_volume_map, + executor_volume_map)) # This is needed here because I just loaded settings from a runner! settings.terra.zone = 'task' @@ -116,14 +115,16 @@ def __call__(self, *args, **kwargs): args_only = kwargs.pop(ARGS, ()) kwargs.update(kwargs.pop(KWARGS, ())) kwargs = self.translate_paths(kwargs, - reverse_compute_volume_map, executor_volume_map) + reverse_compute_volume_map, + executor_volume_map) # Set up logger to talk to master controller terra.logger._logs.reconfigure_logger(pre_run_task=True) return_value = self.run(*args_only, **kwargs) # Calculate the runner mapped version of the executor's return value return_value = self.translate_paths(return_value, - reverse_executor_volume_map, compute_volume_map) + reverse_executor_volume_map, + compute_volume_map) else: # Must call (synchronous) apply or python __call__ with no volume # mappings diff --git a/terra/tests/__init__.py b/terra/tests/__init__.py index b7904075..a4d1bfa7 100644 --- a/terra/tests/__init__.py +++ b/terra/tests/__init__.py @@ -1,21 +1,17 @@ import os import warnings -# from terra.core.signals import logger_configure, logger_reconfigure - - -# # Disconnect signal receivers -# logger_configure.receivers = [] -# logger_reconfigure.receivers = [] original_environ = os.environ.copy() + # Use this as a package level setup def load_tests(loader, standard_tests, pattern): if os.environ.get('TERRA_UNITTEST', None) != "1": - warnings.warn('WARNING: Running terra tests without setting TERRA_UNITTEST will ' - 'result in side effects such as extraneouse log files being ' - 'generated') + warnings.warn( + 'WARNING: Running terra tests without setting TERRA_UNITTEST will ' + 'result in side effects such as extraneouse log files being ' + 'generated') this_dir = os.path.dirname(__file__) package_tests = loader.discover(start_dir=this_dir, pattern=pattern) diff --git a/terra/tests/demo/__main__.py b/terra/tests/demo/__main__.py index 1fc7c53c..2b0a8e8c 100644 --- a/terra/tests/demo/__main__.py +++ b/terra/tests/demo/__main__.py @@ -16,11 +16,13 @@ from terra.core.exceptions import ImproperlyConfigured from terra.utils.cli import FullPaths, ArgumentParser + @settings_property def singularity_unset(self): raise ImproperlyConfigured('You must to set --compose and --service for ' 'singularity') + def demo_templates(): docker = { "demo": {"compose": os.path.join(env['TERRA_TERRA_DIR'], diff --git a/terra/tests/demo/runners/demo1.py b/terra/tests/demo/runners/demo1.py index b919111b..0861a97f 100644 --- a/terra/tests/demo/runners/demo1.py +++ b/terra/tests/demo/runners/demo1.py @@ -5,13 +5,6 @@ configured to do so. May result in a small amount of billing. ''' -import argparse -from os import environ as env -import tempfile -import os -import json -import pydoc - from terra.utils.cli import ArgumentParser from terra import settings from terra.logger import getLogger @@ -22,6 +15,7 @@ def main(args=None): settings.terra.zone logger.critical('Demo 1') + if __name__ == '__main__': ArgumentParser().parse_args() main() diff --git a/terra/tests/demo/runners/demo2.py b/terra/tests/demo/runners/demo2.py index 831ab4c5..0621fae8 100644 --- a/terra/tests/demo/runners/demo2.py +++ b/terra/tests/demo/runners/demo2.py @@ -24,7 +24,7 @@ def main(args=None): with Executor(max_workers=4) as executor: for x in range(1, 3): for y in range(4, 6): - futures[executor.submit(demo2, x, y)] = (x,y) + futures[executor.submit(demo2, x, y)] = (x, y) for future in concurrent.futures.as_completed(futures): logger.info(f'Completed: {settings.terra.zone} {futures[future]}') @@ -32,7 +32,6 @@ def main(args=None): logger.critical('Demo 2 Done') - if __name__ == '__main__': ArgumentParser().parse_args() main() diff --git a/terra/tests/demo/services.py b/terra/tests/demo/services.py index f4638f1b..f33629e8 100644 --- a/terra/tests/demo/services.py +++ b/terra/tests/demo/services.py @@ -51,6 +51,7 @@ class Demo2(Demo1): ''' Simple Demo Service ''' command = ['python', '-m', 'terra.tests.demo.runners.demo2'] + @DockerCompute.register(Demo2) class Demo2_docker(DockerService, Demo2): def __init__(self): diff --git a/terra/tests/demo/tasks.py b/terra/tests/demo/tasks.py index e9573050..e38b650d 100644 --- a/terra/tests/demo/tasks.py +++ b/terra/tests/demo/tasks.py @@ -8,4 +8,4 @@ @shared_task def demo2(self, x, y): logger.critical(f"Task: {x} {y}") - return x * y \ No newline at end of file + return x * y diff --git a/terra/tests/demo/workflows.py b/terra/tests/demo/workflows.py index 91508832..61fe1081 100644 --- a/terra/tests/demo/workflows.py +++ b/terra/tests/demo/workflows.py @@ -1,12 +1,12 @@ -from terra import settings from terra.compute import compute from terra.logger import getLogger logger = getLogger(__name__) + class DemoWorkflow: def demonate(self): logger.critical('Starting demo workflow') compute.run('terra.tests.demo.services.Demo1') compute.run('terra.tests.demo.services.Demo2') - logger.critical('Ran demo workflow') \ No newline at end of file + logger.critical('Ran demo workflow') diff --git a/terra/tests/test_compute_base.py b/terra/tests/test_compute_base.py index 6db849da..55489a18 100644 --- a/terra/tests/test_compute_base.py +++ b/terra/tests/test_compute_base.py @@ -51,20 +51,19 @@ class Foo: class TestService(self.base.BaseService): pass - class TestService_base(Foo.TestService, self.base.BaseService): pass # Register a class class, just for fun self.base.BaseCompute.register(Foo.TestService)(TestService_base) - self.assertIn(Foo.TestService.__module__ + '.' + \ - Foo.TestService.__qualname__, + self.assertIn(Foo.TestService.__module__ + '.' + + Foo.TestService.__qualname__, self.base.services) with self.assertRaises(self.base.AlreadyRegisteredException, - msg='Compute command "car" does not have a service ' - 'implementation "car_service"'): + msg='Compute command "car" does not have a ' + 'service implementation "car_service"'): self.base.BaseCompute.register(Foo.TestService)(lambda x: 1) def test_getattr(self): diff --git a/terra/tests/test_core_settings.py b/terra/tests/test_core_settings.py index f0db8cda..ce605f7a 100644 --- a/terra/tests/test_core_settings.py +++ b/terra/tests/test_core_settings.py @@ -8,11 +8,7 @@ from envcontext import EnvironmentContext -from .utils import ( - TestCase, TestLoggerCase, TestLoggerConfigureCase, - TestSettingsUnconfiguredCase -) - +from .utils import TestCase, TestLoggerCase, TestLoggerConfigureCase from terra import settings from terra.core.exceptions import ImproperlyConfigured from terra.core.settings import ( @@ -671,7 +667,8 @@ def last_test_import_settings(self): break time.sleep(0.001) else: - self.assertFalse(terra.logger._logs.tcp_logging_server.ready, 'TCP Server did not shut down within a second') + self.assertFalse(terra.logger._logs.tcp_logging_server.ready, + 'TCP Server did not shut down within a second') # Picky windows import terra.logger diff --git a/terra/tests/test_executor_utils.py b/terra/tests/test_executor_utils.py index 15a1f60e..457555d3 100644 --- a/terra/tests/test_executor_utils.py +++ b/terra/tests/test_executor_utils.py @@ -1,4 +1,4 @@ -from unittest import mock, SkipTest +from unittest import SkipTest import concurrent.futures from terra import settings diff --git a/terra/tests/test_logger.py b/terra/tests/test_logger.py index a246dbfb..37a4666e 100644 --- a/terra/tests/test_logger.py +++ b/terra/tests/test_logger.py @@ -2,24 +2,16 @@ import io import os import sys -import json import logging import uuid -import tempfile import platform import warnings from terra.core.exceptions import ImproperlyConfigured from terra import settings -from .utils import ( - TestCase, make_traceback, TestNamedTemporaryFileCase, - TestSettingsUnconfiguredCase, - TestLoggerConfigureCase -) +from .utils import TestCase, make_traceback, TestLoggerConfigureCase from terra import logger -from terra.core import signals -# import terra.compute.utils class TestHandlerLoggingContext(TestCase): def test_handler_logging_context(self): @@ -44,7 +36,6 @@ def test_handler_logging_context(self): class TestLogger(TestLoggerConfigureCase): - pass def test_setup_working(self): self.assertFalse(settings.configured) self.assertEqual(settings.processing_dir, self.temp_dir.name) @@ -83,7 +74,7 @@ def save_exec_info(exc_type, exc, tb): # with self.assertRaises(ZeroDivisionError): tb = make_traceback() sys.excepthook(ZeroDivisionError, - ZeroDivisionError('division by almost zero'), tb) + ZeroDivisionError('division by almost zero'), tb) self.assertIn('division by almost zero', str(cm.output)) # Test stack trace stuff in there @@ -106,7 +97,8 @@ def test_logs_stderr(self): def test_logs_temp_file(self): temp_handler = [ h for h in self._logs.root_logger.handlers - if hasattr(h, 'stream') and h.stream.name == self._logs.tmp_file.name][0] + if hasattr(h, 'stream') + and h.stream.name == self._logs.tmp_file.name][0] # Test that log everything is set self.assertEqual(temp_handler.level, logger.NOTSET) self.assertEqual(self._logs.root_logger.level, logger.NOTSET) diff --git a/terra/tests/test_other.py b/terra/tests/test_other.py new file mode 100644 index 00000000..7ea5c15d --- /dev/null +++ b/terra/tests/test_other.py @@ -0,0 +1,9 @@ +import os + +from .utils import TestCase +from terra.tests import original_environ + + +class TestOtherThings(TestCase): + def last_test_environ_change(self): + self.assertEqual(os.environ, original_environ) diff --git a/terra/tests/test_signals.py b/terra/tests/test_signals.py index 024a80c5..b2bb7fb9 100644 --- a/terra/tests/test_signals.py +++ b/terra/tests/test_signals.py @@ -1,6 +1,6 @@ from terra.core.signals import Signal, receiver -import terra.core.signals as signals -from .utils import TestCase, TestSignalCase +from .utils import TestSignalCase + class TestSignals(TestSignalCase): def signal_handle1(self, sender, **kwargs): @@ -160,5 +160,5 @@ def decorated2(sender, **kwargs): # signal.receivers, # msg="If you are seeing this, one of the other unit tests has " # "left a signal connected. This side effect should " -# "be prevented by disconnecting any functions you connected to a " -# "signal.") +# "be prevented by disconnecting any functions you connected to " +# "a signal.") diff --git a/terra/tests/test_utils_workflow.py b/terra/tests/test_utils_workflow.py index 56f2ca54..735163e3 100644 --- a/terra/tests/test_utils_workflow.py +++ b/terra/tests/test_utils_workflow.py @@ -1,6 +1,4 @@ -from unittest import mock import re -import os import json from terra.utils.workflow import resumable, AlreadyRunException diff --git a/terra/tests/utils.py b/terra/tests/utils.py index 9af2ee6a..8db1049f 100644 --- a/terra/tests/utils.py +++ b/terra/tests/utils.py @@ -23,7 +23,8 @@ def __init__(self, *args, **kwargs): def setUp(self): # Useful for tests that set this - self.patches.append(mock.patch.dict(os.environ, + self.patches.append(mock.patch.dict( + os.environ, {'TERRA_SETTINGS_FILE': self.settings_filename})) # Use settings self.patches.append(mock.patch.object(settings, '_wrapped', None)) From be0e0f857d94cc4478dc69e16cc5ec4b22c0f3c5 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Mon, 1 Jun 2020 14:14:13 -0400 Subject: [PATCH 63/94] Docs Signed-off-by: Andy Neff --- docs/conf.py | 3 +++ external/vsi_common | 2 +- terra/task.py | 3 ++- terra/tests/utils.py | 6 +++++- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 1e3a542e..c0615110 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -51,6 +51,7 @@ intersphinx_mapping = { 'python': ('https://docs.python.org/3.6', None), 'vsi_common': ('https://visionsystemsinc.github.io/vsi_common/', None), + 'celery': ('https://docs.celeryproject.org/en/stable/', None) } # Add any paths that contain templates here, relative to this directory. @@ -103,6 +104,8 @@ ('py:class', 'json.encoder.JSONEncoder'), ('py:class', 'concurrent.futures._base.Executor'), ('py:class', 'concurrent.futures._base.Future'), + ('py:class', 'concurrent.futures.process.ProcessPoolExecutor'), + ('py:class', 'concurrent.futures.thread.ThreadPoolExecutor'), ('py:class', 'argparse._AppendAction'), ('py:data', 'logging.DEBUG'), ('py:data', 'logging.WARNING'), diff --git a/external/vsi_common b/external/vsi_common index 2047fe85..7d6857d4 160000 --- a/external/vsi_common +++ b/external/vsi_common @@ -1 +1 @@ -Subproject commit 2047fe85503b7884735cae953e3dcf1c8217343c +Subproject commit 7d6857d497a7b1a2446d988f367f8f52dcd08836 diff --git a/terra/task.py b/terra/task.py index 59f89fcc..8a071aca 100644 --- a/terra/task.py +++ b/terra/task.py @@ -1,7 +1,8 @@ import os from tempfile import gettempdir -from celery import Task, shared_task as original_shared_task +from celery import shared_task as original_shared_task +from celery.app.task import Task from vsi.tools.python import args_to_kwargs, ARGS, KWARGS diff --git a/terra/tests/utils.py b/terra/tests/utils.py index 8db1049f..b5444025 100644 --- a/terra/tests/utils.py +++ b/terra/tests/utils.py @@ -53,9 +53,13 @@ def setUp(self): with open(self.settings_filename, 'w') as fid: json.dump(config, fid) + super().setUp() + + # Run _setup_terra_logger AFTER the patches have been applied, or else the + # temp files will be in /tmp, not self.temp_dir, and the terra_initial_tmp_ + # files won't get auto cleaned up import terra.logger self._logs = terra.logger._setup_terra_logger() - super().setUp() def tearDown(self): sys.excepthook = self.original_system_hook From f5e8fe9b52e976ab70989b1d1acc19b58bee8777 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Mon, 1 Jun 2020 15:04:23 -0400 Subject: [PATCH 64/94] Fix CI Signed-off-by: Andy Neff --- Justfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Justfile b/Justfile index 9f4c2307..26be8785 100755 --- a/Justfile +++ b/Justfile @@ -220,7 +220,8 @@ function terra_caseify() terra_test-pep8) # Run pep8 test justify terra pep8 echo "Running flake8..." - Terra_Pipenv run bash -c 'flake8 \ + Terra_Pipenv run bash -c 'cd ${TERRA_TERRA_DIR}; + flake8 \ "${TERRA_TERRA_DIR}/terra"' ;; From 30d51d8de99ecfc0d07b5a18c96d05545b27767a Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Mon, 1 Jun 2020 16:36:18 -0400 Subject: [PATCH 65/94] Minor cleanup Signed-off-by: Andy Neff --- external/vsi_common | 2 +- terra/executor/celery/__init__.py | 5 ++--- terra/executor/utils.py | 3 --- terra/logger.py | 1 - 4 files changed, 3 insertions(+), 8 deletions(-) diff --git a/external/vsi_common b/external/vsi_common index 7d6857d4..b69c6d51 160000 --- a/external/vsi_common +++ b/external/vsi_common @@ -1 +1 @@ -Subproject commit 7d6857d497a7b1a2446d988f367f8f52dcd08836 +Subproject commit b69c6d51d40db4768ada2fc2e4aa9903fca6a335 diff --git a/terra/executor/celery/__init__.py b/terra/executor/celery/__init__.py index 1e9d73a6..dc6b447c 100644 --- a/terra/executor/celery/__init__.py +++ b/terra/executor/celery/__init__.py @@ -14,10 +14,9 @@ main_name = env.get('TERRA_CELERY_MAIN_NAME', None) if main_name is None: try: - # FIXME __spec__ is None (__main__ is builtin) main_name = sys.modules['__main__'].__spec__.name - # REVIEW can we catch a specific exception here, like AttributeError - except Exception: + except AttributeError: + # if __spec__ is None, then __main__ is a builtin main_name = "main_name_unset__set_TERRA_CELERY_MAIN_NAME" app = Celery(main_name) diff --git a/terra/executor/utils.py b/terra/executor/utils.py index 75bbcc4e..ad199ac3 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -12,9 +12,6 @@ class ExecutorHandler(ClassHandler): the ``concurrent.futures`` executor class. ''' - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - def _connect_backend(self): ''' Loads the executor backend's base module, given either a fully qualified diff --git a/terra/logger.py b/terra/logger.py index 95ba789a..cb102904 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -180,7 +180,6 @@ class LogRecordSocketReceiver(socketserver.ThreadingTCPServer): def __init__(self, host='localhost', port=logging.handlers.DEFAULT_TCP_LOGGING_PORT, handler=LogRecordStreamHandler): - print('SGR - LRSR init') socketserver.ThreadingTCPServer.__init__(self, (host, port), handler) self.abort = False self.ready = False From 9987b7f4a3370d5451023348ac04f9efc8a8514c Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Tue, 2 Jun 2020 11:39:22 -0400 Subject: [PATCH 66/94] Minor cleanup Signed-off-by: Andy Neff --- terra/core/settings.py | 8 ++++++-- terra/core/utils.py | 5 ----- terra/logger.py | 2 +- terra/workflow.py | 7 ------- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/terra/core/settings.py b/terra/core/settings.py index 2bd6a7d3..a5c7078b 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -148,6 +148,7 @@ import os from uuid import uuid4 +# from datetime import datetime from logging.handlers import DEFAULT_TCP_LOGGING_PORT from inspect import isfunction from functools import wraps @@ -299,6 +300,7 @@ def terra_uuid(self): # unlike other settings, this should NOT be overwritten by a # config.json file, there is currently nothing to prevent that 'zone': 'controller', + # 'start_time': datetime.now(), # Not json serializable yet 'uuid': terra_uuid }, 'status_file': status_file, @@ -530,10 +532,10 @@ def read_json(json_file): for pattern in json_include_suffixes)), lambda key, value: read_json(value)) - # Importing these here is intentional + # Importing these here is intentional, it guarantees the signals are + # connected so that executor and computes can setup logging if need be import terra.executor # noqa import terra.compute # noqa - # compute._connection # call a cached property from terra.core.signals import post_settings_configured post_settings_configured.send(sender=self) @@ -694,6 +696,8 @@ def default(self, obj): if obj._wrapped is None: raise ImproperlyConfigured('Settings not initialized') return TerraJSONEncoder.serializableSettings(obj._wrapped) + # elif isinstance(obj, datetime): + # return str(obj) return JSONEncoder.default(self, obj) # pragma: no cover @staticmethod diff --git a/terra/core/utils.py b/terra/core/utils.py index ce0df3b8..7aae9eee 100644 --- a/terra/core/utils.py +++ b/terra/core/utils.py @@ -193,8 +193,3 @@ def _connect_backend(self, *args, **kwargs): def __call__(self, *args, **kwargs): return self._connection(*args, **kwargs) - - -class ThreadedHandler(Handler): - def _connection(self): - return self._connect_backend() diff --git a/terra/logger.py b/terra/logger.py index cb102904..92231c81 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -306,7 +306,7 @@ def handle_exception(exc_type, exc_value, exc_traceback): logger.critical("Uncaught exception", extra={'skip_stderr': True}, exc_info=(exc_type, exc_value, exc_traceback)) except Exception: # pragma: no cover - print('There was an exception logging in the execpetion handler!', + print('There was an exception logging in the exception handler!', file=sys.stderr) traceback.print_exc() diff --git a/terra/workflow.py b/terra/workflow.py index 1f8fd060..778e3fea 100644 --- a/terra/workflow.py +++ b/terra/workflow.py @@ -1,6 +1,3 @@ -from uuid import uuid4 -from datetime import datetime - from terra import settings from terra.logger import getLogger logger = getLogger(__name__) @@ -11,10 +8,6 @@ class BaseWorkflow: The base class for all Terra Workflows ''' - def __init__(self): - self.uuid = uuid4() - self.start_time = datetime.now() - def run(self): pass From e9fbd355cc7a7c2dfb55de1c71072534cab59afa Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Mon, 4 May 2020 19:40:43 -0400 Subject: [PATCH 67/94] Fixing tests Signed-off-by: Andy Neff --- terra/compute/virtualenv.py | 6 +++--- terra/core/settings.py | 4 +++- terra/logger.py | 17 ++++++++++++++++- terra/task.py | 6 ++++-- terra/tests/test_executor_dummy.py | 10 ++++++++++ terra/tests/test_logger.py | 16 ++++++++++++++-- 6 files changed, 50 insertions(+), 9 deletions(-) diff --git a/terra/compute/virtualenv.py b/terra/compute/virtualenv.py index fc807b8b..a7b679c3 100644 --- a/terra/compute/virtualenv.py +++ b/terra/compute/virtualenv.py @@ -99,9 +99,6 @@ class Service(BaseService): ''' def pre_run(self): - """ - - """ super().pre_run() # Create a temp directory, store it in this instance @@ -128,4 +125,7 @@ def post_run(self): super().post_run() # Delete temp_dir if self.env.get('TERRA_KEEP_TEMP_DIR', None) != "1": + # Calling this just prevents the annoying warning from saying "Hey, you + # know that automatic cleanup? It happened! Maybe you should manually + # call the automatic cleanup, cause yeah, that makes sense!" self.temp_dir.cleanup() diff --git a/terra/core/settings.py b/terra/core/settings.py index a5c7078b..7c3a3abf 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -527,7 +527,9 @@ def read_json(json_file): nested_patch_inplace( self._wrapped, - lambda key, value: (isinstance(key, str) + lambda key, value: (isinstance(key, str) and + (isinstance(value, str) or + getattr(value, 'settings_property', False)) and any(key.endswith(pattern) for pattern in json_include_suffixes)), lambda key, value: read_json(value)) diff --git a/terra/logger.py b/terra/logger.py index 92231c81..c5f0790b 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -213,6 +213,12 @@ class _SetupTerraLogger(): def __init__(self): self._configured = False + self.root_logger = None + self.stderr_handler = None + + def setup(self): + # This must always use logging's getLogger. If a custom Terra getLogger is + # ever defined, don't use it to get the root logger self.root_logger = logging.getLogger(None) self.root_logger.setLevel(0) @@ -613,6 +619,10 @@ def handle_warning(message, category, filename, lineno, file=None, line=None): it will call warnings.formatwarning and will log the resulting string to a warnings logger named "py.warnings" with level logging.WARNING. """ + + # import traceback + # traceback.print_stack() + if file is not None: # I don't actually know how this can be not None if _warnings_showwarning is not None: # pragma: no cover _warnings_showwarning(message, category, filename, lineno, file, line) @@ -622,8 +632,13 @@ def handle_warning(message, category, filename, lineno, file=None, line=None): logger.warning("%s", s) +# Ordinarily we would use __file__ for this, but frozen modules don't always +# have __file__ set, for some reason (see Issue CPython#21736). Thus, we get +# the filename from a handy code object from a function defined in this +# module. (There's no particular reason for picking debug1.) _srcfiles = (logging_srcfile, - os.path.normcase(Logger.debug1.__code__.co_filename)) + os.path.normcase(Logger.debug1.__code__.co_filename), + warnings.showwarning.__code__.co_filename) DEBUG1 = 10 diff --git a/terra/task.py b/terra/task.py index 8a071aca..eb54de43 100644 --- a/terra/task.py +++ b/terra/task.py @@ -129,12 +129,14 @@ def __call__(self, *args, **kwargs): else: # Must call (synchronous) apply or python __call__ with no volume # mappings - original_zone = settings.terra.zone + if settings.configured: + original_zone = settings.terra.zone settings.terra.zone = 'task' try: return_value = self.run(*args, **kwargs) finally: - settings.terra.zone = original_zone + if settings.configured: + settings.terra.zone = original_zone return return_value # # from https://stackoverflow.com/a/45333231/1771778 diff --git a/terra/tests/test_executor_dummy.py b/terra/tests/test_executor_dummy.py index 77c62c87..04283b1c 100644 --- a/terra/tests/test_executor_dummy.py +++ b/terra/tests/test_executor_dummy.py @@ -1,3 +1,6 @@ +from unittest import mock + +from terra import settings from terra.executor import dummy from .utils import TestSettingsConfiguredCase @@ -12,9 +15,16 @@ def test2(x): class TestExecutorDummy(TestSettingsConfiguredCase): def setUp(self): + # Use settings + self.patches.append(mock.patch.object(settings, '_wrapped', None)) + super().setUp() self.executor = dummy.DummyExecutor() + settings.configure({ + 'executor': {'type': 'dummy'}, + 'processing_dir': self.temp_dir.name}) + def test_simple(self): future = self.executor.submit(test1, 15) self.assertEqual(future.result(), None) diff --git a/terra/tests/test_logger.py b/terra/tests/test_logger.py index 37a4666e..a7655973 100644 --- a/terra/tests/test_logger.py +++ b/terra/tests/test_logger.py @@ -11,6 +11,11 @@ from terra import settings from .utils import TestCase, make_traceback, TestLoggerConfigureCase from terra import logger +<<<<<<< HEAD +======= +from terra.core import signals +from terra.executor.utils import Executor +>>>>>>> Fixing tests class TestHandlerLoggingContext(TestCase): @@ -127,7 +132,7 @@ def test_hostname(self): "Hiya", (), None) self.assertTrue(test_logger.filter(record)) self.assertTrue(self._logs.stderr_handler.filter(record)) - self.assertIn(':preconfig)', self._logs.stderr_handler.format(record)) + self.assertIn(f'({platform.node()}:preconfig)', self._logs.stderr_handler.format(record)) settings._setup() @@ -135,7 +140,7 @@ def test_hostname(self): "Hiya", (), None) self.assertTrue(test_logger.filter(record)) self.assertTrue(self._logs.stderr_handler.filter(record)) - self.assertIn(f'({platform.node()}:', + self.assertIn(f'({platform.node()}:controller)', self._logs.stderr_handler.format(record)) # Test https://stackoverflow.com/q/19615876/4166604 @@ -268,3 +273,10 @@ def last_test_logger(self): "prevented for you automatically. If you are seeing this, you " "have configured logging manually, and should make sure you " "restore it.") + + def last_test_executor_logfile(self): + from terra.executor.utils import Executor + + self.assertIsNone(getattr(Executor, '_log_file', None)) + # self.assertIsNone(getattr(Executor, '_logging_handler', None)) + # self.assertFalse(True) \ No newline at end of file From 1eb80f69bfb482a7a4c515aec3362607f727c9c6 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 6 May 2020 13:12:17 -0400 Subject: [PATCH 68/94] Working on logging/executor mock in tests Signed-off-by: Andy Neff --- terra/tests/test_logger.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/terra/tests/test_logger.py b/terra/tests/test_logger.py index a7655973..cac5e79f 100644 --- a/terra/tests/test_logger.py +++ b/terra/tests/test_logger.py @@ -11,11 +11,7 @@ from terra import settings from .utils import TestCase, make_traceback, TestLoggerConfigureCase from terra import logger -<<<<<<< HEAD -======= -from terra.core import signals from terra.executor.utils import Executor ->>>>>>> Fixing tests class TestHandlerLoggingContext(TestCase): @@ -274,9 +270,13 @@ def last_test_logger(self): "have configured logging manually, and should make sure you " "restore it.") + @mock.patch.object(settings, '_wrapped', None) + @mock.patch.dict(Executor.__dict__) def last_test_executor_logfile(self): from terra.executor.utils import Executor + self.assertNotIn('_connection', Executor.__dict__) + + settings.configure({}) self.assertIsNone(getattr(Executor, '_log_file', None)) - # self.assertIsNone(getattr(Executor, '_logging_handler', None)) - # self.assertFalse(True) \ No newline at end of file + self.assertIsNone(getattr(Executor, '_logging_handler', None)) From b7fb55dab33ade98610179dfc00ec0ca7442d52e Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Tue, 2 Jun 2020 12:16:51 -0400 Subject: [PATCH 69/94] Rebase forgotten commit onto celery Signed-off-by: Andy Neff --- terra/logger.py | 6 ------ terra/tests/test_executor_dummy.py | 10 ---------- terra/tests/test_logger.py | 12 +----------- 3 files changed, 1 insertion(+), 27 deletions(-) diff --git a/terra/logger.py b/terra/logger.py index c5f0790b..c9235aa2 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -213,10 +213,7 @@ class _SetupTerraLogger(): def __init__(self): self._configured = False - self.root_logger = None - self.stderr_handler = None - def setup(self): # This must always use logging's getLogger. If a custom Terra getLogger is # ever defined, don't use it to get the root logger self.root_logger = logging.getLogger(None) @@ -620,9 +617,6 @@ def handle_warning(message, category, filename, lineno, file=None, line=None): warnings logger named "py.warnings" with level logging.WARNING. """ - # import traceback - # traceback.print_stack() - if file is not None: # I don't actually know how this can be not None if _warnings_showwarning is not None: # pragma: no cover _warnings_showwarning(message, category, filename, lineno, file, line) diff --git a/terra/tests/test_executor_dummy.py b/terra/tests/test_executor_dummy.py index 04283b1c..77c62c87 100644 --- a/terra/tests/test_executor_dummy.py +++ b/terra/tests/test_executor_dummy.py @@ -1,6 +1,3 @@ -from unittest import mock - -from terra import settings from terra.executor import dummy from .utils import TestSettingsConfiguredCase @@ -15,16 +12,9 @@ def test2(x): class TestExecutorDummy(TestSettingsConfiguredCase): def setUp(self): - # Use settings - self.patches.append(mock.patch.object(settings, '_wrapped', None)) - super().setUp() self.executor = dummy.DummyExecutor() - settings.configure({ - 'executor': {'type': 'dummy'}, - 'processing_dir': self.temp_dir.name}) - def test_simple(self): future = self.executor.submit(test1, 15) self.assertEqual(future.result(), None) diff --git a/terra/tests/test_logger.py b/terra/tests/test_logger.py index cac5e79f..bc1e4757 100644 --- a/terra/tests/test_logger.py +++ b/terra/tests/test_logger.py @@ -121,6 +121,7 @@ def test_formatter(self): self.assertTrue(self._logs.stderr_handler.filter(record)) self.assertEqual(self._logs.stderr_handler.format(record), "foo bar Hiya") + @mock.patch('terra.logger.ColorFormatter.use_color', False) def test_hostname(self): test_logger = logger.getLogger(f'{__name__}.test_hostname') @@ -269,14 +270,3 @@ def last_test_logger(self): "prevented for you automatically. If you are seeing this, you " "have configured logging manually, and should make sure you " "restore it.") - - @mock.patch.object(settings, '_wrapped', None) - @mock.patch.dict(Executor.__dict__) - def last_test_executor_logfile(self): - from terra.executor.utils import Executor - - self.assertNotIn('_connection', Executor.__dict__) - - settings.configure({}) - self.assertIsNone(getattr(Executor, '_log_file', None)) - self.assertIsNone(getattr(Executor, '_logging_handler', None)) From 54def9f2a0a77c2989295189d79f2ef59a9dab87 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Tue, 2 Jun 2020 12:43:03 -0400 Subject: [PATCH 70/94] Pep 8 Signed-off-by: Andy Neff --- terra/core/settings.py | 6 +++--- terra/tests/test_logger.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/terra/core/settings.py b/terra/core/settings.py index 7c3a3abf..8d27dcc2 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -527,9 +527,9 @@ def read_json(json_file): nested_patch_inplace( self._wrapped, - lambda key, value: (isinstance(key, str) and - (isinstance(value, str) or - getattr(value, 'settings_property', False)) + lambda key, value: (isinstance(key, str) + and (isinstance(value, str) + or getattr(value, 'settings_property', False)) and any(key.endswith(pattern) for pattern in json_include_suffixes)), lambda key, value: read_json(value)) diff --git a/terra/tests/test_logger.py b/terra/tests/test_logger.py index bc1e4757..7c49b50b 100644 --- a/terra/tests/test_logger.py +++ b/terra/tests/test_logger.py @@ -11,7 +11,6 @@ from terra import settings from .utils import TestCase, make_traceback, TestLoggerConfigureCase from terra import logger -from terra.executor.utils import Executor class TestHandlerLoggingContext(TestCase): @@ -129,7 +128,8 @@ def test_hostname(self): "Hiya", (), None) self.assertTrue(test_logger.filter(record)) self.assertTrue(self._logs.stderr_handler.filter(record)) - self.assertIn(f'({platform.node()}:preconfig)', self._logs.stderr_handler.format(record)) + self.assertIn(f'({platform.node()}:preconfig)', + self._logs.stderr_handler.format(record)) settings._setup() From 3eb2a843b578a7c1e4566488c2c0cde4e95ddb58 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 3 Jun 2020 12:27:53 -0400 Subject: [PATCH 71/94] Add warning for Thread Pool Signed-off-by: Andy Neff --- terra/executor/utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/terra/executor/utils.py b/terra/executor/utils.py index ad199ac3..3f3d9f71 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -1,4 +1,5 @@ from importlib import import_module +import warnings from terra import settings import terra.core.signals @@ -38,6 +39,12 @@ def _connect_backend(self): elif backend_name == "ThreadPoolExecutor" or \ backend_name == "concurrent.futures.ThreadPoolExecutor": from terra.executor.thread import ThreadPoolExecutor + # An example of unexpected behavior is the zone being set incorrectly in + # logging messages. But this is just one such example! + # You have been warned + warnings.warn("ThreadPoolExecutor is for testing purposes only. " + "settings.terra.zone is not threadsafe and can create " + "unexpected behavior", RuntimeWarning) return ThreadPoolExecutor elif backend_name == "ProcessPoolExecutor" or \ backend_name == "concurrent.futures.ProcessPoolExecutor": From fc8070de0809e2dbf5ce368c3ef4c584c0172e0f Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 3 Jun 2020 13:52:58 -0400 Subject: [PATCH 72/94] Remove old commented code Signed-off-by: Andy Neff --- Justfile | 5 +++-- terra/logger.py | 10 ++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/Justfile b/Justfile index 26be8785..117f8215 100755 --- a/Justfile +++ b/Justfile @@ -34,7 +34,7 @@ function Terra_Pipenv() ask_question "Continue?" answer_continue n [ "$answer_continue" == "0" ] && return 1 fi - PIPENV_PIPFILE="${TERRA_CWD}/Pipfile" pipenv ${@+"${@}"} || return $? + ${DRYRUN} env PIPENV_PIPFILE="${TERRA_CWD}/Pipfile" pipenv ${@+"${@}"} || return $? else Just-docker-compose -f "${TERRA_CWD}/docker-compose-main.yml" run ${TERRA_PIPENV_IMAGE-terra} pipenv ${@+"${@}"} || return $? fi @@ -141,7 +141,8 @@ function terra_caseify() ;; run_flower) # Start the flower server - Terra_Pipenv run python -m terra.executor.celery -A terra.executor.celery.app flower + # Flower doesn't actually need the tasks an app, so clear it + TERRA_CELERY_INCLUDE='[]' Terra_Pipenv run python -m terra.executor.celery -A terra.executor.celery.app flower ;; shutdown_celery) # Shuts down all celery works on all nodes Terra_Pipenv run python -c "from terra.executor.celery import app; app.control.broadcast('shutdown')" diff --git a/terra/logger.py b/terra/logger.py index c9235aa2..da23a357 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -499,10 +499,6 @@ def filter(self, record): class ColorFormatter(Formatter): use_color = True - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - # self.use_color = use_color - def format(self, record): if self.use_color: zone = record.__dict__['zone'] @@ -569,10 +565,8 @@ def findCaller(self, stack_info=False, stacklevel=1): break return rv - # Define _log instead of logger adapter, this works better (setLoggerClass) - # https://stackoverflow.com/a/28050837/4166604 - # def _log(self,*args, **kwargs): - # return super()._log(*args, **kwargs) + # Define _log instead of logger adapter if needed, this works better + # (setLoggerClass) https://stackoverflow.com/a/28050837/4166604 def debug1(self, msg, *args, **kwargs): ''' From 806614a5deebc4e05e03a4cbdfd464ed5e11012e Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 4 Jun 2020 12:45:20 -0400 Subject: [PATCH 73/94] Added review changes Signed-off-by: Andy Neff --- terra/compute/container.py | 4 ++-- terra/core/exceptions.py | 2 +- terra/core/settings.py | 4 ++-- terra/executor/celery/executor.py | 2 +- terra/task.py | 12 ++++++------ terra/tests/__init__.py | 2 +- terra/tests/demo/__main__.py | 2 +- terra/tests/test_compute_singularity.py | 2 +- terra/tests/utils.py | 2 +- 9 files changed, 16 insertions(+), 16 deletions(-) diff --git a/terra/compute/container.py b/terra/compute/container.py index ef3091c0..0ad4f3d3 100644 --- a/terra/compute/container.py +++ b/terra/compute/container.py @@ -28,8 +28,8 @@ def __init__(self): self.extra_compose_files = [] def pre_run(self): - # Need to run Base's pre_run first, so it has a change to update settings - # for special exectutors, etc... + # Need to run Base's pre_run first, so it has a chance to update settings + # for special executors, etc... super().pre_run() self.temp_dir = TemporaryDirectory(suffix=f"_{type(self).__name__}") diff --git a/terra/core/exceptions.py b/terra/core/exceptions.py index 3d2dbae4..c3438415 100644 --- a/terra/core/exceptions.py +++ b/terra/core/exceptions.py @@ -6,5 +6,5 @@ class ImproperlyConfigured(Exception): class ConfigurationWarning(Warning): """ - Warning for Terra may somehow be improperly configured + Warning that Terra may be improperly configured """ diff --git a/terra/core/settings.py b/terra/core/settings.py index 8d27dcc2..35bd1c05 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -473,8 +473,8 @@ def __getstate__(self): def __setstate__(self, state): self._wrapped = state['_wrapped'] - # This should NOT be done on a pre instance basis, this if only for - # the global terra.settings. So maybe this should be done in context + # This should NOT be done on a per instance basis, this is only for + # the global terra.settings. So maybe this should be done in a context # manager?? # from terra.core.signals import post_settings_configured # post_settings_configured.send(sender=self) diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index 7f3fbd5a..ec253483 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -253,7 +253,7 @@ def __init__(self, service_info): def configure_logger(sender, **kwargs): if settings.terra.zone == 'task': # pragma: no cover # This will never really be reached, because the task_controller will - # configure the logger, and than fork. + # configure the logger, and then fork. sender.main_log_handler = NullHandler() elif settings.terra.zone == 'task_controller': # Setup log file for use in configure diff --git a/terra/task.py b/terra/task.py index eb54de43..e9553371 100644 --- a/terra/task.py +++ b/terra/task.py @@ -34,7 +34,7 @@ def _get_volume_mappings(self): # Flip each mount point, so it goes from runner to controller reverse_compute_volume_map = [[x[1], x[0]] for x in compute_volume_map] - # Revere order. This will be important in case one mount point mounts + # Reverse order. This will be important in case one mount point mounts # inside another reverse_compute_volume_map.reverse() @@ -54,14 +54,14 @@ def translate_paths(self, payload, reverse_compute_volume_map, executor_volume_map): if reverse_compute_volume_map or executor_volume_map: # If either translation is needed, start by applying the ~ home dir - # expansion and settings_property(which wouldn't have made it through - # pure json conversion, but the ~ will + # expansion and settings_property (which wouldn't have made it through + # pure json conversion, but the ~ will) payload = TerraJSONEncoder.serializableSettings(payload) # Go from compute runner to master controller if reverse_compute_volume_map: payload = terra.compute.utils.translate_settings_paths( payload, reverse_compute_volume_map) - # Go from master controller to exector + # Go from master controller to executor if executor_volume_map: payload = terra.compute.utils.translate_settings_paths( payload, executor_volume_map) @@ -102,7 +102,7 @@ def __call__(self, *args, **kwargs): # This is needed here because I just loaded settings from a runner! settings.terra.zone = 'task' - # Just in case processing dir doesn't exists + # Just in case processing dir doesn't exist if not os.path.exists(settings.processing_dir): logger.critical(f'Dir "{settings.processing_dir}" is not accessible ' 'by the executor, please make sure the worker has ' @@ -111,7 +111,7 @@ def __call__(self, *args, **kwargs): logger.warning('Using temporary directory: ' f'"{settings.processing_dir}" for the processing dir') - # Calculate the exector's mapped version of the arguments + # Calculate the executor's mapped version of the arguments kwargs = args_to_kwargs(self.run, args, kwargs) args_only = kwargs.pop(ARGS, ()) kwargs.update(kwargs.pop(KWARGS, ())) diff --git a/terra/tests/__init__.py b/terra/tests/__init__.py index a4d1bfa7..ae00379d 100644 --- a/terra/tests/__init__.py +++ b/terra/tests/__init__.py @@ -10,7 +10,7 @@ def load_tests(loader, standard_tests, pattern): if os.environ.get('TERRA_UNITTEST', None) != "1": warnings.warn( 'WARNING: Running terra tests without setting TERRA_UNITTEST will ' - 'result in side effects such as extraneouse log files being ' + 'result in side effects such as extraneous log files being ' 'generated') this_dir = os.path.dirname(__file__) diff --git a/terra/tests/demo/__main__.py b/terra/tests/demo/__main__.py index 2b0a8e8c..ec090bdf 100644 --- a/terra/tests/demo/__main__.py +++ b/terra/tests/demo/__main__.py @@ -19,7 +19,7 @@ @settings_property def singularity_unset(self): - raise ImproperlyConfigured('You must to set --compose and --service for ' + raise ImproperlyConfigured('You must set --compose and --service for ' 'singularity') diff --git a/terra/tests/test_compute_singularity.py b/terra/tests/test_compute_singularity.py index aa6efa80..e287fe30 100644 --- a/terra/tests/test_compute_singularity.py +++ b/terra/tests/test_compute_singularity.py @@ -11,7 +11,7 @@ class TestComputeSingularityCase(TestSettingsUnconfiguredCase): def setUp(self): - # This will resets the _connection to an uninitialized state + # This will reset the _connection to an uninitialized state self.patches.append( mock.patch.object(terra.compute.utils.ComputeHandler, '_connection', diff --git a/terra/tests/utils.py b/terra/tests/utils.py index b5444025..00c9737f 100644 --- a/terra/tests/utils.py +++ b/terra/tests/utils.py @@ -106,7 +106,7 @@ def setUp(self): # Enable signals. Most logging tests require configure logger to actually # be called. LogRecordSocketReceiver is mocked out, so no lasting side -# effects should inccur. +# effects should occur. class TestLoggerConfigureCase(TestLoggerCase, TestSignalCase, TestComputeCase, TestExecutorCase): pass From dbcb59e2391a2c1831c1b085ab516d0e987b07b6 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 4 Jun 2020 14:07:09 -0400 Subject: [PATCH 74/94] Fixed exceptions printing to stderr in ipython Signed-off-by: Andy Neff --- terra/logger.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/terra/logger.py b/terra/logger.py index da23a357..8450fdc9 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -306,8 +306,14 @@ def setup_logging_exception_hook(self): def handle_exception(exc_type, exc_value, exc_traceback): # Try catch here because I want to make sure the original hook is called try: - logger.critical("Uncaught exception", extra={'skip_stderr': True}, - exc_info=(exc_type, exc_value, exc_traceback)) + # Use getLogger instead of logger (defined below) incase there is an + # exception on import, this will make it easier to get a normal error + # message + getLogger(__name__).critical("Uncaught exception", + extra={'skip_stderr': True}, + exc_info=(exc_type, + exc_value, + exc_traceback)) except Exception: # pragma: no cover print('There was an exception logging in the exception handler!', file=sys.stderr) @@ -345,8 +351,14 @@ def setup_logging_ipython_exception_hook(self): original_exception = InteractiveShell.showtraceback def handle_traceback(*args, **kwargs): # pragma: no cover - getLogger(__name__).critical("Uncaught exception", - exc_info=sys.exc_info()) + try: + getLogger(__name__).critical("Uncaught exception", + extra={'skip_stderr': True}, + exc_info=sys.exc_info()) + except Exception: + print('There was an exception logging in the exception handler!', + file=sys.stderr) + traceback.print_exc() try: from terra import settings From 90eaa92e3374bbd7d0d11b72802641b99d84eb4f Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 4 Jun 2020 14:11:11 -0400 Subject: [PATCH 75/94] Changed default compute Signed-off-by: Andy Neff --- terra/core/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terra/core/settings.py b/terra/core/settings.py index 35bd1c05..1512be9a 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -289,7 +289,7 @@ def terra_uuid(self): } }, "executor": { - "type": "ThreadPoolExecutor", + "type": "ProcessPoolExecutor", 'volume_map': [] }, "compute": { From ac5f5bc5f46bfa7e07a7d13aadfed60a4d1e609c Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 4 Jun 2020 14:10:07 -0400 Subject: [PATCH 76/94] Added TERRA_DISABLE_SETTINGS_DUMP Signed-off-by: Andy Neff --- terra.env | 4 ++++ terra/logger.py | 13 +++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/terra.env b/terra.env index c8ddbac5..78c27c97 100644 --- a/terra.env +++ b/terra.env @@ -84,6 +84,10 @@ fi # .. envvar:: TERRA_KEEP_TEMP_DIR # # Optional environment variable, that when set to ``1`` will keep the temporary config files generated for containers. For debug use. +# +# .. envvar:: TERRA_DISABLE_SETTINGS_DUMP +# +# Optional environment variable, that when set to ``1`` will disable the saving of ``settings.json`` files in the processing dir. This is particularly useful for test script or jupyter notebooks where you do not want to litter ``settings.json`` files everywhere. For debug use. #** #** diff --git a/terra/logger.py b/terra/logger.py index 8450fdc9..503ba698 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -425,12 +425,13 @@ def configure_logger(self, sender=None, signal=None, **kwargs): self.root_logger.removeHandler(self.preconfig_main_log_handler) self.root_logger.removeHandler(self.tmp_handler) - settings_dump = os.path.join( - settings.processing_dir, - datetime.now(timezone.utc).strftime( - f'settings_{settings.terra.uuid}_%Y_%m_%d_%H_%M_%S_%f.json')) - with open(settings_dump, 'w') as fid: - fid.write(TerraJSONEncoder.dumps(settings, indent=2)) + if os.environ.get('TERRA_DISABLE_SETTINGS_DUMP') != '1': + settings_dump = os.path.join( + settings.processing_dir, + datetime.now(timezone.utc).strftime( + f'settings_{settings.terra.uuid}_%Y_%m_%d_%H_%M_%S_%f.json')) + with open(settings_dump, 'w') as fid: + fid.write(TerraJSONEncoder.dumps(settings, indent=2)) # filter the stderr buffer self.preconfig_stderr_handler.buffer = \ From db02b57f0b26358cd89762471be51f9662e4a43c Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 4 Jun 2020 14:10:47 -0400 Subject: [PATCH 77/94] Added TERRA_DISABLE_TERRA_LOG Signed-off-by: Andy Neff --- terra.env | 4 ++++ terra/compute/base.py | 18 ++++++++++++------ terra/executor/celery/executor.py | 14 ++++++++++---- terra/logger.py | 10 ++++++---- 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/terra.env b/terra.env index 78c27c97..e3a4a7cf 100644 --- a/terra.env +++ b/terra.env @@ -88,6 +88,10 @@ fi # .. envvar:: TERRA_DISABLE_SETTINGS_DUMP # # Optional environment variable, that when set to ``1`` will disable the saving of ``settings.json`` files in the processing dir. This is particularly useful for test script or jupyter notebooks where you do not want to litter ``settings.json`` files everywhere. For debug use. +# +# .. envvar:: TERRA_DISABLE_TERRA_LOG +# +# Optional environment variable, that when set to ``1`` will disable the saving of the ``terra_log`` file in the processing dir. This is particularly useful for test script or jupyter notebooks where you do not want to litter ``terra_log`` files everywhere. For debug use. #** #** diff --git a/terra/compute/base.py b/terra/compute/base.py index 14e8696c..989e89d2 100644 --- a/terra/compute/base.py +++ b/terra/compute/base.py @@ -197,9 +197,12 @@ def configuration_map_service(self, service_info): def configure_logger(sender, **kwargs): if settings.terra.zone == 'controller': # Setup log file for use in configure - sender._log_file = os.path.join( - settings.processing_dir, - terra.logger._SetupTerraLogger.default_log_prefix) + if os.environ.get('TERRA_DISABLE_TERRA_LOG') != '1': + sender._log_file = os.path.join( + settings.processing_dir, + terra.logger._SetupTerraLogger.default_log_prefix) + else: + sender._log_file = os.devnull os.makedirs(settings.processing_dir, exist_ok=True) sender._log_file = open(sender._log_file, 'a') sender.main_log_handler = StreamHandler(stream=sender._log_file) @@ -249,9 +252,12 @@ def reconfigure_logger(sender, **kwargs): # output stream if settings.terra.zone == 'controller': - log_file = os.path.join( - settings.processing_dir, - terra.logger._SetupTerraLogger.default_log_prefix) + if os.environ.get('TERRA_DISABLE_TERRA_LOG') != '1': + log_file = os.path.join( + settings.processing_dir, + terra.logger._SetupTerraLogger.default_log_prefix) + else: + log_file = os.devnull # Check to see if _log_file is unset. If it is, this is due to _log_file # being called without configure being called. While it is not important diff --git a/terra/executor/celery/executor.py b/terra/executor/celery/executor.py index ec253483..6b607aba 100644 --- a/terra/executor/celery/executor.py +++ b/terra/executor/celery/executor.py @@ -257,8 +257,11 @@ def configure_logger(sender, **kwargs): sender.main_log_handler = NullHandler() elif settings.terra.zone == 'task_controller': # Setup log file for use in configure - sender._log_file = os.path.join(settings.processing_dir, - terra.logger._logs.default_log_prefix) + if os.environ.get('TERRA_DISABLE_TERRA_LOG') != '1': + sender._log_file = os.path.join(settings.processing_dir, + terra.logger._logs.default_log_prefix) + else: + sender._log_file = os.devnull os.makedirs(settings.processing_dir, exist_ok=True) sender._log_file = open(sender._log_file, 'a') sender.main_log_handler = StreamHandler(stream=sender._log_file) @@ -291,8 +294,11 @@ def reconfigure_logger(sender, pre_run_task=False, sender.main_log_handler = NullHandler() sender.root_logger.addHandler(sender.main_log_handler) elif settings.terra.zone == 'task_controller': - log_file = os.path.join(settings.processing_dir, - terra.logger._logs.default_log_prefix) + if os.environ.get('TERRA_DISABLE_TERRA_LOG') != '1': + log_file = os.path.join(settings.processing_dir, + terra.logger._logs.default_log_prefix) + else: + log_file = os.devnull if log_file != sender._log_file.name: os.makedirs(settings.processing_dir, exist_ok=True) diff --git a/terra/logger.py b/terra/logger.py index 503ba698..853dce90 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -227,9 +227,11 @@ def __init__(self): self.root_logger.addHandler(self.stderr_handler) # Set up temporary file logger - self.tmp_file = tempfile.NamedTemporaryFile(mode="w+", - prefix=self.default_tmp_prefix, - delete=False) + if os.environ.get('TERRA_DISABLE_TERRA_LOG') != '1': + self.tmp_file = tempfile.NamedTemporaryFile( + mode="w+", prefix=self.default_tmp_prefix, delete=False) + else: + self.tmp_file = open(os.devnull, mode='w+') self.tmp_handler = logging.StreamHandler(stream=self.tmp_file) self.tmp_handler.setLevel(0) self.tmp_handler.setFormatter(self.default_formatter) @@ -462,7 +464,7 @@ def configure_logger(self, sender=None, signal=None, **kwargs): # Remove the temporary file now that you are done with it self.tmp_file.close() - if os.path.exists(self.tmp_file.name): + if os.path.exists(self.tmp_file.name) and self.tmp_file.name != os.devnull: os.unlink(self.tmp_file.name) self.tmp_file = None From 63057a1ff9fa573a4955b03510d1a3510643a0d9 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Thu, 4 Jun 2020 14:40:12 -0400 Subject: [PATCH 78/94] Fix root logger Signed-off-by: Andy Neff --- terra/logger.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/terra/logger.py b/terra/logger.py index 853dce90..c63b18a0 100644 --- a/terra/logger.py +++ b/terra/logger.py @@ -218,6 +218,9 @@ def __init__(self): # ever defined, don't use it to get the root logger self.root_logger = logging.getLogger(None) self.root_logger.setLevel(0) + # Add the Terra filter to the rootlogger, so that it gets the same extra + # args any other terra.logger.Logger would get + self.root_logger.addFilter(TerraAddFilter()) # stream -> stderr self.stderr_handler = logging.StreamHandler(sys.stderr) @@ -516,7 +519,7 @@ class ColorFormatter(Formatter): def format(self, record): if self.use_color: - zone = record.__dict__['zone'] + zone = record.__dict__.get('zone', 'preconfig') if zone == "preconfig": record.__dict__['zone'] = '\033[33mpreconfig\033[0m' elif zone == "controller": From cac374f5b33e728186f57bf6e35de0f5e1f2fb59 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Fri, 5 Jun 2020 10:22:15 -0400 Subject: [PATCH 79/94] Customized ThreadPoolExecutor _adjust_thread_count Signed-off-by: Andy Neff --- terra/executor/thread.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/terra/executor/thread.py b/terra/executor/thread.py index 355c6db5..85a88e97 100644 --- a/terra/executor/thread.py +++ b/terra/executor/thread.py @@ -1,4 +1,8 @@ import concurrent.futures +import concurrent.futures.thread +import weakref +import threading + import terra.executor.base __all__ = ['ThreadPoolExecutor'] @@ -6,4 +10,30 @@ class ThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor, terra.executor.base.BaseExecutor): - pass + # This function works in python 3.6-3.9 + def _adjust_thread_count(self): + # 3.8 compatibility + if hasattr(self, '_idle_semaphore'): + # if idle threads are available, don't spin new threads + if self._idle_semaphore.acquire(timeout=0): + return + # When the executor gets lost, the weakref callback will wake up + # the worker threads. + def weakref_cb(_, q=self._work_queue): + q.put(None) + num_threads = len(self._threads) + if num_threads < self._max_workers: + thread_name = '%s_%d' % (self._thread_name_prefix or self, num_threads) + args = (weakref.ref(self, weakref_cb), self._work_queue) + # 3.7 compatibility + if hasattr(self, '_initializer'): + args += (self._initializer, self._initargs) + t = threading.Thread(name=thread_name, + target=concurrent.futures.thread._worker, + args=args) + t.daemon = True + self._threads.add(t) + concurrent.futures.thread._threads_queues[t] = self._work_queue + # Start thread AFTER adding it to the queue, so any check to see if + # current thread is in _threads_queues work correctly. + t.start() \ No newline at end of file From 7a0825274a062408e2d510a755a3c9f9a6a1b07a Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Fri, 5 Jun 2020 10:22:41 -0400 Subject: [PATCH 80/94] Restored ThreadPoolExecutor.py Signed-off-by: Andy Neff --- terra/executor/thread.py | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/terra/executor/thread.py b/terra/executor/thread.py index 85a88e97..8e394e2b 100644 --- a/terra/executor/thread.py +++ b/terra/executor/thread.py @@ -1,7 +1,4 @@ import concurrent.futures -import concurrent.futures.thread -import weakref -import threading import terra.executor.base @@ -10,30 +7,4 @@ class ThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor, terra.executor.base.BaseExecutor): - # This function works in python 3.6-3.9 - def _adjust_thread_count(self): - # 3.8 compatibility - if hasattr(self, '_idle_semaphore'): - # if idle threads are available, don't spin new threads - if self._idle_semaphore.acquire(timeout=0): - return - # When the executor gets lost, the weakref callback will wake up - # the worker threads. - def weakref_cb(_, q=self._work_queue): - q.put(None) - num_threads = len(self._threads) - if num_threads < self._max_workers: - thread_name = '%s_%d' % (self._thread_name_prefix or self, num_threads) - args = (weakref.ref(self, weakref_cb), self._work_queue) - # 3.7 compatibility - if hasattr(self, '_initializer'): - args += (self._initializer, self._initargs) - t = threading.Thread(name=thread_name, - target=concurrent.futures.thread._worker, - args=args) - t.daemon = True - self._threads.add(t) - concurrent.futures.thread._threads_queues[t] = self._work_queue - # Start thread AFTER adding it to the queue, so any check to see if - # current thread is in _threads_queues work correctly. - t.start() \ No newline at end of file + pass From 1a1d1cec3bfaba7acf002ba38891466d65c1056e Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Sat, 6 Jun 2020 09:12:02 -0400 Subject: [PATCH 81/94] Multithreaded safe settings POC working Signed-off-by: Andy Neff --- terra/core/settings.py | 50 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/terra/core/settings.py b/terra/core/settings.py index 1512be9a..b2986380 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -155,6 +155,10 @@ from json import JSONEncoder import platform import warnings +import threading +import concurrent.futures +import weakref +import copy from terra.core.exceptions import ImproperlyConfigured, ConfigurationWarning # Do not import terra.logger or terra.signals here, or any module that @@ -337,7 +341,6 @@ class LazyObject: Based off of Django's LazyObject ''' - _wrapped = None ''' The internal object being wrapped ''' @@ -360,6 +363,7 @@ def __init__(self): def __getattr__(self, name, *args, **kwargs): '''Supported''' + print('getattr:', name) if self._wrapped is None: self._setup() return getattr(self._wrapped, name, *args, **kwargs) @@ -367,8 +371,8 @@ def __getattr__(self, name, *args, **kwargs): def __setattr__(self, name, value): '''Supported''' if name == "_wrapped": - # Assign to __dict__ to avoid infinite __setattr__ loops. - self.__dict__["_wrapped"] = value + # Call super to avoid infinite __setattr__ loops. + super().__setattr__(name, value) else: if self._wrapped is None: self._setup() @@ -585,6 +589,46 @@ def __exit__(self, exc_type=None, exc_value=None, traceback=None): return return_value +class LazySettingsThreaded(LazySettings): + @classmethod + def downcast(cls, obj): + assert(type(obj) == LazySettings) + settings = obj._wrapped + # Put settings in __wrapped where property below expects it. + obj.__wrapped = settings + object.__setattr__(obj, '__class__', cls) + object.__setattr__(obj, '_LazySettingsThreaded__wrapped', settings) + # obj.__threaded_wrapped = weakref.WeakKeyDictionary() + object.__setattr__(obj, '_LazySettingsThreaded__threaded_wrapped', weakref.WeakKeyDictionary()) + + + @property + def _wrapped(self): + thread = threading.current_thread() + if thread._target == concurrent.futures.thread._worker: + print('thread pool thread') + if thread not in self.__threaded_wrapped: + self.__threaded_wrapped[thread] = copy.deepcopy(self.__wrapped) + return self.__threaded_wrapped[thread] + else: + print('main threads') + print(self.__dict__.keys()) + return self.__wrapped + + def __setattr__(self, name, value): + '''Supported''' + print('name:', name) + if name in ("_wrapped", + "_LazySettingsThreaded__wrapped", + "_LazySettingsThreaded__threaded_wrapped"): + # Call super to avoid infinite __setattr__ loops. + object.__setattr__(name, value) + else: + if self._wrapped is None: + self._setup() + setattr(self._wrapped, name, value) + + class ObjectDict(dict): ''' An object dictionary, that accesses dictionary keys using attributes (``.``) From 60411760baa5ad7819ae76a47fe923b75af6a424 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Sat, 6 Jun 2020 10:53:08 -0400 Subject: [PATCH 82/94] Cleanup of thread safe settings Signed-off-by: Andy Neff --- terra/core/settings.py | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/terra/core/settings.py b/terra/core/settings.py index b2986380..76a88e0b 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -592,41 +592,40 @@ def __exit__(self, exc_type=None, exc_value=None, traceback=None): class LazySettingsThreaded(LazySettings): @classmethod def downcast(cls, obj): - assert(type(obj) == LazySettings) - settings = obj._wrapped + assert(type(obj) == LazySettings, + "This downcast function was intended for LazySettings instances " + "only") # Put settings in __wrapped where property below expects it. - obj.__wrapped = settings + settings = obj._wrapped + # Use object setattr, or else this will be treated as a normal key in the + # settings._wrapped ObjectDict, which is not what we want object.__setattr__(obj, '__class__', cls) - object.__setattr__(obj, '_LazySettingsThreaded__wrapped', settings) - # obj.__threaded_wrapped = weakref.WeakKeyDictionary() - object.__setattr__(obj, '_LazySettingsThreaded__threaded_wrapped', weakref.WeakKeyDictionary()) - + obj.__wrapped = settings + obj.__tls = threading.local() @property def _wrapped(self): + ''' + Thread safe version of _wrapped getter + ''' thread = threading.current_thread() if thread._target == concurrent.futures.thread._worker: - print('thread pool thread') - if thread not in self.__threaded_wrapped: - self.__threaded_wrapped[thread] = copy.deepcopy(self.__wrapped) - return self.__threaded_wrapped[thread] + if not hasattr(self.__tls, 'settings'): + self.__tls.settings = copy.deepcopy(self.__wrapped) + return self.__tls.settings else: - print('main threads') print(self.__dict__.keys()) return self.__wrapped def __setattr__(self, name, value): '''Supported''' - print('name:', name) - if name in ("_wrapped", - "_LazySettingsThreaded__wrapped", - "_LazySettingsThreaded__threaded_wrapped"): - # Call super to avoid infinite __setattr__ loops. - object.__setattr__(name, value) + if name in ("_LazySettingsThreaded__wrapped", + "_LazySettingsThreaded__tls"): + # Call original __setattr__ to avoid infinite __setattr__ loops. + object.__setattr__(self, name, value) else: - if self._wrapped is None: - self._setup() - setattr(self._wrapped, name, value) + # Normal LazyObject setter + super().__setattr__(name, value) class ObjectDict(dict): From cdb5df366bc21be011be73e1dce2d2370531fbbe Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Sat, 6 Jun 2020 11:07:38 -0400 Subject: [PATCH 83/94] ThreadPoolExecutor auto downcasts settings Signed-off-by: Andy Neff --- terra/core/settings.py | 7 ++----- terra/executor/thread.py | 8 +++++++- terra/executor/utils.py | 6 ------ 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/terra/core/settings.py b/terra/core/settings.py index 76a88e0b..0f203514 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -363,7 +363,6 @@ def __init__(self): def __getattr__(self, name, *args, **kwargs): '''Supported''' - print('getattr:', name) if self._wrapped is None: self._setup() return getattr(self._wrapped, name, *args, **kwargs) @@ -592,9 +591,8 @@ def __exit__(self, exc_type=None, exc_value=None, traceback=None): class LazySettingsThreaded(LazySettings): @classmethod def downcast(cls, obj): - assert(type(obj) == LazySettings, - "This downcast function was intended for LazySettings instances " - "only") + # This downcast function was intended for LazySettings instances only + assert type(obj) == LazySettings # Put settings in __wrapped where property below expects it. settings = obj._wrapped # Use object setattr, or else this will be treated as a normal key in the @@ -614,7 +612,6 @@ def _wrapped(self): self.__tls.settings = copy.deepcopy(self.__wrapped) return self.__tls.settings else: - print(self.__dict__.keys()) return self.__wrapped def __setattr__(self, name, value): diff --git a/terra/executor/thread.py b/terra/executor/thread.py index 8e394e2b..f735a827 100644 --- a/terra/executor/thread.py +++ b/terra/executor/thread.py @@ -1,10 +1,16 @@ import concurrent.futures import terra.executor.base +import terra.core.settings __all__ = ['ThreadPoolExecutor'] class ThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor, terra.executor.base.BaseExecutor): - pass + def __init__(self, *args, **kwargs): + # Make terra.setting "thread safe" + if not isinstance(terra.settings, + terra.core.settings.LazySettingsThreaded): + terra.core.settings.LazySettingsThreaded.downcast(terra.settings) + super().__init__(*args, **kwargs) diff --git a/terra/executor/utils.py b/terra/executor/utils.py index 3f3d9f71..64b6b979 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -39,12 +39,6 @@ def _connect_backend(self): elif backend_name == "ThreadPoolExecutor" or \ backend_name == "concurrent.futures.ThreadPoolExecutor": from terra.executor.thread import ThreadPoolExecutor - # An example of unexpected behavior is the zone being set incorrectly in - # logging messages. But this is just one such example! - # You have been warned - warnings.warn("ThreadPoolExecutor is for testing purposes only. " - "settings.terra.zone is not threadsafe and can create " - "unexpected behavior", RuntimeWarning) return ThreadPoolExecutor elif backend_name == "ProcessPoolExecutor" or \ backend_name == "concurrent.futures.ProcessPoolExecutor": From 4a770e9e2eeb77fcd05a50d051492634888e956a Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Sat, 6 Jun 2020 11:19:37 -0400 Subject: [PATCH 84/94] Unittest and pep8 Signed-off-by: Andy Neff --- terra/core/settings.py | 3 +-- terra/executor/utils.py | 1 - terra/tests/test_executor_utils.py | 13 +++++++------ 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/terra/core/settings.py b/terra/core/settings.py index 0f203514..a5fcd935 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -157,7 +157,6 @@ import warnings import threading import concurrent.futures -import weakref import copy from terra.core.exceptions import ImproperlyConfigured, ConfigurationWarning @@ -599,7 +598,7 @@ def downcast(cls, obj): # settings._wrapped ObjectDict, which is not what we want object.__setattr__(obj, '__class__', cls) obj.__wrapped = settings - obj.__tls = threading.local() + obj.__tls = threading.local() @property def _wrapped(self): diff --git a/terra/executor/utils.py b/terra/executor/utils.py index 64b6b979..ad199ac3 100644 --- a/terra/executor/utils.py +++ b/terra/executor/utils.py @@ -1,5 +1,4 @@ from importlib import import_module -import warnings from terra import settings import terra.core.signals diff --git a/terra/tests/test_executor_utils.py b/terra/tests/test_executor_utils.py index 457555d3..1c60348a 100644 --- a/terra/tests/test_executor_utils.py +++ b/terra/tests/test_executor_utils.py @@ -33,10 +33,11 @@ def test_executor_name_sync(self): settings.configure({'executor': {'type': 'SyncExecutor'}}) self.assertIsInstance(Executor._connection(), SyncExecutor) - def test_executor_name_thread(self): - settings.configure({'executor': {'type': 'ThreadPoolExecutor'}}) - self.assertIsInstance(Executor._connection(), - concurrent.futures.ThreadPoolExecutor) + # TODO: It takes more mocking to make this test pass now + # def test_executor_name_thread(self): + # settings.configure({'executor': {'type': 'ThreadPoolExecutor'}}) + # self.assertIsInstance(Executor._connection(), + # concurrent.futures.ThreadPoolExecutor) def test_executor_name_process(self): settings.configure({'executor': {'type': 'ProcessPoolExecutor'}}) @@ -55,9 +56,9 @@ def test_executor_name_celery(self): def test_executor_name_by_name(self): settings.configure( - {'executor': {'type': 'concurrent.futures.ThreadPoolExecutor'}}) + {'executor': {'type': 'concurrent.futures.ProcessPoolExecutor'}}) self.assertIsInstance(Executor._connection(), - concurrent.futures.ThreadPoolExecutor) + concurrent.futures.ProcessPoolExecutor) class TestUnitTests(TestCase): From 2f9af4f2d31b6d945a4fe040299471645c2b37b3 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Sat, 6 Jun 2020 11:33:32 -0400 Subject: [PATCH 85/94] Add a little doc Signed-off-by: Andy Neff --- terra/executor/thread.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/terra/executor/thread.py b/terra/executor/thread.py index f735a827..a0795cc7 100644 --- a/terra/executor/thread.py +++ b/terra/executor/thread.py @@ -8,6 +8,29 @@ class ThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor, terra.executor.base.BaseExecutor): + ''' + Terra version of :class:`concurrent.futures.ThreadPoolExecutor` + + Unlike other executors, :class:`ThreadPoolExecutor` has no process isolation. + This results in a scenario where multiple threads could be terra settings, + actually influence each other, which is not typical behavior, given that + all other executors have process isolation and do not allow this. + + :class:`ThreadPoolExecutor` will downcast :obj:`terra.core.settings` to a + thread-safe :class:`terra.core.settings.LazySettingsThreaded` where each + Executor thread has it's own thread local storage version of the settings + structure. + + This behavior is limited to threads started by ThreadPoolExecutor only. All + other threads will have normal thread behavior with the runner threads, and + use a single version of the settings. The only side effect is if a task + starts its own thread, it will be treated as one of the runner threads, not a + task thread. The currently known downside to this is log messages will be + reported as coming from the runner rather than task zone. However, any + attempts to edit settings from this rouge thread can have unintended + consequences. + ''' + def __init__(self, *args, **kwargs): # Make terra.setting "thread safe" if not isinstance(terra.settings, From bb145a389571a58210a2bb8690b6bf4e03c48782 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Mon, 8 Jun 2020 13:29:17 -0400 Subject: [PATCH 86/94] Added reviewed changes Signed-off-by: Andy Neff --- terra/executor/thread.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/terra/executor/thread.py b/terra/executor/thread.py index a0795cc7..96940055 100644 --- a/terra/executor/thread.py +++ b/terra/executor/thread.py @@ -12,23 +12,23 @@ class ThreadPoolExecutor(concurrent.futures.ThreadPoolExecutor, Terra version of :class:`concurrent.futures.ThreadPoolExecutor` Unlike other executors, :class:`ThreadPoolExecutor` has no process isolation. - This results in a scenario where multiple threads could be terra settings, - actually influence each other, which is not typical behavior, given that - all other executors have process isolation and do not allow this. + This results in a scenario where multiple threads could use terra settings to + influence each other, which is not typical behavior, given that all other + executors have process isolation and do not allow this. :class:`ThreadPoolExecutor` will downcast :obj:`terra.core.settings` to a thread-safe :class:`terra.core.settings.LazySettingsThreaded` where each Executor thread has it's own thread local storage version of the settings structure. - This behavior is limited to threads started by ThreadPoolExecutor only. All - other threads will have normal thread behavior with the runner threads, and - use a single version of the settings. The only side effect is if a task - starts its own thread, it will be treated as one of the runner threads, not a - task thread. The currently known downside to this is log messages will be - reported as coming from the runner rather than task zone. However, any - attempts to edit settings from this rouge thread can have unintended - consequences. + This behavior is limited to threads started by :class:`ThreadPoolExecutor` + only. All other threads will have normal thread behavior with the runner + threads, and use a single version of the settings. The only side effect is if + a task starts its own thread, it will be treated as one of the runner + threads, not a task thread. The currently known downside to this is log + messages will be reported as coming from the runner rather than task zone. + However, any attempts to edit settings from this rogue thread could + potentially have other unintended consequences. ''' def __init__(self, *args, **kwargs): From 96d308f2171ff108cd8a303457bef6c6cf56dd0f Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Mon, 8 Jun 2020 13:57:27 -0400 Subject: [PATCH 87/94] Get ThreadPoolExecutor tests working again Signed-off-by: Andy Neff --- terra/core/settings.py | 7 +++---- terra/tests/test_core_settings.py | 7 ++++++- terra/tests/test_executor_utils.py | 13 ++++++++++++- terra/tests/utils.py | 25 +++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 6 deletions(-) diff --git a/terra/core/settings.py b/terra/core/settings.py index a5fcd935..a9f42a56 100644 --- a/terra/core/settings.py +++ b/terra/core/settings.py @@ -368,7 +368,7 @@ def __getattr__(self, name, *args, **kwargs): def __setattr__(self, name, value): '''Supported''' - if name == "_wrapped": + if name in ("_wrapped", "__class__"): # Call super to avoid infinite __setattr__ loops. super().__setattr__(name, value) else: @@ -594,9 +594,8 @@ def downcast(cls, obj): assert type(obj) == LazySettings # Put settings in __wrapped where property below expects it. settings = obj._wrapped - # Use object setattr, or else this will be treated as a normal key in the - # settings._wrapped ObjectDict, which is not what we want - object.__setattr__(obj, '__class__', cls) + # Downcast + obj.__class__ = cls obj.__wrapped = settings obj.__tls = threading.local() diff --git a/terra/tests/test_core_settings.py b/terra/tests/test_core_settings.py index ce605f7a..b8fcaf65 100644 --- a/terra/tests/test_core_settings.py +++ b/terra/tests/test_core_settings.py @@ -13,7 +13,7 @@ from terra.core.exceptions import ImproperlyConfigured from terra.core.settings import ( ObjectDict, settings_property, Settings, LazyObject, TerraJSONEncoder, - ExpandedString + ExpandedString, LazySettings ) @@ -645,6 +645,11 @@ def last_test_settings(self): "Otherwise unit tests can interfere with each other") +class TestSettingsClass(TestCase): + def last_test_settings_class(self): + self.assertEqual(type(settings), LazySettings) + + class TestCircularDependency(TestLoggerConfigureCase): # I don't want this unloading terra to interfere with other last_tests, as # this would reset modules to their initial state, giving false positives to diff --git a/terra/tests/test_executor_utils.py b/terra/tests/test_executor_utils.py index 1c60348a..baa7427c 100644 --- a/terra/tests/test_executor_utils.py +++ b/terra/tests/test_executor_utils.py @@ -2,7 +2,10 @@ import concurrent.futures from terra import settings -from .utils import TestCase, TestExecutorCase, TestSettingsUnconfiguredCase +from .utils import ( + TestCase, TestExecutorCase, TestThreadPoolExecutorCase, + TestSettingsUnconfiguredCase +) from terra.executor.utils import ExecutorHandler, Executor from terra.executor.dummy import DummyExecutor from terra.executor.sync import SyncExecutor @@ -61,6 +64,14 @@ def test_executor_name_by_name(self): concurrent.futures.ProcessPoolExecutor) +class TestThreadExecutorHandler(TestThreadPoolExecutorCase, + TestSettingsUnconfiguredCase): + def test_executor_name_thread(self): + settings.configure({'executor': {'type': 'ThreadPoolExecutor'}}) + self.assertIsInstance(Executor._connection(), + concurrent.futures.ThreadPoolExecutor) + + class TestUnitTests(TestCase): # Don't name this "test*" so normal discover doesn't pick it up, "last*" are # run last diff --git a/terra/tests/utils.py b/terra/tests/utils.py index 00c9737f..fa86591f 100644 --- a/terra/tests/utils.py +++ b/terra/tests/utils.py @@ -98,6 +98,31 @@ def setUp(self): super().setUp() +class TestThreadPoolExecutorCase(TestExecutorCase): + ''' + Special care is needed for ThreadPoolExecutor because it downcasts settings + ''' + + def setUp(self): + self.settings_class_patch = mock.patch.object( + settings, '__class__', type(settings), create=False) + super().setUp() + self.settings_class_patch.start() + # This mock behavior needs to be modified, because setting __class__ is + # unlike normal attributes, it doesn't get overwritten in __dict__, so + # setting is_local prevents delattr being called on __class__, which would + # be the wrong thing to do. + self.settings_class_patch.is_local = True + + # This class does not mock or clean up __wrapped or __tls, but they do not + # introduce sideeffects. + + def tearDown(self): + # This has to be stopped before the rest, or else a setattr error occurs. + self.settings_class_patch.stop() + super().tearDown() + + class TestSignalCase(TestCase): def setUp(self): self.patches.append(mock.patch.dict(os.environ, TERRA_UNITTEST='0')) From 504292a13bb88b8a5aaf2e39d2503c3d97acaf21 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Tue, 9 Jun 2020 08:24:07 -0400 Subject: [PATCH 88/94] Documentation fixes [ci skip] Signed-off-by: Andy Neff --- Justfile | 4 ++-- terra.env | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Justfile b/Justfile index 117f8215..02c8d610 100755 --- a/Justfile +++ b/Justfile @@ -141,10 +141,10 @@ function terra_caseify() ;; run_flower) # Start the flower server - # Flower doesn't actually need the tasks an app, so clear it + # Flower doesn't actually need the tasks loaded in the app, so clear it TERRA_CELERY_INCLUDE='[]' Terra_Pipenv run python -m terra.executor.celery -A terra.executor.celery.app flower ;; - shutdown_celery) # Shuts down all celery works on all nodes + shutdown_celery) # Shuts down all celery workers on all nodes Terra_Pipenv run python -c "from terra.executor.celery import app; app.control.broadcast('shutdown')" ;; diff --git a/terra.env b/terra.env index e3a4a7cf..9645f701 100644 --- a/terra.env +++ b/terra.env @@ -83,15 +83,15 @@ fi # # .. envvar:: TERRA_KEEP_TEMP_DIR # -# Optional environment variable, that when set to ``1`` will keep the temporary config files generated for containers. For debug use. +# Optional environment variable that, when set to ``1``, will keep the temporary config files generated for containers. For debug use. # # .. envvar:: TERRA_DISABLE_SETTINGS_DUMP # -# Optional environment variable, that when set to ``1`` will disable the saving of ``settings.json`` files in the processing dir. This is particularly useful for test script or jupyter notebooks where you do not want to litter ``settings.json`` files everywhere. For debug use. +# Optional environment variable that, when set to ``1``, will disable the saving of ``settings.json`` files in the processing dir. This is particularly useful for test script or jupyter notebooks where you do not want to litter ``settings.json`` files everywhere. For debug use. # # .. envvar:: TERRA_DISABLE_TERRA_LOG # -# Optional environment variable, that when set to ``1`` will disable the saving of the ``terra_log`` file in the processing dir. This is particularly useful for test script or jupyter notebooks where you do not want to litter ``terra_log`` files everywhere. For debug use. +# Optional environment variable that, when set to ``1``, will disable the saving of the ``terra_log`` file in the processing dir. This is particularly useful for test script or jupyter notebooks where you do not want to litter ``terra_log`` files everywhere. For debug use. #** #** From b7a259084ccc6c7077760aa1f100bbb9a20a3741 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Tue, 9 Jun 2020 18:55:17 -0400 Subject: [PATCH 89/94] Remove TERRA_DOCKER_RUNTIME, not a Terra var Signed-off-by: Andy Neff --- terra.env | 1 - 1 file changed, 1 deletion(-) diff --git a/terra.env b/terra.env index 9645f701..2449de1a 100644 --- a/terra.env +++ b/terra.env @@ -42,7 +42,6 @@ if [ -d "/tmp/.X11-unix" ]; then ${TERRA_VOLUMES+"${TERRA_VOLUMES[@]}"}) fi -: ${TERRA_DOCKER_RUNTIME="$([[ "$(nvidia-docker version 2>/dev/null)" = "NVIDIA Docker: 2"* ]] && echo nvidia)"} # Redis values : ${TERRA_REDIS_PORT=6379} : ${TERRA_REDIS_PORT_DOCKER=6379} From 2998faa2d53a4c56fa19d0111710fbfb0b57edaa Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Tue, 9 Jun 2020 18:55:54 -0400 Subject: [PATCH 90/94] Added terra_setup Signed-off-by: Andy Neff --- Justfile | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/Justfile b/Justfile index 02c8d610..2266f805 100755 --- a/Justfile +++ b/Justfile @@ -3,6 +3,8 @@ source "${VSI_COMMON_DIR}/linux/just_env" "$(dirname "${BASH_SOURCE[0]}")"/'terra.env' # Plugins +source "${VSI_COMMON_DIR}/linux/ask_question" +source "${VSI_COMMON_DIR}/linux/command_tools.bsh" source "${VSI_COMMON_DIR}/linux/docker_functions.bsh" source "${VSI_COMMON_DIR}/linux/just_docker_functions.bsh" source "${VSI_COMMON_DIR}/linux/just_singularity_functions.bsh" @@ -259,6 +261,94 @@ function terra_caseify() extra_args=$# ;; + terra_setup) # Setup pipenv using system python and/or conda + local output_dir + local CONDA + local PYTHON + + parse_args extra_args --dir output_dir: --python PYTHON: --conda CONDA: -- ${@+"${@}"} + + if [ -z "${output_dir:+set}" ]; then + echo "--dir must be specified" >& 2 + exit 2 + fi + + mkdir -p "${output_dir}" + # relative to absolute + output_dir="$(cd "${output_dir}"; pwd)" + + local use_conda + local platform_bin + if [ "${OS-}" = "Windows_NT" ]; then + platform_bin=Scripts + else + platform_bin=bin + fi + + if [ -n "${PYTHON:+set}" ]; then + use_conda=0 + elif [ -n "${CONDA:+set}" ]; then + use_conda=1 + else + if command -v Xpython3 &> /dev/null; then + PYTHON=python3 + use_conda=0 + elif command -v Xpython &> /dev/null; then + PYTHON=python + use_conda=0 + elif command -v Xconda3 &> /dev/null; then + CONDA=conda3 + use_conda=1 + elif command -v Xconda2 &> /dev/null; then + CONDA=conda2 + use_conda=1 + else + source "${VSI_COMMON_DIR}/linux/web_tools.bsh" + source "${VSI_COMMON_DIR}/linux/dir_tools.bsh" + make_temp_path temp_dir -d + if [ "${OS-}" = "Windows_NT" ]; then + download_to_stdout "https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe" > "${temp_dir}/install_conda.exe" + MSYS2_ARG_CONV_EXCL="*" "${temp_dir}/install_conda.exe" /NoRegistry=1 /InstallationType=JustMe /S "/D=$(cygpath -aw "${temp_dir}/conda")" + CONDA="${temp_dir}/conda/Scripts/conda" + else + if [[ ${OSTYPE-} = darwin* ]]; then + URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh" + else + URL="https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" + fi + download_to_stdout "${URL}" > "${temp_dir}/install_conda.sh" + bash "${temp_dir}/install_conda.sh" -b -p "${temp_dir}/conda" -s + CONDA="${temp_dir}/conda/bin/conda" + fi + use_conda=1 + fi + fi + + if [ "${use_conda}" = "1" ]; then + "${CONDA}" create -y -p "${output_dir}/.python" 'python<=3.8' + PYTHON="${output_dir}/.python/${platform_bin}/python" + fi + + # Make sure python is 3.6 or newer + local python_version="$("${PYTHON}" --version | awk '{print $2}')" + source "${VSI_COMMON_DIR}/linux/requirements.bsh" + if ! meet_requirements "${python_version}" '>=3.6' '<3.9'; then + echo "Python version ${python_version} does not meet the expected requirements" >&2 + read -srn1 -d '' -p "Press any key to continue" + echo + fi + + source "${VSI_COMMON_DIR}/docker/recipes/get-pipenv" + PIPENV_VIRTUALENV="${output_dir}" install_pipenv + + local add_to_local + echo "" >&2 + ask_question "Do you want to add \"${output_dir}/${platform_bin}\" to your local.env automatically?" add_to_local y + if [ "${add_to_local}" == "1" ]; then + echo $'\n'"PATH=\"${output_dir}/${platform_bin}:\${PATH}\"" >> "${TERRA_CWD}/local.env" + fi + ;; + terra_pipenv) # Run pipenv commands in Terra's pipenv conatainer. Useful for \ # installing/updating pipenv packages into terra TERRA_PIPENV_IMAGE=terra_pipenv Terra_Pipenv ${@+"${@}"} From 786719a2c5c585787eb283712a4df61a9a534561 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Tue, 9 Jun 2020 18:57:14 -0400 Subject: [PATCH 91/94] Updated vsi_common Signed-off-by: Andy Neff --- external/vsi_common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/external/vsi_common b/external/vsi_common index b69c6d51..f1d5e62a 160000 --- a/external/vsi_common +++ b/external/vsi_common @@ -1 +1 @@ -Subproject commit b69c6d51d40db4768ada2fc2e4aa9903fca6a335 +Subproject commit f1d5e62aa8be566fe1ba88095ba9386819b2379d From 28de71cdbf84ca902f2f2c08ce3482f34b85ae49 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 10 Jun 2020 09:29:49 -0400 Subject: [PATCH 92/94] Oops [skip ci] Signed-off-by: Andy Neff --- Justfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Justfile b/Justfile index 2266f805..e14e5b6b 100755 --- a/Justfile +++ b/Justfile @@ -290,16 +290,16 @@ function terra_caseify() elif [ -n "${CONDA:+set}" ]; then use_conda=1 else - if command -v Xpython3 &> /dev/null; then + if command -v python3 &> /dev/null; then PYTHON=python3 use_conda=0 - elif command -v Xpython &> /dev/null; then + elif command -v python &> /dev/null; then PYTHON=python use_conda=0 - elif command -v Xconda3 &> /dev/null; then + elif command -v conda3 &> /dev/null; then CONDA=conda3 use_conda=1 - elif command -v Xconda2 &> /dev/null; then + elif command -v conda2 &> /dev/null; then CONDA=conda2 use_conda=1 else From 454948000f206f7f2b77fc33d221790ab3f41d26 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 10 Jun 2020 14:31:32 -0400 Subject: [PATCH 93/94] Restore test to no longer need a delayed import Signed-off-by: Andy Neff --- terra/tests/test_compute_base.py | 64 +++++++++++++++----------------- 1 file changed, 30 insertions(+), 34 deletions(-) diff --git a/terra/tests/test_compute_base.py b/terra/tests/test_compute_base.py index 55489a18..67bfa0f9 100644 --- a/terra/tests/test_compute_base.py +++ b/terra/tests/test_compute_base.py @@ -2,20 +2,18 @@ from unittest import mock from terra import settings -from .utils import TestCase, TestSettingsConfiguredCase +import terra.compute.base +from .utils import ( + TestCase, TestSettingsConfiguredCase, TestSettingsUnconfiguredCase +) class TestServiceBase(TestSettingsConfiguredCase): - def setUp(self): - from terra.compute import base - self.base = base - super().setUp() - # Simulate external env var @mock.patch.dict(os.environ, {'FOO': "BAR"}) def test_env(self): # Test that a service inherits the environment correctly - service = self.base.BaseService() + service = terra.compute.base.BaseService() # App specific env var service.env['BAR'] = 'foo' # Make sure both show up @@ -25,49 +23,37 @@ def test_env(self): self.assertNotIn("BAR", os.environ) def test_add_volumes(self): - service = self.base.BaseService() + service = terra.compute.base.BaseService() # Add a volumes service.add_volume("/local", "/remote") # Make sure it's in the list self.assertIn(("/local", "/remote"), service.volumes) - # Unconfigure settings - @mock.patch.object(settings, '_wrapped', None) - def test_volumes_and_configuration_map(self): - # Add a volumes - service = self.base.BaseService() - service.add_volume("/local", "/remote") - - # Test configuration_map - settings.configure({}) - # Make sure the volume is in the map - self.assertEqual([("/local", "/remote")], - self.base.BaseCompute().configuration_map(service)) - def test_registry(self): - with mock.patch.dict(self.base.services, clear=True): + with mock.patch.dict(terra.compute.base.services, clear=True): # Registration test class Foo: - class TestService(self.base.BaseService): + class TestService(terra.compute.base.BaseService): pass - class TestService_base(Foo.TestService, self.base.BaseService): + class TestService_base(Foo.TestService, terra.compute.base.BaseService): pass # Register a class class, just for fun - self.base.BaseCompute.register(Foo.TestService)(TestService_base) + terra.compute.base.BaseCompute.register(Foo.TestService)( + TestService_base) self.assertIn(Foo.TestService.__module__ + '.' + Foo.TestService.__qualname__, - self.base.services) + terra.compute.base.services) - with self.assertRaises(self.base.AlreadyRegisteredException, + with self.assertRaises(terra.compute.base.AlreadyRegisteredException, msg='Compute command "car" does not have a ' 'service implementation "car_service"'): - self.base.BaseCompute.register(Foo.TestService)(lambda x: 1) + terra.compute.base.BaseCompute.register(Foo.TestService)(lambda x: 1) def test_getattr(self): - class Foo(self.base.BaseCompute): + class Foo(terra.compute.base.BaseCompute): def bar_service(self): pass @@ -77,14 +63,24 @@ def bar_service(self): foo.car -class TestUnitTests(TestCase): - def setUp(self): - from terra.compute import base - self.base = base +class TestServiceBaseUnconfigured(TestSettingsUnconfiguredCase): + def test_volumes_and_configuration_map(self): + # Add a volumes + service = terra.compute.base.BaseService() + service.add_volume("/local", "/remote") + # Test configuration_map + settings.configure({}) + # Make sure the volume is in the map + self.assertEqual( + [("/local", "/remote")], + terra.compute.base.BaseCompute().configuration_map(service)) + + +class TestUnitTests(TestCase): def last_test_registered_services(self): self.assertFalse( - self.base.services, + terra.compute.base.services, msg="If you are seeing this, one of the other unit tests has " "registered a terra service. This side effect should be " "prevented by mocking out the terra.compute.base.services dict. " From 3620ac23daedeb409b660831b0ff6d9540dbf512 Mon Sep 17 00:00:00 2001 From: Andy Neff Date: Wed, 10 Jun 2020 14:32:05 -0400 Subject: [PATCH 94/94] Add README steps for terra_setup Signed-off-by: Andy Neff --- Justfile | 14 +++++++++----- README.md | 14 ++++++++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/Justfile b/Justfile index e14e5b6b..e4d537d4 100755 --- a/Justfile +++ b/Justfile @@ -265,8 +265,9 @@ function terra_caseify() local output_dir local CONDA local PYTHON + local download_conda=0 - parse_args extra_args --dir output_dir: --python PYTHON: --conda CONDA: -- ${@+"${@}"} + parse_args extra_args --dir output_dir: --python PYTHON: --conda CONDA: --download download_conda -- ${@+"${@}"} if [ -z "${output_dir:+set}" ]; then echo "--dir must be specified" >& 2 @@ -290,16 +291,19 @@ function terra_caseify() elif [ -n "${CONDA:+set}" ]; then use_conda=1 else - if command -v python3 &> /dev/null; then + if [ "${download_conda}" == "0" ] && command -v python3 &> /dev/null; then PYTHON=python3 use_conda=0 - elif command -v python &> /dev/null; then + elif [ "${download_conda}" == "0" ] && command -v python &> /dev/null; then PYTHON=python use_conda=0 - elif command -v conda3 &> /dev/null; then + elif [ "${download_conda}" == "0" ] && command -v conda3 &> /dev/null; then CONDA=conda3 use_conda=1 - elif command -v conda2 &> /dev/null; then + elif [ "${download_conda}" == "0" ] && command -v conda &> /dev/null; then + CONDA=conda + use_conda=1 + elif [ "${download_conda}" == "0" ] && command -v conda2 &> /dev/null; then CONDA=conda2 use_conda=1 else diff --git a/README.md b/README.md index 2167ed72..3bc65ef8 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,20 @@ just terra sync just terra run ``` +## Setting up pipenv + +There are a number of reasons `pipenv` running python 3.6 or newer may not be available, especially on older operating systems. To automatically setup `pipenv` in a directory for you, run `just terra setup --dir {directory to install pipenv in}`. This does not require elevated permissions. + +`just terra setup` will attempt to setup pipenv using a series of different strategies: + +1. It will look for the Python 3 executable (`python3` or `python`). If this is found, it will be used to setup `pipenv` + - A specific python executable can be specified using the `--python` flag +2. If `python` cannot be found, it will look for the `conda3`/`conda`/`conda2` executable and use that to first setup Python 3.7, and then setup `pipenv` + - A specific executable of conda can be specified using the `--conda` flag +3. If all else fails, MiniConda will be downloaded from the internet, installed, and used to first setup Python 3.7, and then setup `pipenv` +4. If an invalid version of python or conda is detected, the download approach can be forced using the `--download` flag. +5. Once `pipenv` is setup, it should be added to your `PATH` using the `local.env` file. This will be done for you if you answer yes to the final question at the end. + ## Running an app in celery 1. `just terra up` - To start redis queue (only once)