From 57bd6fb2925a7d505a80b83140811b94b363f49c Mon Sep 17 00:00:00 2001 From: Ash Berlin-Taylor Date: Mon, 7 Jun 2021 09:27:01 +0100 Subject: [PATCH] Don't fail to log if we can't redact something (#16118) Rather than dying with an exception, catch it and warn about that, asking users to report it to us. Additionally handle the specific case where a file handle/IO object is logged -- we definitely don't want to iterate over that! --- airflow/utils/log/secrets_masker.py | 53 +++++++++++++++++--------- tests/utils/log/test_secrets_masker.py | 24 ++++++++++++ 2 files changed, 59 insertions(+), 18 deletions(-) diff --git a/airflow/utils/log/secrets_masker.py b/airflow/utils/log/secrets_masker.py index 6b1d88f530210..bb09a2d733f8d 100644 --- a/airflow/utils/log/secrets_masker.py +++ b/airflow/utils/log/secrets_masker.py @@ -16,6 +16,7 @@ # under the License. """Mask sensitive information from logs""" import collections +import io import logging import re from typing import TYPE_CHECKING, Iterable, Optional, Set, TypeVar, Union @@ -27,6 +28,10 @@ RedactableItem = TypeVar('RedactableItem') + +log = logging.getLogger(__name__) + + DEFAULT_SENSITIVE_FIELDS = frozenset( { 'password', @@ -173,24 +178,36 @@ def redact(self, item: "RedactableItem", name: str = None) -> "RedactableItem": is redacted. """ - if name and should_hide_value_for_key(name): - return self._redact_all(item) - - if isinstance(item, dict): - return {dict_key: self.redact(subval, dict_key) for dict_key, subval in item.items()} - elif isinstance(item, str): - if self.replacer: - # We can't replace specific values, but the key-based redacting - # can still happen, so we can't short-circuit, we need to walk - # the structure. - return self.replacer.sub('***', item) - return item - elif isinstance(item, (tuple, set)): - # Turn set in to tuple! - return tuple(self.redact(subval) for subval in item) - elif isinstance(item, Iterable): - return list(self.redact(subval) for subval in item) - else: + try: + if name and should_hide_value_for_key(name): + return self._redact_all(item) + + if isinstance(item, dict): + return {dict_key: self.redact(subval, dict_key) for dict_key, subval in item.items()} + elif isinstance(item, str): + if self.replacer: + # We can't replace specific values, but the key-based redacting + # can still happen, so we can't short-circuit, we need to walk + # the structure. + return self.replacer.sub('***', item) + return item + elif isinstance(item, (tuple, set)): + # Turn set in to tuple! + return tuple(self.redact(subval) for subval in item) + elif isinstance(item, io.IOBase): + return item + elif isinstance(item, Iterable): + return list(self.redact(subval) for subval in item) + else: + return item + except Exception as e: # pylint: disable=broad-except + log.warning( + "Unable to redact %r, please report this via . " + "Error was: %s: %s", + item, + type(e).__name__, + str(e), + ) return item # pylint: enable=too-many-return-statements diff --git a/tests/utils/log/test_secrets_masker.py b/tests/utils/log/test_secrets_masker.py index 8c88bdd6f3091..24e86c1b297f6 100644 --- a/tests/utils/log/test_secrets_masker.py +++ b/tests/utils/log/test_secrets_masker.py @@ -72,6 +72,22 @@ def test_args(self, logger, caplog): assert caplog.text == "INFO Cannot connect to user:***\n" + def test_non_redactable(self, logger, caplog): + class NonReactable: + def __iter__(self): + raise RuntimeError("force fail") + + def __repr__(self): + return "" + + logger.info("Logging %s", NonReactable()) + + assert caplog.messages == [ + "Unable to redact , please report this via " + + ". Error was: RuntimeError: force fail", + "Logging ", + ] + def test_extra(self, logger, caplog): logger.handlers[0].formatter = ShortExcFormatter("%(levelname)s %(message)s %(conn)s") logger.info("Cannot connect", extra={'conn': "user:password"}) @@ -202,6 +218,14 @@ def test_redact(self, patterns, name, value, expected): assert filt.redact(value, name) == expected + def test_redact_filehandles(self, caplog): + filt = SecretsMasker() + with open("/dev/null", "w") as handle: + assert filt.redact(handle, None) == handle + + # We shouldn't have logged a warning here + assert caplog.messages == [] + class TestShouldHideValueForKey: @pytest.mark.parametrize(