From 4c37aeab97fb1f40a20b912402d2747cd5fc1d5a Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Wed, 16 Jun 2021 11:29:45 +0200 Subject: [PATCH] Switch to built-in data structures in SecretsMasker (#16424) Using Iterable in SecretsMasker might cause undesireable side effect in case the object passed as log parameter is an iterable object and actually iterating it is not idempotent. For example in case of botocore, it passes StreamingBody object to log and this object is Iterable. However it can be iterated only once. Masking causes the object to be iterated during logging and results in empty body when actual results are retrieved later. This change only iterates list type of objects and recurrently redacts only dicts/strs/tuples/sets/lists which should never produce any side effects as all those objects do not have side effects when they are accessed. Fixes: #16148 (cherry picked from commit d1d02b62e3436dedfe9a2b80cd1e61954639ca4d) --- airflow/utils/log/secrets_masker.py | 8 +++----- tests/utils/log/test_secrets_masker.py | 16 ---------------- 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/airflow/utils/log/secrets_masker.py b/airflow/utils/log/secrets_masker.py index 3177c58d14525..2fd0d0aaf97ad 100644 --- a/airflow/utils/log/secrets_masker.py +++ b/airflow/utils/log/secrets_masker.py @@ -16,7 +16,6 @@ # under the License. """Mask sensitive information from logs""" import collections -import io import logging import re from typing import TYPE_CHECKING, Iterable, Optional, Set, TypeVar, Union @@ -178,7 +177,7 @@ def _redact_all(self, item: "RedactableItem") -> "RedactableItem": elif isinstance(item, (tuple, set)): # Turn set in to tuple! return tuple(self._redact_all(subval) for subval in item) - elif isinstance(item, Iterable): + elif isinstance(item, list): return list(self._redact_all(subval) for subval in item) else: return item @@ -209,12 +208,11 @@ def redact(self, item: "RedactableItem", name: str = None) -> "RedactableItem": elif isinstance(item, (tuple, set)): # Turn set in to tuple! return tuple(self.redact(subval) for subval in item) - elif isinstance(item, io.IOBase): - return item - elif isinstance(item, Iterable): + elif isinstance(item, list): return list(self.redact(subval) for subval in item) else: return item + # I think this should never happen, but it does not hurt to leave it just in case except Exception as e: # pylint: disable=broad-except log.warning( "Unable to redact %r, please report this via . " diff --git a/tests/utils/log/test_secrets_masker.py b/tests/utils/log/test_secrets_masker.py index 24e86c1b297f6..5d3b40404d9fd 100644 --- a/tests/utils/log/test_secrets_masker.py +++ b/tests/utils/log/test_secrets_masker.py @@ -72,22 +72,6 @@ def test_args(self, logger, caplog): assert caplog.text == "INFO Cannot connect to user:***\n" - def test_non_redactable(self, logger, caplog): - class NonReactable: - def __iter__(self): - raise RuntimeError("force fail") - - def __repr__(self): - return "" - - logger.info("Logging %s", NonReactable()) - - assert caplog.messages == [ - "Unable to redact , please report this via " - + ". Error was: RuntimeError: force fail", - "Logging ", - ] - def test_extra(self, logger, caplog): logger.handlers[0].formatter = ShortExcFormatter("%(levelname)s %(message)s %(conn)s") logger.info("Cannot connect", extra={'conn': "user:password"})