From 24630743cecd3d0e9db7bee7e47381dbadf2d789 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bern=C3=A1t=20G=C3=A1bor?= Date: Wed, 7 Jul 2021 01:14:51 +0100 Subject: [PATCH] Avoid zip extract racing condition by using read+write instead extract (#5707) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract also creates the folder hierarchy, however we do not need that, the file itself being extracted to a temporary folder is good enough. Instead we read the content of the zip and then write it. The write is not locked but it's OK to update the same file multiple times given the update operation will not alter the content of the file. By not creating the folder hierarchy (default via extract) we no longer can run into the problem of two parallel extracts both trying to create the folder hierarchy without exists ok flag, and one must fail. Resolves #5223. Signed-off-by: Bernát Gábor --- requests/utils.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/requests/utils.py b/requests/utils.py index db67938e67..6705c9a300 100644 --- a/requests/utils.py +++ b/requests/utils.py @@ -256,13 +256,28 @@ def extract_zipped_paths(path): # we have a valid zip archive and a valid member of that archive tmp = tempfile.gettempdir() - extracted_path = os.path.join(tmp, *member.split('/')) + extracted_path = os.path.join(tmp, member.split('/')[-1]) if not os.path.exists(extracted_path): - extracted_path = zip_file.extract(member, path=tmp) - + # use read + write to avoid the creating nested folders, we only want the file, avoids mkdir racing condition + with atomic_open(extracted_path) as file_handler: + file_handler.write(zip_file.read(member)) return extracted_path +@contextlib.contextmanager +def atomic_open(filename): + """Write a file to the disk in an atomic fashion""" + replacer = os.rename if sys.version_info[0] == 2 else os.replace + tmp_descriptor, tmp_name = tempfile.mkstemp(dir=os.path.dirname(filename)) + try: + with os.fdopen(tmp_descriptor, 'wb') as tmp_handler: + yield tmp_handler + replacer(tmp_name, filename) + except BaseException: + os.remove(tmp_name) + raise + + def from_key_val_list(value): """Take an object and test to see if it can be represented as a dictionary. Unless it can not be represented as such, return an