From d85a28cf712d4f47594d043f6d3686a9ceb5a3d8 Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Sat, 24 Apr 2021 10:13:15 +0100 Subject: [PATCH] Don't split git references on unicode separators Previously, maliciously formatted tags could be used to hijack a commit-based pin. Using the fact that the split here allowed for all of unicode's whitespace characters as separators -- which git allows as a part of a tag name -- it is possible to force a different revision to be installed; if an attacker gains access to the repository. This change stops splitting the string on unicode characters, by forcing the splits to happen on newlines and ASCII spaces. --- src/pip/_internal/vcs/git.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/pip/_internal/vcs/git.py b/src/pip/_internal/vcs/git.py index 9f24ccdf5ee..d80b638111d 100644 --- a/src/pip/_internal/vcs/git.py +++ b/src/pip/_internal/vcs/git.py @@ -131,9 +131,12 @@ def get_revision_sha(cls, dest, rev): on_returncode='ignore', ) refs = {} - for line in output.strip().splitlines(): + # NOTE: We do not use splitlines here since that would split on other + # unicode separators, which can be maliciously used to install a + # different revision. + for line in output.strip().split("\n"): try: - ref_sha, ref_name = line.split() + ref_sha, ref_name = line.rstrip("\r").split(" ", maxsplit=2) except ValueError: # Include the offending line to simplify troubleshooting if # this error ever occurs.