Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate hashes for all available candidates #1723

Merged
merged 7 commits into from
Nov 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
50 changes: 31 additions & 19 deletions piptools/repositories/pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,45 +322,61 @@ def get_hashes(self, ireq: InstallRequirement) -> set[str]:
log.debug(ireq.name)

with log.indentation():
hashes = self._get_hashes_from_pypi(ireq)
if hashes is None:
log.debug("Couldn't get hashes from PyPI, fallback to hashing files")
return self._get_hashes_from_files(ireq)
return self._get_req_hashes(ireq)

return hashes
def _get_req_hashes(self, ireq: InstallRequirement) -> set[str]:
"""
Collects the hashes for all candidates satisfying the given InstallRequirement. Computes
the hashes for the candidates that don't have one reported by their index.
"""
matching_candidates = self._get_matching_candidates(ireq)
pypi_hashes_by_link = self._get_hashes_from_pypi(ireq)
pypi_hashes = {
pypi_hashes_by_link[candidate.link.url]
for candidate in matching_candidates
if candidate.link.url in pypi_hashes_by_link
}
local_hashes = {
self._get_file_hash(candidate.link)
for candidate in matching_candidates
if candidate.link.url not in pypi_hashes_by_link
}
return pypi_hashes | local_hashes

def _get_hashes_from_pypi(self, ireq: InstallRequirement) -> set[str] | None:
def _get_hashes_from_pypi(self, ireq: InstallRequirement) -> dict[str, str]:
"""
Return a set of hashes from PyPI JSON API for a given InstallRequirement.
Return None if fetching data is failed or missing digests.
Builds a mapping from the release URLs to their hashes as reported by the PyPI JSON API
for a given InstallRequirement.
"""
project = self._get_project(ireq)
if project is None:
return None
return {}

_, version, _ = as_tuple(ireq)

try:
release_files = project["releases"][version]
except KeyError:
log.debug("Missing release files on PyPI")
return None
return {}

try:
hashes = {
f"{FAVORITE_HASH}:{file_['digests'][FAVORITE_HASH]}"
file_["url"]: f"{FAVORITE_HASH}:{file_['digests'][FAVORITE_HASH]}"
for file_ in release_files
if file_["packagetype"] in self.HASHABLE_PACKAGE_TYPES
}
except KeyError:
log.debug("Missing digests of release files on PyPI")
return None
return {}

return hashes

def _get_hashes_from_files(self, ireq: InstallRequirement) -> set[str]:
def _get_matching_candidates(
self, ireq: InstallRequirement
) -> set[InstallationCandidate]:
"""
Return a set of hashes for all release files of a given InstallRequirement.
Returns all candidates that satisfy the given InstallRequirement.
"""
# We need to get all of the candidates that match our current version
# pin, these will represent all of the files that could possibly
Expand All @@ -370,11 +386,7 @@ def _get_hashes_from_files(self, ireq: InstallRequirement) -> set[str]:
matching_versions = list(
ireq.specifier.filter(candidate.version for candidate in all_candidates)
)
matching_candidates = candidates_by_version[matching_versions[0]]

return {
self._get_file_hash(candidate.link) for candidate in matching_candidates
}
return candidates_by_version[matching_versions[0]]

def _get_file_hash(self, link: Link) -> str:
log.debug(f"Hashing {link.show_url}")
Expand Down
60 changes: 60 additions & 0 deletions tests/test_cli_compile.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import hashlib
import os
import shutil
import subprocess
Expand All @@ -8,6 +9,7 @@
from unittest import mock

import pytest
from pip._internal.utils.hashes import FAVORITE_HASH
from pip._internal.utils.urls import path_to_url

from piptools.scripts.compile import cli
Expand Down Expand Up @@ -1212,6 +1214,64 @@ def test_generate_hashes_with_line_style_annotations(runner):
)


@pytest.mark.network
def test_generate_hashes_with_mixed_sources(
runner, make_package, make_wheel, make_sdist, tmp_path
):
"""
Test that pip-compile generate hashes for every file from all given sources:
PyPI and/or --find-links.
"""

wheels_dir = tmp_path / "wheels"
wheels_dir.mkdir()

dummy_six_pkg = make_package(name="six", version="1.16.0")
make_wheel(dummy_six_pkg, wheels_dir, "--build-number", "123")

fav_hasher = hashlib.new(FAVORITE_HASH)
fav_hasher.update((wheels_dir / "six-1.16.0-123-py3-none-any.whl").read_bytes())
dummy_six_wheel_digest = fav_hasher.hexdigest()

with open("requirements.in", "w") as fp:
fp.write("six==1.16.0\n")

out = runner.invoke(
cli,
[
"--output-file",
"-",
"--quiet",
"--no-header",
"--generate-hashes",
"--no-emit-options",
"--no-annotate",
"--find-links",
wheels_dir.as_uri(),
],
)

expected_digests = sorted(
(
# sdist hash for six-1.16.0.tar.gz from PyPI
"1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926",
# wheel hash for six-1.16.0-py2.py3-none-any.whl from PyPI
"8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254",
# wheel hash for local six-1.16.0-123-py3-none-any.whl file
dummy_six_wheel_digest,
)
)
expected_output = dedent(
f"""\
six==1.16.0 \\
--hash=sha256:{expected_digests[0]} \\
--hash=sha256:{expected_digests[1]} \\
--hash=sha256:{expected_digests[2]}
"""
)
assert out.stdout == expected_output


def test_filter_pip_markers(pip_conf, runner):
"""
Check that pip-compile works with pip environment markers (PEP496)
Expand Down
103 changes: 92 additions & 11 deletions tests/test_repository_pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from unittest import mock

import pytest
from pip._internal.models.candidate import InstallationCandidate
from pip._internal.models.link import Link
from pip._internal.utils.urls import path_to_url
from pip._vendor.requests import HTTPError, Session
Expand Down Expand Up @@ -177,11 +178,12 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir):
{
"packagetype": "bdist_wheel",
"digests": {"sha256": "fake-hash"},
"url": "https://link",
}
]
}
},
{"sha256:fake-hash"},
{"https://link": "sha256:fake-hash"},
id="return single hash",
),
pytest.param(
Expand All @@ -191,15 +193,20 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir):
{
"packagetype": "bdist_wheel",
"digests": {"sha256": "fake-hash-number1"},
"url": "https://link1",
},
{
"packagetype": "sdist",
"digests": {"sha256": "fake-hash-number2"},
"url": "https://link2",
},
]
}
},
{"sha256:fake-hash-number1", "sha256:fake-hash-number2"},
{
"https://link1": "sha256:fake-hash-number1",
"https://link2": "sha256:fake-hash-number2",
},
id="return multiple hashes",
),
pytest.param(
Expand All @@ -209,39 +216,55 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir):
{
"packagetype": "bdist_wheel",
"digests": {"sha256": "fake-hash-number1"},
"url": "https://link1",
},
{
"packagetype": "sdist",
"digests": {"sha256": "fake-hash-number2"},
"url": "https://link2",
},
{
"packagetype": "bdist_eggs",
"digests": {"sha256": "fake-hash-number3"},
"url": "https://link3",
},
]
}
},
{"sha256:fake-hash-number1", "sha256:fake-hash-number2"},
{
"https://link1": "sha256:fake-hash-number1",
"https://link2": "sha256:fake-hash-number2",
},
id="return only bdist_wheel and sdist hashes",
),
pytest.param(None, None, id="not found project data"),
pytest.param({}, None, id="not found releases key"),
pytest.param({"releases": {}}, None, id="not found version"),
pytest.param({"releases": {"0.1": [{}]}}, None, id="not found digests"),
pytest.param(None, {}, id="not found project data"),
pytest.param({}, {}, id="not found releases key"),
pytest.param({"releases": {}}, {}, id="not found version"),
pytest.param({"releases": {"0.1": [{}]}}, {}, id="not found digests"),
pytest.param(
{"releases": {"0.1": [{"packagetype": "bdist_wheel", "digests": {}}]}},
None,
{
"releases": {
"0.1": [
{"packagetype": "bdist_wheel", "digests": {}, "url": "link"}
]
}
},
{},
id="digests are empty",
),
pytest.param(
{
"releases": {
"0.1": [
{"packagetype": "bdist_wheel", "digests": {"md5": "fake-hash"}}
{
"packagetype": "bdist_wheel",
"digests": {"md5": "fake-hash"},
"url": "https://link",
}
]
}
},
None,
{},
id="not found sha256 algo",
),
),
Expand All @@ -264,6 +287,64 @@ def _get_project(self, ireq):
assert actual_hashes == expected_hashes


def test_get_hashes_from_mixed(pip_conf, from_line, tmpdir):
atugushev marked this conversation as resolved.
Show resolved Hide resolved
"""
Test PyPIRepository.get_hashes() returns hashes from both PyPi and extra indexes/links
"""

package_name = "small-fake-multi-arch"
package_version = "0.1"

# One candidate from PyPi and the rest from find-links / extra indexes
extra_index_link1 = Link("https://extra-index-link1")
extra_index_link2 = Link("https://extra-index-link2")
pypi_link = Link("https://pypi-link")

all_candidates = [
InstallationCandidate(package_name, package_version, extra_index_link1),
InstallationCandidate(package_name, package_version, extra_index_link2),
InstallationCandidate(package_name, package_version, pypi_link),
]

# Extra indexes hashes so we don't spend time computing them
file_hashes = {
extra_index_link1: "sha256:hash-link1",
extra_index_link2: "sha256:hash-link2",
}
pypi_hash = "pypi-hash"

class MockPyPIRepository(PyPIRepository):
def _get_project(self, ireq):
return {
"releases": {
package_version: [
{
"packagetype": "bdist_wheel",
"digests": {"sha256": pypi_hash},
"url": str(pypi_link),
},
]
}
}

def find_all_candidates(self, req_name):
return all_candidates

def _get_file_hash(self, link):
return file_hashes[link]

pypi_repository = MockPyPIRepository(
["--no-cache-dir"], cache_dir=(tmpdir / "pypi-repo-cache")
)

ireq = from_line(f"{package_name}=={package_version}")

expected_hashes = {"sha256:" + pypi_hash} | set(file_hashes.values())

actual_hashes = pypi_repository.get_hashes(ireq)
assert actual_hashes == expected_hashes


def test_get_project__returns_data(from_line, tmpdir, monkeypatch, pypi_repository):
"""
Test PyPIRepository._get_project() returns expected project data.
Expand Down