Skip to content

Commit

Permalink
feat: new cache methods: hardlink and symlink_individual (#2425)
Browse files Browse the repository at this point in the history
Signed-off-by: Frost Ming <me@frostming.com>
  • Loading branch information
frostming committed Dec 1, 2023
1 parent 48d8082 commit ba703d1
Show file tree
Hide file tree
Showing 8 changed files with 180 additions and 74 deletions.
86 changes: 43 additions & 43 deletions docs/docs/reference/configuration.md

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions docs/docs/usage/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,14 @@ It can be enabled on a per-project basis by adding the `--local` option to the c

The caches are located in `$(pdm config cache_dir)/packages`. You can view the cache usage with `pdm cache info`. Note that the cached installs are managed automatically -- they will be deleted if they are not linked to any projects. Manually deleting the caches from disk may break some projects on the system.

In addition, several different ways of linking to cache entries are supported:

- `symlink`(default), create symlinks to the package directories or children if the parent is a namespace package.
- `symlink_individual`, for each individual files in the package directory, create a symlink to it.
- `hardlink`, create hard links to the package files of the cache entry.

You can switch between them by running `pdm config [-l] install.cache_method <method>`.

!!! note
Only the installation of _named requirements_ resolved from PyPI can be cached.

Expand Down
1 change: 1 addition & 0 deletions news/2425.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
New cache methods: `symlink_individual` for creating a symlink for each individual package file and `hardlink` for creating hardlinks.
4 changes: 3 additions & 1 deletion src/pdm/cli/commands/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,14 @@ def ask_project(self, project: Project) -> str:
def get_metadata_from_input(self, project: Project, options: argparse.Namespace) -> dict[str, Any]:
from pdm.formats.base import array_of_inline_tables, make_array, make_inline_table

name = self.ask_project(project)
version = self.ask("Project version", "0.1.0")
is_library = options.lib or bool(options.backend)
if not is_library and self.interactive:
is_library = termui.confirm(
"Is the project a library that is installable?\n"
"If yes, we will need to ask a few more questions to include "
"the project name and build backend"
"the build backend"
)
build_backend: type[BuildBackend] | None = None
python = project.python
Expand Down
72 changes: 54 additions & 18 deletions src/pdm/installers/installers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,19 @@
from pdm.exceptions import PDMWarning
from pdm.installers.packages import CachedPackage
from pdm.termui import logger
from pdm.utils import fs_supports_symlink

if TYPE_CHECKING:
from typing import Any, BinaryIO, Callable, Iterable
from typing import Any, BinaryIO, Callable, Iterable, Protocol

from installer.destinations import Scheme
from installer.sources import WheelContentElement

from pdm.environments import BaseEnvironment

class LinkMethod(Protocol):
def __call__(self, src: str | Path, dst: str | Path, target_is_directory: bool = False) -> None:
...


@lru_cache
def _is_python_package(root: str | Path) -> bool:
Expand Down Expand Up @@ -71,9 +74,11 @@ def _is_namespace_package(root: str) -> bool:
return not _namespace_package_lines.isdisjoint(init_py_lines)


def _create_symlinks_recursively(source: str, destination: str) -> Iterable[str]:
"""Create symlinks recursively from source to destination. In the following ways:
package <-- link
def _create_links_recursively(
source: str, destination: str, link_method: LinkMethod, link_individual: bool
) -> Iterable[str]:
"""Create symlinks recursively from source to destination.
package(if not individual) <-- link
__init__.py
namespace_package <-- mkdir
foo.py <-- link
Expand All @@ -89,7 +94,7 @@ def _create_symlinks_recursively(source: str, destination: str) -> Iterable[str]
destination_root = os.path.join(destination, relpath)
if is_top:
is_top = False
elif not _is_namespace_package(root):
elif not _is_namespace_package(root) and not link_individual:
# A package, create link for the parent dir and don't proceed
# for child directories
if os.path.exists(destination_root):
Expand All @@ -98,7 +103,7 @@ def _create_symlinks_recursively(source: str, destination: str) -> Iterable[str]
shutil.rmtree(destination_root)
else:
os.remove(destination_root)
os.symlink(root, destination_root, True)
link_method(root, destination_root, True)
yield relpath
dirs[:] = []
continue
Expand All @@ -113,7 +118,7 @@ def _create_symlinks_recursively(source: str, destination: str) -> Iterable[str]
destination_path = os.path.join(destination_root, f)
if os.path.exists(destination_path):
os.remove(destination_path)
os.symlink(source_path, destination_path, False)
link_method(source_path, destination_path, False)
yield os.path.join(relpath, f)


Expand Down Expand Up @@ -141,9 +146,18 @@ def get_contents(self) -> Iterator[WheelContentElement]:


class InstallDestination(SchemeDictionaryDestination):
def __init__(self, *args: Any, symlink_to: str | None = None, **kwargs: Any) -> None:
def __init__(
self,
*args: Any,
link_to: str | None = None,
link_method: LinkMethod | None = None,
link_individual: bool = False,
**kwargs: Any,
) -> None:
super().__init__(*args, **kwargs)
self.symlink_to = symlink_to
self.link_to = link_to
self.link_method = link_method
self.link_individual = link_individual

def write_to_fs(self, scheme: Scheme, path: str | Path, stream: BinaryIO, is_executable: bool) -> RecordEntry:
target_path = os.path.join(self.scheme_dict[scheme], path)
Expand All @@ -157,13 +171,17 @@ def finalize_installation(
record_file_path: str | Path,
records: Iterable[tuple[Scheme, RecordEntry]],
) -> None:
if self.symlink_to:
if self.link_method is not None:
# Create symlinks to the cached location
def _symlink_files(symlink_to: str) -> Iterator[tuple[Scheme, RecordEntry]]:
for relpath in _create_symlinks_recursively(symlink_to, self.scheme_dict[scheme]):
def _link_files() -> Iterator[tuple[Scheme, RecordEntry]]:
assert self.link_method is not None
assert self.link_to is not None
for relpath in _create_links_recursively(
self.link_to, self.scheme_dict[scheme], self.link_method, self.link_individual
):
yield (scheme, RecordEntry(relpath.replace("\\", "/"), None, None))

records = itertools.chain(records, _symlink_files(self.symlink_to))
records = itertools.chain(records, _link_files())
return super().finalize_installation(scheme, record_file_path, records)


Expand All @@ -180,6 +198,20 @@ def install_wheel(wheel: str, environment: BaseEnvironment, direct_url: dict[str
return _install_wheel(wheel=wheel, destination=destination, additional_metadata=additional_metadata)


def _get_link_method_and_individual(cache_method: str) -> tuple[LinkMethod | None, bool]:
from pdm import utils

def _hardlink(src: str | Path, dst: str | Path, target_is_directory: bool = False) -> None:
os.link(src, dst)

if "symlink" in cache_method and utils.fs_supports_link_method("symlink"):
return os.symlink, "individual" in cache_method

if "link" in cache_method and utils.fs_supports_link_method("link"):
return _hardlink, True
return None, False


def install_wheel_with_cache(wheel: str, environment: BaseEnvironment, direct_url: dict[str, Any] | None = None) -> str:
"""Only create .pth files referring to the cached package.
If the cache doesn't exist, create one.
Expand All @@ -189,7 +221,9 @@ def install_wheel_with_cache(wheel: str, environment: BaseEnvironment, direct_ur
package_cache = CachedPackage(cache_path)
interpreter = str(environment.interpreter.executable)
script_kind = _get_kind(environment)
use_symlink = environment.project.config["install.cache_method"] == "symlink" and fs_supports_symlink()
# the cache_method can be any of "symlink", "hardlink", "symlink_individual" and "pth"
cache_method: str = environment.project.config["install.cache_method"]
link_method, link_individual = _get_link_method_and_individual(cache_method)
if not cache_path.is_dir():
logger.info("Installing wheel into cached location %s", cache_path)
cache_path.mkdir(exist_ok=True)
Expand All @@ -213,14 +247,14 @@ def skip_files(source: WheelFile, element: WheelContentElement) -> bool:
or path.split("/")[0].endswith(".dist-info")
# We need to skip the *-nspkg.pth files generated by setuptools'
# namespace_packages merchanism. See issue#623 for details
or not use_symlink
or link_method is None
and path.endswith(".pth")
and not path.endswith("-nspkg.pth")
)

additional_contents: list[WheelContentElement] = []
lib_path = package_cache.scheme()["purelib"]
if not use_symlink:
if link_method is None:
# HACK: Prefix with aaa_ to make it processed as early as possible
filename = "aaa_" + wheel_stem.split("-")[0] + ".pth"
# use site.addsitedir() rather than a plain path to properly process .pth files
Expand All @@ -231,7 +265,9 @@ def skip_files(source: WheelFile, element: WheelContentElement) -> bool:
scheme_dict=environment.get_paths(),
interpreter=interpreter,
script_kind=script_kind,
symlink_to=lib_path if use_symlink else None,
link_to=lib_path,
link_method=link_method,
link_individual=link_individual,
)

dist_info_dir = _install_wheel(
Expand Down
2 changes: 1 addition & 1 deletion src/pdm/project/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class Config(MutableMapping[str, str]):
coerce=ensure_boolean,
),
"install.cache_method": ConfigItem(
"`symlink` or `pth` to create links to the cached installation",
"Specify how to create links to the caches(`symlink/symlink_individual/hardlink/pth`)",
"symlink",
),
"python.providers": ConfigItem(
Expand Down
13 changes: 7 additions & 6 deletions src/pdm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,15 +395,16 @@ def is_url(url: str) -> bool:


@functools.lru_cache
def fs_supports_symlink() -> bool:
if not hasattr(os, "symlink"):
def fs_supports_link_method(method: str) -> bool:
if not hasattr(os, method):
return False
if sys.platform == "win32":
with tempfile.NamedTemporaryFile(prefix="TmP") as tmp_file:
temp_dir = os.path.dirname(tmp_file.name)
dest = os.path.join(temp_dir, "{}-{}".format(tmp_file.name, "b"))
with tempfile.TemporaryDirectory(prefix="TmP") as temp_dir:
with open(src := os.path.join(temp_dir, "a"), "w") as tmp_file:
tmp_file.write("foo")
dest = f"{src}-link"
try:
os.symlink(tmp_file.name, dest)
getattr(os, method)(src, dest)
return True
except (OSError, NotImplementedError):
return False
Expand Down
68 changes: 63 additions & 5 deletions tests/test_installer.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,36 @@
from __future__ import annotations

import logging
import os
from typing import Callable

import pytest
from unearth import Link

from pdm import utils
from pdm.installers import InstallManager
from pdm.models.candidates import Candidate
from pdm.models.requirements import parse_requirement
from pdm.utils import fs_supports_symlink
from tests import FIXTURES

pytestmark = pytest.mark.usefixtures("local_finder")


@pytest.fixture()
def supports_link(preferred: str | None, monkeypatch: pytest.MonkeyPatch) -> Callable[[str], bool]:
original = utils.fs_supports_link_method

def mocked_support(linker: str) -> bool:
if preferred is None:
return False
if preferred == "hardlink" and linker == "symlink":
return False
return original(linker)

monkeypatch.setattr(utils, "fs_supports_link_method", mocked_support)
return mocked_support


def test_install_wheel_with_inconsistent_dist_info(project):
req = parse_requirement("pyfunctional")
candidate = Candidate(
Expand Down Expand Up @@ -97,8 +115,8 @@ def test_uninstall_with_console_scripts(project, use_install_cache):
assert not os.path.exists(celery_script)


def test_install_wheel_with_cache(project, pdm):
supports_symlink = fs_supports_symlink()
@pytest.mark.parametrize("preferred", ["symlink", "hardlink", None])
def test_install_wheel_with_cache(project, pdm, supports_link):
req = parse_requirement("future-fstrings")
candidate = Candidate(
req,
Expand All @@ -108,9 +126,12 @@ def test_install_wheel_with_cache(project, pdm):
installer.install(candidate)

lib_path = project.environment.get_paths()["purelib"]
if supports_symlink:
if supports_link("symlink"):
assert os.path.islink(os.path.join(lib_path, "future_fstrings.py"))
assert os.path.islink(os.path.join(lib_path, "aaaaa_future_fstrings.pth"))
elif supports_link("link"):
assert os.path.isfile(os.path.join(lib_path, "future_fstrings.py"))
assert os.path.isfile(os.path.join(lib_path, "aaaaa_future_fstrings.pth"))
else:
assert os.path.isfile(os.path.join(lib_path, "aaa_future_fstrings.pth"))
assert os.path.isfile(os.path.join(lib_path, "aaaaa_future_fstrings.pth"))
Expand All @@ -123,7 +144,7 @@ def test_install_wheel_with_cache(project, pdm):

dist = project.environment.get_working_set()["future-fstrings"]
installer.uninstall(dist)
if supports_symlink:
if supports_link("symlink") or supports_link("link"):
assert not os.path.exists(os.path.join(lib_path, "future_fstrings.py"))
assert not os.path.exists(os.path.join(lib_path, "aaaaa_future_fstrings.pth"))
else:
Expand Down Expand Up @@ -183,3 +204,40 @@ def test_compress_file_list_for_rename():
}
abs_paths = {os.path.join(project_root, path) for path in paths}
assert sorted(compress_for_rename(abs_paths)) == [os.path.join(project_root, "test-removal" + os.sep)]


@pytest.mark.parametrize("preferred", ["symlink", "hardlink"])
def test_install_cache_namespace_package(project, supports_link):
if not supports_link("symlink") and not supports_link("link"):
pytest.skip("This test requires symlink or hardlink support")

req = parse_requirement("pdm-backend")
candidate = Candidate(
req,
link=Link("http://fixtures.test/artifacts/pdm_backend-2.1.4-py3-none-any.whl"),
)
installer = InstallManager(project.environment, use_install_cache=True)
installer.install(candidate)
lib_path = project.environment.get_paths()["purelib"]
assert os.path.isdir(top_dir := os.path.join(lib_path, "pdm")) and not os.path.islink(top_dir)
assert os.path.isdir(child_dir := os.path.join(top_dir, "backend"))
if supports_link("symlink"):
assert os.path.islink(child_dir)
else:
assert os.path.isfile(os.path.join(child_dir, "__init__.py"))


@pytest.mark.skipif(not utils.fs_supports_link_method("symlink"), reason="This test requires symlink support")
def test_install_cache_symlink_individual(project):
project.project_config["install.cache_method"] = "symlink_individual"
req = parse_requirement("pdm-backend")
candidate = Candidate(
req,
link=Link("http://fixtures.test/artifacts/pdm_backend-2.1.4-py3-none-any.whl"),
)
installer = InstallManager(project.environment, use_install_cache=True)
installer.install(candidate)
lib_path = project.environment.get_paths()["purelib"]
for path in ("pdm", "pdm/backend"):
assert os.path.exists(child := os.path.join(lib_path, path)) and not os.path.islink(child)
assert os.path.islink(os.path.join(lib_path, "pdm/backend/__init__.py"))

0 comments on commit ba703d1

Please sign in to comment.