From dc1d3e880aa2f0779e8ad18de6afc5543f5a3947 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Saugat=20Pachhai=20=28=E0=A4=B8=E0=A5=8C=E0=A4=97=E0=A4=BE?= =?UTF-8?q?=E0=A4=A4=29?= Date: Fri, 13 May 2022 10:20:14 +0545 Subject: [PATCH] dvc.objects.cache: set type in a more compatible way It turns out that diskcache.Disk saves passed arguments to the database and uses that all the time. So this means that we are not forward compatible as it breaks moving from newer version of DVC to an older one. We don't promise this, but this breaks our benchmarks which are probably not isolated enough. Regarding the use of cache, it is a bit iffy to extend it this way. We only need this type for error messages. We could consider removing this, but since the error message is documented and it's not too hard to extend this :), I decided to preserve the message. --- dvc/data/db/index.py | 4 +--- dvc/objects/cache.py | 10 +++++++--- tests/unit/objects/test_cache.py | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/dvc/data/db/index.py b/dvc/data/db/index.py index 60c095c8cd..21bb3b12fb 100644 --- a/dvc/data/db/index.py +++ b/dvc/data/db/index.py @@ -95,9 +95,7 @@ def __init__( self.index_dir = os.path.join(tmp_dir, self.INDEX_DIR, name) makedirs(self.index_dir, exist_ok=True) self.fs = LocalFileSystem() - cache = Cache( - self.index_dir, eviction_policy="none", disk_type="index" - ) + cache = Cache(self.index_dir, eviction_policy="none", type="index") self.index = Index.fromcache(cache) def __iter__(self): diff --git a/dvc/objects/cache.py b/dvc/objects/cache.py index 0aa92d2c47..5f61535ca8 100644 --- a/dvc/objects/cache.py +++ b/dvc/objects/cache.py @@ -35,9 +35,8 @@ def wrapped(self, *args, **kwargs): class Disk(disk): """Reraise pickle-related errors as DiskError.""" - def __init__(self, *args, type=None, **kwargs): - super().__init__(*args, **kwargs) - self._type = type or os.path.basename(self._directory) + # we need type to differentiate cache for better error messages + _type: str put = translate_pickle_error(disk.put) get = translate_pickle_error(disk.get) @@ -53,9 +52,14 @@ def __init__( directory: str = None, timeout: int = 60, disk: disk = Disk, # pylint: disable=redefined-outer-name + type: str = None, **settings: Any, ) -> None: settings.setdefault("disk_pickle_protocol", 4) super().__init__( directory=directory, timeout=timeout, disk=disk, **settings ) + self.disk._type = self._type = type or os.path.basename(self.directory) + + def __getstate__(self): + return (*super().__getstate__(), self._type) diff --git a/tests/unit/objects/test_cache.py b/tests/unit/objects/test_cache.py index 39d59d65c5..5bcb0bd7cc 100644 --- a/tests/unit/objects/test_cache.py +++ b/tests/unit/objects/test_cache.py @@ -17,7 +17,7 @@ def test_pickle_protocol_error(tmp_dir, disk_type): cache = Cache( directory, disk_pickle_protocol=pickle.HIGHEST_PROTOCOL + 1, - disk_type=disk_type, + type=disk_type, ) with pytest.raises(DiskError) as exc, cache as cache: set_value(cache, "key", ("value1", "value2"))