Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deps: bump dvc-data to 0.0.6; fix imports #7895

Merged
merged 1 commit into from Jun 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion dvc/cli/__init__.py
Expand Up @@ -107,7 +107,7 @@ def _log_exceptions(exc: Exception) -> Optional[int]:
)
return 251

from dvc_objects.hashfile.cache import DiskError
from dvc_data.hashfile.cache import DiskError

if isinstance(exc, DiskError):
from dvc.utils import relpath
Expand Down
4 changes: 2 additions & 2 deletions dvc/commands/dag.py
Expand Up @@ -70,9 +70,9 @@ def _collect_targets(repo, target, outs):
targets.extend([str(out) for out in stage.outs])
continue

for out in outs_trie.itervalues(
for out in outs_trie.itervalues( # noqa: B301
prefix=repo.fs.path.parts(path)
): # noqa: B301
):
targets.extend(str(out))

return targets
Expand Down
16 changes: 8 additions & 8 deletions dvc/data_cloud.py
Expand Up @@ -6,9 +6,9 @@
from dvc_data.db import get_index

if TYPE_CHECKING:
from dvc_data.hashfile.db import HashFileDB
Copy link
Member Author

@skshetry skshetry Jun 14, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed ObjectDB type to HashFileDB here as it requires odb.verify. I didn't change it elsewhere as it seems we'll have few more iterations on this, and it's better to use stricter version (i.e. ObjectDB where possible).

from dvc_data.hashfile.hash_info import HashInfo
from dvc_data.status import CompareStatusResult
from dvc_objects.db import ObjectDB
from dvc_objects.hashfile.hash_info import HashInfo

logger = logging.getLogger(__name__)

Expand All @@ -31,7 +31,7 @@ def get_remote_odb(
self,
name: Optional[str] = None,
command: str = "<command>",
) -> "ObjectDB":
) -> "HashFileDB":
from dvc.config import NoRemoteError

if not name:
Expand Down Expand Up @@ -76,8 +76,8 @@ def _log_missing(self, status: "CompareStatusResult"):

def transfer(
self,
src_odb: "ObjectDB",
dest_odb: "ObjectDB",
src_odb: "HashFileDB",
dest_odb: "HashFileDB",
objs: Iterable["HashInfo"],
**kwargs,
):
Expand All @@ -94,7 +94,7 @@ def push(
objs: Iterable["HashInfo"],
jobs: Optional[int] = None,
remote: Optional[str] = None,
odb: Optional["ObjectDB"] = None,
odb: Optional["HashFileDB"] = None,
):
"""Push data items in a cloud-agnostic way.

Expand All @@ -121,7 +121,7 @@ def pull(
objs: Iterable["HashInfo"],
jobs: Optional[int] = None,
remote: Optional[str] = None,
odb: Optional["ObjectDB"] = None,
odb: Optional["HashFileDB"] = None,
):
"""Pull data items in a cloud-agnostic way.

Expand Down Expand Up @@ -149,7 +149,7 @@ def status(
objs: Iterable["HashInfo"],
jobs: Optional[int] = None,
remote: Optional[str] = None,
odb: Optional["ObjectDB"] = None,
odb: Optional["HashFileDB"] = None,
log_missing: bool = True,
):
"""Check status of data items in a cloud-agnostic way.
Expand Down
2 changes: 1 addition & 1 deletion dvc/dependency/param.py
Expand Up @@ -9,7 +9,7 @@

from dvc.exceptions import DvcException
from dvc.utils.serialize import LOADERS, ParseError
from dvc_objects.hashfile.hash_info import HashInfo
from dvc_data.hashfile.hash_info import HashInfo

from .base import Dependency

Expand Down
4 changes: 2 additions & 2 deletions dvc/dependency/repo.py
Expand Up @@ -10,9 +10,9 @@
from .base import Dependency

if TYPE_CHECKING:
from dvc_data.hashfile.file import HashFile
from dvc_data.hashfile.hash_info import HashInfo
from dvc_objects.db import ObjectDB
from dvc_objects.hashfile.file import HashFile
from dvc_objects.hashfile.hash_info import HashInfo


class RepoDependency(Dependency):
Expand Down
4 changes: 2 additions & 2 deletions dvc/fs/data.py
Expand Up @@ -117,12 +117,12 @@ def isdvc(self, path, recursive=False, strict=True):
return bool(info.get("outs") if recurse else info.get("isout"))

def info(self, path, **kwargs):
from dvc_objects.hashfile.meta import Meta
from dvc_data.hashfile.meta import Meta

key = self._get_key(path)

try:
outs = list(self.repo.index.tree.iteritems(key))
outs = list(self.repo.index.tree.iteritems(key)) # noqa: B301
except KeyError as exc:
raise FileNotFoundError from exc

Expand Down
6 changes: 3 additions & 3 deletions dvc/output.py
Expand Up @@ -21,12 +21,12 @@
from dvc_data import check as ocheck
from dvc_data import load as oload
from dvc_data.checkout import checkout
from dvc_data.hashfile.hash_info import HashInfo
from dvc_data.hashfile.istextfile import istextfile
from dvc_data.hashfile.meta import Meta
from dvc_data.stage import stage as ostage
from dvc_data.transfer import transfer as otransfer
from dvc_objects.errors import ObjectFormatError
from dvc_objects.hashfile.hash_info import HashInfo
from dvc_objects.hashfile.istextfile import istextfile
from dvc_objects.hashfile.meta import Meta

from .fs import (
HDFSFileSystem,
Expand Down
18 changes: 3 additions & 15 deletions dvc/repo/__init__.py
Expand Up @@ -3,7 +3,7 @@
from collections import defaultdict
from contextlib import contextmanager
from functools import wraps
from typing import TYPE_CHECKING, Callable, Optional, Set
from typing import TYPE_CHECKING, Callable, Optional

from funcy import cached_property

Expand All @@ -18,7 +18,6 @@
from dvc.fs import FileSystem
from dvc.repo.scm_context import SCMContext
from dvc.scm import Base
from dvc_objects.hashfile.file import HashFile

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -165,7 +164,7 @@ def __init__(
from dvc.repo.stage import StageLoad
from dvc.scm import SCM
from dvc.stage.cache import StageCache
from dvc_objects.hashfile.state import State, StateNoop
from dvc_data.hashfile.state import State, StateNoop

self.url = url
self._fs_conf = {"repo_factory": repo_factory}
Expand Down Expand Up @@ -411,16 +410,7 @@ def used_objs(
"""
used = defaultdict(set)

def _add_suffix(objs: Set["HashFile"], suffix: str) -> None:
from itertools import chain

from dvc_data import iterobjs

for obj in chain.from_iterable(map(iterobjs, objs)):
if obj.name is not None:
obj.name += suffix

Comment on lines -414 to -422
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gets rid of this functionality to set name. :)

for branch in self.brancher(
for _ in self.brancher(
revs=revs,
all_branches=all_branches,
all_tags=all_tags,
Expand All @@ -435,8 +425,6 @@ def _add_suffix(objs: Set["HashFile"], suffix: str) -> None:
recursive=recursive,
with_deps=with_deps,
).items():
if branch:
_add_suffix(objs, f" ({branch})")
used[odb].update(objs)

if used_run_cache:
Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/index.py
Expand Up @@ -26,8 +26,8 @@
from dvc.stage import Stage
from dvc.types import StrPath, TargetType
from dvc_data import Tree
from dvc_data.hashfile.hash_info import HashInfo
from dvc_objects.db import ObjectDB
from dvc_objects.hashfile.hash_info import HashInfo


ObjectContainer = Dict[Optional["ObjectDB"], Set["HashInfo"]]
Expand Down
2 changes: 1 addition & 1 deletion dvc/schema.py
Expand Up @@ -7,7 +7,7 @@
from dvc.parsing import DO_KWD, FOREACH_KWD, VARS_KWD
from dvc.parsing.versions import SCHEMA_KWD, lockfile_version_schema
from dvc.stage.params import StageParams
from dvc_objects.hashfile.meta import Meta
from dvc_data.hashfile.meta import Meta

STAGES = "stages"
SINGLE_STAGE_SCHEMA = {
Expand Down
2 changes: 1 addition & 1 deletion dvc/stage/__init__.py
Expand Up @@ -36,8 +36,8 @@

if TYPE_CHECKING:
from dvc.dvcfile import DVCFile
from dvc_data.hashfile.hash_info import HashInfo
from dvc_objects.db.base import ObjectDB
from dvc_objects.hashfile.hash_info import HashInfo

logger = logging.getLogger(__name__)
# Disallow all punctuation characters except hyphen and underscore
Expand Down
2 changes: 1 addition & 1 deletion dvc/stage/cache.py
Expand Up @@ -27,7 +27,7 @@ class RunCacheNotSupported(DvcException):


def _get_cache_hash(cache, key=False):
from dvc_objects.hashfile.meta import Meta
from dvc_data.hashfile.meta import Meta

if key:
cache["outs"] = [out["path"] for out in cache.get("outs", [])]
Expand Down
4 changes: 2 additions & 2 deletions dvc/stage/loader.py
Expand Up @@ -8,8 +8,8 @@
from dvc import dependency, output
from dvc.parsing import FOREACH_KWD, JOIN, DataResolver, EntryNotFound
from dvc.parsing.versions import LOCKFILE_VERSION
from dvc_objects.hashfile.hash_info import HashInfo
from dvc_objects.hashfile.meta import Meta
from dvc_data.hashfile.hash_info import HashInfo
from dvc_data.hashfile.meta import Meta

from . import PipelineStage, Stage, loads_from
from .exceptions import StageNameUnspecified, StageNotFound
Expand Down
2 changes: 1 addition & 1 deletion dvc/stage/utils.py
Expand Up @@ -5,7 +5,7 @@
from funcy import concat, first, lsplit, rpartial, without

from dvc.exceptions import InvalidArgumentError
from dvc_objects.hashfile.meta import Meta
from dvc_data.hashfile.meta import Meta

from .exceptions import (
MissingDataSource,
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Expand Up @@ -69,7 +69,7 @@ install_requires =
scmrepo==0.0.24
dvc-render==0.0.6
dvclive>=0.7.3
dvc-data==0.0.5
dvc-data==0.0.6

[options.extras_require]
all =
Expand Down
9 changes: 4 additions & 5 deletions tests/func/test_add.py
Expand Up @@ -12,7 +12,6 @@

import dvc as dvc_module
import dvc_data
import dvc_objects
from dvc.cli import main
from dvc.dvcfile import DVC_FILE_SUFFIX
from dvc.exceptions import (
Expand All @@ -38,8 +37,8 @@
from dvc.utils import LARGE_DIR_SIZE, relpath
from dvc.utils.fs import path_isin
from dvc.utils.serialize import YAMLFileCorruptedError, load_yaml
from dvc_objects.hashfile.hash import file_md5
from dvc_objects.hashfile.hash_info import HashInfo
from dvc_data.hashfile.hash import file_md5
from dvc_data.hashfile.hash_info import HashInfo
from tests.basic_env import TestDvc
from tests.utils import get_gitignore_content

Expand Down Expand Up @@ -380,7 +379,7 @@ def test_dir(self):


def test_should_update_state_entry_for_file_after_add(mocker, dvc, tmp_dir):
file_md5_counter = mocker.spy(dvc_objects.hashfile.hash, "file_md5")
file_md5_counter = mocker.spy(dvc_data.hashfile.hash, "file_md5")
tmp_dir.gen("foo", "foo")

ret = main(["config", "cache.type", "copy"])
Expand Down Expand Up @@ -411,7 +410,7 @@ def test_should_update_state_entry_for_file_after_add(mocker, dvc, tmp_dir):
def test_should_update_state_entry_for_directory_after_add(
mocker, dvc, tmp_dir
):
file_md5_counter = mocker.spy(dvc_objects.hashfile.hash, "file_md5")
file_md5_counter = mocker.spy(dvc_data.hashfile.hash, "file_md5")

tmp_dir.gen({"data/data": "foo", "data/data_sub/sub_data": "foo"})

Expand Down
8 changes: 4 additions & 4 deletions tests/func/test_data_cloud.py
Expand Up @@ -5,7 +5,7 @@
import pytest
from flaky.flaky_decorator import flaky

import dvc_objects
import dvc_data
from dvc.cli import main
from dvc.external_repo import clean_repos
from dvc.stage.exceptions import StageNotFound
Expand All @@ -14,7 +14,7 @@
)
from dvc.utils.fs import remove
from dvc_data.db.local import LocalHashFileDB
from dvc_objects.hashfile.db import HashFileDB
from dvc_data.hashfile.db import HashFileDB


def test_cloud_cli(tmp_dir, dvc, remote, mocker):
Expand Down Expand Up @@ -148,7 +148,7 @@ def test_warn_on_outdated_stage(tmp_dir, dvc, local_remote, caplog):

def test_hash_recalculation(mocker, dvc, tmp_dir, local_remote):
tmp_dir.gen({"foo": "foo"})
test_file_md5 = mocker.spy(dvc_objects.hashfile.hash, "file_md5")
test_file_md5 = mocker.spy(dvc_data.hashfile.hash, "file_md5")
ret = main(["config", "cache.type", "hardlink"])
assert ret == 0
ret = main(["add", "foo"])
Expand Down Expand Up @@ -210,7 +210,7 @@ def test_verify_hashes(
remove("dir")
remove(dvc.odb.local.cache_dir)

hash_spy = mocker.spy(dvc_objects.hashfile.hash, "file_md5")
hash_spy = mocker.spy(dvc_data.hashfile.hash, "file_md5")

dvc.pull()
assert hash_spy.call_count == 0
Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_ignore.py
Expand Up @@ -10,8 +10,8 @@
from dvc.repo import Repo
from dvc.testing.tmp_dir import TmpDir
from dvc.types import List
from dvc_data.hashfile.utils import get_mtime_and_size
from dvc_data.stage import IgnoreInCollectedDirError
from dvc_objects.hashfile.utils import get_mtime_and_size


def _to_pattern_info_list(str_list: List):
Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_install.py
Expand Up @@ -6,7 +6,7 @@
from git import GitCommandError

from dvc.exceptions import DvcException
from dvc_objects.hashfile.hash import file_md5
from dvc_data.hashfile.hash import file_md5
from tests.func.parsing.test_errors import escape_ansi


Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_lockfile.py
Expand Up @@ -10,7 +10,7 @@
from dvc.utils.fs import remove
from dvc.utils.serialize import dumps_yaml, parse_yaml_for_update
from dvc.utils.strictyaml import YAMLValidationError, make_relpath
from dvc_objects.hashfile.hash_info import HashInfo
from dvc_data.hashfile.hash_info import HashInfo
from tests.func.test_run_multistage import supported_params

FS_STRUCTURE = {
Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_odb.py
Expand Up @@ -7,8 +7,8 @@
from dvc.cli import main
from dvc.odbmgr import ODBManager
from dvc.utils import relpath
from dvc_data.hashfile.hash_info import HashInfo
from dvc_objects.errors import ObjectFormatError
from dvc_objects.hashfile.hash_info import HashInfo


def test_cache(tmp_dir, dvc):
Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_repo_index.py
Expand Up @@ -258,7 +258,7 @@ def get_index(dvc, rev):

@pytest.mark.parametrize("rev", ["workspace", "HEAD"])
def test_used_objs(tmp_dir, scm, dvc, run_copy, rev):
from dvc_objects.hashfile.hash_info import HashInfo
from dvc_data.hashfile.hash_info import HashInfo

dvc.config["core"]["autostage"] = True
tmp_dir.dvc_gen({"dir": {"subdir": {"file": "file"}}, "foo": "foo"})
Expand Down
4 changes: 2 additions & 2 deletions tests/func/test_repro.py
Expand Up @@ -21,7 +21,7 @@
from dvc.utils import relpath
from dvc.utils.fs import remove
from dvc.utils.serialize import dump_yaml, load_yaml
from dvc_objects.hashfile.hash import file_md5
from dvc_data.hashfile.hash import file_md5
from tests.basic_env import TestDvc


Expand Down Expand Up @@ -817,7 +817,7 @@ def test(self):
),
]

from dvc_objects.hashfile.state import StateNoop
from dvc_data.hashfile.state import StateNoop

self.dvc.state = StateNoop()

Expand Down
2 changes: 1 addition & 1 deletion tests/func/test_run_single_stage.py
Expand Up @@ -31,7 +31,7 @@
StagePathOutsideError,
)
from dvc.utils.serialize import load_yaml
from dvc_objects.hashfile.hash import file_md5
from dvc_data.hashfile.hash import file_md5
from tests.basic_env import TestDvc, TestDvcGit


Expand Down