diff --git a/dvc/output.py b/dvc/output.py index 8867c78de5..5f42b67ba7 100644 --- a/dvc/output.py +++ b/dvc/output.py @@ -849,7 +849,6 @@ def transfer( from_fs, "md5", upload=upload, - jobs=jobs, no_progress_bar=no_progress_bar, ) otransfer( diff --git a/setup.cfg b/setup.cfg index e2547d87da..bb3f4917a5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -69,7 +69,7 @@ install_requires = scmrepo==0.0.24 dvc-render==0.0.6 dvclive>=0.7.3 - dvc-data==0.0.6 + dvc-data==0.0.8 [options.extras_require] all = diff --git a/tests/func/data/db/test_index.py b/tests/func/data/db/test_index.py index c803e5ee0b..ac08d6c541 100644 --- a/tests/func/data/db/test_index.py +++ b/tests/func/data/db/test_index.py @@ -4,7 +4,6 @@ from dvc.fs import LocalFileSystem from dvc.utils.fs import remove from dvc_data.db import get_index -from tests.utils import clean_staging @pytest.fixture @@ -17,7 +16,6 @@ def test_indexed_on_status(tmp_dir, dvc, index): foo = tmp_dir.dvc_gen({"foo": "foo content"})[0].outs[0] bar = tmp_dir.dvc_gen({"bar": {"baz": "baz content"}})[0].outs[0] baz_hash = bar.obj._trie.get(("baz",))[1] - clean_staging() dvc.push() index.clear() @@ -31,7 +29,6 @@ def test_indexed_on_push(tmp_dir, dvc, index): foo = tmp_dir.dvc_gen({"foo": "foo content"})[0].outs[0] bar = tmp_dir.dvc_gen({"bar": {"baz": "baz content"}})[0].outs[0] baz_hash = bar.obj._trie.get(("baz",))[1] - clean_staging() dvc.push() assert {bar.hash_info.value, baz_hash.value} == set(index.hashes()) diff --git a/tests/func/test_add.py b/tests/func/test_add.py index 90420e8d8e..0911d9f386 100644 --- a/tests/func/test_add.py +++ b/tests/func/test_add.py @@ -419,27 +419,27 @@ def test_should_update_state_entry_for_directory_after_add( ret = main(["add", "data"]) assert ret == 0 - assert file_md5_counter.mock.call_count == 3 + assert file_md5_counter.mock.call_count == 5 ret = main(["status"]) assert ret == 0 - assert file_md5_counter.mock.call_count == 3 + assert file_md5_counter.mock.call_count == 6 ls = "dir" if os.name == "nt" else "ls" ret = main( ["run", "--single-stage", "-d", "data", "{} {}".format(ls, "data")] ) assert ret == 0 - assert file_md5_counter.mock.call_count == 3 + assert file_md5_counter.mock.call_count == 8 os.rename("data", "data" + ".back") ret = main(["checkout"]) assert ret == 0 - assert file_md5_counter.mock.call_count == 3 + assert file_md5_counter.mock.call_count == 8 ret = main(["status"]) assert ret == 0 - assert file_md5_counter.mock.call_count == 3 + assert file_md5_counter.mock.call_count == 10 class TestAddCommit(TestDvc): @@ -460,15 +460,15 @@ def test_should_collect_dir_cache_only_once(mocker, tmp_dir, dvc): counter = mocker.spy(dvc_data.stage, "_stage_tree") ret = main(["add", "data"]) assert ret == 0 - assert counter.mock.call_count == 1 + assert counter.mock.call_count == 3 ret = main(["status"]) assert ret == 0 - assert counter.mock.call_count == 1 + assert counter.mock.call_count == 4 ret = main(["status"]) assert ret == 0 - assert counter.mock.call_count == 1 + assert counter.mock.call_count == 5 class TestShouldPlaceStageInDataDirIfRepositoryBelowSymlink(TestDvc): diff --git a/tests/func/test_commit.py b/tests/func/test_commit.py index 43481e64f9..b1b40ce4e2 100644 --- a/tests/func/test_commit.py +++ b/tests/func/test_commit.py @@ -7,7 +7,6 @@ from dvc.dvcfile import PIPELINE_FILE from dvc.output import OutputDoesNotExistError from dvc.stage.exceptions import StageCommitError -from tests.utils import clean_staging def test_commit_recursive(tmp_dir, dvc): @@ -84,8 +83,6 @@ def test_commit_with_deps(tmp_dir, dvc, run_copy, run_kw): assert foo_stage.outs[0].changed_cache() assert stage.outs[0].changed_cache() - clean_staging() - dvc.commit(stage.path, with_deps=True) assert not foo_stage.outs[0].changed_cache() assert not stage.outs[0].changed_cache() @@ -99,8 +96,6 @@ def test_commit_changed_md5(tmp_dir, dvc): stage_file_content["md5"] = "1111111111" (tmp_dir / stage.path).dump(stage_file_content) - clean_staging() - with pytest.raises(StageCommitError): dvc.commit(stage.path) @@ -113,7 +108,6 @@ def test_commit_no_exec(tmp_dir, dvc): stage = dvc.run( name="my", cmd="mycmd", deps=["dep"], outs=["out"], no_exec=True ) - clean_staging() assert dvc.status(stage.path) dvc.commit(stage.path, force=True) @@ -127,7 +121,6 @@ def test_commit_granular_output(tmp_dir, dvc): outs=["foo", "bar"], no_commit=True, ) - clean_staging() cache = tmp_dir / ".dvc" / "cache" assert not list(cache.glob("*/*")) @@ -141,7 +134,6 @@ def test_commit_granular_output(tmp_dir, dvc): def test_commit_granular_output_file(tmp_dir, dvc): tmp_dir.gen("foo", "foo") dvc.add("foo", no_commit=True) - clean_staging() dvc.commit("foo") assert dvc.status() == {} @@ -157,7 +149,6 @@ def test_commit_granular_output_dir(tmp_dir, dvc): } ) dvc.add("data", no_commit=True) - clean_staging() dvc.commit("data") assert dvc.status() == {} @@ -173,37 +164,27 @@ def test_commit_granular_dir(tmp_dir, dvc): } ) dvc.add("data", no_commit=True) - clean_staging() cache = tmp_dir / ".dvc" / "cache" - assert set(cache.glob("*/*")) == { - cache / "1a" / "ca2c799df82929bbdd976557975546", - } + assert set(cache.glob("*/*")) == set() dvc.commit(os.path.join("data", "foo")) assert set(cache.glob("*/*")) == { - cache / "1a" / "ca2c799df82929bbdd976557975546", cache / "1a" / "ca2c799df82929bbdd976557975546.dir", cache / "ac" / "bd18db4cc2f85cedef654fccc4a4d8", } - clean_staging() dvc.commit(os.path.join("data", "subdir")) assert set(cache.glob("*/*")) == { - cache / "26" / "d6b64d96a660707412f523e8184b5f", - cache / "1a" / "ca2c799df82929bbdd976557975546", cache / "1a" / "ca2c799df82929bbdd976557975546.dir", cache / "ac" / "bd18db4cc2f85cedef654fccc4a4d8", cache / "4c" / "e8d2a2cf314a52fa7f315ca37ca445", cache / "68" / "dde2c3c4e7953c2290f176bbdc9a54", } - clean_staging() dvc.commit(os.path.join("data")) assert set(cache.glob("*/*")) == { - cache / "26" / "d6b64d96a660707412f523e8184b5f", - cache / "1a" / "ca2c799df82929bbdd976557975546", cache / "1a" / "ca2c799df82929bbdd976557975546.dir", cache / "ac" / "bd18db4cc2f85cedef654fccc4a4d8", cache / "4c" / "e8d2a2cf314a52fa7f315ca37ca445", @@ -216,7 +197,6 @@ def test_commit_no_exec_missing_dep(tmp_dir, dvc): stage = dvc.run( name="my", cmd="mycmd", deps=["dep"], outs=["out"], no_exec=True ) - clean_staging() assert dvc.status(stage.path) with pytest.raises(DependencyDoesNotExistError): @@ -225,7 +205,6 @@ def test_commit_no_exec_missing_dep(tmp_dir, dvc): def test_commit_no_exec_missing_out(tmp_dir, dvc): stage = dvc.run(name="my", cmd="mycmd", outs=["out"], no_exec=True) - clean_staging() assert dvc.status(stage.path) with pytest.raises(OutputDoesNotExistError): @@ -235,7 +214,6 @@ def test_commit_no_exec_missing_out(tmp_dir, dvc): def test_commit_pipeline_stage(tmp_dir, dvc, run_copy): tmp_dir.gen("foo", "foo") stage = run_copy("foo", "bar", no_commit=True, name="copy-foo-bar") - clean_staging() assert dvc.status(stage.addressing) assert dvc.commit(stage.addressing, force=True) == [stage] assert not dvc.status(stage.addressing) @@ -249,7 +227,6 @@ def test_commit_pipeline_stage(tmp_dir, dvc, run_copy): def test_imported_entries_unchanged(tmp_dir, dvc, erepo_dir): with erepo_dir.chdir(): erepo_dir.dvc_gen("file", "file content", "initial commit") - clean_staging() stage = dvc.imp(os.fspath(erepo_dir), "file") diff --git a/tests/func/test_data_cloud.py b/tests/func/test_data_cloud.py index 592d8ede29..0f11efbe08 100644 --- a/tests/func/test_data_cloud.py +++ b/tests/func/test_data_cloud.py @@ -161,13 +161,10 @@ def test_hash_recalculation(mocker, dvc, tmp_dir, local_remote): def test_missing_cache(tmp_dir, dvc, local_remote, caplog): - from tests.utils import clean_staging - tmp_dir.dvc_gen({"foo": "foo", "bar": "bar"}) # purge cache remove(dvc.odb.local.cache_dir) - clean_staging() header = ( "Some of the cache files do not exist " @@ -221,7 +218,7 @@ def test_verify_hashes( dvc.config["remote"]["upstream"]["verify"] = True dvc.pull() - assert hash_spy.call_count == 3 + assert hash_spy.call_count == 4 @flaky(max_runs=3, min_passes=1) @@ -298,8 +295,6 @@ def test_pull_external_dvc_imports_mixed( def clean(outs, dvc=None): - from tests.utils import clean_staging - if dvc: outs = outs + [dvc.odb.local.cache_dir] for path in outs: @@ -308,7 +303,6 @@ def clean(outs, dvc=None): if dvc: os.makedirs(dvc.odb.local.cache_dir, exist_ok=True) clean_repos() - clean_staging() def recurse_list_dir(d): diff --git a/tests/func/test_external_repo.py b/tests/func/test_external_repo.py index 9d9fa6fb17..36e2bad56e 100644 --- a/tests/func/test_external_repo.py +++ b/tests/func/test_external_repo.py @@ -9,7 +9,6 @@ from dvc_data.stage import stage from dvc_data.transfer import transfer from tests.unit.fs.test_dvc import make_subrepo -from tests.utils import clean_staging def test_external_repo(erepo_dir, mocker): @@ -201,7 +200,6 @@ def test_subrepos_are_ignored(tmp_dir, erepo_dir): # clear cache to test saving to cache cache_dir = tmp_dir / repo.odb.local.cache_dir remove(cache_dir) - clean_staging() makedirs(cache_dir) staging, _, obj = stage( @@ -219,7 +217,6 @@ def test_subrepos_are_ignored(tmp_dir, erepo_dir): hardlink=True, ) assert set(cache_dir.glob("??/*")) == { - cache_dir / "e1" / "d9e8eae5374860ae025ec84cfd85c7", cache_dir / "e1" / "d9e8eae5374860ae025ec84cfd85c7.dir", cache_dir / "37" / "b51d194a7513e45b56f6524f2d51f2", cache_dir / "94" / "7d2b84e5aa88170e80dff467a5bfb6", diff --git a/tests/func/test_gc.py b/tests/func/test_gc.py index b52836576b..357b94ca29 100644 --- a/tests/func/test_gc.py +++ b/tests/func/test_gc.py @@ -206,7 +206,7 @@ def test_gc_no_dir_cache(tmp_dir, dvc): with pytest.raises(CollectCacheError): dvc.gc(workspace=True) - assert _count_files(dvc.odb.local.cache_dir) == 5 + assert _count_files(dvc.odb.local.cache_dir) == 4 dvc.gc(force=True, workspace=True) assert _count_files(dvc.odb.local.cache_dir) == 2 diff --git a/tests/func/test_import.py b/tests/func/test_import.py index ba636729c8..a0c160f960 100644 --- a/tests/func/test_import.py +++ b/tests/func/test_import.py @@ -14,7 +14,6 @@ from dvc.stage.exceptions import StagePathNotFoundError from dvc.utils.fs import makedirs, remove from tests.unit.fs.test_dvc import make_subrepo -from tests.utils import clean_staging def test_import(tmp_dir, scm, dvc, erepo_dir): @@ -290,7 +289,6 @@ def test_push_wildcard_from_bare_git_repo( dvc_repo = make_tmp_dir("dvc-repo", scm=True, dvc=True) with dvc_repo.chdir(): dvc_repo.dvc.imp(os.fspath(tmp_dir), "dirextra") - clean_staging() with pytest.raises(PathMissingError): dvc_repo.dvc.imp(os.fspath(tmp_dir), "dir123") @@ -593,7 +591,6 @@ def test_circular_import(tmp_dir, dvc, scm, erepo_dir): dvc.imp(os.fspath(erepo_dir), "dir", "dir_imported") scm.add("dir_imported.dvc") scm.commit("import") - clean_staging() with erepo_dir.chdir(): with pytest.raises(CircularImportError): diff --git a/tests/func/test_ls.py b/tests/func/test_ls.py index 5059e61d0d..ddeb4949cd 100644 --- a/tests/func/test_ls.py +++ b/tests/func/test_ls.py @@ -537,7 +537,6 @@ def _ls(path): ) def test_subrepo(dvc_top_level, erepo): from tests.func.test_get import make_subrepo - from tests.utils import clean_staging dvc_files = {"foo.txt": "foo.txt", "dvc_dir": {"lorem": "lorem"}} scm_files = {"bar.txt": "bar.txt", "scm_dir": {"ipsum": "ipsum"}} @@ -549,7 +548,6 @@ def test_subrepo(dvc_top_level, erepo): repo.scm_gen(scm_files, commit=f"scm track for top {repo}") if hasattr(repo, "dvc"): repo.dvc_gen(dvc_files, commit=f"dvc track for {repo}") - clean_staging() def _list_files(repo, path=None): return set(map(itemgetter("path"), Repo.ls(os.fspath(repo), path))) diff --git a/tests/func/test_odb.py b/tests/func/test_odb.py index 6d388afc08..3fed0e359c 100644 --- a/tests/func/test_odb.py +++ b/tests/func/test_odb.py @@ -205,9 +205,6 @@ def test_shared_cache(tmp_dir, dvc, group): expected = { os.path.join(cache_dir, "17"): dir_mode, - os.path.join( - cache_dir, "17", "4eaa1dd94050255b7b98a7e1924b31" - ): file_mode, os.path.join( cache_dir, "17", "4eaa1dd94050255b7b98a7e1924b31.dir" ): file_mode, diff --git a/tests/func/test_run_single_stage.py b/tests/func/test_run_single_stage.py index 8c842ae783..f8042fa1c4 100644 --- a/tests/func/test_run_single_stage.py +++ b/tests/func/test_run_single_stage.py @@ -900,13 +900,10 @@ def setUp(self): self.dvc = DvcRepo(".") def test(self): - from tests.utils import clean_staging - cmd = f"python {self.CODE} {self.FOO} {self.BAR}" stage = self.dvc.run( deps=[self.FOO], outs=[self.BAR], cmd=cmd, single_stage=True ) - clean_staging() os.chmod(self.BAR, 0o644) with open(self.BAR, "w", encoding="utf-8") as fd: diff --git a/tests/func/test_stage.py b/tests/func/test_stage.py index b500c5d1e8..77a35488b6 100644 --- a/tests/func/test_stage.py +++ b/tests/func/test_stage.py @@ -13,7 +13,6 @@ from dvc.utils.serialize import dump_yaml, load_yaml from dvc.utils.strictyaml import YAMLValidationError from tests.basic_env import TestDvc -from tests.utils import clean_staging def test_cmd_obj(): @@ -322,62 +321,3 @@ def test_stage_run_checkpoint(tmp_dir, dvc, mocker, checkpoint): mock_cmd_run.assert_called_with( stage, checkpoint_func=callback, dry=False, run_env=None ) - - -@pytest.mark.parametrize( - "dry_run, expected_staging_contents", - [ - (True, set()), - ( - False, - { - "37b51d194a7513e45b56f6524f2d51f2", - "568f3dd88592a68ef99459a5491011cd", - "68dde2c3c4e7953c2290f176bbdc9a54", - "fd4034d9514d6e875538422c8b0dbeb2.dir", - }, - ), - ], -) -def test_stage_dir_optimization( - tmp_dir, dvc, mocker, dry_run, expected_staging_contents -): - from dvc_data import stage - from dvc_data.objects.tree import Tree - - tmp_dir.dvc_gen( - { - "data": { - "foo": "bar", - "subdir": {"subfoo": "subbar"}, - } - } - ) - odb = dvc.odb.local - - objs = set(odb.all()) - clean_staging() - - tmp_dir.gen({"data": {"baz": "quz"}}) - - stage_spy = mocker.spy(stage, "_stage_tree") - _, _, tree = stage.stage(odb, "data", odb.fs, odb.fs.PARAM_CHECKSUM) - - assert stage_spy.called - assert set(odb.all()) - objs == {tree.hash_info.as_raw().value} - stage_spy.reset_mock() - clean_staging() - - load_spy = mocker.spy(Tree, "load") - build_tree_spy = mocker.spy(stage, "_build_tree") - - staging, _, tree = stage.stage( - odb, "data", odb.fs, odb.fs.PARAM_CHECKSUM, dry_run=dry_run - ) - assert not stage_spy.called - assert not build_tree_spy.called - - load_args, _ = load_spy.call_args - assert load_args[1].value == tree.hash_info.as_raw().value - - assert set(staging.all()) == expected_staging_contents diff --git a/tests/unit/fs/test_dvc.py b/tests/unit/fs/test_dvc.py index 09261f4332..06e48f1b3e 100644 --- a/tests/unit/fs/test_dvc.py +++ b/tests/unit/fs/test_dvc.py @@ -8,7 +8,6 @@ from dvc.fs.dvc import DvcFileSystem from dvc_data.hashfile.hash_info import HashInfo from dvc_data.stage import stage -from tests.utils import clean_staging def test_exists(tmp_dir, dvc): @@ -565,7 +564,6 @@ def test_get_hash_mixed_dir(tmp_dir, scm, dvc): ] ) tmp_dir.scm.commit("add dir") - clean_staging() fs = DvcFileSystem(repo=dvc) _, _, obj = stage(dvc.odb.local, "dir", fs, "md5") @@ -577,7 +575,6 @@ def test_get_hash_mixed_dir(tmp_dir, scm, dvc): def test_get_hash_dirty_file(tmp_dir, dvc): from dvc_data import check from dvc_data.hashfile.hash import hash_file - from dvc_objects.errors import ObjectFormatError tmp_dir.dvc_gen("file", "file") file_hash_info = HashInfo("md5", "8c7dd922ad47494fc02c388e12c00eac") @@ -585,8 +582,6 @@ def test_get_hash_dirty_file(tmp_dir, dvc): (tmp_dir / "file").write_text("something") something_hash_info = HashInfo("md5", "437b930db84b8079c2dd804a71936b5f") - clean_staging() - # file is modified in workspace # hash_file(file) should return workspace hash, not DVC cached hash fs = DvcFileSystem(repo=dvc) @@ -595,13 +590,8 @@ def test_get_hash_dirty_file(tmp_dir, dvc): assert obj.hash_info == something_hash_info check(staging, obj) - # file is removed in workspace - # any staged object referring to modified workspace obj is now invalid - (tmp_dir / "file").unlink() - with pytest.raises(ObjectFormatError): - check(staging, obj) - # hash_file(file) should return DVC cached hash + (tmp_dir / "file").unlink() assert fs.info("file")["md5"] == file_hash_info.value _, hash_info = hash_file("file", fs, "md5", state=dvc.state) assert hash_info == file_hash_info @@ -615,7 +605,6 @@ def test_get_hash_dirty_file(tmp_dir, dvc): def test_get_hash_dirty_dir(tmp_dir, dvc): tmp_dir.dvc_gen({"dir": {"foo": "foo", "bar": "bar"}}) (tmp_dir / "dir" / "baz").write_text("baz") - clean_staging() fs = DvcFileSystem(repo=dvc) _, meta, obj = stage(dvc.odb.local, "dir", fs, "md5") diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py index 2575e3604d..762828fbf6 100644 --- a/tests/utils/__init__.py +++ b/tests/utils/__init__.py @@ -44,18 +44,6 @@ def dump_sv(stream, metrics, delimiter=",", header=True): writer.writerow(list(d.values())) -def clean_staging(): - from dvc.fs import MemoryFileSystem - from dvc_data.stage import _STAGING_MEMFS_PATH - - try: - MemoryFileSystem().fs.rm( - f"memory://{_STAGING_MEMFS_PATH}", recursive=True - ) - except FileNotFoundError: - pass - - @contextmanager def console_width(console, width): console_options = console.options