Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

exp: show: Include additional info in --json. #7690

Merged
merged 1 commit into from May 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 5 additions & 4 deletions dvc/repo/experiments/show.py
Expand Up @@ -7,7 +7,7 @@
from dvc.repo.metrics.show import _gather_metrics
from dvc.repo.params.show import _gather_params
from dvc.scm import iter_revs
from dvc.utils import error_handler, onerror_collect
from dvc.utils import error_handler, onerror_collect, relpath

if TYPE_CHECKING:
from dvc.repo import Repo
Expand Down Expand Up @@ -45,20 +45,21 @@ def _collect_experiment_commit(
res["params"] = params

res["deps"] = {
dep.def_path: {
relpath(dep.fs_path, repo.root_dir): {
"hash": dep.hash_info.value,
"size": dep.meta.size,
"nfiles": dep.meta.nfiles,
}
for dep in repo.index.deps
if not isinstance(dep, (ParamsDependency, RepoDependency))
}

res["outs"] = {
out.def_path: {
relpath(out.fs_path, repo.root_dir): {
"hash": out.hash_info.value,
"size": out.meta.size,
"nfiles": out.meta.nfiles,
"use_cache": out.use_cache,
"is_data_source": out.stage.is_data_source,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Q] Do I have everything I need to work this out:

for dep in res[“deps”].values():
    for out in res[“outs”].values():
        if dep[“fs_path”] == out[“fs_path”]:
            # THIS IS AN INTERMEDIATE ARTIFACT
            dep["intermediate"] = True
            out["intermediate"] = True
            if out[“use_cache”]:
                # DEP IS DVC TRACKED
                dep["use_cache"] = True
            else:
                # DEP IS GIT-TRACKED 
                dep["use_cache"] = False

The answer is yes

Given that the rules about deciding what files should be included appear to be currently unclear and product-dependent I would prefer not to include this logic in DVC.

This does make sense to me. However, there will be much less chance of providing consistency across dependencies if we push this logic out to the consumers.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mattseddon where does this snippet come from?

I see that is_data_source is included which is what we need I believe, @daavoo could you remind please what is the definition of the out.stage.is_data_source?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct code is in the description?

}
for out in repo.index.outs
if not (out.is_metric or out.is_plot)
Expand Down
62 changes: 61 additions & 1 deletion tests/func/experiments/test_show.py
Expand Up @@ -34,6 +34,7 @@ def make_executor_info(**kwargs):
return ExecutorInfo(**kwargs)


@pytest.mark.vscode
def test_show_simple(tmp_dir, scm, dvc, exp_stage):
assert dvc.experiments.show()["workspace"] == {
"baseline": {
Expand All @@ -57,6 +58,7 @@ def test_show_simple(tmp_dir, scm, dvc, exp_stage):
}


@pytest.mark.vscode
@pytest.mark.parametrize("workspace", [True, False])
def test_show_experiment(tmp_dir, scm, dvc, exp_stage, workspace):
baseline_rev = scm.get_rev()
Expand Down Expand Up @@ -101,6 +103,7 @@ def test_show_experiment(tmp_dir, scm, dvc, exp_stage, workspace):
assert exp["data"]["params"]["params.yaml"] == expected_params


@pytest.mark.vscode
def test_show_queued(tmp_dir, scm, dvc, exp_stage):
baseline_rev = scm.get_rev()

Expand Down Expand Up @@ -132,6 +135,7 @@ def test_show_queued(tmp_dir, scm, dvc, exp_stage):
assert exp["params"]["params.yaml"] == {"data": {"foo": 3}}


@pytest.mark.vscode
@pytest.mark.parametrize("workspace", [True, False])
def test_show_checkpoint(
tmp_dir, scm, dvc, checkpoint_stage, capsys, workspace
Expand Down Expand Up @@ -169,6 +173,7 @@ def test_show_checkpoint(
assert f"{fs} {name}" in cap.out


@pytest.mark.vscode
@pytest.mark.parametrize("workspace", [True, False])
def test_show_checkpoint_branch(
tmp_dir, scm, dvc, checkpoint_stage, capsys, workspace
Expand Down Expand Up @@ -281,6 +286,7 @@ def test_show_filter(
assert "Experiment" not in cap.out


@pytest.mark.vscode
def test_show_multiple_commits(tmp_dir, scm, dvc, exp_stage):
init_rev = scm.get_rev()
tmp_dir.scm_gen("file", "file", "commit")
Expand Down Expand Up @@ -318,6 +324,7 @@ def test_show_sort(tmp_dir, scm, dvc, exp_stage, caplog):
)


@pytest.mark.vscode
def test_show_running_workspace(tmp_dir, scm, dvc, exp_stage, capsys):
pid_dir = os.path.join(dvc.tmp_dir, EXEC_TMP_DIR, EXEC_PID_DIR)
info = make_executor_info(location=BaseExecutor.DEFAULT_LOCATION)
Expand Down Expand Up @@ -626,7 +633,8 @@ def test_show_parallel_coordinates(tmp_dir, dvc, scm, mocker, capsys):
assert '"label": "Experiment"' not in html_text


def test_show_outs(tmp_dir, dvc, scm):
@pytest.mark.vscode
def test_show_outs(tmp_dir, dvc, scm, erepo_dir):
tmp_dir.gen("copy.py", COPY_SCRIPT)
params_file = tmp_dir / "params.yaml"
params_data = {
Expand All @@ -652,9 +660,61 @@ def test_show_outs(tmp_dir, dvc, scm):
"hash": ANY,
"size": ANY,
"nfiles": None,
"use_cache": True,
"is_data_source": False,
}
}

tmp_dir.dvc_gen("out_add", "foo", commit="dvc add output")

outs = dvc.experiments.show()["workspace"]["baseline"]["data"]["outs"]
assert outs == {
"out": {
"hash": ANY,
"size": ANY,
"nfiles": None,
"use_cache": True,
"is_data_source": False,
},
"out_add": {
"hash": ANY,
"size": ANY,
"nfiles": None,
"use_cache": True,
"is_data_source": True,
},
}

with erepo_dir.chdir():
erepo_dir.dvc_gen("out", "out content", commit="create out")

dvc.imp(os.fspath(erepo_dir), "out", "out_imported")

outs = dvc.experiments.show()["workspace"]["baseline"]["data"]["outs"]
assert outs == {
"out": {
"hash": ANY,
"size": ANY,
"nfiles": None,
"use_cache": True,
"is_data_source": False,
},
"out_add": {
"hash": ANY,
"size": ANY,
"nfiles": None,
"use_cache": True,
"is_data_source": True,
},
"out_imported": {
"hash": ANY,
"size": ANY,
"nfiles": None,
"use_cache": True,
"is_data_source": True,
},
}


def test_metrics_renaming(tmp_dir, dvc, scm, capsys):
tmp_dir.gen("copy.py", COPY_SCRIPT)
Expand Down