Skip to content

Commit

Permalink
pull/fetch/push: add stats (iterative#3735)
Browse files Browse the repository at this point in the history
* pull/fetch/push: add stats

* fix typo: s/logging/logger

* pull: make stats detailed, with summary at the bottom

* tests: mock properly, set return_value

* add a line space
  • Loading branch information
skshetry authored May 5, 2020
1 parent 795f001 commit d00bd4d
Show file tree
Hide file tree
Showing 9 changed files with 149 additions and 72 deletions.
32 changes: 7 additions & 25 deletions dvc/command/checkout.py
Original file line number Diff line number Diff line change
@@ -1,39 +1,17 @@
import argparse
import logging
import operator

import colorama

from dvc.command.base import append_doc_link
from dvc.command.base import CmdBase
from dvc.exceptions import CheckoutError
from dvc.utils.humanize import get_summary

logger = logging.getLogger(__name__)


def _human_join(words):
words = list(words)
if not words:
return ""

return (
"{before} and {after}".format(
before=", ".join(words[:-1]), after=words[-1],
)
if len(words) > 1
else words[0]
)


def log_summary(stats):
states = ("added", "deleted", "modified")
summary = (
"{} {}".format(len(stats[state]), state)
for state in states
if stats.get(state)
)
logger.info(_human_join(summary) or "No changes.")


def log_changes(stats):
colors = [
("modified", colorama.Fore.YELLOW,),
Expand Down Expand Up @@ -75,7 +53,11 @@ def run(self):
stats = exc.stats

if self.args.summary:
log_summary(stats)
default_message = "No changes."
msg = get_summary(
sorted(stats.items(), key=operator.itemgetter(0))
)
logger.info(msg or default_message)
else:
log_changes(stats)

Expand Down
23 changes: 13 additions & 10 deletions dvc/command/data_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,25 @@

from dvc.command.base import append_doc_link
from dvc.command.base import CmdBase
from dvc.command.checkout import log_summary
from dvc.command.checkout import log_changes
from dvc.utils.humanize import get_summary
from dvc.exceptions import DvcException, CheckoutError


logger = logging.getLogger(__name__)


class CmdDataBase(CmdBase):
@classmethod
def check_up_to_date(cls, processed_files_count):
if processed_files_count == 0:
logger.info("Everything is up to date.")
def log_summary(self, stats):
default_msg = "Everything is up to date."
logger.info(get_summary(stats.items()) or default_msg)


class CmdDataPull(CmdDataBase):
def log_summary(self, stats):
log_changes(stats)
super().log_summary(stats)

def run(self):
try:
stats = self.repo.pull(
Expand All @@ -31,13 +35,12 @@ def run(self):
force=self.args.force,
recursive=self.args.recursive,
)
log_summary(stats)
self.log_summary(stats)
except (CheckoutError, DvcException) as exc:
log_summary(getattr(exc, "stats", {}))
self.log_summary(getattr(exc, "stats", {}))
logger.exception("failed to pull data from the cloud")
return 1

self.check_up_to_date(stats["downloaded"])
return 0


Expand All @@ -54,10 +57,10 @@ def run(self):
with_deps=self.args.with_deps,
recursive=self.args.recursive,
)
self.log_summary({"pushed": processed_files_count})
except DvcException:
logger.exception("failed to push data to the cloud")
return 1
self.check_up_to_date(processed_files_count)
return 0


Expand All @@ -74,10 +77,10 @@ def run(self):
with_deps=self.args.with_deps,
recursive=self.args.recursive,
)
self.log_summary({"fetched": processed_files_count})
except DvcException:
logger.exception("failed to fetch data from the cloud")
return 1
self.check_up_to_date(processed_files_count)
return 0


Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/pull.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ def pull(
targets=targets, with_deps=with_deps, force=force, recursive=recursive
)

stats["downloaded"] = processed_files_count
stats["fetched"] = processed_files_count
return stats
27 changes: 27 additions & 0 deletions dvc/utils/humanize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from funcy import is_seq


def join(words):
words = list(words)
if not words:
return ""

return (
"{before} and {after}".format(
before=", ".join(words[:-1]), after=words[-1],
)
if len(words) > 1
else words[0]
)


def get_summary(stats):
status = (
(state, len(data) if is_seq(data) else data)
for state, data in stats
if data
)
return join(
"{} file{} {}".format(num, "s" if num > 1 else "", state)
for state, num in status
)
2 changes: 1 addition & 1 deletion tests/func/test_checkout.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,7 +692,7 @@ def test_stats_does_not_show_changes_by_default(tmp_dir, dvc, scm, caplog):
with caplog.at_level(logging.INFO, logger="dvc"):
caplog.clear()
assert main(["checkout", "--summary"]) == 0
assert "2 deleted" in caplog.text
assert "2 files deleted" in caplog.text
assert "dir" not in caplog.text
assert "other" not in caplog.text

Expand Down
62 changes: 57 additions & 5 deletions tests/func/test_data_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,14 +726,14 @@ def test_pull_git_imports(request, tmp_dir, dvc, scm, erepo):
dvc.imp(fspath(erepo), "foo")
dvc.imp(fspath(erepo), "dir", out="new_dir", rev="HEAD~")

assert dvc.pull()["downloaded"] == 0
assert dvc.pull()["fetched"] == 0

for item in ["foo", "new_dir", dvc.cache.local.cache_dir]:
remove(item)
os.makedirs(dvc.cache.local.cache_dir, exist_ok=True)
clean_repos()

assert dvc.pull(force=True)["downloaded"] == 2
assert dvc.pull(force=True)["fetched"] == 2

assert (tmp_dir / "foo").exists()
assert (tmp_dir / "foo").read_text() == "foo"
Expand All @@ -753,11 +753,11 @@ def test_pull_external_dvc_imports(tmp_dir, dvc, scm, erepo_dir):
dvc.imp(fspath(erepo_dir), "foo")
dvc.imp(fspath(erepo_dir), "dir", out="new_dir", rev="HEAD~")

assert dvc.pull()["downloaded"] == 0
assert dvc.pull()["fetched"] == 0

clean(["foo", "new_dir"], dvc)

assert dvc.pull(force=True)["downloaded"] == 2
assert dvc.pull(force=True)["fetched"] == 2

assert (tmp_dir / "foo").exists()
assert (tmp_dir / "foo").read_text() == "foo"
Expand Down Expand Up @@ -798,7 +798,7 @@ def test_dvc_pull_pipeline_stages(tmp_dir, dvc, local_remote, run_copy):
for target in [stage.addressing, out]:
clean(outs, dvc)
stats = dvc.pull([target])
assert stats["downloaded"] == 1
assert stats["fetched"] == 1
assert stats["added"] == [out]
assert os.path.exists(out)
assert not any(os.path.exists(out) for out in set(outs) - {out})
Expand Down Expand Up @@ -849,3 +849,55 @@ def test_pipeline_file_target_ops(tmp_dir, dvc, local_remote, run_copy):

with pytest.raises(StageNotFound):
dvc.pull(["dvc.yaml:StageThatDoesNotExist"])


@pytest.mark.parametrize(
"fs, msg",
[
({"foo": "foo", "bar": "bar"}, "2 files pushed"),
({"foo": "foo"}, "1 file pushed"),
({}, "Everything is up to date"),
],
)
def test_push_stats(tmp_dir, dvc, fs, msg, local_remote, caplog):
tmp_dir.dvc_gen(fs)
caplog.clear()
with caplog.at_level(level=logging.INFO, logger="dvc"):
main(["push"])
assert msg in caplog.text


@pytest.mark.parametrize(
"fs, msg",
[
({"foo": "foo", "bar": "bar"}, "2 files fetched"),
({"foo": "foo"}, "1 file fetched"),
({}, "Everything is up to date."),
],
)
def test_fetch_stats(tmp_dir, dvc, fs, msg, local_remote, caplog):
tmp_dir.dvc_gen(fs)
dvc.push()
clean(list(fs.keys()), dvc)
caplog.clear()
with caplog.at_level(level=logging.INFO, logger="dvc"):
main(["fetch"])
assert msg in caplog.text


def test_pull_stats(tmp_dir, dvc, local_remote, caplog):
tmp_dir.dvc_gen({"foo": "foo", "bar": "bar"})
dvc.push()
clean(["foo", "bar"], dvc)
(tmp_dir / "bar").write_text("foobar")
caplog.clear()
with caplog.at_level(level=logging.INFO, logger="dvc"):
main(["pull", "--force"])
assert "M\tbar" in caplog.text
assert "A\tfoo" in caplog.text
assert "2 files fetched" in caplog.text
assert "1 file added" in caplog.text
assert "1 file modified" in caplog.text
with caplog.at_level(level=logging.INFO, logger="dvc"):
main(["pull"])
assert "Everything is up to date." in caplog.text
29 changes: 1 addition & 28 deletions tests/unit/command/test_checkout.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging

from dvc.cli import parse_args
from dvc.command.checkout import CmdCheckout, log_summary, log_changes
from dvc.command.checkout import CmdCheckout, log_changes


def test_checkout(tmp_dir, dvc, mocker):
Expand All @@ -23,33 +23,6 @@ def test_checkout(tmp_dir, dvc, mocker):
)


def test_log_summary(caplog):
stats = {
"added": ["file1", "file2", "file3"],
"deleted": ["file4", "file5"],
"modified": ["file6", "file7"],
}

def _assert_output(stats, expected_text):
with caplog.at_level(logging.INFO, logger="dvc"):
caplog.clear()
log_summary(stats)
assert expected_text in caplog.text

_assert_output(stats, "3 added, 2 deleted and 2 modified")

del stats["deleted"][1]
_assert_output(stats, "3 added, 1 deleted and 2 modified")

del stats["deleted"][0]
_assert_output(stats, "3 added and 2 modified")

del stats["modified"]
_assert_output(stats, "3 added")

_assert_output({}, "No changes.")


def test_log_changes(caplog):
stats = {
"added": ["file1", "dir1/"],
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/command/test_data_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def test_fetch(mocker):
assert cli_args.func == CmdDataFetch

cmd = cli_args.func(cli_args)
m = mocker.patch.object(cmd.repo, "fetch", autospec=True)
m = mocker.patch.object(cmd.repo, "fetch", autospec=True, return_value=0)

assert cmd.run() == 0

Expand Down Expand Up @@ -96,7 +96,7 @@ def test_push(mocker):
assert cli_args.func == CmdDataPush

cmd = cli_args.func(cli_args)
m = mocker.patch.object(cmd.repo, "push", autospec=True)
m = mocker.patch.object(cmd.repo, "push", autospec=True, return_value=0)

assert cmd.run() == 0

Expand Down
40 changes: 40 additions & 0 deletions tests/unit/utils/test_humanize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from collections import OrderedDict

from dvc.utils.humanize import get_summary


def test_get_summary():
# dict, so that we could delete from it easily
stats = OrderedDict(
[
("fetched", 3),
("added", ["file1", "file2", "file3"]),
("deleted", ["file4", "file5"]),
("modified", ["file6", "file7"]),
]
)

assert get_summary(stats.items()) == (
"3 files fetched, "
"3 files added, "
"2 files deleted "
"and "
"2 files modified"
)

del stats["fetched"]
del stats["deleted"][1]
assert (
get_summary(stats.items())
== "3 files added, 1 file deleted and 2 files modified"
)

del stats["deleted"][0]
assert get_summary(stats.items()) == "3 files added and 2 files modified"

del stats["modified"]
assert get_summary(stats.items()) == "3 files added"

assert get_summary([]) == ""
assert get_summary([("x", 0), ("y", [])]) == ""
assert get_summary([("x", 1), ("y", [])]) == "1 file x"

0 comments on commit d00bd4d

Please sign in to comment.