From a4f302750431aa762ca874bd76f6eaed6b606445 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 14 Jun 2022 11:41:41 +0200 Subject: [PATCH 1/8] Log the number and size of files to be read --- python/publish_unit_test_results.py | 21 +++++++++++++++++++-- python/requirements.txt | 3 ++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/python/publish_unit_test_results.py b/python/publish_unit_test_results.py index b8d2f8ef..9e689ffd 100644 --- a/python/publish_unit_test_results.py +++ b/python/publish_unit_test_results.py @@ -9,10 +9,11 @@ from typing import List, Optional, Union import github +import humanize from urllib3.util.retry import Retry import publish.github_action -from publish import hide_comments_modes, none_list, available_annotations, default_annotations, \ +from publish import hide_comments_modes, available_annotations, default_annotations, \ pull_request_build_modes, fail_on_modes, fail_on_mode_errors, fail_on_mode_failures, \ comment_mode_off, comment_mode_update, comment_modes, punctuation_space from publish.github_action import GithubAction @@ -56,6 +57,22 @@ def get_files(multiline_files_globs: str) -> List[str]: return list(included - excluded) +def get_files_size(files: List[str]) -> str: + try: + size = sum([os.path.getsize(file) for file in files]) + return humanize.naturalsize(size, binary=True) + except BaseException as e: + logger.warning(f'failed to obtain file size of {len(files)} files', exc_info=e) + return 'unknown size' + + +def get_number_of_files(files: List[str]) -> str: + number_of_files = '{number:,} file{s}'.format( + number=len(files), s='s' if len(files) > 1 else '' + ).replace(',', punctuation_space) + return number_of_files + + def main(settings: Settings, gha: GithubAction) -> None: # we cannot create a check run or pull request comment when running on pull_request event from a fork # when event_file is given we assume proper setup as in README.md#support-fork-repositories-and-dependabot-branches @@ -74,7 +91,7 @@ def main(settings: Settings, gha: GithubAction) -> None: if len(files) == 0: gha.warning(f'Could not find any files for {settings.files_glob}') else: - logger.info(f'reading {settings.files_glob}') + logger.info(f'Reading {settings.files_glob} ({get_number_of_files(files)}, {get_files_size(files)})') logger.debug(f'reading {list(files)}') # get the unit test results diff --git a/python/requirements.txt b/python/requirements.txt index 1fa9ee4b..e74ee0ae 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,6 +1,7 @@ # dataclasses does not exist before Python3.7, needed as dependency dataclasses;python_version<"3.7" +humanize==4.1.0 junitparser==2.5.0 PyGithub==1.55 +requests==2.27.1 urllib3==1.26.9 -requests==2.27.1 \ No newline at end of file From f7525382ecaaec30fe9ef40fe1dda0fb8c93b0a7 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 14 Jun 2022 11:57:27 +0200 Subject: [PATCH 2/8] Log available memory --- python/publish_unit_test_results.py | 7 +++++++ python/requirements.txt | 1 + 2 files changed, 8 insertions(+) diff --git a/python/publish_unit_test_results.py b/python/publish_unit_test_results.py index 9e689ffd..7b39f7b8 100644 --- a/python/publish_unit_test_results.py +++ b/python/publish_unit_test_results.py @@ -10,6 +10,7 @@ import github import humanize +import psutil from urllib3.util.retry import Retry import publish.github_action @@ -94,6 +95,12 @@ def main(settings: Settings, gha: GithubAction) -> None: logger.info(f'Reading {settings.files_glob} ({get_number_of_files(files)}, {get_files_size(files)})') logger.debug(f'reading {list(files)}') + # log the available RAM to help spot OOM issues: + # https://github.com/EnricoMi/publish-unit-test-result-action/issues/231 + # https://github.com/EnricoMi/publish-unit-test-result-action/issues/304 + avail_mem = humanize.naturalsize(psutil.virtual_memory().available, binary=True) + logger.info(f'Available memory to load files: {avail_mem}') + # get the unit test results parsed = parse_junit_xml_files(files, settings.time_factor, settings.ignore_runs).with_commit(settings.commit) [gha.error(message=f'Error processing result file: {error.message}', file=error.file, line=error.line, column=error.column) diff --git a/python/requirements.txt b/python/requirements.txt index e74ee0ae..cbff60d6 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -2,6 +2,7 @@ dataclasses;python_version<"3.7" humanize==4.1.0 junitparser==2.5.0 +psutil==5.9.1 PyGithub==1.55 requests==2.27.1 urllib3==1.26.9 From 3143a013aa568c48193c29477c5f454026ebe0e7 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 14 Jun 2022 12:10:41 +0200 Subject: [PATCH 3/8] Minor rewording --- python/publish_unit_test_results.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/publish_unit_test_results.py b/python/publish_unit_test_results.py index 7b39f7b8..aef28026 100644 --- a/python/publish_unit_test_results.py +++ b/python/publish_unit_test_results.py @@ -99,7 +99,7 @@ def main(settings: Settings, gha: GithubAction) -> None: # https://github.com/EnricoMi/publish-unit-test-result-action/issues/231 # https://github.com/EnricoMi/publish-unit-test-result-action/issues/304 avail_mem = humanize.naturalsize(psutil.virtual_memory().available, binary=True) - logger.info(f'Available memory to load files: {avail_mem}') + logger.info(f'Available memory to read files: {avail_mem}') # get the unit test results parsed = parse_junit_xml_files(files, settings.time_factor, settings.ignore_runs).with_commit(settings.commit) From bd02e9a49e7e9f6ace0f986d3f595190b3aa6e2f Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 14 Jun 2022 12:14:25 +0200 Subject: [PATCH 4/8] Pin humanize to latest version available for py3.6 --- python/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/requirements.txt b/python/requirements.txt index cbff60d6..2e7fca82 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,6 +1,6 @@ # dataclasses does not exist before Python3.7, needed as dependency dataclasses;python_version<"3.7" -humanize==4.1.0 +humanize==3.14.0 junitparser==2.5.0 psutil==5.9.1 PyGithub==1.55 From 2cac4453826e90b440a9c6293ba64dd51e656635 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 14 Jun 2022 13:15:06 +0200 Subject: [PATCH 5/8] Log progress when reading files --- python/publish/junit.py | 12 +++-- python/publish/progress.py | 81 +++++++++++++++++++++++++++++ python/publish_unit_test_results.py | 15 ++++-- python/test/test_progress.py | 65 +++++++++++++++++++++++ 4 files changed, 166 insertions(+), 7 deletions(-) create mode 100644 python/publish/progress.py create mode 100644 python/test/test_progress.py diff --git a/python/publish/junit.py b/python/publish/junit.py index e6100e65..50aaa061 100644 --- a/python/publish/junit.py +++ b/python/publish/junit.py @@ -1,6 +1,6 @@ import os from collections import defaultdict -from typing import Optional, Iterable, Union, Any, List, Dict +from typing import Optional, Iterable, Union, Any, List, Dict, Callable, TypeVar import junitparser from junitparser import Element, JUnitXml, TestCase, TestSuite, Skipped @@ -98,7 +98,13 @@ def end(self, tag: Union[str, bytes]) -> Element: self._stack.pop() -def parse_junit_xml_files(files: Iterable[str], time_factor: float = 1.0, drop_testcases: bool = False) -> ParsedUnitTestResults: +T = TypeVar("T") + + +def parse_junit_xml_files(files: Iterable[str], + time_factor: float = 1.0, + drop_testcases: bool = False, + progress: Callable[[T], T] = lambda x: x) -> ParsedUnitTestResults: """Parses junit xml files and returns aggregated statistics as a ParsedUnitTestResults.""" def parse(path: str) -> Union[str, Any]: if not os.path.exists(path): @@ -114,7 +120,7 @@ def parse(path: str) -> Union[str, Any]: except BaseException as e: return e - parsed_files = [(result_file, parse(result_file)) + parsed_files = [progress((result_file, parse(result_file))) for result_file in files] junits = [(result_file, junit) for result_file, junit in parsed_files diff --git a/python/publish/progress.py b/python/publish/progress.py new file mode 100644 index 00000000..d4d78c5f --- /dev/null +++ b/python/publish/progress.py @@ -0,0 +1,81 @@ +from datetime import datetime +from logging import Logger +from threading import Timer +from typing import Generic, TypeVar, Optional, Callable +import contextlib + +import humanize + +T = TypeVar('T') + + +@contextlib.contextmanager +def progress_logger(items: int, + interval_seconds: int, + progress_template: str, + finish_template: Optional[str], + logger: Logger) -> Callable[[T], T]: + progress = Progress(items) + plogger = ProgressLogger(progress, interval_seconds, progress_template, logger).start() + try: + yield progress.observe + finally: + plogger.finish(finish_template) + + +class Progress(Generic[T]): + def __init__(self, items: int): + self.items = items + self.observations = 0 + + def observe(self, observation: T) -> T: + self.observations = self.observations + 1 + return observation + + def get_progress(self) -> str: + return f'{self.observations} of {self.items}' + + +class ProgressLogger: + def __init__(self, progress: Progress, interval_seconds: int, template: str, logger: Logger): + self._progress = progress + self._interval_seconds = interval_seconds + self._template = template + self._logger = logger + + self._start = None + self._duration = None + self._timer = self._get_progress_timer() + + def start(self) -> 'ProgressLogger': + self._start = datetime.utcnow() + self._timer.start() + return self + + def finish(self, template: Optional[str] = None): + self._duration = datetime.utcnow() - self._start + self._start = None + self._timer.cancel() + + if template: + self._logger.info(template.format(items=self._progress.items, + observations=self._progress.observations, + duration=self.duration)) + + @property + def duration(self) -> str: + return humanize.precisedelta(self._duration) + + def _get_progress_timer(self): + timer = Timer(self._interval_seconds, self._log_progress) + timer.setDaemon(daemonic=True) + return timer + + def _log_progress(self): + if self._start is None: + return + + delta = datetime.utcnow() - self._start + self._logger.info(self._template.format(progress=self._progress.get_progress(), time=humanize.precisedelta(delta))) + self._timer = self._get_progress_timer() + self._timer.start() diff --git a/python/publish_unit_test_results.py b/python/publish_unit_test_results.py index aef28026..0277667d 100644 --- a/python/publish_unit_test_results.py +++ b/python/publish_unit_test_results.py @@ -19,6 +19,7 @@ comment_mode_off, comment_mode_update, comment_modes, punctuation_space from publish.github_action import GithubAction from publish.junit import parse_junit_xml_files +from publish.progress import progress_logger from publish.publisher import Publisher, Settings from publish.retry import GitHubRetry from publish.unittestresults import get_test_results, get_stats, ParsedUnitTestResults @@ -101,10 +102,16 @@ def main(settings: Settings, gha: GithubAction) -> None: avail_mem = humanize.naturalsize(psutil.virtual_memory().available, binary=True) logger.info(f'Available memory to read files: {avail_mem}') - # get the unit test results - parsed = parse_junit_xml_files(files, settings.time_factor, settings.ignore_runs).with_commit(settings.commit) - [gha.error(message=f'Error processing result file: {error.message}', file=error.file, line=error.line, column=error.column) - for error in parsed.errors] + # log the progress + with progress_logger(items=len(files), + interval_seconds=10, + progress_template='Read {progress} files in {time}', + finish_template='Finished reading {observations} files in {duration}', + logger=logger) as progress: + # get the unit test results + parsed = parse_junit_xml_files(files, settings.time_factor, settings.ignore_runs, progress).with_commit(settings.commit) + [gha.error(message=f'Error processing result file: {error.message}', file=error.file, line=error.line, column=error.column) + for error in parsed.errors] # process the parsed results results = get_test_results(parsed, settings.dedup_classes_by_file_name) diff --git a/python/test/test_progress.py b/python/test/test_progress.py new file mode 100644 index 00000000..7f035556 --- /dev/null +++ b/python/test/test_progress.py @@ -0,0 +1,65 @@ +import unittest +from datetime import datetime + +import mock + +from publish.progress import Progress, ProgressLogger + + +class TestProgress(unittest.TestCase): + def test_get_progress(self): + progress = Progress(10) + self.assertEqual('0 of 10', progress.get_progress()) + self.assertEqual('0 of 10', progress.get_progress()) + self.assertEqual('item', progress.observe('item')) + self.assertEqual('1 of 10', progress.get_progress()) + self.assertEqual('1 of 10', progress.get_progress()) + self.assertEqual(1, progress.observe(1)) + self.assertEqual('2 of 10', progress.get_progress()) + self.assertEqual('2 of 10', progress.get_progress()) + self.assertEqual(1.2, progress.observe(1.2)) + self.assertEqual('3 of 10', progress.get_progress()) + self.assertEqual('3 of 10', progress.get_progress()) + obj = object() + self.assertEqual(obj, progress.observe(obj)) + self.assertEqual('4 of 10', progress.get_progress()) + self.assertEqual('4 of 10', progress.get_progress()) + + +class TestProgressLogger(unittest.TestCase): + def test(self): + progress = Progress(10) + logger = mock.MagicMock(info=mock.Mock()) + plogger = ProgressLogger(progress, 60, 'progress: {progress} in {time}', logger) + try: + ts = datetime.fromisoformat('2022-06-01 12:34:56') + with mock.patch('publish.progress.datetime', utcnow=mock.Mock(return_value=ts)): + plogger.start() + logger.info.assert_not_called() + + progress.observe('item') + logger.info.assert_not_called() + + ts = datetime.fromisoformat('2022-06-01 12:35:00') + with mock.patch('publish.progress.datetime', utcnow=mock.Mock(return_value=ts)): + plogger._log_progress() + self.assertEqual([mock.call('progress: 1 of 10 in 4 seconds')], logger.info.call_args_list) + logger.info.reset_mock() + + progress.observe('item') + progress.observe('item') + logger.info.assert_not_called() + + ts = datetime.fromisoformat('2022-06-01 12:40:00') + with mock.patch('publish.progress.datetime', utcnow=mock.Mock(return_value=ts)): + plogger._log_progress() + self.assertEqual([mock.call('progress: 3 of 10 in 5 minutes and 4 seconds')], logger.info.call_args_list) + logger.info.reset_mock() + finally: + ts = datetime.fromisoformat('2022-06-01 12:41:23') + with mock.patch('publish.progress.datetime', utcnow=mock.Mock(return_value=ts)): + plogger.finish('finished: {observations} of {items} in {duration}') + self.assertEqual([mock.call('finished: 3 of 10 in 6 minutes and 27 seconds')], logger.info.call_args_list) + logger.info.reset_mock() + + self.assertEqual('6 minutes and 27 seconds', plogger.duration) From 07bd34a8147ffda5dd5d95aeabdb4df094ed586b Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 14 Jun 2022 14:10:20 +0200 Subject: [PATCH 6/8] Use the same logger everywhere --- python/publish/retry.py | 2 +- python/publish_unit_test_results.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/publish/retry.py b/python/publish/retry.py index 5fe71c4b..2029684e 100644 --- a/python/publish/retry.py +++ b/python/publish/retry.py @@ -11,7 +11,7 @@ from publish.github_action import GithubAction -logger = logging.getLogger('publish-unit-test-results') +logger = logging.getLogger('publish') class GitHubRetry(Retry): diff --git a/python/publish_unit_test_results.py b/python/publish_unit_test_results.py index 0277667d..8b00e293 100644 --- a/python/publish_unit_test_results.py +++ b/python/publish_unit_test_results.py @@ -24,7 +24,7 @@ from publish.retry import GitHubRetry from publish.unittestresults import get_test_results, get_stats, ParsedUnitTestResults -logger = logging.getLogger('publish-unit-test-results') +logger = logging.getLogger('publish') def get_conclusion(parsed: ParsedUnitTestResults, fail_on_failures, fail_on_errors) -> str: From 4621b0043f1fdf8cd5d5b174097f89d24f8a5625 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 14 Jun 2022 14:21:54 +0200 Subject: [PATCH 7/8] Fix test_progress for Python 3.6 --- python/publish_unit_test_results.py | 1 + python/test/test_progress.py | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/python/publish_unit_test_results.py b/python/publish_unit_test_results.py index 8b00e293..a4ff8ee8 100644 --- a/python/publish_unit_test_results.py +++ b/python/publish_unit_test_results.py @@ -103,6 +103,7 @@ def main(settings: Settings, gha: GithubAction) -> None: logger.info(f'Available memory to read files: {avail_mem}') # log the progress + # https://github.com/EnricoMi/publish-unit-test-result-action/issues/304 with progress_logger(items=len(files), interval_seconds=10, progress_template='Read {progress} files in {time}', diff --git a/python/test/test_progress.py b/python/test/test_progress.py index 7f035556..2b331add 100644 --- a/python/test/test_progress.py +++ b/python/test/test_progress.py @@ -1,5 +1,5 @@ import unittest -from datetime import datetime +from datetime import datetime, timezone import mock @@ -32,7 +32,7 @@ def test(self): logger = mock.MagicMock(info=mock.Mock()) plogger = ProgressLogger(progress, 60, 'progress: {progress} in {time}', logger) try: - ts = datetime.fromisoformat('2022-06-01 12:34:56') + ts = datetime(2022, 6, 1, 12, 34, 56, tzinfo=timezone.utc) with mock.patch('publish.progress.datetime', utcnow=mock.Mock(return_value=ts)): plogger.start() logger.info.assert_not_called() @@ -40,7 +40,7 @@ def test(self): progress.observe('item') logger.info.assert_not_called() - ts = datetime.fromisoformat('2022-06-01 12:35:00') + ts = datetime(2022, 6, 1, 12, 35, 00, tzinfo=timezone.utc) with mock.patch('publish.progress.datetime', utcnow=mock.Mock(return_value=ts)): plogger._log_progress() self.assertEqual([mock.call('progress: 1 of 10 in 4 seconds')], logger.info.call_args_list) @@ -50,13 +50,13 @@ def test(self): progress.observe('item') logger.info.assert_not_called() - ts = datetime.fromisoformat('2022-06-01 12:40:00') + ts = datetime(2022, 6, 1, 12, 40, 00, tzinfo=timezone.utc) with mock.patch('publish.progress.datetime', utcnow=mock.Mock(return_value=ts)): plogger._log_progress() self.assertEqual([mock.call('progress: 3 of 10 in 5 minutes and 4 seconds')], logger.info.call_args_list) logger.info.reset_mock() finally: - ts = datetime.fromisoformat('2022-06-01 12:41:23') + ts = datetime(2022, 6, 1, 12, 41, 23, tzinfo=timezone.utc) with mock.patch('publish.progress.datetime', utcnow=mock.Mock(return_value=ts)): plogger.finish('finished: {observations} of {items} in {duration}') self.assertEqual([mock.call('finished: 3 of 10 in 6 minutes and 27 seconds')], logger.info.call_args_list) From f28d9a8deaf4e9a76d1ad6c71e401ad270a4ce23 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 14 Jun 2022 15:02:55 +0200 Subject: [PATCH 8/8] Fix callable typing --- python/publish/junit.py | 7 ++----- python/publish/progress.py | 7 ++++--- python/publish_unit_test_results.py | 3 ++- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/python/publish/junit.py b/python/publish/junit.py index 50aaa061..1c950820 100644 --- a/python/publish/junit.py +++ b/python/publish/junit.py @@ -1,6 +1,6 @@ import os from collections import defaultdict -from typing import Optional, Iterable, Union, Any, List, Dict, Callable, TypeVar +from typing import Optional, Iterable, Union, Any, List, Dict, Callable, Tuple import junitparser from junitparser import Element, JUnitXml, TestCase, TestSuite, Skipped @@ -98,13 +98,10 @@ def end(self, tag: Union[str, bytes]) -> Element: self._stack.pop() -T = TypeVar("T") - - def parse_junit_xml_files(files: Iterable[str], time_factor: float = 1.0, drop_testcases: bool = False, - progress: Callable[[T], T] = lambda x: x) -> ParsedUnitTestResults: + progress: Callable[[Tuple[str, Any]], Tuple[str, Any]] = lambda x: x) -> ParsedUnitTestResults: """Parses junit xml files and returns aggregated statistics as a ParsedUnitTestResults.""" def parse(path: str) -> Union[str, Any]: if not os.path.exists(path): diff --git a/python/publish/progress.py b/python/publish/progress.py index d4d78c5f..c2c51a29 100644 --- a/python/publish/progress.py +++ b/python/publish/progress.py @@ -1,7 +1,7 @@ from datetime import datetime from logging import Logger from threading import Timer -from typing import Generic, TypeVar, Optional, Callable +from typing import Generic, TypeVar, Optional, Callable, Type, Any import contextlib import humanize @@ -14,8 +14,9 @@ def progress_logger(items: int, interval_seconds: int, progress_template: str, finish_template: Optional[str], - logger: Logger) -> Callable[[T], T]: - progress = Progress(items) + logger: Logger, + progress_item_type: Type[T] = Any) -> Callable[[T], T]: + progress = Progress[progress_item_type](items) plogger = ProgressLogger(progress, interval_seconds, progress_template, logger).start() try: yield progress.observe diff --git a/python/publish_unit_test_results.py b/python/publish_unit_test_results.py index a4ff8ee8..b8d26ca2 100644 --- a/python/publish_unit_test_results.py +++ b/python/publish_unit_test_results.py @@ -6,7 +6,7 @@ from collections import defaultdict from datetime import datetime from glob import glob -from typing import List, Optional, Union +from typing import List, Optional, Union, Tuple, Any import github import humanize @@ -108,6 +108,7 @@ def main(settings: Settings, gha: GithubAction) -> None: interval_seconds=10, progress_template='Read {progress} files in {time}', finish_template='Finished reading {observations} files in {duration}', + progress_item_type=Tuple[str, Any], logger=logger) as progress: # get the unit test results parsed = parse_junit_xml_files(files, settings.time_factor, settings.ignore_runs, progress).with_commit(settings.commit)