From ca6e0bdd538469a438eaf05b5b4aedfbfe7f2e98 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Tue, 14 Jun 2022 15:55:11 +0100 Subject: [PATCH] Log file sizes, free mem and progress reading files (#305) --- python/publish/junit.py | 9 ++-- python/publish/progress.py | 82 +++++++++++++++++++++++++++++ python/publish/retry.py | 2 +- python/publish_unit_test_results.py | 49 ++++++++++++++--- python/requirements.txt | 4 +- python/test/test_progress.py | 65 +++++++++++++++++++++++ 6 files changed, 198 insertions(+), 13 deletions(-) create mode 100644 python/publish/progress.py create mode 100644 python/test/test_progress.py diff --git a/python/publish/junit.py b/python/publish/junit.py index e6100e65..1c950820 100644 --- a/python/publish/junit.py +++ b/python/publish/junit.py @@ -1,6 +1,6 @@ import os from collections import defaultdict -from typing import Optional, Iterable, Union, Any, List, Dict +from typing import Optional, Iterable, Union, Any, List, Dict, Callable, Tuple import junitparser from junitparser import Element, JUnitXml, TestCase, TestSuite, Skipped @@ -98,7 +98,10 @@ def end(self, tag: Union[str, bytes]) -> Element: self._stack.pop() -def parse_junit_xml_files(files: Iterable[str], time_factor: float = 1.0, drop_testcases: bool = False) -> ParsedUnitTestResults: +def parse_junit_xml_files(files: Iterable[str], + time_factor: float = 1.0, + drop_testcases: bool = False, + progress: Callable[[Tuple[str, Any]], Tuple[str, Any]] = lambda x: x) -> ParsedUnitTestResults: """Parses junit xml files and returns aggregated statistics as a ParsedUnitTestResults.""" def parse(path: str) -> Union[str, Any]: if not os.path.exists(path): @@ -114,7 +117,7 @@ def parse(path: str) -> Union[str, Any]: except BaseException as e: return e - parsed_files = [(result_file, parse(result_file)) + parsed_files = [progress((result_file, parse(result_file))) for result_file in files] junits = [(result_file, junit) for result_file, junit in parsed_files diff --git a/python/publish/progress.py b/python/publish/progress.py new file mode 100644 index 00000000..c2c51a29 --- /dev/null +++ b/python/publish/progress.py @@ -0,0 +1,82 @@ +from datetime import datetime +from logging import Logger +from threading import Timer +from typing import Generic, TypeVar, Optional, Callable, Type, Any +import contextlib + +import humanize + +T = TypeVar('T') + + +@contextlib.contextmanager +def progress_logger(items: int, + interval_seconds: int, + progress_template: str, + finish_template: Optional[str], + logger: Logger, + progress_item_type: Type[T] = Any) -> Callable[[T], T]: + progress = Progress[progress_item_type](items) + plogger = ProgressLogger(progress, interval_seconds, progress_template, logger).start() + try: + yield progress.observe + finally: + plogger.finish(finish_template) + + +class Progress(Generic[T]): + def __init__(self, items: int): + self.items = items + self.observations = 0 + + def observe(self, observation: T) -> T: + self.observations = self.observations + 1 + return observation + + def get_progress(self) -> str: + return f'{self.observations} of {self.items}' + + +class ProgressLogger: + def __init__(self, progress: Progress, interval_seconds: int, template: str, logger: Logger): + self._progress = progress + self._interval_seconds = interval_seconds + self._template = template + self._logger = logger + + self._start = None + self._duration = None + self._timer = self._get_progress_timer() + + def start(self) -> 'ProgressLogger': + self._start = datetime.utcnow() + self._timer.start() + return self + + def finish(self, template: Optional[str] = None): + self._duration = datetime.utcnow() - self._start + self._start = None + self._timer.cancel() + + if template: + self._logger.info(template.format(items=self._progress.items, + observations=self._progress.observations, + duration=self.duration)) + + @property + def duration(self) -> str: + return humanize.precisedelta(self._duration) + + def _get_progress_timer(self): + timer = Timer(self._interval_seconds, self._log_progress) + timer.setDaemon(daemonic=True) + return timer + + def _log_progress(self): + if self._start is None: + return + + delta = datetime.utcnow() - self._start + self._logger.info(self._template.format(progress=self._progress.get_progress(), time=humanize.precisedelta(delta))) + self._timer = self._get_progress_timer() + self._timer.start() diff --git a/python/publish/retry.py b/python/publish/retry.py index 5fe71c4b..2029684e 100644 --- a/python/publish/retry.py +++ b/python/publish/retry.py @@ -11,7 +11,7 @@ from publish.github_action import GithubAction -logger = logging.getLogger('publish-unit-test-results') +logger = logging.getLogger('publish') class GitHubRetry(Retry): diff --git a/python/publish_unit_test_results.py b/python/publish_unit_test_results.py index b8d2f8ef..b8d26ca2 100644 --- a/python/publish_unit_test_results.py +++ b/python/publish_unit_test_results.py @@ -6,22 +6,25 @@ from collections import defaultdict from datetime import datetime from glob import glob -from typing import List, Optional, Union +from typing import List, Optional, Union, Tuple, Any import github +import humanize +import psutil from urllib3.util.retry import Retry import publish.github_action -from publish import hide_comments_modes, none_list, available_annotations, default_annotations, \ +from publish import hide_comments_modes, available_annotations, default_annotations, \ pull_request_build_modes, fail_on_modes, fail_on_mode_errors, fail_on_mode_failures, \ comment_mode_off, comment_mode_update, comment_modes, punctuation_space from publish.github_action import GithubAction from publish.junit import parse_junit_xml_files +from publish.progress import progress_logger from publish.publisher import Publisher, Settings from publish.retry import GitHubRetry from publish.unittestresults import get_test_results, get_stats, ParsedUnitTestResults -logger = logging.getLogger('publish-unit-test-results') +logger = logging.getLogger('publish') def get_conclusion(parsed: ParsedUnitTestResults, fail_on_failures, fail_on_errors) -> str: @@ -56,6 +59,22 @@ def get_files(multiline_files_globs: str) -> List[str]: return list(included - excluded) +def get_files_size(files: List[str]) -> str: + try: + size = sum([os.path.getsize(file) for file in files]) + return humanize.naturalsize(size, binary=True) + except BaseException as e: + logger.warning(f'failed to obtain file size of {len(files)} files', exc_info=e) + return 'unknown size' + + +def get_number_of_files(files: List[str]) -> str: + number_of_files = '{number:,} file{s}'.format( + number=len(files), s='s' if len(files) > 1 else '' + ).replace(',', punctuation_space) + return number_of_files + + def main(settings: Settings, gha: GithubAction) -> None: # we cannot create a check run or pull request comment when running on pull_request event from a fork # when event_file is given we assume proper setup as in README.md#support-fork-repositories-and-dependabot-branches @@ -74,13 +93,27 @@ def main(settings: Settings, gha: GithubAction) -> None: if len(files) == 0: gha.warning(f'Could not find any files for {settings.files_glob}') else: - logger.info(f'reading {settings.files_glob}') + logger.info(f'Reading {settings.files_glob} ({get_number_of_files(files)}, {get_files_size(files)})') logger.debug(f'reading {list(files)}') - # get the unit test results - parsed = parse_junit_xml_files(files, settings.time_factor, settings.ignore_runs).with_commit(settings.commit) - [gha.error(message=f'Error processing result file: {error.message}', file=error.file, line=error.line, column=error.column) - for error in parsed.errors] + # log the available RAM to help spot OOM issues: + # https://github.com/EnricoMi/publish-unit-test-result-action/issues/231 + # https://github.com/EnricoMi/publish-unit-test-result-action/issues/304 + avail_mem = humanize.naturalsize(psutil.virtual_memory().available, binary=True) + logger.info(f'Available memory to read files: {avail_mem}') + + # log the progress + # https://github.com/EnricoMi/publish-unit-test-result-action/issues/304 + with progress_logger(items=len(files), + interval_seconds=10, + progress_template='Read {progress} files in {time}', + finish_template='Finished reading {observations} files in {duration}', + progress_item_type=Tuple[str, Any], + logger=logger) as progress: + # get the unit test results + parsed = parse_junit_xml_files(files, settings.time_factor, settings.ignore_runs, progress).with_commit(settings.commit) + [gha.error(message=f'Error processing result file: {error.message}', file=error.file, line=error.line, column=error.column) + for error in parsed.errors] # process the parsed results results = get_test_results(parsed, settings.dedup_classes_by_file_name) diff --git a/python/requirements.txt b/python/requirements.txt index 1fa9ee4b..2e7fca82 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,6 +1,8 @@ # dataclasses does not exist before Python3.7, needed as dependency dataclasses;python_version<"3.7" +humanize==3.14.0 junitparser==2.5.0 +psutil==5.9.1 PyGithub==1.55 +requests==2.27.1 urllib3==1.26.9 -requests==2.27.1 \ No newline at end of file diff --git a/python/test/test_progress.py b/python/test/test_progress.py new file mode 100644 index 00000000..2b331add --- /dev/null +++ b/python/test/test_progress.py @@ -0,0 +1,65 @@ +import unittest +from datetime import datetime, timezone + +import mock + +from publish.progress import Progress, ProgressLogger + + +class TestProgress(unittest.TestCase): + def test_get_progress(self): + progress = Progress(10) + self.assertEqual('0 of 10', progress.get_progress()) + self.assertEqual('0 of 10', progress.get_progress()) + self.assertEqual('item', progress.observe('item')) + self.assertEqual('1 of 10', progress.get_progress()) + self.assertEqual('1 of 10', progress.get_progress()) + self.assertEqual(1, progress.observe(1)) + self.assertEqual('2 of 10', progress.get_progress()) + self.assertEqual('2 of 10', progress.get_progress()) + self.assertEqual(1.2, progress.observe(1.2)) + self.assertEqual('3 of 10', progress.get_progress()) + self.assertEqual('3 of 10', progress.get_progress()) + obj = object() + self.assertEqual(obj, progress.observe(obj)) + self.assertEqual('4 of 10', progress.get_progress()) + self.assertEqual('4 of 10', progress.get_progress()) + + +class TestProgressLogger(unittest.TestCase): + def test(self): + progress = Progress(10) + logger = mock.MagicMock(info=mock.Mock()) + plogger = ProgressLogger(progress, 60, 'progress: {progress} in {time}', logger) + try: + ts = datetime(2022, 6, 1, 12, 34, 56, tzinfo=timezone.utc) + with mock.patch('publish.progress.datetime', utcnow=mock.Mock(return_value=ts)): + plogger.start() + logger.info.assert_not_called() + + progress.observe('item') + logger.info.assert_not_called() + + ts = datetime(2022, 6, 1, 12, 35, 00, tzinfo=timezone.utc) + with mock.patch('publish.progress.datetime', utcnow=mock.Mock(return_value=ts)): + plogger._log_progress() + self.assertEqual([mock.call('progress: 1 of 10 in 4 seconds')], logger.info.call_args_list) + logger.info.reset_mock() + + progress.observe('item') + progress.observe('item') + logger.info.assert_not_called() + + ts = datetime(2022, 6, 1, 12, 40, 00, tzinfo=timezone.utc) + with mock.patch('publish.progress.datetime', utcnow=mock.Mock(return_value=ts)): + plogger._log_progress() + self.assertEqual([mock.call('progress: 3 of 10 in 5 minutes and 4 seconds')], logger.info.call_args_list) + logger.info.reset_mock() + finally: + ts = datetime(2022, 6, 1, 12, 41, 23, tzinfo=timezone.utc) + with mock.patch('publish.progress.datetime', utcnow=mock.Mock(return_value=ts)): + plogger.finish('finished: {observations} of {items} in {duration}') + self.assertEqual([mock.call('finished: 3 of 10 in 6 minutes and 27 seconds')], logger.info.call_args_list) + logger.info.reset_mock() + + self.assertEqual('6 minutes and 27 seconds', plogger.duration)