Skip to content

Commit

Permalink
Log file sizes, free mem and progress reading files (#305)
Browse files Browse the repository at this point in the history
  • Loading branch information
EnricoMi committed Jun 14, 2022
1 parent 3281b65 commit ca6e0bd
Show file tree
Hide file tree
Showing 6 changed files with 198 additions and 13 deletions.
9 changes: 6 additions & 3 deletions python/publish/junit.py
@@ -1,6 +1,6 @@
import os
from collections import defaultdict
from typing import Optional, Iterable, Union, Any, List, Dict
from typing import Optional, Iterable, Union, Any, List, Dict, Callable, Tuple

import junitparser
from junitparser import Element, JUnitXml, TestCase, TestSuite, Skipped
Expand Down Expand Up @@ -98,7 +98,10 @@ def end(self, tag: Union[str, bytes]) -> Element:
self._stack.pop()


def parse_junit_xml_files(files: Iterable[str], time_factor: float = 1.0, drop_testcases: bool = False) -> ParsedUnitTestResults:
def parse_junit_xml_files(files: Iterable[str],
time_factor: float = 1.0,
drop_testcases: bool = False,
progress: Callable[[Tuple[str, Any]], Tuple[str, Any]] = lambda x: x) -> ParsedUnitTestResults:
"""Parses junit xml files and returns aggregated statistics as a ParsedUnitTestResults."""
def parse(path: str) -> Union[str, Any]:
if not os.path.exists(path):
Expand All @@ -114,7 +117,7 @@ def parse(path: str) -> Union[str, Any]:
except BaseException as e:
return e

parsed_files = [(result_file, parse(result_file))
parsed_files = [progress((result_file, parse(result_file)))
for result_file in files]
junits = [(result_file, junit)
for result_file, junit in parsed_files
Expand Down
82 changes: 82 additions & 0 deletions python/publish/progress.py
@@ -0,0 +1,82 @@
from datetime import datetime
from logging import Logger
from threading import Timer
from typing import Generic, TypeVar, Optional, Callable, Type, Any
import contextlib

import humanize

T = TypeVar('T')


@contextlib.contextmanager
def progress_logger(items: int,
interval_seconds: int,
progress_template: str,
finish_template: Optional[str],
logger: Logger,
progress_item_type: Type[T] = Any) -> Callable[[T], T]:
progress = Progress[progress_item_type](items)
plogger = ProgressLogger(progress, interval_seconds, progress_template, logger).start()
try:
yield progress.observe
finally:
plogger.finish(finish_template)


class Progress(Generic[T]):
def __init__(self, items: int):
self.items = items
self.observations = 0

def observe(self, observation: T) -> T:
self.observations = self.observations + 1
return observation

def get_progress(self) -> str:
return f'{self.observations} of {self.items}'


class ProgressLogger:
def __init__(self, progress: Progress, interval_seconds: int, template: str, logger: Logger):
self._progress = progress
self._interval_seconds = interval_seconds
self._template = template
self._logger = logger

self._start = None
self._duration = None
self._timer = self._get_progress_timer()

def start(self) -> 'ProgressLogger':
self._start = datetime.utcnow()
self._timer.start()
return self

def finish(self, template: Optional[str] = None):
self._duration = datetime.utcnow() - self._start
self._start = None
self._timer.cancel()

if template:
self._logger.info(template.format(items=self._progress.items,
observations=self._progress.observations,
duration=self.duration))

@property
def duration(self) -> str:
return humanize.precisedelta(self._duration)

def _get_progress_timer(self):
timer = Timer(self._interval_seconds, self._log_progress)
timer.setDaemon(daemonic=True)
return timer

def _log_progress(self):
if self._start is None:
return

delta = datetime.utcnow() - self._start
self._logger.info(self._template.format(progress=self._progress.get_progress(), time=humanize.precisedelta(delta)))
self._timer = self._get_progress_timer()
self._timer.start()
2 changes: 1 addition & 1 deletion python/publish/retry.py
Expand Up @@ -11,7 +11,7 @@

from publish.github_action import GithubAction

logger = logging.getLogger('publish-unit-test-results')
logger = logging.getLogger('publish')


class GitHubRetry(Retry):
Expand Down
49 changes: 41 additions & 8 deletions python/publish_unit_test_results.py
Expand Up @@ -6,22 +6,25 @@
from collections import defaultdict
from datetime import datetime
from glob import glob
from typing import List, Optional, Union
from typing import List, Optional, Union, Tuple, Any

import github
import humanize
import psutil
from urllib3.util.retry import Retry

import publish.github_action
from publish import hide_comments_modes, none_list, available_annotations, default_annotations, \
from publish import hide_comments_modes, available_annotations, default_annotations, \
pull_request_build_modes, fail_on_modes, fail_on_mode_errors, fail_on_mode_failures, \
comment_mode_off, comment_mode_update, comment_modes, punctuation_space
from publish.github_action import GithubAction
from publish.junit import parse_junit_xml_files
from publish.progress import progress_logger
from publish.publisher import Publisher, Settings
from publish.retry import GitHubRetry
from publish.unittestresults import get_test_results, get_stats, ParsedUnitTestResults

logger = logging.getLogger('publish-unit-test-results')
logger = logging.getLogger('publish')


def get_conclusion(parsed: ParsedUnitTestResults, fail_on_failures, fail_on_errors) -> str:
Expand Down Expand Up @@ -56,6 +59,22 @@ def get_files(multiline_files_globs: str) -> List[str]:
return list(included - excluded)


def get_files_size(files: List[str]) -> str:
try:
size = sum([os.path.getsize(file) for file in files])
return humanize.naturalsize(size, binary=True)
except BaseException as e:
logger.warning(f'failed to obtain file size of {len(files)} files', exc_info=e)
return 'unknown size'


def get_number_of_files(files: List[str]) -> str:
number_of_files = '{number:,} file{s}'.format(
number=len(files), s='s' if len(files) > 1 else ''
).replace(',', punctuation_space)
return number_of_files


def main(settings: Settings, gha: GithubAction) -> None:
# we cannot create a check run or pull request comment when running on pull_request event from a fork
# when event_file is given we assume proper setup as in README.md#support-fork-repositories-and-dependabot-branches
Expand All @@ -74,13 +93,27 @@ def main(settings: Settings, gha: GithubAction) -> None:
if len(files) == 0:
gha.warning(f'Could not find any files for {settings.files_glob}')
else:
logger.info(f'reading {settings.files_glob}')
logger.info(f'Reading {settings.files_glob} ({get_number_of_files(files)}, {get_files_size(files)})')
logger.debug(f'reading {list(files)}')

# get the unit test results
parsed = parse_junit_xml_files(files, settings.time_factor, settings.ignore_runs).with_commit(settings.commit)
[gha.error(message=f'Error processing result file: {error.message}', file=error.file, line=error.line, column=error.column)
for error in parsed.errors]
# log the available RAM to help spot OOM issues:
# https://github.com/EnricoMi/publish-unit-test-result-action/issues/231
# https://github.com/EnricoMi/publish-unit-test-result-action/issues/304
avail_mem = humanize.naturalsize(psutil.virtual_memory().available, binary=True)
logger.info(f'Available memory to read files: {avail_mem}')

# log the progress
# https://github.com/EnricoMi/publish-unit-test-result-action/issues/304
with progress_logger(items=len(files),
interval_seconds=10,
progress_template='Read {progress} files in {time}',
finish_template='Finished reading {observations} files in {duration}',
progress_item_type=Tuple[str, Any],
logger=logger) as progress:
# get the unit test results
parsed = parse_junit_xml_files(files, settings.time_factor, settings.ignore_runs, progress).with_commit(settings.commit)
[gha.error(message=f'Error processing result file: {error.message}', file=error.file, line=error.line, column=error.column)
for error in parsed.errors]

# process the parsed results
results = get_test_results(parsed, settings.dedup_classes_by_file_name)
Expand Down
4 changes: 3 additions & 1 deletion python/requirements.txt
@@ -1,6 +1,8 @@
# dataclasses does not exist before Python3.7, needed as dependency
dataclasses;python_version<"3.7"
humanize==3.14.0
junitparser==2.5.0
psutil==5.9.1
PyGithub==1.55
requests==2.27.1
urllib3==1.26.9
requests==2.27.1
65 changes: 65 additions & 0 deletions python/test/test_progress.py
@@ -0,0 +1,65 @@
import unittest
from datetime import datetime, timezone

import mock

from publish.progress import Progress, ProgressLogger


class TestProgress(unittest.TestCase):
def test_get_progress(self):
progress = Progress(10)
self.assertEqual('0 of 10', progress.get_progress())
self.assertEqual('0 of 10', progress.get_progress())
self.assertEqual('item', progress.observe('item'))
self.assertEqual('1 of 10', progress.get_progress())
self.assertEqual('1 of 10', progress.get_progress())
self.assertEqual(1, progress.observe(1))
self.assertEqual('2 of 10', progress.get_progress())
self.assertEqual('2 of 10', progress.get_progress())
self.assertEqual(1.2, progress.observe(1.2))
self.assertEqual('3 of 10', progress.get_progress())
self.assertEqual('3 of 10', progress.get_progress())
obj = object()
self.assertEqual(obj, progress.observe(obj))
self.assertEqual('4 of 10', progress.get_progress())
self.assertEqual('4 of 10', progress.get_progress())


class TestProgressLogger(unittest.TestCase):
def test(self):
progress = Progress(10)
logger = mock.MagicMock(info=mock.Mock())
plogger = ProgressLogger(progress, 60, 'progress: {progress} in {time}', logger)
try:
ts = datetime(2022, 6, 1, 12, 34, 56, tzinfo=timezone.utc)
with mock.patch('publish.progress.datetime', utcnow=mock.Mock(return_value=ts)):
plogger.start()
logger.info.assert_not_called()

progress.observe('item')
logger.info.assert_not_called()

ts = datetime(2022, 6, 1, 12, 35, 00, tzinfo=timezone.utc)
with mock.patch('publish.progress.datetime', utcnow=mock.Mock(return_value=ts)):
plogger._log_progress()
self.assertEqual([mock.call('progress: 1 of 10 in 4 seconds')], logger.info.call_args_list)
logger.info.reset_mock()

progress.observe('item')
progress.observe('item')
logger.info.assert_not_called()

ts = datetime(2022, 6, 1, 12, 40, 00, tzinfo=timezone.utc)
with mock.patch('publish.progress.datetime', utcnow=mock.Mock(return_value=ts)):
plogger._log_progress()
self.assertEqual([mock.call('progress: 3 of 10 in 5 minutes and 4 seconds')], logger.info.call_args_list)
logger.info.reset_mock()
finally:
ts = datetime(2022, 6, 1, 12, 41, 23, tzinfo=timezone.utc)
with mock.patch('publish.progress.datetime', utcnow=mock.Mock(return_value=ts)):
plogger.finish('finished: {observations} of {items} in {duration}')
self.assertEqual([mock.call('finished: 3 of 10 in 6 minutes and 27 seconds')], logger.info.call_args_list)
logger.info.reset_mock()

self.assertEqual('6 minutes and 27 seconds', plogger.duration)

0 comments on commit ca6e0bd

Please sign in to comment.