Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move to lxml #286

Merged
merged 3 commits into from May 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
27 changes: 23 additions & 4 deletions python/publish/junit.py
@@ -1,13 +1,19 @@
import os
from collections import defaultdict
from typing import Optional, Iterable, Union, Any, List, Dict
from typing import Optional, Iterable, Union, Tuple, List, Dict

import junitparser
from junitparser import Element, JUnitXml, TestCase, TestSuite, Skipped
from junitparser.junitparser import etree

from publish.unittestresults import ParsedUnitTestResults, UnitTestCase, ParseError

try:
import lxml
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

blacklist: Using lxml to parse untrusted XML data is known to be vulnerable to XML attacks. Replace lxml with the equivalent defusedxml package.

(at-me in a reply with help or ignore)


Was this a good recommendation?
[ 🙁 Not relevant ] - [ 😕 Won't fix ] - [ 😑 Not critical, will fix ] - [ 🙂 Critical, will fix ] - [ 😊 Critical, fixing now ]

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

defusedxml.lxml is deprecated

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sonatype-lift ignore

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've recorded this as ignored for this pull request. If you change your mind, just comment @sonatype-lift unignore.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

opt.semgrep.python.lang.security.use-defused-xml.use-defused-xml: Found use of the native Python XML libraries, which is vulnerable to XML external entity (XXE)
attacks. The Python documentation recommends the 'defusedxml' library instead if the XML being
loaded is untrusted.

(at-me in a reply with help or ignore)


Was this a good recommendation?
[ 🙁 Not relevant ] - [ 😕 Won't fix ] - [ 😑 Not critical, will fix ] - [ 🙂 Critical, will fix ] - [ 😊 Critical, fixing now ]

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

defusedxml.lxml is deprecated

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sonatype-lift ignore

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've recorded this as ignored for this pull request. If you change your mind, just comment @sonatype-lift unignore.

lxml_available = True
except ImportError:
lxml_available = False


def get_results(results: Union[Element, List[Element]], status: Optional[str] = None) -> List[Element]:
"""
Expand Down Expand Up @@ -97,10 +103,24 @@ def end(self, tag: Union[str, bytes]) -> Element:
if self._stack:
self._stack.pop()

def close(self) -> Element:
# when lxml is around, we have to return an ElementTree here, otherwise
# XMLParser(target=...).parse(..., parser=...)
# returns an Element, not a ElementTree, but junitparser expects an ElementTree
#
# https://lxml.de/parsing.html:
# Note that the parser does not build a tree when using a parser target. The result of the parser run is
# whatever the target object returns from its .close() method. If you want to return an XML tree here, you
# have to create it programmatically in the target object.
if lxml_available:
return lxml.etree.ElementTree(super().close())
else:
return super().close()


def parse_junit_xml_files(files: Iterable[str], time_factor: float = 1.0, drop_testcases: bool = False) -> ParsedUnitTestResults:
"""Parses junit xml files and returns aggregated statistics as a ParsedUnitTestResults."""
def parse(path: str) -> Union[str, Any]:
def parse(path: str) -> Union[JUnitXml, BaseException]:
if not os.path.exists(path):
return FileNotFoundError(f'File does not exist.')
if os.stat(path).st_size == 0:
Expand All @@ -114,8 +134,7 @@ def parse(path: str) -> Union[str, Any]:
except BaseException as e:
return e

parsed_files = [(result_file, parse(result_file))
for result_file in files]
parsed_files = [(result_file, parse(result_file)) for result_file in files]
junits = [(result_file, junit)
for result_file, junit in parsed_files
if not isinstance(junit, BaseException)]
Expand Down
6 changes: 3 additions & 3 deletions python/publish/unittestresults.py
Expand Up @@ -29,8 +29,8 @@ def __init__(self, items=None):
class ParseError:
file: str
message: str
line: Optional[int]
column: Optional[int]
line: Optional[int] = None
column: Optional[int] = None

@staticmethod
def from_exception(file: str, exception: BaseException):
Expand All @@ -45,7 +45,7 @@ def from_exception(file: str, exception: BaseException):
elif msg.startswith('Invalid format.'):
msg = f'File is not a valid JUnit file:\n{msg}'
return ParseError(file=file, message=msg, line=line, column=column)
return ParseError(file=file, message=str(exception), line=None, column=None)
return ParseError(file=file, message=str(exception))


@dataclass(frozen=True)
Expand Down
3 changes: 2 additions & 1 deletion python/requirements.txt
Expand Up @@ -3,4 +3,5 @@ dataclasses;python_version<"3.7"
junitparser==2.5.0
PyGithub==1.55
urllib3==1.26.9
requests==2.27.1
requests==2.27.1
lxml==4.8.0
2 changes: 1 addition & 1 deletion python/test/test_action_script.py
Expand Up @@ -96,7 +96,7 @@ def test_get_conclusion_parse_errors(self):
with self.subTest(fail_on_errors=fail_on_errors, fail_on_failures=fail_on_failures):
actual = get_conclusion(ParsedUnitTestResults(
files=2,
errors=[ParseError(file='file', message='error', line=None, column=None)],
errors=[ParseError(file='file', message='error')],
suites=1,
suite_tests=4,
suite_skipped=1,
Expand Down
24 changes: 14 additions & 10 deletions python/test/test_junit.py
@@ -1,4 +1,3 @@
import os
import pathlib
import unittest
from distutils.version import LooseVersion
Expand All @@ -11,6 +10,7 @@

test_files_path = pathlib.Path(__file__).parent / 'files'


class TestElement(Element):
__test__ = False

Expand Down Expand Up @@ -384,7 +384,7 @@ def test_parse_junit_xml_files_with_empty_file(self):
ParsedUnitTestResults(
cases=[],
files=1,
errors=[ParseError(result_file, 'File is empty.', None, None)],
errors=[ParseError(result_file, 'File is empty.')],
suite_errors=0,
suite_failures=0,
suite_skipped=0,
Expand All @@ -394,12 +394,14 @@ def test_parse_junit_xml_files_with_empty_file(self):
))

def test_parse_junit_xml_files_with_non_xml_file(self):
result_file = str(test_files_path / 'non-xml.xml')
result_file = test_files_path / 'non-xml.xml'
result_filename = str(result_file)
expected_filename = ('file:/' + result_file.absolute().as_posix()) if result_file.drive else result_file.name
self.assertEqual(
parse_junit_xml_files([result_file]),
parse_junit_xml_files([result_filename]),
ParsedUnitTestResults(
files=1,
errors=[ParseError(file=result_file, message='File is not a valid XML file:\nsyntax error: line 1, column 0', line=1, column=0)],
errors=[ParseError(file=result_filename, message=f"Start tag expected, '<' not found, line 1, column 1 ({expected_filename}, line 1)")],
suites=0,
suite_tests=0,
suite_skipped=0,
Expand All @@ -410,12 +412,14 @@ def test_parse_junit_xml_files_with_non_xml_file(self):
))

def test_parse_junit_xml_files_with_corrupt_xml_file(self):
result_file = str(test_files_path / 'corrupt-xml.xml')
result_file = test_files_path / 'corrupt-xml.xml'
result_filename = str(result_file)
expected_filename = ('file:/' + result_file.absolute().as_posix()) if result_file.drive else result_file.name
self.assertEqual(
parse_junit_xml_files([result_file]),
parse_junit_xml_files([result_filename]),
ParsedUnitTestResults(
files=1,
errors=[ParseError(file=result_file, message='File is not a valid XML file:\nno element found: line 11, column 21', line=11, column=21)],
errors=[ParseError(file=result_filename, message=f'Premature end of data in tag skipped line 9, line 11, column 22 ({expected_filename}, line 11)')],
suites=0,
suite_tests=0,
suite_skipped=0,
Expand All @@ -431,7 +435,7 @@ def test_parse_junit_xml_files_with_non_junit_file(self):
parse_junit_xml_files([result_file]),
ParsedUnitTestResults(
files=1,
errors=[ParseError(file=result_file, message='Invalid format.', line=None, column=None)],
errors=[ParseError(file=result_file, message='Invalid format.')],
suites=0,
suite_tests=0,
suite_skipped=0,
Expand All @@ -447,7 +451,7 @@ def test_parse_junit_xml_files_with_non_existing_file(self):
ParsedUnitTestResults(
cases=[],
files=1,
errors=[ParseError('files/does_not_exist.xml', 'File does not exist.', None, None)],
errors=[ParseError('files/does_not_exist.xml', 'File does not exist.')],
suite_errors=0,
suite_failures=0,
suite_skipped=0,
Expand Down
8 changes: 4 additions & 4 deletions python/test/test_unittestresults.py
Expand Up @@ -8,7 +8,7 @@
UnitTestRunResults, UnitTestRunDeltaResults, ParseError
from test import d, n

errors = [ParseError('file', 'error', None, None)]
errors = [ParseError('file', 'error')]
errors_dict = [dataclasses.asdict(e) for e in errors]


Expand All @@ -26,12 +26,12 @@ def test_parse_error_from_file_not_found(self):
error = FileNotFoundError(2, 'No such file or directory')
error.filename = 'some file path'
actual = ParseError.from_exception('file', error)
expected = ParseError('file', "[Errno 2] No such file or directory: 'some file path'", None, None)
expected = ParseError('file', "[Errno 2] No such file or directory: 'some file path'")
self.assertEqual(expected, actual)

def test_parse_error_from_error(self):
actual = ParseError.from_exception('file', ValueError('error'))
expected = ParseError('file', 'error', None, None)
expected = ParseError('file', 'error')
self.assertEqual(expected, actual)

def test_parsed_unit_test_results_with_commit(self):
Expand Down Expand Up @@ -390,7 +390,7 @@ def test_get_stats_delta(self):
commit='commit'
), UnitTestRunResults(
files=3,
errors=[ParseError('other file', 'other error', None, None)],
errors=[ParseError('other file', 'other error')],
suites=5,
duration=7,

Expand Down