Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lcov testing #1289

Merged
merged 3 commits into from Jan 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
26 changes: 26 additions & 0 deletions coverage/cmdline.py
Expand Up @@ -127,6 +127,11 @@ class Opts:
'', '--pretty-print', action='store_true',
help="Format the JSON for human readers.",
)
lcov = optparse.make_option(
'-o', '', action='store', dest='outfile',
metavar="OUTFILE",
help="Write the LCOV report to this file. Defaults to 'coverage.lcov'"
)
parallel_mode = optparse.make_option(
'-p', '--parallel-mode', action='store_true',
help=(
Expand Down Expand Up @@ -473,6 +478,20 @@ def get_prog_name(self):
usage="[options] [modules]",
description="Generate an XML report of coverage results."
),

'lcov': CmdOptionParser(
"lcov",
[
Opts.fail_under,
Opts.ignore_errors,
Opts.include,
Opts.lcov,
Opts.omit,
Opts.quiet,
] + GLOBAL_ARGS,
usage="[options] [modules]",
description="Generate an LCOV report of coverage results."
)
}


Expand Down Expand Up @@ -657,6 +676,12 @@ def command_line(self, argv):
show_contexts=options.show_contexts,
**report_args
)
elif options.action == "lcov":
total = self.coverage.lcov_report(
outfile=options.outfile,
**report_args
)

else:
# There are no other possible actions.
raise AssertionError
Expand Down Expand Up @@ -854,6 +879,7 @@ def unglob_args(args):
report Report coverage stats on modules.
run Run a Python program and measure code execution.
xml Create an XML report of coverage results.
lcov Create an LCOV report of coverage results.

Use "{program_name} help <command>" for detailed help on any command.
""",
Expand Down
3 changes: 3 additions & 0 deletions coverage/config.py
Expand Up @@ -227,6 +227,9 @@ def __init__(self):
self.json_pretty_print = False
self.json_show_contexts = False

# Default output filename for lcov_reporter
self.lcov_output = "coverage.lcov"

# Defaults for [paths]
self.paths = collections.OrderedDict()

Expand Down
20 changes: 20 additions & 0 deletions coverage/control.py
Expand Up @@ -26,6 +26,7 @@
from coverage.html import HtmlReporter
from coverage.inorout import InOrOut
from coverage.jsonreport import JsonReporter
from coverage.lcovreport import LcovReporter
from coverage.misc import bool_or_none, join_regex, human_sorted, human_sorted_items
from coverage.misc import DefaultValue, ensure_dir_for_file, isolate_module
from coverage.plugin import FileReporter
Expand Down Expand Up @@ -1049,6 +1050,25 @@ def json_report(
):
return render_report(self.config.json_output, JsonReporter(self), morfs, self._message)

def lcov_report(
self, morfs=None, outfile=None, ignore_errors=None,
omit=None, include=None, contexts=None,
):
"""Generate an LCOV report of coverage results.

Each module in 'morfs' is included in the report. 'outfile' is the
path to write the file to, "-" will write to stdout.

See :meth 'report' for other arguments.
nedbat marked this conversation as resolved.
Show resolved Hide resolved

.. versionadded:: 6.3
"""
with override_config(self,
ignore_errors=ignore_errors, report_omit=omit, report_include=include,
lcov_output=outfile, report_contexts=contexts,
):
return render_report(self.config.lcov_output, LcovReporter(self), morfs, self._message)

def sys_info(self):
"""Return a list of (key, value) pairs showing internal information."""

Expand Down
106 changes: 106 additions & 0 deletions coverage/lcovreport.py
@@ -0,0 +1,106 @@
# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
# For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt

"""LCOV reporting for coverage.py."""

import sys
import base64
from hashlib import md5

from coverage.report import get_analysis_to_report


class LcovReporter:
"""A reporter for writing LCOV coverage reports."""

report_type = "LCOV report"

def __init__(self, coverage):
self.coverage = coverage
self.config = self.coverage.config

def report(self, morfs, outfile=None):
"""Renders the full lcov report

'morfs' is a list of modules or filenames

outfile is the file object to write the file into.
"""

self.coverage.get_data()
outfile = outfile or sys.stdout

for fr, analysis in get_analysis_to_report(self.coverage, morfs):
self.get_lcov(fr, analysis, outfile)

def get_lcov(self, fr, analysis, outfile=None):
"""Produces the lcov data for a single file

get_lcov currently supports both line and branch coverage,
however function coverage is not supported.

"""

outfile.write("TN:\n")
outfile.write(f"SF:{fr.relative_filename()}\n")
source_lines = fr.source().splitlines()
for covered in sorted(analysis.executed):
# Note: Coveragepy currently only supports checking *if* a line has
# been executed, not how many times, so we set this to 1 for nice
# output even if it's technically incorrect

# The lines below calculate a 64 bit encoded md5 hash of the line
# corresponding to the DA lines in the lcov file,
# for either case of the line being covered or missed in Coveragepy
# The final two characters of the encoding ("==") are removed from
# the hash to allow genhtml to run on the resulting lcov file
if source_lines:
line = source_lines[covered - 1].encode("utf-8")
else:
line = b""
hashed = str(base64.b64encode(md5(line).digest())[:-2], encoding="utf-8")
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess tastes vary, but I like .strip("=") here more than [:-2] since it's more explicit what it's doing.
And "ascii" as encoding also seems more appropriate.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But this is where we need to decide what to do about md5: it's not available on FIPS-enabled systems. Here are some choices:

  1. Don't use md5 at all, and leave the hash out of the report.
  2. Use md5, but catch the ValueError it will raise on FIPS systems ("coverage html" fails on systems with FIPS enabled #1189)
  3. Make lcov unavailable on FIPS systems
  4. Let lcov just fail if used on FIPS systems

I don't know what the effect would be of omitting the hash, so I don't know which to pick.

Copy link

@TLATER TLATER Jan 6, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The hash is mainly used to ensure the correct lines are marked as covered, to my knowledge. Upstream's genhtml, for example, which uses an lcov file and the original source tree to produce an html page with annotated source files, will throw an error if it finds a hash that mismatches: https://github.com/linux-test-project/lcov/blob/master/bin/genhtml#L5308.

Genhtml is of course an unlikely usecase with pycoverage, but it's the reference consumer.

It is up to the implementation of the consumer to define what the md5 is used for, but it is marked as optional, so any tools that break on a missing md5 are inherently broken. There shouldn't be any issue with simply omitting the hash when it can't be set, and I can't think of many other uses than "has our user accidentally changed their source". I think that was probably a more common use case around the time lcov added that feature :)

It would be sad if pycoverage produced different outputs depending on the machine it is run on. Maybe hence it's best to remove this functionality in general.

I think a fifth option is to lobby upstream for supporting a more modern hash, as much as that seems unnecessary. It's honestly a bit silly that python mandates thinking about FIPS compliance with no opt-out if you use its md5 hash, and even more silly that some default python distributions seem to enable FIPS compliance (this seems to be a MacOS thing?).

It'll be hard to come up with a backwards-compatible design for that, so I suspect the solution would be to deprecate having hashes at all, since those don't really seem that useful anymore in 2022 when checking for accidental source changes is anyway so easy and often unnecessary.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the backstory on the hash values.

It's honestly a bit silly that python mandates thinking about FIPS compliance with no opt-out if you use its md5 hash

Is this Python's fault? FIPS forbids md5 as an algorithm.

some default python distributions seem to enable FIPS compliance (this seems to be a MacOS thing?)

I haven't encountered this. Can you provide more information?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this Python's fault? FIPS forbids md5 as an algorithm.

No, not at all. I just think the situation is a bit silly for backwards-compatibility with older formats that use md5 for legitimate reasons (or even just for migration purposes). It's besides the point, sorry for venting here.

I haven't encountered this. Can you provide more information?

This support page seems to suggest a FIPS-compliance mode is enabled by default. Reading further, it may come down to processor support for the hash, rather than something explicitly disabled on the software side, but this is all ultimately besides the point.

I think I'll spend a bit more time reading up on where exactly the limitation comes from, and then raise an issue on the lcov upstream to see if they're interested enough in allowing lcov to be run in FIPS-compliant environments. No need for coveragepy to be involved in that :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TLATER is right here on the use of the hash within LCOV. I am perfectly happy to remove the md5 hash if you would prefer @nedbat.

outfile.write(f"DA:{covered},1,{hashed}\n")
for missed in sorted(analysis.missing):
if source_lines:
line = source_lines[missed - 1].encode("utf-8")
else:
line = b""
hashed = str(base64.b64encode(md5(line).digest())[:-2], encoding="utf-8")
outfile.write(f"DA:{missed},0,{hashed}\n")
outfile.write(f"LF:{len(analysis.statements)}\n")
outfile.write(f"LH:{len(analysis.executed)}\n")

# More information dense branch coverage data
missing_arcs = analysis.missing_branch_arcs()
executed_arcs = analysis.executed_branch_arcs()
for block_number, block_line_number in enumerate(
sorted(analysis.branch_stats().keys())
):
for branch_number, line_number in enumerate(
sorted(missing_arcs[block_line_number])
):
# The exit branches have a negative line number,
# this will not produce valid lcov, and so setting
# the line number of the exit branch to 0 will allow
# for valid lcov, while preserving the data
line_number = max(line_number, 0)
outfile.write(f"BRDA:{line_number},{block_number},{branch_number},-\n")
# The start value below allows for the block number to be
# preserved between these two for loops (stopping the loop from
# resetting the value of the block number to 0)
for branch_number, line_number in enumerate(
sorted(executed_arcs[block_line_number]),
start=len(missing_arcs[block_line_number]),
):
line_number = max(line_number, 0)
outfile.write(f"BRDA:{line_number},{block_number},{branch_number},1\n")

# Summary of the branch coverage
if analysis.has_arcs():
branch_stats = analysis.branch_stats()
brf = sum(t for t, k in branch_stats.values())
brh = brf - sum(t - k for t, k in branch_stats.values())
outfile.write(f"BRF:{brf}\n")
outfile.write(f"BRH:{brh}\n")

outfile.write("end_of_record\n")
15 changes: 15 additions & 0 deletions coverage/results.py
Expand Up @@ -136,6 +136,21 @@ def missing_branch_arcs(self):
mba[l1].append(l2)
return mba

@contract(returns='dict(int: list(int))')
def executed_branch_arcs(self):
"""Return arcs that were executed from branch lines.

Returns {l1:[l2a,l2b,...], ...}

"""
executed = self.arcs_executed()
branch_lines = set(self._branch_lines())
eba = collections.defaultdict(list)
for l1, l2 in executed:
if l1 in branch_lines:
eba[l1].append(l2)
return eba

@contract(returns='dict(int: tuple(int, int))')
def branch_stats(self):
"""Get stats about branches.
Expand Down
42 changes: 40 additions & 2 deletions doc/cmd.rst
Expand Up @@ -58,6 +58,8 @@ Coverage.py has a number of commands:

* **json** -- :ref:`Produce a JSON report with coverage results <cmd_json>`.

* **lcov** -- :ref:`Produce an LCOV report with coverage results <cmd_lcov>`.

* **annotate** --
:ref:`Annotate source files with coverage results <cmd_annotate>`.

Expand Down Expand Up @@ -430,8 +432,8 @@ Reporting
---------

Coverage.py provides a few styles of reporting, with the **report**, **html**,
**annotate**, **json**, and **xml** commands. They share a number of common
options.
**annotate**, **json**, **lcov**, and **xml** commands. They share a number
of common options.

The command-line arguments are module or file names to report on, if you'd like
to report on a subset of the data collected.
Expand Down Expand Up @@ -785,6 +787,42 @@ The **json** command writes coverage data to a "coverage.json" file.
You can specify the name of the output file with the ``-o`` switch. The JSON
can be nicely formatted by specifying the ``--pretty-print`` switch.

.. _cmd_lcov:

LCOV reporting: ``coverage lcov``
---------------------------------

The **json** command writes coverage data to a "coverage.lcov" file.

.. [[[cog show_help("lcov") ]]]
.. code::

$ coverage lcov --help
Usage: coverage lcov [options] [modules]

Generate an LCOV report of coverage results.

Options:
--fail-under=MIN Exit with a status of 2 if the total coverage is less
than MIN.
-i, --ignore-errors Ignore errors while reading source files.
--include=PAT1,PAT2,...
Include only files whose paths match one of these
patterns. Accepts shell-style wildcards, which must be
quoted.
-o OUTFILE Write the LCOV report to this file. Defaults to
'coverage.lcov'
--omit=PAT1,PAT2,... Omit files whose paths match one of these patterns.
Accepts shell-style wildcards, which must be quoted.
-q, --quiet Don't print messages about what is happening.
--debug=OPTS Debug options, separated by commas. [env:
COVERAGE_DEBUG]
-h, --help Get help on this command.
--rcfile=RCFILE Specify configuration file. By default '.coveragerc',
'setup.cfg', 'tox.ini', and 'pyproject.toml' are
tried. [env: COVERAGE_RCFILE]
.. [[[end]]] (checksum: 4d078e4637e5b507cbb997803a0d4758)

Other common reporting options are described above in :ref:`cmd_reporting`.


Expand Down
1 change: 1 addition & 0 deletions doc/dict.txt
Expand Up @@ -107,6 +107,7 @@ jquery
json
jython
kwargs
lcov
Mako
matcher
matchers
Expand Down
3 changes: 2 additions & 1 deletion doc/index.rst
Expand Up @@ -152,7 +152,8 @@ Coverage.py can do a number of things:
- It can tell you :ref:`what tests ran which lines <dynamic_contexts>`.

- It can produce reports in a number of formats: :ref:`text <cmd_report>`,
:ref:`HTML <cmd_html>`, :ref:`XML <cmd_xml>`, and :ref:`JSON <cmd_json>`.
:ref:`HTML <cmd_html>`, :ref:`XML <cmd_xml>`, :ref:`LCOV <cmd_lcov>`,
and :ref:`JSON <cmd_json>`.

- For advanced uses, there's an :ref:`API <api>`, and the result data is
available in a :ref:`SQLite database <dbschema>`.
Expand Down
28 changes: 28 additions & 0 deletions doc/python-coverage.1.txt
Expand Up @@ -67,6 +67,9 @@ COMMAND OVERVIEW
|command| **xml**
Create an XML report of coverage results.

|command| **lcov**
Create an LCOV report of coverage results.


GLOBAL OPTIONS
==============
Expand Down Expand Up @@ -229,6 +232,31 @@ COMMAND REFERENCE
\--show-contexts
Include information about the contexts that executed each line.

**lcov** [ `option` ... ] [ `MODULE` ... ]

Create an LCOV report of the coverage results.

Options:

\--fail-under `MIN`
Exit with a status of 2 if the total coverage is less than `MIN`.

\-i, --ignore-errors
Ignore errors while reading source files.

\-o `OUTFILE`
Write the LCOV report to `OUTFILE`. Defaults to ``coverage.lcov``.

\--include `PATTERN` [ , ... ]
Include only files whose paths match one of these
PATTERNs. Accepts shell-style wildcards, which must be quoted.

\--omit `PATTERN` [ , ... ]
Omit files when their file name matches one of these PATTERNs.
Usually needs quoting on the command line.

\-q, --quiet
Don't print messages about what is happening.

**report** [ `option` ... ] [ `MODULE` ... ]

Expand Down
3 changes: 2 additions & 1 deletion doc/source.rst
Expand Up @@ -88,7 +88,8 @@ reported. Usually you want to see all the code that was measured, but if you
are measuring a large project, you may want to get reports for just certain
parts.

The report commands (``report``, ``html``, ``json``, ``annotate``, and ``xml``)
The report commands (``report``, ``html``, ``json``, ``lcov``, ``annotate``,
and ``xml``)
all take optional ``modules`` arguments, and ``--include`` and ``--omit``
switches. The ``modules`` arguments specify particular modules to report on.
The ``include`` and ``omit`` values are lists of file name patterns, just as
Expand Down