pantsbuild · TansyArron · Mar 21, 2023 · Mar 23, 2023 · Mar 30, 2023 · Apr 20, 2023
diff --git a/pants.toml b/pants.toml
@@ -8,6 +8,7 @@ backend_packages.add = [
   "pants.backend.build_files.fmt.black",
   "pants.backend.python",
   "pants.backend.experimental.python.packaging.pyoxidizer",
+  "pants.backend.experimental.audit",
   "pants.backend.python.lint.autoflake",
   "pants.backend.python.lint.black",
   "pants.backend.python.lint.docformatter",
@@ -240,3 +241,6 @@ args = ["-Yrangepos", "-Xlint:unused"]
 
 [scala-infer]
 force_add_siblings_as_dependencies = false
+
+[pypi-audit]
+lockfile_vulnerability_excludes = { "python-default" = ["GHSA-w596-4wvx-j9j6"] }
-lockfile_vulnerability_excludes = { "python-default" = ["GHSA-w596-4wvx-j9j6"] }
+lockfile_vulnerability_excludes = { "python-default" = [{"id": "GHSA-w596-4wvx-j9j6", "note": "This is N/A for reasons a, b and c."}] }
-lockfile_vulnerability_excludes = { "python-default" = ["GHSA-w596-4wvx-j9j6"] }
+lockfile_vulnerability_excludes = { "python-default" = [{"id": "GHSA-w596-4wvx-j9j6", "note": "This is N/A for reasons a, b and c."}] }
diff --git a/src/python/pants/backend/audit/BUILD b/src/python/pants/backend/audit/BUILD
@@ -0,0 +1,7 @@
+# Copyright 2024 Pants project contributors (see CONTRIBUTORS.md).
+# Licensed under the Apache License, Version 2.0 (see LICENSE).
+python_sources()
+
+python_tests(
+    name="tests",
+)
diff --git a/src/python/pants/backend/audit/__init__.py b/src/python/pants/backend/audit/__init__.py
diff --git a/src/python/pants/backend/audit/audit.py b/src/python/pants/backend/audit/audit.py
@@ -0,0 +1,131 @@
+# Copyright 2024 Pants project contributors (see CONTRIBUTORS.md).
+# Licensed under the Apache License, Version 2.0 (see LICENSE).
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import Any, ClassVar, Generic, Iterable, TypeVar
+
+from pants.core.goals.lint import REPORT_DIR as REPORT_DIR  # noqa: F401
+from pants.core.goals.multi_tool_goal_helper import determine_specified_tool_ids
+from pants.engine.collection import Collection
+from pants.engine.console import Console
+from pants.engine.engine_aware import EngineAwareParameter, EngineAwareReturnType
+from pants.engine.goal import Goal, GoalSubsystem
+from pants.engine.internals.selectors import Get, MultiGet
+from pants.engine.rules import collect_rules, goal_rule
+from pants.engine.target import FieldSet, FilteredTargets
+from pants.engine.unions import UnionMembership, union
+
+logger = logging.getLogger(__name__)
+_FS = TypeVar("_FS", bound=FieldSet)
+
+
+@dataclass(frozen=True)
+class AuditResult:
+    resolve_name: str
+    lockfile: str
+    report: str
+
+
+@dataclass(unsafe_hash=True)
+class AuditResults(EngineAwareReturnType):
+    """Zero or more AuditResult objects for a single auditor."""
+
+    results: tuple[AuditResult, ...]
+    auditor_name: str
+
+    def __init__(self, results: Iterable[AuditResult], *, auditor_name: str) -> None:
+        self.results = tuple(results)
+        self.auditor_name = auditor_name
+
+    def cacheable(self) -> bool:
+        """Is marked uncacheable to ensure that it always renders."""
+        return False
+
+
+@union
+@dataclass(unsafe_hash=True)
+class AuditRequest(Generic[_FS], EngineAwareParameter):
+    """A union for targets that should be audited.
+
+    Subclass and install a member of this type to provide an auditor.
+    """
+
+    field_set_type: ClassVar[type[_FS]]  # type: ignore[misc]
+    tool_id: ClassVar[str]
+
+    field_sets: Collection[_FS]
+
+    def __init__(self, field_sets: Iterable[_FS]) -> None:
+        self.field_sets = Collection[_FS](field_sets)
+
+    def debug_hint(self) -> str:
+        return self.tool_id
+
+    def metadata(self) -> dict[str, Any]:
+        return {"addresses": [fs.address.spec for fs in self.field_sets]}
+
+
+class AuditSubsystem(GoalSubsystem):
+    name = "audit"
+    help = "Run third party dependency audit tools."
+
+    @classmethod
+    def activated(cls, union_membership: UnionMembership) -> bool:
+        return AuditRequest in union_membership
+
+
+class Audit(Goal):
+    subsystem_cls = AuditSubsystem
+    environment_behavior = Goal.EnvironmentBehavior.LOCAL_ONLY
+
+
+@goal_rule
+async def audit(
+    console: Console,
+    targets: FilteredTargets,
+    union_membership: UnionMembership,
+) -> Audit:
+    request_types = union_membership[AuditRequest]
+    specified_ids = determine_specified_tool_ids(
+        "audit",
+        [
+            "pypi-audit",
+        ],
+        request_types,
+    )
+    requests = tuple(
+        request_type(
+            request_type.field_set_type.create(target)  # type: ignore[misc]
+            for target in targets
+            if (
+                request_type.tool_id in specified_ids
+                and request_type.field_set_type.is_applicable(target)  # type: ignore[misc]
+            )
+        )
+        for request_type in request_types
+    )
+
+    all_results = await MultiGet(Get(AuditResults, {request: AuditRequest}) for request in requests)
+    for results in all_results:
+        for result in results.results:
+            if result.report:
+                sigil = console.sigil_failed()
+            else:
+                sigil = console.sigil_succeeded()
+            console.print_stdout(
+                f"\n\n{sigil} Resolve: {result.resolve_name} (from {result.lockfile})"
+            )
+            if result.report:
+                console.print_stdout(result.report)
+            else:
+                console.print_stdout("No vulnerabilities reported.")
+
+    return Audit(0)
+
+
+def rules():
+    return [
+        *collect_rules(),
+    ]
diff --git a/src/python/pants/backend/audit/format_results.py b/src/python/pants/backend/audit/format_results.py
@@ -0,0 +1,59 @@
+# Copyright 2024 Pants project contributors (see CONTRIBUTORS.md).
+# Licensed under the Apache License, Version 2.0 (see LICENSE).
+"""Functionality for formatting vulnerability results as a set of human-readable columns."""
+
+from __future__ import annotations
+
+from itertools import zip_longest
+from typing import Any, Iterable
+
+from pants.backend.audit.pip_audit import VulnerabilityData
+
+
+def tabulate(rows: Iterable[Iterable[Any]]) -> tuple[list[str], list[int]]:
+    """Return a list of formatted rows and a list of column sizes. For example::
+
+    >>> tabulate([['foobar', 2000], [0xdeadbeef]])
+    (['foobar     2000', '3735928559'], [10, 4])
+    """
+    rows = [tuple(map(str, row)) for row in rows]
+    sizes = [max(map(len, col)) for col in zip_longest(*rows, fillvalue="")]
+    table = [" ".join(map(str.ljust, row, sizes)).rstrip() for row in rows]
+    return table, sizes
+
+
+def generate_header(sizes: Iterable[int]) -> str:
+    """Return a dashed header string."""
+    headers = []
+    for column_size in sizes:
+        column_header = "-" * column_size
+        headers.append(column_header)
+    return " ".join(headers)
+
+
+def format_results(
+    result: dict[str, list[VulnerabilityData]],
+) -> str:
+    """Returns a column formatted string for a given mapping of dependencies to vulnerability
+    results."""
+    vuln_data: list[list[Any]] = []
+    header = ["Dependency", "ID", "Fix Versions", "Link"]
+    vuln_data.append(header)
+    for dep, vulns in result.items():
+        for vuln in vulns:
+            vuln_data.append([dep, vuln.vuln_id, vuln.fixed_in, vuln.link])
+    columns_string = ""
+
+    # If it's just a header, don't bother adding it to the output
+    if len(vuln_data) > 1:
+        vuln_strings, sizes = tabulate(vuln_data)
+
+        if len(vuln_data) > 0:
+            vuln_strings.insert(1, generate_header(sizes))
+
+        for row in vuln_strings:
+            if columns_string:
+                columns_string += "\n"
+            columns_string += row
+
+    return columns_string
diff --git a/src/python/pants/backend/audit/pip_audit.py b/src/python/pants/backend/audit/pip_audit.py
@@ -0,0 +1,77 @@
+# Copyright 2024 Pants project contributors (see CONTRIBUTORS.md).
+# Licensed under the Apache License, Version 2.0 (see LICENSE).
+import logging
+from dataclasses import dataclass
+from typing import Dict, List, Optional
+
+import requests
+from packaging.requirements import Requirement
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class VulnerabilityData:
+    """Represents a vulnerability in some python package."""
+
+    vuln_id: str  # A service-provided identifier for the vulnerability.
+    details: str  # A human-readable description of the vulnerability. Can be extremely long.
+    fixed_in: List[
+        str
+    ]  # A list of versions that can be upgraded to that resolve the vulnerability.
+    aliases: List[str]  # A set of aliases (alternative identifiers) for this result.
+    link: str  # A link to the vulnerability info.
+    summary: Optional[str]  # An optional short form human readable description.
+    withdrawn: Optional[str]  # Represents whether the vulnerability has been withdrawn.
+
+    @classmethod
+    def from_raw_data(self, data):
+        return VulnerabilityData(
+            vuln_id=data["id"],
+            details=data["details"],
+            fixed_in=data["fixed_in"],
+            aliases=data["aliases"],
+            link=data["link"],
+            summary=data["summary"],
+            withdrawn=data["withdrawn"],
+        )
+
+
+def audit_constraints_strings(
+    constraints_strings, session, excludes_ids
+) -> Dict[str, List[VulnerabilityData]]:
+    """Retrieve security warnings for the given constraints from the Pypi json API."""
+    vulnerabilities = {}
+    for constraint_string in constraints_strings:
+        requirement = Requirement(constraint_string)
+        specifiers = list(requirement.specifier)
+        if len(specifiers) != 1:
+            raise ValueError(
+                "Unexpected specifier from a lockfile (not exactly one): {}", specifiers
+            )
+        specifier = specifiers[0]
+        results = audit_constraints_string(
+            package_name=requirement.name,
+            version=specifier.version,
+            session=session,
+        )
+        if not results:
+            continue
+        vulnerabilities[str(requirement)] = [
+            result for result in results if result.vuln_id not in excludes_ids
+        ]
+    return vulnerabilities
+
+
+def audit_constraints_string(
+    package_name: str, version: str, session: requests.Session
+) -> List[VulnerabilityData]:
+    url = f"https://pypi.org/pypi/{package_name}/{str(version)}/json"
+    response = session.get(url=url)
+    response.raise_for_status()
+    response_json = response.json()
+    vulnerabilities = response_json.get("vulnerabilities")
+    if vulnerabilities:
+        vulns = [VulnerabilityData.from_raw_data(vuln_data) for vuln_data in vulnerabilities]
+        return vulns
+    return []