From cb3ec99926e143c8d36079e4442db6a184949f47 Mon Sep 17 00:00:00 2001
From: Richard Si <63936253+ichard26@users.noreply.github.com>
Date: Thu, 23 Dec 2021 16:34:01 -0500
Subject: [PATCH 1/7] Add diff-shades integration

Hopefully this makes it much easier to gauge the impacts of future
changes!
---
 .github/workflows/diff_shades.yml         | 139 ++++++++++++
 .github/workflows/diff_shades_comment.yml |  59 +++++
 .pre-commit-config.yaml                   |   2 +-
 docs/contributing/gauging_changes.md      |  53 +++++
 scripts/diff_shades_gha_helper.py         | 262 ++++++++++++++++++++++
 5 files changed, 514 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/diff_shades.yml
 create mode 100644 .github/workflows/diff_shades_comment.yml
 create mode 100644 scripts/diff_shades_gha_helper.py

diff --git a/.github/workflows/diff_shades.yml b/.github/workflows/diff_shades.yml
new file mode 100644
index 00000000000..fd9372f9e72
--- /dev/null
+++ b/.github/workflows/diff_shades.yml
@@ -0,0 +1,139 @@
+name: diff-shades
+
+on:
+  push:
+    branches: [main]
+    paths-ignore: ["docs/**", "*.md"]
+
+  pull_request:
+    path-ignore: ["docs/**", "*.md"]
+
+  workflow_dispatch:
+    inputs:
+      baseline:
+        description: >
+          The baseline revision. Pro-tip, use `.pypi` to use the latest version
+          on PyPI or `.XXX` to use a PR.
+        required: true
+        default: main
+      baseline-args:
+        description: "Custom Black arguments (eg. -l 79)"
+        required: false
+      target:
+        description: >
+          The target revision to compare against the baseline. Same tip applies here.
+        required: true
+      target-args:
+        description: "Custom Black arguments (eg. -S)"
+        required: false
+
+jobs:
+  analysis:
+    name: analysis / linux
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout this repository (full clone)
+        uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - uses: actions/setup-python@v2
+
+      - name: Install diff-shades and support dependencies
+        run: |
+          python -m pip install pip --upgrade
+          python -m pip install https://github.com/ichard26/diff-shades/archive/stable.zip
+          python -m pip install click packaging urllib3
+          python -m diff_shades --version
+
+      - name: Calculate run configuration & metadata
+        id: config
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
+        run: |
+          python scripts/diff_shades_gha_helper.py config ${{ github.event_name }} \
+            ${{ github.event.inputs.baseline }} ${{ github.event.inputs.target }}
+          # After analyzing old revisions of Black, this might not exist so we'll
+          # use a copy.
+          cat scripts/diff_shades_gha_helper.py > helper.py
+
+      - name: Calculate baseline analysis cache key
+        id: cache-keys
+        run: >
+          python scripts/diff_shades_gha_helper.py cache-key baseline
+          ${{ steps.config.outputs.baseline-analysis }} -- ${{ github.event.inputs.baseline-args }}
+
+      - name: Attempt to use cached baseline analysis
+        id: baseline-cache
+        uses: actions/cache@v2.1.7
+        with:
+          path: ${{ steps.config.outputs.baseline-analysis }}
+          key: ${{ steps.cache-keys.outputs.baseline-cache-key }}
+
+      - name: Install baseline revision
+        if: steps.baseline-cache.outputs.cache-hit != 'true'
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
+        run: ${{ steps.config.outputs.baseline-setup-cmd }} && python -m pip install .
+
+      - name: Analyze baseline revision
+        if: steps.baseline-cache.outputs.cache-hit != 'true'
+        run: >
+          diff-shades analyze --verbose --work-dir projects-cache/
+          ${{ steps.config.outputs.baseline-analysis }} -- ${{ github.event.inputs.baseline-args }}
+
+      - name: Install target revision
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
+        run: ${{ steps.config.outputs.target-setup-cmd }} && python -m pip install .
+
+      - name: Analyze target revision
+        run: >
+          diff-shades analyze --verbose --work-dir projects-cache/
+          ${{ steps.config.outputs.target-analysis }} --repeat-projects-from
+          ${{ steps.config.outputs.baseline-analysis }} -- ${{ github.event.inputs.target-args }}
+
+      - name: Generate HTML diff report
+        run: >
+          diff-shades --dump-html diff.html compare --diff --quiet
+          ${{ steps.config.outputs.baseline-analysis }} ${{ steps.config.outputs.target-analysis }}
+
+      - name: Upload diff report
+        uses: actions/upload-artifact@v2
+        with:
+          name: diff.html
+          path: diff.html
+
+      - name: Upload baseline analysis
+        uses: actions/upload-artifact@v2
+        with:
+          name: ${{ steps.config.outputs.baseline-analysis }}
+          path: ${{ steps.config.outputs.baseline-analysis }}
+
+      - name: Upload target analysis
+        uses: actions/upload-artifact@v2
+        with:
+          name: ${{ steps.config.outputs.target-analysis }}
+          path: ${{ steps.config.outputs.target-analysis }}
+
+      - name: Generate summary file (PR only)
+        if: github.event_name == 'pull_request'
+        run: >
+          python helper.py comment-body
+          ${{ steps.config.outputs.baseline-analysis }} ${{ steps.config.outputs.target-analysis }}
+          ${{ steps.config.outputs.baseline-sha }} ${{ steps.config.outputs.target-sha }}
+
+      - name: Upload summary file (PR only)
+        if: github.event_name == 'pull_request'
+        uses: actions/upload-artifact@v2
+        with:
+          name: .pr-comment-body.md
+          path: .pr-comment-body.md
+
+        # This is last so the diff-shades-comment workflow can still work even if we
+        # end up detecting failed files and failing the run.
+      - name: Check for failed files in both analyses
+        run: >
+          diff-shades show-failed --check ${{ steps.config.outputs.baseline-analysis }};
+          diff-shades show-failed --check ${{ steps.config.outputs.target-analysis }}
diff --git a/.github/workflows/diff_shades_comment.yml b/.github/workflows/diff_shades_comment.yml
new file mode 100644
index 00000000000..4597fc0bb56
--- /dev/null
+++ b/.github/workflows/diff_shades_comment.yml
@@ -0,0 +1,59 @@
+name: diff-shades-comment
+
+on:
+  workflow_run:
+    workflows: [diff-shades]
+    types: [completed]
+
+permissions:
+  pull-requests: write
+
+jobs:
+  comment:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+
+      - name: Install support dependencies
+        run: |
+          python -m pip install pip --upgrade
+          python -m pip install click packaging urllib3
+
+      - name: Get details from initial workflow run
+        id: metadata
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
+        run:
+          python scripts/diff_shades_gha_helper.py comment-details ${{
+          github.event.workflow_run.id }}
+
+      - name: Try to find pre-existing PR comment
+        if: steps.metadata.outputs.needs-comment == 'true'
+        id: find-comment
+        uses: peter-evans/find-comment@v1
+        with:
+          issue-number: ${{ steps.metadata.outputs.pr-number }}
+          comment-author: "github-actions[bot]"
+          body-includes: "diff-shades-comment"
+
+      - name: Read comment body from helper script
+        id: body
+        if: steps.metadata.outputs.needs-comment == 'true'
+        run: |
+          # The contents have to be escaped to preserve newlines ...
+          # https://github.community/t/set-output-truncates-multiline-strings/16852/3
+          body="$(cat .pr-comment-body.md)"
+          body="${body//'%'/'%25'}"
+          body="${body//$'\n'/'%0A'}"
+          body="${body//$'\r'/'%0D'}"
+          echo "::set-output name=body::$body"
+
+      - name: Create or update PR comment
+        if: steps.metadata.outputs.needs-comment == 'true'
+        uses: peter-evans/create-or-update-comment@v1
+        with:
+          comment-id: ${{ steps.find-comment.outputs.comment-id }}
+          issue-number: ${{ steps.metadata.outputs.pr-number }}
+          body: ${{ steps.body.outputs.body }}
+          edit-mode: replace
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 52a18623612..af3c5c2b96e 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -57,7 +57,7 @@ repos:
     rev: v2.5.1
     hooks:
       - id: prettier
-        exclude: ^Pipfile\.lock
+        exclude: \.github/workflows/diff_shades\.yml
 
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.0.1
diff --git a/docs/contributing/gauging_changes.md b/docs/contributing/gauging_changes.md
index b41c7a35dda..82e57af0f65 100644
--- a/docs/contributing/gauging_changes.md
+++ b/docs/contributing/gauging_changes.md
@@ -40,3 +40,56 @@ If you're running locally yourself to test black on lots of code try:
 ```{program-output} black-primer --help
 
 ```
+
+## diff-shades
+
+diff-shades is a tool similar to black-primer, it also runs _Black_ across a list of Git
+cloneable OSS projects recording the results. The intention is to eventually fully
+replace black-primer with diff-shades as it's much more feature complete and supports
+our needs better.
+
+The main highlight feature of diff-shades is being able to compare two revisions of
+_Black_. This is incredibly useful as it allows us to see what exact changes will occur,
+say merging a certain PR. Black-primer's results would usually be filled with changes
+caused by pre-existing code in Black drowning out the (new) changes we want to see. It
+operates similarly to black-primer but crucially it saves the results as a JSON file
+which allows for the rich comparison features alluded to above.
+
+For more information, please see the [diff-shades documentation][diff-shades].
+
+### CI integration
+
+diff-shades is also the tool behind the "diff-shades results comparing ..." /
+"diff-shades reports zero changes ..." comments on PRs. The project has a GitHub Actions
+workflow which runs diff-shades twice against two revisions of _Black_ according to
+these rules:
+
+|                       | Baseline revision       | Target revision              |
+| --------------------- | ----------------------- | ---------------------------- |
+| On PRs                | latest commit on `main` | PR commit with `main` merged |
+| On pushes (main only) | latest PyPI version     | the pushed commit            |
+
+Once finished, a PR comment will posted embedding a summary of the changes and links to
+further information. If there's a pre-existing diff-shades comment, it'll be updated
+instead the next time the workflow is triggered on the same PR.
+
+The workflow uploads 3-4 artifacts upon completion: the two generated analyses (they
+have the .json file extension), `diff.html`, and `.pr-comment-body.md` if triggered by a
+PR. The last one is downloaded by the `diff-shades-comment` workflow and shouldn't be
+downloaded locally. `diff.html` comes in handy for push-based or manually triggered
+runs. And the analyses exist just in case you want to do further analysis using the
+collected data locally.
+
+Note that the workflow will only fail intentionally if while analyzing a file failed to
+format. Otherwise it's mostly likely a bug of the workflow.
+
+```{tip}
+Maintainers with write access or higher can trigger the workflow manually from the
+Actions tab using the `workflow_dispatch` event. Simply select "diff-shades"
+from the workflows list on the left, press "Run workflow", and fill in which revisions
+and command line arguments to use.
+
+Once finished, check the logs or download the artifacts for local use.
+```
+
+[diff-shades]: https://github.com/ichard26/diff-shades#readme
diff --git a/scripts/diff_shades_gha_helper.py b/scripts/diff_shades_gha_helper.py
new file mode 100644
index 00000000000..376f27618f2
--- /dev/null
+++ b/scripts/diff_shades_gha_helper.py
@@ -0,0 +1,262 @@
+import json
+import os
+import platform
+import pprint
+import subprocess
+import sys
+import zipfile
+from io import BytesIO
+from pathlib import Path
+from typing import Any, Dict, Optional, Tuple
+
+import click
+import urllib3
+from packaging.version import Version
+
+if sys.version_info >= (3, 8):
+    from typing import Final, Literal
+else:
+    from typing_extensions import Final, Literal
+
+COMMENT_BODY_FILE: Final = ".pr-comment-body.md"
+DOCS_URL: Final = (
+    "https://black.readthedocs.io/en/latest/"
+    "contributing/gauging_changes.html#diff-shades"
+)
+SHA_LENGTH: Final = 10
+GH_API_TOKEN: Final = os.getenv("GITHUB_TOKEN")
+REPO: Final = os.getenv("GITHUB_REPOSITORY", default="psf/black")
+http = urllib3.PoolManager()
+
+
+def set_output(name: str, value: str) -> None:
+    if len(value) < 200:
+        print(f"[INFO]: setting '{name}' to '{value}'")
+    else:
+        print(f"[INFO]: setting '{name}' to [{len(value)} chars]")
+    print(f"::set-output name={name}::{value}")
+
+
+def http_get(
+    url: str,
+    is_json: bool = True,
+    headers: Optional[Dict[str, str]] = None,
+    **kwargs: Any,
+) -> Any:
+    headers = headers or {}
+    if GH_API_TOKEN:
+        headers["Authorization"] = f"token {GH_API_TOKEN}"
+    if "github" in url:
+        headers["Accept"] = "application/vnd.github.v3+json"
+    headers["User-Agent"] = "psf/black diff-shades workflow via urllib3"
+    r = http.request("GET", url, headers=headers, **kwargs)
+    if is_json:
+        data = json.loads(r.data.decode("utf-8"))
+    else:
+        data = r.data
+    print(f"[INFO]: issued GET request for {r.geturl()}")
+    if not (200 <= r.status < 300):
+        pprint.pprint(dict(r.info()))
+        pprint.pprint(data)
+        raise RuntimeError(f"unexpected status code: {r.status}")
+
+    return data
+
+
+def get_branch_or_tag_revision(sha: str = "main") -> str:
+    data = http_get(
+        f"https://api.github.com/repos/{REPO}/commits",
+        fields={"per_page": "1", "sha": sha},
+    )
+    assert isinstance(data[0]["sha"], str)
+    return data[0]["sha"]
+
+
+def get_pr_revision(pr: int) -> str:
+    data = http_get(f"https://api.github.com/repos/{REPO}/pulls/{pr}")
+    assert isinstance(data["head"]["sha"], str)
+    return data["head"]["sha"]
+
+
+def get_pypi_version() -> Version:
+    data = http_get("https://pypi.org/pypi/black/json")
+    versions = [Version(v) for v in data["releases"]]
+    sorted_versions = sorted(versions, reverse=True)
+    return sorted_versions[0]
+
+
+def resolve_custom_ref(ref: str) -> Tuple[str, str]:
+    if ref == ".pypi":
+        # Special value to get latest PyPI version.
+        version = str(get_pypi_version())
+        return version, f"git checkout {version}"
+
+    if ref.startswith(".") and ref[1:].isnumeric():
+        # Special format to get a PR.
+        number = int(ref[1:])
+        revision = get_pr_revision(number)
+        return f"pr-{number}-{revision[:SHA_LENGTH]}", f"gh pr checkout {number}"
+
+    # Alright, it's probably a branch, tag, or a commit SHA, let's find out!
+    revision = get_branch_or_tag_revision(ref)
+    # We're cutting the revision short as we might be operating on a short commit SHA.
+    if revision == ref or revision[: len(ref)] == ref:
+        # It's *probably* a commit as the resolved SHA isn't different from the REF.
+        return revision[:SHA_LENGTH], f"git checkout {revision}"
+
+    # It's *probably* a pre-existing branch or tag, yay!
+    return f"{ref}-{revision[:SHA_LENGTH]}", f"git checkout {revision}"
+
+
+@click.group()
+def main() -> None:
+    pass
+
+
+@main.command("config", help="Acquire run configuration and metadata.")
+@click.argument(
+    "event", type=click.Choice(["push", "pull_request", "workflow_dispatch"])
+)
+@click.argument("custom_baseline", required=False)
+@click.argument("custom_target", required=False)
+def config(
+    event: Literal["push", "pull_request", "workflow_dispatch"],
+    custom_baseline: Optional[str],
+    custom_target: Optional[str],
+) -> None:
+    if event == "push":
+        # Push on main, let's use PyPI Black as the baseline.
+        baseline_name = str(get_pypi_version())
+        baseline_cmd = f"git checkout {baseline_name}"
+        target_rev = os.getenv("GITHUB_SHA")
+        assert target_rev is not None
+        target_name = "main-" + target_rev[:SHA_LENGTH]
+        target_cmd = f"git checkout {target_rev}"
+
+    elif event == "pull_request":
+        # PR, let's use main as the baseline.
+        baseline_rev = get_branch_or_tag_revision()
+        assert baseline_rev is not None, "main should exist ..."
+        baseline_name = "main-" + baseline_rev[:SHA_LENGTH]
+        baseline_cmd = f"git checkout {baseline_rev}"
+
+        pr_ref = os.getenv("GITHUB_REF")
+        assert pr_ref is not None
+        pr_num = int(pr_ref[10:-6])
+        pr_rev = get_pr_revision(pr_num)
+        target_name = f"pr-{pr_num}-{pr_rev[:SHA_LENGTH]}"
+        target_cmd = f"gh pr checkout {pr_num} && git merge origin/main"
+
+        # These are only needed for the PR comment.
+        set_output("baseline-sha", baseline_rev)
+        set_output("target-sha", pr_rev)
+    else:
+        assert custom_baseline is not None and custom_target is not None
+        baseline_name, baseline_cmd = resolve_custom_ref(custom_baseline)
+        target_name, target_cmd = resolve_custom_ref(custom_target)
+        if baseline_name == target_name:
+            # Alright we're using the same revisions but we're (hopefully) using
+            # different command line arguments, let's support that too.
+            baseline_name += "-1"
+            target_name += "-2"
+
+    set_output("baseline-analysis", baseline_name + ".json")
+    set_output("baseline-setup-cmd", baseline_cmd)
+    set_output("target-analysis", target_name + ".json")
+    set_output("target-setup-cmd", target_cmd)
+
+
+@main.command("cache-key", help="Generate a cache-key for an analysis.")
+@click.argument("type", type=click.Choice(["baseline", "target"]))
+@click.argument("name")
+@click.argument("black-args", nargs=-1, type=click.UNPROCESSED)
+def cache_key(
+    type: Literal["baseline", "target"], name: str, black_args: Tuple[str, ...]
+) -> None:
+    import diff_shades
+
+    args_digest = "".join(black_args).encode("utf-8").hex()
+    key = f"{platform.system()}-{platform.python_version()}-{diff_shades.__version__}"
+    key += f"-{name}-{args_digest}"
+    set_output(f"{type}-cache-key", key)
+
+
+@main.command("comment-body", help="Generate the body for a summary PR comment.")
+@click.argument("baseline", type=click.Path(exists=True, path_type=Path))
+@click.argument("target", type=click.Path(exists=True, path_type=Path))
+@click.argument("baseline-sha")
+@click.argument("target-sha")
+def comment_body(
+    baseline: Path, target: Path, baseline_sha: str, target_sha: str
+) -> None:
+    # fmt: off
+    cmd = [
+        sys.executable, "-m", "diff_shades", "--no-color",
+        "compare", str(baseline), str(target), "--quiet", "--check"
+    ]
+    # fmt: on
+    proc = subprocess.run(cmd, stdout=subprocess.PIPE, encoding="utf-8")
+    if not proc.returncode:
+        body = (
+            f"**diff-shades** reports zero changes comparing this PR ({target_sha}) to"
+            f" main ({baseline_sha}).\n\n---\n\n"
+        )
+    else:
+        body = (
+            f"**diff-shades** results comparing this PR ({target_sha}) to main"
+            f" ({baseline_sha}). The full diff is [available in the logs]"
+            '($job-diff-url) under the "Generate HTML diff report" step.'
+        )
+        body += "\n```text\n" + proc.stdout.strip() + "\n```\n"
+    body += (
+        f"[**What is this?**]({DOCS_URL}) | [Workflow run]($workflow-run-url) |"
+        " [diff-shades documentation](https://github.com/ichard26/diff-shades#readme) |"
+        # This is used by the comment workflow to discover a pre-existing comment.
+        " id: diff-shades-comment"
+    )
+    print(f"[INFO]: writing half-completed comment body to {COMMENT_BODY_FILE}")
+    with open(COMMENT_BODY_FILE, "w", encoding="utf-8") as f:
+        f.write(body)
+
+
+@main.command(
+    "comment-details", help="Get comment details from an analysis workflow run."
+)
+@click.argument("run-id")
+def comment_details(run_id: str) -> None:
+    data = http_get(f"https://api.github.com/repos/{REPO}/actions/runs/{run_id}")
+    if data["event"] != "pull_request":
+        set_output("needs-comment", "false")
+        return
+
+    set_output("needs-comment", "true")
+    pulls = data["pull_requests"]
+    assert len(pulls) == 1
+    pr_number = pulls[0]["number"]
+    set_output("pr-number", str(pr_number))
+
+    jobs_data = http_get(data["jobs_url"])
+    job = jobs_data["jobs"][0]
+    steps = {s["name"]: s["number"] for s in job["steps"]}
+    diff_step = steps["Generate HTML diff report"]
+    diff_url = job["html_url"] + f"#step:{diff_step}:1"
+
+    artifacts_data = http_get(data["artifacts_url"])["artifacts"]
+    artifacts = {a["name"]: a["archive_download_url"] for a in artifacts_data}
+    body_url = artifacts[COMMENT_BODY_FILE]
+    body_zip = BytesIO(http_get(body_url, is_json=False))
+    with zipfile.ZipFile(body_zip) as zfile:
+        with zfile.open(COMMENT_BODY_FILE) as rf:
+            body = rf.read().decode("utf-8")
+    # It's more convenient to fill these fields after the first workflow is done
+    # since this command can access the workflows API.
+    body = body.replace("$workflow-run-url", data["html_url"])
+    body = body.replace("$job-diff-url", diff_url)
+
+    print(f"[INFO]: writing comment body to {COMMENT_BODY_FILE}")
+    with open(COMMENT_BODY_FILE, "w", encoding="utf-8") as wf:
+        wf.write(body)
+
+
+if __name__ == "__main__":
+    main()

From 6b87f8666a9cbfb667c96f43c73e5c138ede1565 Mon Sep 17 00:00:00 2001
From: Richard Si <63936253+ichard26@users.noreply.github.com>
Date: Thu, 23 Dec 2021 17:27:18 -0500
Subject: [PATCH 2/7] Turn down the verbosity

---
 .github/workflows/diff_shades.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/diff_shades.yml b/.github/workflows/diff_shades.yml
index fd9372f9e72..86fa7ac5ad5 100644
--- a/.github/workflows/diff_shades.yml
+++ b/.github/workflows/diff_shades.yml
@@ -80,7 +80,7 @@ jobs:
       - name: Analyze baseline revision
         if: steps.baseline-cache.outputs.cache-hit != 'true'
         run: >
-          diff-shades analyze --verbose --work-dir projects-cache/
+          diff-shades analyze --show-project-revision --work-dir projects-cache/
           ${{ steps.config.outputs.baseline-analysis }} -- ${{ github.event.inputs.baseline-args }}
 
       - name: Install target revision
@@ -90,7 +90,7 @@ jobs:
 
       - name: Analyze target revision
         run: >
-          diff-shades analyze --verbose --work-dir projects-cache/
+          diff-shades analyze --show-project-revision --work-dir projects-cache/
           ${{ steps.config.outputs.target-analysis }} --repeat-projects-from
           ${{ steps.config.outputs.baseline-analysis }} -- ${{ github.event.inputs.target-args }}
 

From b797ca490334ece0bed409fc277e6620a38022a9 Mon Sep 17 00:00:00 2001
From: Richard Si <63936253+ichard26@users.noreply.github.com>
Date: Thu, 23 Dec 2021 18:28:43 -0500
Subject: [PATCH 3/7] Tests only changes don't matter too

---
 .github/workflows/diff_shades.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/diff_shades.yml b/.github/workflows/diff_shades.yml
index 86fa7ac5ad5..c708933cf53 100644
--- a/.github/workflows/diff_shades.yml
+++ b/.github/workflows/diff_shades.yml
@@ -3,10 +3,10 @@ name: diff-shades
 on:
   push:
     branches: [main]
-    paths-ignore: ["docs/**", "*.md"]
+    paths-ignore: ["docs/**", "tests/**", "*.md"]
 
   pull_request:
-    path-ignore: ["docs/**", "*.md"]
+    path-ignore: ["docs/**", "tests/**", "*.md"]
 
   workflow_dispatch:
     inputs:

From 4f45d2fdd89f6e6531faaea7d3e22917f7973208 Mon Sep 17 00:00:00 2001
From: Richard Si <63936253+ichard26@users.noreply.github.com>
Date: Sun, 9 Jan 2022 12:51:45 -0500
Subject: [PATCH 4/7] Use latest diff-shades features + so much cleanup

---
 .github/workflows/diff_shades.yml         | 27 ++++++-----------
 .github/workflows/diff_shades_comment.yml | 20 +++---------
 docs/contributing/gauging_changes.md      |  4 +--
 scripts/diff_shades_gha_helper.py         | 37 +++++++++--------------
 4 files changed, 30 insertions(+), 58 deletions(-)

diff --git a/.github/workflows/diff_shades.yml b/.github/workflows/diff_shades.yml
index c708933cf53..228575ec9d1 100644
--- a/.github/workflows/diff_shades.yml
+++ b/.github/workflows/diff_shades.yml
@@ -45,31 +45,24 @@ jobs:
           python -m pip install pip --upgrade
           python -m pip install https://github.com/ichard26/diff-shades/archive/stable.zip
           python -m pip install click packaging urllib3
-          python -m diff_shades --version
+          # After checking out old revisions, this might not exist so we'll use a copy.
+          cat scripts/diff_shades_gha_helper.py > helper.py
 
       - name: Calculate run configuration & metadata
         id: config
         env:
           GITHUB_TOKEN: ${{ github.token }}
-        run: |
-          python scripts/diff_shades_gha_helper.py config ${{ github.event_name }} \
-            ${{ github.event.inputs.baseline }} ${{ github.event.inputs.target }}
-          # After analyzing old revisions of Black, this might not exist so we'll
-          # use a copy.
-          cat scripts/diff_shades_gha_helper.py > helper.py
-
-      - name: Calculate baseline analysis cache key
-        id: cache-keys
         run: >
-          python scripts/diff_shades_gha_helper.py cache-key baseline
-          ${{ steps.config.outputs.baseline-analysis }} -- ${{ github.event.inputs.baseline-args }}
+          python helper.py config ${{ github.event_name }}
+          ${{ github.event.inputs.baseline }} ${{ github.event.inputs.target }}
+          --baseline-args "${{ github.event.inputs.baseline-args }}"
 
       - name: Attempt to use cached baseline analysis
         id: baseline-cache
         uses: actions/cache@v2.1.7
         with:
           path: ${{ steps.config.outputs.baseline-analysis }}
-          key: ${{ steps.cache-keys.outputs.baseline-cache-key }}
+          key: ${{ steps.config.outputs.baseline-cache-key }}
 
       - name: Install baseline revision
         if: steps.baseline-cache.outputs.cache-hit != 'true'
@@ -80,7 +73,7 @@ jobs:
       - name: Analyze baseline revision
         if: steps.baseline-cache.outputs.cache-hit != 'true'
         run: >
-          diff-shades analyze --show-project-revision --work-dir projects-cache/
+          diff-shades analyze -v --work-dir projects-cache/
           ${{ steps.config.outputs.baseline-analysis }} -- ${{ github.event.inputs.baseline-args }}
 
       - name: Install target revision
@@ -90,7 +83,7 @@ jobs:
 
       - name: Analyze target revision
         run: >
-          diff-shades analyze --show-project-revision --work-dir projects-cache/
+          diff-shades analyze -v --work-dir projects-cache/
           ${{ steps.config.outputs.target-analysis }} --repeat-projects-from
           ${{ steps.config.outputs.baseline-analysis }} -- ${{ github.event.inputs.target-args }}
 
@@ -135,5 +128,5 @@ jobs:
         # end up detecting failed files and failing the run.
       - name: Check for failed files in both analyses
         run: >
-          diff-shades show-failed --check ${{ steps.config.outputs.baseline-analysis }};
-          diff-shades show-failed --check ${{ steps.config.outputs.target-analysis }}
+          diff-shades show-failed --check --show-log ${{ steps.config.outputs.baseline-analysis }};
+          diff-shades show-failed --check --show-log ${{ steps.config.outputs.target-analysis }}
diff --git a/.github/workflows/diff_shades_comment.yml b/.github/workflows/diff_shades_comment.yml
index 4597fc0bb56..ff3a87307c7 100644
--- a/.github/workflows/diff_shades_comment.yml
+++ b/.github/workflows/diff_shades_comment.yml
@@ -24,9 +24,9 @@ jobs:
         id: metadata
         env:
           GITHUB_TOKEN: ${{ github.token }}
-        run:
-          python scripts/diff_shades_gha_helper.py comment-details ${{
-          github.event.workflow_run.id }}
+        run: >
+          python scripts/diff_shades_gha_helper.py comment-details
+          ${{github.event.workflow_run.id }}
 
       - name: Try to find pre-existing PR comment
         if: steps.metadata.outputs.needs-comment == 'true'
@@ -37,23 +37,11 @@ jobs:
           comment-author: "github-actions[bot]"
           body-includes: "diff-shades-comment"
 
-      - name: Read comment body from helper script
-        id: body
-        if: steps.metadata.outputs.needs-comment == 'true'
-        run: |
-          # The contents have to be escaped to preserve newlines ...
-          # https://github.community/t/set-output-truncates-multiline-strings/16852/3
-          body="$(cat .pr-comment-body.md)"
-          body="${body//'%'/'%25'}"
-          body="${body//$'\n'/'%0A'}"
-          body="${body//$'\r'/'%0D'}"
-          echo "::set-output name=body::$body"
-
       - name: Create or update PR comment
         if: steps.metadata.outputs.needs-comment == 'true'
         uses: peter-evans/create-or-update-comment@v1
         with:
           comment-id: ${{ steps.find-comment.outputs.comment-id }}
           issue-number: ${{ steps.metadata.outputs.pr-number }}
-          body: ${{ steps.body.outputs.body }}
+          body: ${{ steps.metadata.outputs.comment-body }}
           edit-mode: replace
diff --git a/docs/contributing/gauging_changes.md b/docs/contributing/gauging_changes.md
index 82e57af0f65..580c10429b3 100644
--- a/docs/contributing/gauging_changes.md
+++ b/docs/contributing/gauging_changes.md
@@ -69,8 +69,8 @@ these rules:
 | On PRs                | latest commit on `main` | PR commit with `main` merged |
 | On pushes (main only) | latest PyPI version     | the pushed commit            |
 
-Once finished, a PR comment will posted embedding a summary of the changes and links to
-further information. If there's a pre-existing diff-shades comment, it'll be updated
+Once finished, a PR comment will be posted embedding a summary of the changes and links
+to further information. If there's a pre-existing diff-shades comment, it'll be updated
 instead the next time the workflow is triggered on the same PR.
 
 The workflow uploads 3-4 artifacts upon completion: the two generated analyses (they
diff --git a/scripts/diff_shades_gha_helper.py b/scripts/diff_shades_gha_helper.py
index 376f27618f2..c155db9f475 100644
--- a/scripts/diff_shades_gha_helper.py
+++ b/scripts/diff_shades_gha_helper.py
@@ -23,6 +23,7 @@
     "https://black.readthedocs.io/en/latest/"
     "contributing/gauging_changes.html#diff-shades"
 )
+USER_AGENT: Final = f"psf/black diff-shades workflow via urllib3/{urllib3.__version__}"
 SHA_LENGTH: Final = 10
 GH_API_TOKEN: Final = os.getenv("GITHUB_TOKEN")
 REPO: Final = os.getenv("GITHUB_REPOSITORY", default="psf/black")
@@ -44,11 +45,11 @@ def http_get(
     **kwargs: Any,
 ) -> Any:
     headers = headers or {}
-    if GH_API_TOKEN:
-        headers["Authorization"] = f"token {GH_API_TOKEN}"
+    headers["User-Agent"] = USER_AGENT
     if "github" in url:
+        if GH_API_TOKEN:
+            headers["Authorization"] = f"token {GH_API_TOKEN}"
         headers["Accept"] = "application/vnd.github.v3+json"
-    headers["User-Agent"] = "psf/black diff-shades workflow via urllib3"
     r = http.request("GET", url, headers=headers, **kwargs)
     if is_json:
         data = json.loads(r.data.decode("utf-8"))
@@ -119,11 +120,15 @@ def main() -> None:
 )
 @click.argument("custom_baseline", required=False)
 @click.argument("custom_target", required=False)
+@click.option("--baseline-args", default="")
 def config(
     event: Literal["push", "pull_request", "workflow_dispatch"],
     custom_baseline: Optional[str],
     custom_target: Optional[str],
+    baseline_args: str,
 ) -> None:
+    import diff_shades
+
     if event == "push":
         # Push on main, let's use PyPI Black as the baseline.
         baseline_name = str(get_pypi_version())
@@ -165,20 +170,9 @@ def config(
     set_output("target-analysis", target_name + ".json")
     set_output("target-setup-cmd", target_cmd)
 
-
-@main.command("cache-key", help="Generate a cache-key for an analysis.")
-@click.argument("type", type=click.Choice(["baseline", "target"]))
-@click.argument("name")
-@click.argument("black-args", nargs=-1, type=click.UNPROCESSED)
-def cache_key(
-    type: Literal["baseline", "target"], name: str, black_args: Tuple[str, ...]
-) -> None:
-    import diff_shades
-
-    args_digest = "".join(black_args).encode("utf-8").hex()
     key = f"{platform.system()}-{platform.python_version()}-{diff_shades.__version__}"
-    key += f"-{name}-{args_digest}"
-    set_output(f"{type}-cache-key", key)
+    key += f"-{baseline_name}-{baseline_args.encode('utf-8').hex()}"
+    set_output("baseline-cache-key", key)
 
 
 @main.command("comment-body", help="Generate the body for a summary PR comment.")
@@ -219,9 +213,7 @@ def comment_body(
         f.write(body)
 
 
-@main.command(
-    "comment-details", help="Get comment details from an analysis workflow run."
-)
+@main.command("comment-details", help="Get PR comment resources from a workflow run.")
 @click.argument("run-id")
 def comment_details(run_id: str) -> None:
     data = http_get(f"https://api.github.com/repos/{REPO}/actions/runs/{run_id}")
@@ -252,10 +244,9 @@ def comment_details(run_id: str) -> None:
     # since this command can access the workflows API.
     body = body.replace("$workflow-run-url", data["html_url"])
     body = body.replace("$job-diff-url", diff_url)
-
-    print(f"[INFO]: writing comment body to {COMMENT_BODY_FILE}")
-    with open(COMMENT_BODY_FILE, "w", encoding="utf-8") as wf:
-        wf.write(body)
+    # # https://github.community/t/set-output-truncates-multiline-strings/16852/3
+    escaped = body.replace("%", "%25").replace("\n", "%0A").replace("\r", "%0D")
+    set_output("comment-body", escaped)
 
 
 if __name__ == "__main__":

From 5e8b18bd5bb243571ff5cda1bab2b2a17e8f4dbd Mon Sep 17 00:00:00 2001
From: Richard Si <63936253+ichard26@users.noreply.github.com>
Date: Sun, 9 Jan 2022 18:33:25 -0500
Subject: [PATCH 5/7] Configure git user ID so merges work

---
 .github/workflows/diff_shades.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/diff_shades.yml b/.github/workflows/diff_shades.yml
index 228575ec9d1..b6a9b3355fb 100644
--- a/.github/workflows/diff_shades.yml
+++ b/.github/workflows/diff_shades.yml
@@ -47,6 +47,8 @@ jobs:
           python -m pip install click packaging urllib3
           # After checking out old revisions, this might not exist so we'll use a copy.
           cat scripts/diff_shades_gha_helper.py > helper.py
+          git config user.name "diff-shades-gha"
+          git config user.email "diff-shades-gha@example.com"
 
       - name: Calculate run configuration & metadata
         id: config

From a963292a1c14c964f101776fe16df9046fc1feaa Mon Sep 17 00:00:00 2001
From: Richard Si <63936253+ichard26@users.noreply.github.com>
Date: Wed, 12 Jan 2022 22:38:31 -0500
Subject: [PATCH 6/7] Add even more docs and minor cleanup

---
 .github/workflows/diff_shades_comment.yml |  2 +-
 scripts/diff_shades_gha_helper.py         | 37 +++++++++++++++++------
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/diff_shades_comment.yml b/.github/workflows/diff_shades_comment.yml
index ff3a87307c7..bdd90321800 100644
--- a/.github/workflows/diff_shades_comment.yml
+++ b/.github/workflows/diff_shades_comment.yml
@@ -35,7 +35,7 @@ jobs:
         with:
           issue-number: ${{ steps.metadata.outputs.pr-number }}
           comment-author: "github-actions[bot]"
-          body-includes: "diff-shades-comment"
+          body-includes: "diff-shades"
 
       - name: Create or update PR comment
         if: steps.metadata.outputs.needs-comment == 'true'
diff --git a/scripts/diff_shades_gha_helper.py b/scripts/diff_shades_gha_helper.py
index c155db9f475..21e04a590a1 100644
--- a/scripts/diff_shades_gha_helper.py
+++ b/scripts/diff_shades_gha_helper.py
@@ -1,3 +1,19 @@
+"""Helper script for psf/black's diff-shades Github Actions integration.
+
+diff-shades is a tool for analyzing what happens when you run Black on
+OSS code capturing it for comparisons or other usage. It's used here to
+help measure the impact of a change *before* landing it (in particular
+posting a comment on completion for PRs).
+
+This script exists as a more maintainable alternative to using inline
+Javascript in the workflow YAML files. The revision configuration and
+resolving, caching, and PR comment logic is contained here.
+
+For more information, please see the developer docs:
+
+https://black.readthedocs.io/en/latest/contributing/gauging_changes.html#diff-shades
+"""
+
 import json
 import os
 import platform
@@ -19,6 +35,7 @@
     from typing_extensions import Final, Literal
 
 COMMENT_BODY_FILE: Final = ".pr-comment-body.md"
+DIFF_STEP_NAME: Final = "Generate HTML diff report"
 DOCS_URL: Final = (
     "https://black.readthedocs.io/en/latest/"
     "contributing/gauging_changes.html#diff-shades"
@@ -96,7 +113,10 @@ def resolve_custom_ref(ref: str) -> Tuple[str, str]:
         # Special format to get a PR.
         number = int(ref[1:])
         revision = get_pr_revision(number)
-        return f"pr-{number}-{revision[:SHA_LENGTH]}", f"gh pr checkout {number}"
+        return (
+            f"pr-{number}-{revision[:SHA_LENGTH]}",
+            f"gh pr checkout {number} && git merge origin/main",
+        )
 
     # Alright, it's probably a branch, tag, or a commit SHA, let's find out!
     revision = get_branch_or_tag_revision(ref)
@@ -141,7 +161,6 @@ def config(
     elif event == "pull_request":
         # PR, let's use main as the baseline.
         baseline_rev = get_branch_or_tag_revision()
-        assert baseline_rev is not None, "main should exist ..."
         baseline_name = "main-" + baseline_rev[:SHA_LENGTH]
         baseline_cmd = f"git checkout {baseline_rev}"
 
@@ -199,14 +218,12 @@ def comment_body(
         body = (
             f"**diff-shades** results comparing this PR ({target_sha}) to main"
             f" ({baseline_sha}). The full diff is [available in the logs]"
-            '($job-diff-url) under the "Generate HTML diff report" step.'
+            f'($job-diff-url) under the "{DIFF_STEP_NAME}" step.'
         )
         body += "\n```text\n" + proc.stdout.strip() + "\n```\n"
     body += (
         f"[**What is this?**]({DOCS_URL}) | [Workflow run]($workflow-run-url) |"
-        " [diff-shades documentation](https://github.com/ichard26/diff-shades#readme) |"
-        # This is used by the comment workflow to discover a pre-existing comment.
-        " id: diff-shades-comment"
+        " [diff-shades documentation](https://github.com/ichard26/diff-shades#readme)"
     )
     print(f"[INFO]: writing half-completed comment body to {COMMENT_BODY_FILE}")
     with open(COMMENT_BODY_FILE, "w", encoding="utf-8") as f:
@@ -228,9 +245,10 @@ def comment_details(run_id: str) -> None:
     set_output("pr-number", str(pr_number))
 
     jobs_data = http_get(data["jobs_url"])
+    assert len(jobs_data["jobs"]) == 1, "multiple jobs not supported nor tested"
     job = jobs_data["jobs"][0]
     steps = {s["name"]: s["number"] for s in job["steps"]}
-    diff_step = steps["Generate HTML diff report"]
+    diff_step = steps[DIFF_STEP_NAME]
     diff_url = job["html_url"] + f"#step:{diff_step}:1"
 
     artifacts_data = http_get(data["artifacts_url"])["artifacts"]
@@ -240,8 +258,9 @@ def comment_details(run_id: str) -> None:
     with zipfile.ZipFile(body_zip) as zfile:
         with zfile.open(COMMENT_BODY_FILE) as rf:
             body = rf.read().decode("utf-8")
-    # It's more convenient to fill these fields after the first workflow is done
-    # since this command can access the workflows API.
+    # It's more convenient to fill in these fields after the first workflow is done
+    # since this command can access the workflows API (doing it in the main workflow
+    # while it's still in progress seems impossible).
     body = body.replace("$workflow-run-url", data["html_url"])
     body = body.replace("$job-diff-url", diff_url)
     # # https://github.community/t/set-output-truncates-multiline-strings/16852/3

From aab8f28826aa8fee3900c342bf5cd893bad842bb Mon Sep 17 00:00:00 2001
From: Jelle Zijlstra <jelle.zijlstra@gmail.com>
Date: Thu, 13 Jan 2022 18:11:23 -0800
Subject: [PATCH 7/7] Update docs/contributing/gauging_changes.md

---
 docs/contributing/gauging_changes.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/contributing/gauging_changes.md b/docs/contributing/gauging_changes.md
index 580c10429b3..3cfa98b3df8 100644
--- a/docs/contributing/gauging_changes.md
+++ b/docs/contributing/gauging_changes.md
@@ -81,7 +81,7 @@ runs. And the analyses exist just in case you want to do further analysis using
 collected data locally.
 
 Note that the workflow will only fail intentionally if while analyzing a file failed to
-format. Otherwise it's mostly likely a bug of the workflow.
+format. Otherwise a failure indicates a bug in the workflow.
 
 ```{tip}
 Maintainers with write access or higher can trigger the workflow manually from the