diff --git a/Dockerfile b/Dockerfile index b9b60e4..6f3132d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,14 @@ -FROM ruby:3.0-alpine +FROM alpine:3.17 -RUN apk --no-cache add \ - build-base \ - curl \ - ruby-dev \ - && gem install html-proofer -v 3.19.4 +RUN apk --no-cache add build-base ruby-dev +RUN apk --no-cache add curl +RUN gem install html-proofer -v 5.0.2 + +RUN apk --no-cache add python3 py3-pip +RUN apk --no-cache add openjdk8 +RUN pip install html5validator + +RUN apk --no-cache add bash COPY entrypoint.sh proof-html.rb / diff --git a/README.md b/README.md index cbdfa8b..0e1c18f 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,16 @@ # proof-html proof-html is a [GitHub Action](https://github.com/features/actions) to -validate HTML using [HTMLProofer](https://github.com/gjtorikian/html-proofer). +validate HTML using the [Nu HTML +Validator](https://github.com/validator/validator) through +[html5validator](https://github.com/svenkreiss/html5validator), and check +links, images, and more using +[HTMLProofer](https://github.com/gjtorikian/html-proofer). ## Usage ```yaml -- uses: anishathalye/proof-html@v1 +- uses: anishathalye/proof-html@v2 with: directory: ./site ``` @@ -21,14 +25,12 @@ See below for a [full example](#full-example). | `check_external_hash` | Check whether external anchors exist | true | | `check_favicon` | Check whether favicons are valid | true | | `check_html` | Validate HTML | true | -| `check_img_http` | Enforce that images use HTTPS | true | | `check_opengraph` | Check images and URLs in Open Graph metadata | true | -| `empty_alt_ignore` | Allow images with empty alt tags | false | +| `ignore_empty_alt` | Allow images with empty alt tags | false | | `enforce_https` | Require that links use HTTPS | true | -| `external_only` | Only check external links | false | -| `internal_domains` | Newline-separated list of domains to treat as internal URLs | (empty) | -| `url_ignore` | Newline-separated list of URLs to ignore | (empty) | -| `url_ignore_re` | Newline-separated list of URL regexes to ignore | (empty) | +| `swap_urls` | JSON-encoded map of URL rewrite rules | (empty) | +| `ignore_url` | Newline-separated list of URLs to ignore | (empty) | +| `ignore_url_re` | Newline-separated list of URL regexes to ignore | (empty) | | `connect_timeout` | HTTP connection timeout | 30 | | `tokens` | JSON-encoded map of domains to authorization tokens | (empty) | | `max_concurrency` | Maximum number of concurrent requests | 50 | @@ -38,6 +40,8 @@ See below for a [full example](#full-example). Most of the options correspond directly to [configuration options for HTMLProofer](https://github.com/gjtorikian/html-proofer#configuration). +**tokens** + `tokens` is a _JSON-encoded_ map of domains to authorization tokens. So it's "doubly encoded": the workflow file is written in YAML and `tokens` is a string (not a map!), a JSON encoding of the data. This option can be used to provide @@ -55,6 +59,16 @@ tokens: | You can also see the full example below for how to pass on the `GITHUB_TOKEN` supplied by the workflow runner. +**swap_urls** + +`swap_urls` is a _JSON-encoded_ map, mapping regexes to strings. This can be +useful to strip a base path for an internal domain. For example: + +```yaml +swap_urls: | + {"^https://example.com/": "/"} +``` + ## Full Example This is the entire `.github/workflows/build.yml` file for a GitHub Pages / @@ -84,19 +98,19 @@ jobs: bundle config path vendor/bundle bundle install --jobs 4 --retry 3 - run: bundle exec jekyll build - - uses: anishathalye/proof-html@v1 + - uses: anishathalye/proof-html@v2 with: directory: ./_site enforce_https: false tokens: | {"https://github.com": "${{ secrets.GITHUB_TOKEN }}"} - url_ignore: | + ignore_url: | http://www.example.com/ https://en.wikipedia.org/wiki/Main_Page - url_ignore_re: | + ignore_url_re: | ^https://twitter.com/ - internal_domains: | - www.anishathalye.com + swap_urls: | + {"^https://www.anishathalye.com/": "/"} ``` ### Real-world examples diff --git a/action.yml b/action.yml index 992fef5..110d520 100644 --- a/action.yml +++ b/action.yml @@ -12,30 +12,24 @@ inputs: check_html: description: Validate HTML required: false - check_img_http: - description: Enforce that images use HTTPS - required: false check_opengraph: description: Check images and URLs in Open Graph metadata required: false check_favicon: description: Check whether favicons are valid required: false - empty_alt_ignore: + ignore_empty_alt: description: Allow images with empty alt tags required: false enforce_https: description: Require that links use HTTPS required: false - external_only: - description: Only check external links - required: false - internal_domains: - description: Newline-separated list of domains to treat as internal URLs - required: false tokens: description: JSON-encoded map of domains to authorization tokens required: false + swap_urls: + description: JSON-encoded map of URL rewrite rules + required: false max_concurrency: description: Maximum number of concurrent requests required: false @@ -45,10 +39,10 @@ inputs: timeout: description: HTTP request timeout required: false - url_ignore: + ignore_url: description: Newline-separated list of URLs to ignore required: false - url_ignore_re: + ignore_url_re: description: Newline-separated list of URL regexes to ignore required: false retries: @@ -57,7 +51,7 @@ inputs: runs: using: docker - image: docker://anishathalye/proof-html:1.4.1 + image: docker://anishathalye/proof-html:2.0.0 branding: icon: check-square diff --git a/entrypoint.sh b/entrypoint.sh index 309ac94..641182e 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -1,18 +1,27 @@ -#!/bin/sh +#!/usr/bin/env bash + +failed=0 + +check_html="${INPUT_CHECK_HTML:-true}" +if [[ "$check_html" =~ ^t.*|^T.*|^y.*|^Y.*|^1.* ]]; then + if ! html5validator --also-check-css --log INFO --root "${INPUT_DIRECTORY}"; then + failed=1 + fi +fi tries="${INPUT_RETRIES:-3}" -code=1 while [ "$tries" -ge 1 ]; do tries=$((tries-1)) - ruby /proof-html.rb - code="$?" - if [ "$code" -eq 0 ]; then + if ruby /proof-html.rb; then break fi if [ "$tries" -ge 1 ]; then sleep 5 fi + if [ "$tries" -eq 0 ]; then + failed=1 + fi done -exit "$code" +exit $failed diff --git a/proof-html.rb b/proof-html.rb index 72f0e49..66c8951 100644 --- a/proof-html.rb +++ b/proof-html.rb @@ -30,26 +30,35 @@ def get_str(name) s.nil? ? "" : s end -url_ignore_re = get_str("URL_IGNORE_RE").split("\n").map { |s| Regexp.new s } -url_ignore = get_str("URL_IGNORE").split("\n").concat url_ignore_re +ignore_url_re = get_str("IGNORE_URL_RE").split("\n").map { |s| Regexp.new s } +ignore_url = get_str("IGNORE_URL").split("\n").concat ignore_url_re tokens_str = get_str("TOKENS") tokens = JSON.parse (tokens_str == "" ? "{}" : tokens_str) -internal_domains = get_str("INTERNAL_DOMAINS").split("\n") + +swap_urls_str = get_str("SWAP_URLS") +swap_urls = JSON.parse (swap_urls_str == "" ? "{}" : swap_urls_str) +swap_urls.transform_keys! { |k| Regexp.new k } + +checks = ["Links", "Scripts", "Images"] +if get_bool("CHECK_FAVICON", true) + checks.push("Favicon") +end +if get_bool("CHECK_OPENGRAPH", true) + checks.push("OpenGraph") +end options = { - :cache => { :timeframe => "1d" }, + :checks => checks, + :cache => { :timeframe => { + :internal => "1d", + :external => "1d", + } }, :check_external_hash => get_bool("CHECK_EXTERNAL_HASH", true), - :check_html => get_bool("CHECK_HTML", true), - :check_img_http => get_bool("CHECK_IMG_HTTP", true), - :check_opengraph => get_bool("CHECK_OPENGRAPH", true), - :check_favicon => get_bool("CHECK_FAVICON", true), - :empty_alt_ignore => get_bool("EMPTY_ALT_IGNORE", false), + :ignore_empty_alt => get_bool("IGNORE_EMPTY_ALT", false), :enforce_https => get_bool("ENFORCE_HTTPS", true), - :external_only => get_bool("EXTERNAL_ONLY", false), :hydra => { :max_concurrency => get_int("MAX_CONCURRENCY", 50), }, - :internal_domains => internal_domains, :typhoeus => { :connecttimeout => get_int("CONNECT_TIMEOUT", 30), :followlocation => true, @@ -58,7 +67,8 @@ def get_str(name) }, :timeout => get_int("TIMEOUT", 120), }, - :url_ignore => url_ignore, + :ignore_urls => ignore_url, + :swap_urls => swap_urls, } begin