diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
new file mode 100644
index 0000000000..36e8cfb78c
--- /dev/null
+++ b/.github/workflows/windows.yml
@@ -0,0 +1,50 @@
+# this is a work in progress!
+name: windows
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    types: [opened, synchronize]
+    branches:
+      - '*'
+
+jobs:
+  windows:
+    name: "windows, sys: ${{ matrix.sys }}, ${{ matrix.ruby }}"
+
+    env:
+      MAKEFLAGS: -j2
+
+    runs-on: windows-latest
+
+    strategy:
+      fail-fast: false
+      matrix:
+        sys: [ enable, disable ]
+        ruby: [ "2.5", "2.6", "2.7", "3.0", "mingw" ]
+
+    steps:
+      - name: configure git crlf on windows
+        run: |
+          git config --system core.autocrlf false
+          git config --system core.eol lf
+      - name: checkout
+        uses: actions/checkout@v2
+      - name: load Ruby and bundle install
+        uses: MSP-Greg/setup-ruby-pkgs@v1
+        with:
+          ruby-version: ${{ matrix.ruby }}
+          mingw: libxml2 libxslt
+          bundler-cache: true
+      - uses: actions/cache@v2
+        if: matrix.sys == 'disable'
+        with:
+          path: ports/archives
+          key: ${{ matrix.os }}-${{ matrix.ruby }}-tarballs-${{ hashFiles('**/dependencies.yml') }}
+          restore-keys: ${{ matrix.os }}-${{ matrix.ruby }}-tarballs-
+      - name: bundle exec rake compile
+        run: |
+          bundle exec rake compile -- --${{ matrix.sys }}-system-libraries
+      - name: bundle exec rake test
+        run:  bundle exec rake test
diff --git a/.gitignore b/.gitignore
index 79baf6a716..5a6b71bc58 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,8 @@
 /gems/
 /lib/nokogiri/**/nokogiri.bundle
 /lib/nokogiri/**/nokogiri.so
+/lib/nokogumbo/**/nokogumbo.bundle
+/lib/nokogumbo/**/nokogumbo.so
 /lib/nokogiri/nokogiri.jar
 /pkg/
 /ports/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8a710b1c6f..db1d50c3bb 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,11 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA
 
 ## next / unreleased
 
+### Dependencies
+
+* [MRI] Upgrade mini_portile2 dependency from `~> 2.5.0` to `~> 2.5.1`.
+
+
 ### Changed
 
 * Introduce `Nokogiri::XML::ParseOptions::DEFAULT_XSLT` which adds the libxslt-preferred options of `NOENT | DTDLOAD | DTDATTR | NOCDATA` to `ParseOptions::DEFAULT_XML`.
diff --git a/ext/java/nokogiri/XmlDocument.java b/ext/java/nokogiri/XmlDocument.java
index 139659bad2..1942cb4891 100644
--- a/ext/java/nokogiri/XmlDocument.java
+++ b/ext/java/nokogiri/XmlDocument.java
@@ -443,7 +443,7 @@ private static class DocumentBuilderFactoryHolder
       return new_root;
     }
     if (!(new_root instanceof XmlNode)) {
-        throw context.runtime.newArgumentError("expected Nokogiri::XML::Node but received " + new_root.getType());
+      throw context.runtime.newArgumentError("expected Nokogiri::XML::Node but received " + new_root.getType());
     }
     XmlNode newRoot = asXmlNode(context, new_root);
 
diff --git a/ext/nokogiri/extconf.rb b/ext/nokogiri/extconf.rb
index fa4d189c77..38c93e2ff6 100644
--- a/ext/nokogiri/extconf.rb
+++ b/ext/nokogiri/extconf.rb
@@ -14,7 +14,7 @@
 
 # The gem version constraint in the Rakefile is not respected at install time.
 # Keep this version in sync with the one in the Rakefile !
-REQUIRED_MINI_PORTILE_VERSION = "~> 2.5.0"
+REQUIRED_MINI_PORTILE_VERSION = "~> 2.5.1"
 REQUIRED_PKG_CONFIG_VERSION = "~> 1.1"
 
 # Keep track of what versions of what libraries we build against
@@ -402,7 +402,7 @@ def process_recipe(name, version, static_p, cross_p)
   require 'mini_portile2'
   message("Using mini_portile version #{MiniPortile::VERSION}\n")
 
-  if name != "libxml2" && name != "libxslt"
+  unless ["libxml2", "libxslt"].include?(name)
     OTHER_LIBRARY_VERSIONS[name] = version
   end
 
@@ -486,7 +486,7 @@ def process_recipe(name, version, static_p, cross_p)
         end
       end
 
-      message(<<~EOM)
+      message(<<~EOM) if name != "libgumbo"
 
         The Nokogiri maintainers intend to provide timely security updates, but if
         this is a concern for you and want to use your OS/distro system library
@@ -498,7 +498,7 @@ def process_recipe(name, version, static_p, cross_p)
       EOM
 
       message(<<~EOM) if name == 'libxml2'
-        Note, however, that nokogiri cannot guarantee compatiblity with every
+        Note, however, that nokogiri cannot guarantee compatibility with every
         version of libxml2 that may be provided by OS/package vendors.
 
       EOM
@@ -868,6 +868,56 @@ def compile
   ensure_func("exsltFuncRegister", "libexslt/exslt.h")
 end
 
+libgumbo_recipe = process_recipe("libgumbo", "1.0.0-nokogiri", static_p, cross_build_p) do |recipe|
+  recipe.configure_options = []
+
+  class << recipe
+    def downloaded?
+      true
+    end
+
+    def extract
+      target = File.join(tmp_path, "gumbo-parser")
+      output "Copying gumbo-parser files into #{target}..."
+      FileUtils.mkdir_p target
+      FileUtils.cp Dir.glob(File.join(PACKAGE_ROOT_DIR, "gumbo-parser/src/*")), target
+    end
+
+    def configured?
+      true
+    end
+
+    def install
+      lib_dir = File.join(port_path, "lib")
+      inc_dir = File.join(port_path, "include")
+      FileUtils.mkdir_p([lib_dir, inc_dir])
+      FileUtils.cp File.join(work_path, "libgumbo.a"), lib_dir
+      FileUtils.cp Dir.glob(File.join(work_path, "*.h")), inc_dir
+    end
+
+    def compile
+      cflags = concat_flags(ENV["CFLAGS"], "-fPIC", "-g")
+
+      env = {"CC" => gcc_cmd, "CFLAGS" => cflags}
+      if config_cross_build?
+        if host =~ /darwin/
+          env["AR"] = "#{host}-libtool"
+          env["ARFLAGS"] = "-o"
+        else
+          env["AR"] = "#{host}-ar"
+        end
+        env["RANLIB"] = "#{host}-ranlib"
+      end
+
+      execute("compile", make_cmd, {env: env})
+    end
+  end
+end
+append_cppflags("-I#{File.join(libgumbo_recipe.path, "include")}")
+$libs = $libs + " " + File.join(libgumbo_recipe.path, "lib", "libgumbo.a")
+$LIBPATH = $LIBPATH | [File.join(libgumbo_recipe.path, "lib")]
+ensure_func("gumbo_parse_with_options", "gumbo.h")
+
 have_func('xmlHasFeature') || abort("xmlHasFeature() is missing.") # introduced in libxml 2.6.21
 have_func('xmlFirstElementChild') # introduced in libxml 2.7.3
 have_func('xmlRelaxNGSetParserStructuredErrors') # introduced in libxml 2.6.24
diff --git a/ext/nokogiri/gumbo.c b/ext/nokogiri/gumbo.c
new file mode 100644
index 0000000000..7a3b42a878
--- /dev/null
+++ b/ext/nokogiri/gumbo.c
@@ -0,0 +1,606 @@
+//
+//  Copyright 2013-2021 Sam Ruby, Stephen Checkoway
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+//
+
+//
+// nokogumbo.c defines the following:
+//
+//   class Nokogumbo
+//     def parse(utf8_string) # returns Nokogiri::HTML5::Document
+//   end
+//
+// Processing starts by calling gumbo_parse_with_options. The resulting document tree
+// is then walked, a parallel libxml2 tree is constructed, and the final document is
+// then wrapped using Nokogiri_wrap_xml_document. This approach reduces memory and CPU
+// requirements as Ruby objects are only built when necessary.
+//
+
+#include <nokogiri.h>
+
+#include "gumbo.h"
+
+VALUE cNokogiriHtml5Document;
+
+// Interned symbols
+static ID internal_subset;
+static ID parent;
+
+/* Backwards compatibility to Ruby 2.1.0 */
+#if RUBY_API_VERSION_CODE < 20200
+#define ONIG_ESCAPE_UCHAR_COLLISION 1
+#include <ruby/encoding.h>
+
+static VALUE
+rb_utf8_str_new(const char *str, long length)
+{
+  return rb_enc_str_new(str, length, rb_utf8_encoding());
+}
+
+static VALUE
+rb_utf8_str_new_cstr(const char *str)
+{
+  return rb_enc_str_new_cstr(str, rb_utf8_encoding());
+}
+
+static VALUE
+rb_utf8_str_new_static(const char *str, long length)
+{
+  return rb_enc_str_new(str, length, rb_utf8_encoding());
+}
+#endif
+
+#include <nokogiri.h>
+#include <libxml/tree.h>
+#include <libxml/HTMLtree.h>
+
+// URI = system id
+// external id = public id
+static xmlDocPtr
+new_html_doc(const char *dtd_name, const char *system, const char *public)
+{
+  // These two libxml2 functions take the public and system ids in
+  // opposite orders.
+  htmlDocPtr doc = htmlNewDocNoDtD(/* URI */ NULL, /* ExternalID */NULL);
+  assert(doc);
+  if (dtd_name) {
+    xmlCreateIntSubset(doc, BAD_CAST dtd_name, BAD_CAST public, BAD_CAST system);
+  }
+  return doc;
+}
+
+static xmlNodePtr
+get_parent(xmlNodePtr node)
+{
+  return node->parent;
+}
+
+static GumboOutput *
+perform_parse(const GumboOptions *options, VALUE input)
+{
+  assert(RTEST(input));
+  Check_Type(input, T_STRING);
+  GumboOutput *output = gumbo_parse_with_options(
+                          options,
+                          RSTRING_PTR(input),
+                          RSTRING_LEN(input)
+                        );
+
+  const char *status_string = gumbo_status_to_string(output->status);
+  switch (output->status) {
+  case GUMBO_STATUS_OK:
+    break;
+  case GUMBO_STATUS_TOO_MANY_ATTRIBUTES:
+  case GUMBO_STATUS_TREE_TOO_DEEP:
+    gumbo_destroy_output(output);
+    rb_raise(rb_eArgError, "%s", status_string);
+  case GUMBO_STATUS_OUT_OF_MEMORY:
+    gumbo_destroy_output(output);
+    rb_raise(rb_eNoMemError, "%s", status_string);
+  }
+  return output;
+}
+
+static xmlNsPtr
+lookup_or_add_ns(
+  xmlDocPtr doc,
+  xmlNodePtr root,
+  const char *href,
+  const char *prefix
+)
+{
+  xmlNsPtr ns = xmlSearchNs(doc, root, BAD_CAST prefix);
+  if (ns) {
+    return ns;
+  }
+  return xmlNewNs(root, BAD_CAST href, BAD_CAST prefix);
+}
+
+static void
+set_line(xmlNodePtr node, size_t line)
+{
+  // libxml2 uses 65535 to mean look elsewhere for the line number on some
+  // nodes.
+  if (line < 65535) {
+    node->line = (unsigned short)line;
+  }
+}
+
+// Construct an XML tree rooted at xml_output_node from the Gumbo tree rooted
+// at gumbo_node.
+static void
+build_tree(
+  xmlDocPtr doc,
+  xmlNodePtr xml_output_node,
+  const GumboNode *gumbo_node
+)
+{
+  xmlNodePtr xml_root = NULL;
+  xmlNodePtr xml_node = xml_output_node;
+  size_t child_index = 0;
+
+  while (true) {
+    assert(gumbo_node != NULL);
+    const GumboVector *children = gumbo_node->type == GUMBO_NODE_DOCUMENT ?
+                                  &gumbo_node->v.document.children : &gumbo_node->v.element.children;
+    if (child_index >= children->length) {
+      // Move up the tree and to the next child.
+      if (xml_node == xml_output_node) {
+        // We've built as much of the tree as we can.
+        return;
+      }
+      child_index = gumbo_node->index_within_parent + 1;
+      gumbo_node = gumbo_node->parent;
+      xml_node = get_parent(xml_node);
+      // Children of fragments don't share the same root, so reset it and
+      // it'll be set below. In the non-fragment case, this will only happen
+      // after the html element has been finished at which point there are no
+      // further elements.
+      if (xml_node == xml_output_node) {
+        xml_root = NULL;
+      }
+      continue;
+    }
+    const GumboNode *gumbo_child = children->data[child_index++];
+    xmlNodePtr xml_child;
+
+    switch (gumbo_child->type) {
+    case GUMBO_NODE_DOCUMENT:
+      abort(); // Bug in Gumbo.
+
+    case GUMBO_NODE_TEXT:
+    case GUMBO_NODE_WHITESPACE:
+      xml_child = xmlNewDocText(doc, BAD_CAST gumbo_child->v.text.text);
+      set_line(xml_child, gumbo_child->v.text.start_pos.line);
+      xmlAddChild(xml_node, xml_child);
+      break;
+
+    case GUMBO_NODE_CDATA:
+      xml_child = xmlNewCDataBlock(doc, BAD_CAST gumbo_child->v.text.text,
+                                   (int) strlen(gumbo_child->v.text.text));
+      set_line(xml_child, gumbo_child->v.text.start_pos.line);
+      xmlAddChild(xml_node, xml_child);
+      break;
+
+    case GUMBO_NODE_COMMENT:
+      xml_child = xmlNewDocComment(doc, BAD_CAST gumbo_child->v.text.text);
+      set_line(xml_child, gumbo_child->v.text.start_pos.line);
+      xmlAddChild(xml_node, xml_child);
+      break;
+
+    case GUMBO_NODE_TEMPLATE:
+    // XXX: Should create a template element and a new DocumentFragment
+    case GUMBO_NODE_ELEMENT: {
+      xml_child = xmlNewDocNode(doc, NULL, BAD_CAST gumbo_child->v.element.name, NULL);
+      set_line(xml_child, gumbo_child->v.element.start_pos.line);
+      if (xml_root == NULL) {
+        xml_root = xml_child;
+      }
+      xmlNsPtr ns = NULL;
+      switch (gumbo_child->v.element.tag_namespace) {
+      case GUMBO_NAMESPACE_HTML:
+        break;
+      case GUMBO_NAMESPACE_SVG:
+        ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/2000/svg", "svg");
+        break;
+      case GUMBO_NAMESPACE_MATHML:
+        ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/1998/Math/MathML", "math");
+        break;
+      }
+      if (ns != NULL) {
+        xmlSetNs(xml_child, ns);
+      }
+      xmlAddChild(xml_node, xml_child);
+
+      // Add the attributes.
+      const GumboVector *attrs = &gumbo_child->v.element.attributes;
+      for (size_t i = 0; i < attrs->length; i++) {
+        const GumboAttribute *attr = attrs->data[i];
+
+        switch (attr->attr_namespace) {
+        case GUMBO_ATTR_NAMESPACE_XLINK:
+          ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/1999/xlink", "xlink");
+          break;
+
+        case GUMBO_ATTR_NAMESPACE_XML:
+          ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/XML/1998/namespace", "xml");
+          break;
+
+        case GUMBO_ATTR_NAMESPACE_XMLNS:
+          ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/2000/xmlns/", "xmlns");
+          break;
+
+        default:
+          ns = NULL;
+        }
+        xmlNewNsProp(xml_child, ns, BAD_CAST attr->name, BAD_CAST attr->value);
+      }
+
+      // Add children for this element.
+      child_index = 0;
+      gumbo_node = gumbo_child;
+      xml_node = xml_child;
+    }
+    }
+  }
+}
+
+static void
+add_errors(const GumboOutput *output, VALUE rdoc, VALUE input, VALUE url)
+{
+  const char *input_str = RSTRING_PTR(input);
+  size_t input_len = RSTRING_LEN(input);
+
+  // Add parse errors to rdoc.
+  if (output->errors.length) {
+    const GumboVector *errors = &output->errors;
+    VALUE rerrors = rb_ary_new2(errors->length);
+
+    for (size_t i = 0; i < errors->length; i++) {
+      GumboError *err = errors->data[i];
+      GumboSourcePosition position = gumbo_error_position(err);
+      char *msg;
+      size_t size = gumbo_caret_diagnostic_to_string(err, input_str, input_len, &msg);
+      VALUE err_str = rb_utf8_str_new(msg, size);
+      free(msg);
+      VALUE syntax_error = rb_class_new_instance(1, &err_str, cNokogiriXmlSyntaxError);
+      const char *error_code = gumbo_error_code(err);
+      VALUE str1 = error_code ? rb_utf8_str_new_static(error_code, strlen(error_code)) : Qnil;
+      rb_iv_set(syntax_error, "@domain", INT2NUM(1)); // XML_FROM_PARSER
+      rb_iv_set(syntax_error, "@code", INT2NUM(1));   // XML_ERR_INTERNAL_ERROR
+      rb_iv_set(syntax_error, "@level", INT2NUM(2));  // XML_ERR_ERROR
+      rb_iv_set(syntax_error, "@file", url);
+      rb_iv_set(syntax_error, "@line", INT2NUM(position.line));
+      rb_iv_set(syntax_error, "@str1", str1);
+      rb_iv_set(syntax_error, "@str2", Qnil);
+      rb_iv_set(syntax_error, "@str3", Qnil);
+      rb_iv_set(syntax_error, "@int1", INT2NUM(0));
+      rb_iv_set(syntax_error, "@column", INT2NUM(position.column));
+      rb_ary_push(rerrors, syntax_error);
+    }
+    rb_iv_set(rdoc, "@errors", rerrors);
+  }
+}
+
+typedef struct {
+  GumboOutput *output;
+  VALUE input;
+  VALUE url_or_frag;
+  xmlDocPtr doc;
+} ParseArgs;
+
+static void
+parse_args_mark(void *parse_args)
+{
+  ParseArgs *args = parse_args;
+  rb_gc_mark_maybe(args->input);
+  rb_gc_mark_maybe(args->url_or_frag);
+}
+
+// Wrap a ParseArgs pointer. The underlying ParseArgs must outlive the
+// wrapper.
+static VALUE
+wrap_parse_args(ParseArgs *args)
+{
+  return Data_Wrap_Struct(rb_cData, parse_args_mark, RUBY_NEVER_FREE, args);
+}
+
+// Returnsd the underlying ParseArgs wrapped by wrap_parse_args.
+static ParseArgs *
+unwrap_parse_args(VALUE obj)
+{
+  ParseArgs *args;
+  Data_Get_Struct(obj, ParseArgs, args);
+  return args;
+}
+
+static VALUE
+parse_cleanup(VALUE parse_args)
+{
+  ParseArgs *args = unwrap_parse_args(parse_args);
+  gumbo_destroy_output(args->output);
+  // Make sure garbage collection doesn't mark the objects as being live based
+  // on references from the ParseArgs. This may be unnecessary.
+  args->input = Qnil;
+  args->url_or_frag = Qnil;
+  if (args->doc != NULL) {
+    xmlFreeDoc(args->doc);
+  }
+  return Qnil;
+}
+
+static VALUE parse_continue(VALUE parse_args);
+
+// Parse a string using gumbo_parse into a Nokogiri document
+static VALUE
+parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth)
+{
+  GumboOptions options = kGumboDefaultOptions;
+  options.max_attributes = NUM2INT(max_attributes);
+  options.max_errors = NUM2INT(max_errors);
+  options.max_tree_depth = NUM2INT(max_depth);
+
+  GumboOutput *output = perform_parse(&options, input);
+  ParseArgs args = {
+    .output = output,
+    .input = input,
+    .url_or_frag = url,
+    .doc = NULL,
+  };
+  VALUE parse_args = wrap_parse_args(&args);
+
+  return rb_ensure(parse_continue, parse_args, parse_cleanup, parse_args);
+}
+
+static VALUE
+parse_continue(VALUE parse_args)
+{
+  ParseArgs *args = unwrap_parse_args(parse_args);
+  GumboOutput *output = args->output;
+  xmlDocPtr doc;
+  if (output->document->v.document.has_doctype) {
+    const char *name   = output->document->v.document.name;
+    const char *public = output->document->v.document.public_identifier;
+    const char *system = output->document->v.document.system_identifier;
+    public = public[0] ? public : NULL;
+    system = system[0] ? system : NULL;
+    doc = new_html_doc(name, system, public);
+  } else {
+    doc = new_html_doc(NULL, NULL, NULL);
+  }
+  args->doc = doc; // Make sure doc gets cleaned up if an error is thrown.
+  build_tree(doc, (xmlNodePtr)doc, output->document);
+  VALUE rdoc = Nokogiri_wrap_xml_document(cNokogiriHtml5Document, doc);
+  args->doc = NULL; // The Ruby runtime now owns doc so don't delete it.
+  add_errors(output, rdoc, args->input, args->url_or_frag);
+  return rdoc;
+}
+
+static int
+lookup_namespace(VALUE node, bool require_known_ns)
+{
+  ID namespace, href;
+  CONST_ID(namespace, "namespace");
+  CONST_ID(href, "href");
+  VALUE ns = rb_funcall(node, namespace, 0);
+
+  if (NIL_P(ns)) {
+    return GUMBO_NAMESPACE_HTML;
+  }
+  ns = rb_funcall(ns, href, 0);
+  assert(RTEST(ns));
+  Check_Type(ns, T_STRING);
+
+  const char *href_ptr = RSTRING_PTR(ns);
+  size_t href_len = RSTRING_LEN(ns);
+#define NAMESPACE_P(uri) (href_len == sizeof uri - 1 && !memcmp(href_ptr, uri, href_len))
+  if (NAMESPACE_P("http://www.w3.org/1999/xhtml")) {
+    return GUMBO_NAMESPACE_HTML;
+  }
+  if (NAMESPACE_P("http://www.w3.org/1998/Math/MathML")) {
+    return GUMBO_NAMESPACE_MATHML;
+  }
+  if (NAMESPACE_P("http://www.w3.org/2000/svg")) {
+    return GUMBO_NAMESPACE_SVG;
+  }
+#undef NAMESPACE_P
+  if (require_known_ns) {
+    rb_raise(rb_eArgError, "Unexpected namespace URI \"%*s\"", (int)href_len, href_ptr);
+  }
+  return -1;
+}
+
+static xmlNodePtr
+extract_xml_node(VALUE node)
+{
+  xmlNodePtr xml_node;
+  Data_Get_Struct(node, xmlNode, xml_node);
+  return xml_node;
+}
+
+static VALUE fragment_continue(VALUE parse_args);
+
+static VALUE
+fragment(
+  VALUE self,
+  VALUE doc_fragment,
+  VALUE tags,
+  VALUE ctx,
+  VALUE max_attributes,
+  VALUE max_errors,
+  VALUE max_depth
+)
+{
+  ID name = rb_intern_const("name");
+  const char *ctx_tag;
+  GumboNamespaceEnum ctx_ns;
+  GumboQuirksModeEnum quirks_mode;
+  bool form = false;
+  const char *encoding = NULL;
+
+  if (NIL_P(ctx)) {
+    ctx_tag = "body";
+    ctx_ns = GUMBO_NAMESPACE_HTML;
+  } else if (TYPE(ctx) == T_STRING) {
+    ctx_tag = StringValueCStr(ctx);
+    ctx_ns = GUMBO_NAMESPACE_HTML;
+    size_t len = RSTRING_LEN(ctx);
+    const char *colon = memchr(ctx_tag, ':', len);
+    if (colon) {
+      switch (colon - ctx_tag) {
+      case 3:
+        if (st_strncasecmp(ctx_tag, "svg", 3) != 0) {
+          goto error;
+        }
+        ctx_ns = GUMBO_NAMESPACE_SVG;
+        break;
+      case 4:
+        if (st_strncasecmp(ctx_tag, "html", 4) == 0) {
+          ctx_ns = GUMBO_NAMESPACE_HTML;
+        } else if (st_strncasecmp(ctx_tag, "math", 4) == 0) {
+          ctx_ns = GUMBO_NAMESPACE_MATHML;
+        } else {
+          goto error;
+        }
+        break;
+      default:
+error:
+        rb_raise(rb_eArgError, "Invalid context namespace '%*s'", (int)(colon - ctx_tag), ctx_tag);
+      }
+      ctx_tag = colon + 1;
+    } else {
+      // For convenience, put 'svg' and 'math' in their namespaces.
+      if (len == 3 && st_strncasecmp(ctx_tag, "svg", 3) == 0) {
+        ctx_ns = GUMBO_NAMESPACE_SVG;
+      } else if (len == 4 && st_strncasecmp(ctx_tag, "math", 4) == 0) {
+        ctx_ns = GUMBO_NAMESPACE_MATHML;
+      }
+    }
+
+    // Check if it's a form.
+    form = ctx_ns == GUMBO_NAMESPACE_HTML && st_strcasecmp(ctx_tag, "form") == 0;
+  } else {
+    ID element_ = rb_intern_const("element?");
+
+    // Context fragment name.
+    VALUE tag_name = rb_funcall(ctx, name, 0);
+    assert(RTEST(tag_name));
+    Check_Type(tag_name, T_STRING);
+    ctx_tag = StringValueCStr(tag_name);
+
+    // Context fragment namespace.
+    ctx_ns = lookup_namespace(ctx, true);
+
+    // Check for a form ancestor, including self.
+    for (VALUE node = ctx;
+         !NIL_P(node);
+         node = rb_respond_to(node, parent) ? rb_funcall(node, parent, 0) : Qnil) {
+      if (!RTEST(rb_funcall(node, element_, 0))) {
+        continue;
+      }
+      VALUE element_name = rb_funcall(node, name, 0);
+      if (RSTRING_LEN(element_name) == 4
+          && !st_strcasecmp(RSTRING_PTR(element_name), "form")
+          && lookup_namespace(node, false) == GUMBO_NAMESPACE_HTML) {
+        form = true;
+        break;
+      }
+    }
+
+    // Encoding.
+    if (RSTRING_LEN(tag_name) == 14
+        && !st_strcasecmp(ctx_tag, "annotation-xml")) {
+      VALUE enc = rb_funcall(ctx, rb_intern_const("[]"),
+                             rb_utf8_str_new_static("encoding", 8));
+      if (RTEST(enc)) {
+        Check_Type(enc, T_STRING);
+        encoding = StringValueCStr(enc);
+      }
+    }
+  }
+
+  // Quirks mode.
+  VALUE doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0);
+  VALUE dtd = rb_funcall(doc, internal_subset, 0);
+  if (NIL_P(dtd)) {
+    quirks_mode = GUMBO_DOCTYPE_NO_QUIRKS;
+  } else {
+    VALUE dtd_name = rb_funcall(dtd, name, 0);
+    VALUE pubid = rb_funcall(dtd, rb_intern_const("external_id"), 0);
+    VALUE sysid = rb_funcall(dtd, rb_intern_const("system_id"), 0);
+    quirks_mode = gumbo_compute_quirks_mode(
+                    NIL_P(dtd_name) ? NULL : StringValueCStr(dtd_name),
+                    NIL_P(pubid) ? NULL : StringValueCStr(pubid),
+                    NIL_P(sysid) ? NULL : StringValueCStr(sysid)
+                  );
+  }
+
+  // Perform a fragment parse.
+  int depth = NUM2INT(max_depth);
+  GumboOptions options = kGumboDefaultOptions;
+  options.max_attributes = NUM2INT(max_attributes);
+  options.max_errors = NUM2INT(max_errors);
+  // Add one to account for the HTML element.
+  options.max_tree_depth = depth < 0 ? -1 : (depth + 1);
+  options.fragment_context = ctx_tag;
+  options.fragment_namespace = ctx_ns;
+  options.fragment_encoding = encoding;
+  options.quirks_mode = quirks_mode;
+  options.fragment_context_has_form_ancestor = form;
+
+  GumboOutput *output = perform_parse(&options, tags);
+  ParseArgs args = {
+    .output = output,
+    .input = tags,
+    .url_or_frag = doc_fragment,
+    .doc = (xmlDocPtr)extract_xml_node(doc),
+  };
+  VALUE parse_args = wrap_parse_args(&args);
+  rb_ensure(fragment_continue, parse_args, parse_cleanup, parse_args);
+  return Qnil;
+}
+
+static VALUE
+fragment_continue(VALUE parse_args)
+{
+  ParseArgs *args = unwrap_parse_args(parse_args);
+  GumboOutput *output = args->output;
+  VALUE doc_fragment = args->url_or_frag;
+  xmlDocPtr xml_doc = args->doc;
+
+  args->doc = NULL; // The Ruby runtime owns doc so make sure we don't delete it.
+  xmlNodePtr xml_frag = extract_xml_node(doc_fragment);
+  build_tree(xml_doc, xml_frag, output->root);
+  add_errors(output, doc_fragment, args->input, rb_utf8_str_new_static("#fragment", 9));
+  return Qnil;
+}
+
+// Initialize the Nokogumbo class and fetch constants we will use later.
+void
+noko_init_gumbo()
+{
+  // Class constants.
+  cNokogiriHtml5Document = rb_define_class_under(mNokogiriHtml5, "Document", cNokogiriHtmlDocument);
+  rb_gc_register_mark_object(cNokogiriHtml5Document);
+
+  // Interned symbols.
+  internal_subset = rb_intern_const("internal_subset");
+  parent = rb_intern_const("parent");
+
+  // Define Nokogumbo module with parse and fragment methods.
+  rb_define_singleton_method(mNokogiriGumbo, "parse", parse, 5);
+  rb_define_singleton_method(mNokogiriGumbo, "fragment", fragment, 6);
+}
+
+// vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
diff --git a/ext/nokogiri/nokogiri.c b/ext/nokogiri/nokogiri.c
index fb255f03e3..4db653981c 100644
--- a/ext/nokogiri/nokogiri.c
+++ b/ext/nokogiri/nokogiri.c
@@ -1,8 +1,10 @@
 #include <nokogiri.h>
 
 VALUE mNokogiri ;
+VALUE mNokogiriGumbo ;
 VALUE mNokogiriHtml ;
 VALUE mNokogiriHtmlSax ;
+VALUE mNokogiriHtml5 ;
 VALUE mNokogiriXml ;
 VALUE mNokogiriXmlSax ;
 VALUE mNokogiriXmlXpath ;
@@ -44,6 +46,7 @@ void noko_init_html_element_description();
 void noko_init_html_entity_lookup();
 void noko_init_html_sax_parser_context();
 void noko_init_html_sax_push_parser();
+void noko_init_gumbo();
 void noko_init_test_global_handlers();
 
 static ID id_read, id_write;
@@ -152,12 +155,14 @@ void
 Init_nokogiri()
 {
   mNokogiri         = rb_define_module("Nokogiri");
-  mNokogiriXml      = rb_define_module_under(mNokogiri, "XML");
+  mNokogiriGumbo    = rb_define_module_under(mNokogiri, "Gumbo");
   mNokogiriHtml     = rb_define_module_under(mNokogiri, "HTML");
-  mNokogiriXslt     = rb_define_module_under(mNokogiri, "XSLT");
-  mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
-  mNokogiriXmlSax   = rb_define_module_under(mNokogiriXml, "SAX");
   mNokogiriHtmlSax  = rb_define_module_under(mNokogiriHtml, "SAX");
+  mNokogiriHtml5    = rb_define_module_under(mNokogiri, "HTML5");
+  mNokogiriXml      = rb_define_module_under(mNokogiri, "XML");
+  mNokogiriXmlSax   = rb_define_module_under(mNokogiriXml, "SAX");
+  mNokogiriXmlXpath = rb_define_module_under(mNokogiriXml, "XPath");
+  mNokogiriXslt     = rb_define_module_under(mNokogiri, "XSLT");
 
   rb_const_set(mNokogiri, rb_intern("LIBXML_COMPILED_VERSION"), NOKOGIRI_STR_NEW2(LIBXML_DOTTED_VERSION));
   rb_const_set(mNokogiri, rb_intern("LIBXML_LOADED_VERSION"), NOKOGIRI_STR_NEW2(xmlParserVersion));
@@ -238,6 +243,7 @@ Init_nokogiri()
   noko_init_xml_document_fragment();
   noko_init_xml_document();
   noko_init_html_document();
+  noko_init_gumbo();
 
   noko_init_test_global_handlers();
 
diff --git a/ext/nokogiri/nokogiri.h b/ext/nokogiri/nokogiri.h
index 8b3cc9e845..bb93a1b053 100644
--- a/ext/nokogiri/nokogiri.h
+++ b/ext/nokogiri/nokogiri.h
@@ -69,6 +69,7 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
 #include <ruby/st.h>
 #include <ruby/encoding.h>
 #include <ruby/util.h>
+#include <ruby/version.h>
 
 #define NOKOGIRI_STR_NEW2(str) NOKOGIRI_STR_NEW(str, strlen((const char *)(str)))
 #define NOKOGIRI_STR_NEW(str, len) rb_external_str_new_with_enc((const char *)(str), (long)(len), rb_utf8_encoding())
@@ -92,10 +93,13 @@ xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
 
 
 NOKOPUBVAR VALUE mNokogiri ;
+NOKOPUBVAR VALUE mNokogiriGumbo ;
 NOKOPUBVAR VALUE mNokogiriHtml ;
 NOKOPUBVAR VALUE mNokogiriHtmlSax ;
+NOKOPUBVAR VALUE mNokogiriHtml5 ;
 NOKOPUBVAR VALUE mNokogiriXml ;
 NOKOPUBVAR VALUE mNokogiriXmlSax ;
+NOKOPUBVAR VALUE mNokogiriXmlXpath ;
 NOKOPUBVAR VALUE mNokogiriXslt ;
 
 NOKOPUBVAR VALUE cNokogiriSyntaxError;
@@ -129,6 +133,7 @@ NOKOPUBVAR VALUE cNokogiriXmlXpathSyntaxError;
 NOKOPUBVAR VALUE cNokogiriXsltStylesheet ;
 
 NOKOPUBVAR VALUE cNokogiriHtmlDocument ;
+NOKOPUBVAR VALUE cNokogiriHtml5Document ;
 NOKOPUBVAR VALUE cNokogiriHtmlSaxPushParser ;
 NOKOPUBVAR VALUE cNokogiriHtmlElementDescription ;
 NOKOPUBVAR VALUE cNokogiriHtmlSaxParserContext;
@@ -177,7 +182,8 @@ VALUE noko_xml_node_set_wrap(xmlNodeSetPtr node_set, VALUE document) ;
 
 VALUE noko_xml_document_wrap_with_init_args(VALUE klass, xmlDocPtr doc, int argc, VALUE *argv);
 VALUE noko_xml_document_wrap(VALUE klass, xmlDocPtr doc);
-NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */
+NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass,
+    xmlDocPtr doc); /* deprecated. use noko_xml_document_wrap() instead. */
 
 #define DOC_RUBY_OBJECT_TEST(x) ((nokogiriTuplePtr)(x->_private))
 #define DOC_RUBY_OBJECT(x) (((nokogiriTuplePtr)(x->_private))->doc)
diff --git a/gumbo-parser/.gitignore b/gumbo-parser/.gitignore
new file mode 100644
index 0000000000..13b0a3e962
--- /dev/null
+++ b/gumbo-parser/.gitignore
@@ -0,0 +1,2 @@
+build
+src/*.o
diff --git a/nokogumbo-import/gumbo-parser/CHANGES.md b/gumbo-parser/CHANGES.md
similarity index 100%
rename from nokogumbo-import/gumbo-parser/CHANGES.md
rename to gumbo-parser/CHANGES.md
diff --git a/nokogumbo-import/gumbo-parser/Makefile b/gumbo-parser/Makefile
similarity index 100%
rename from nokogumbo-import/gumbo-parser/Makefile
rename to gumbo-parser/Makefile
diff --git a/nokogumbo-import/gumbo-parser/THANKS b/gumbo-parser/THANKS
similarity index 100%
rename from nokogumbo-import/gumbo-parser/THANKS
rename to gumbo-parser/THANKS
diff --git a/gumbo-parser/src/Makefile b/gumbo-parser/src/Makefile
new file mode 100644
index 0000000000..3a50bc96fa
--- /dev/null
+++ b/gumbo-parser/src/Makefile
@@ -0,0 +1,17 @@
+# this Makefile is used by ext/nokogiri/extconf.rb
+# to enable a mini_portile2 recipe to build the gumbo parser
+.PHONY: clean
+
+override CFLAGS += -std=c99 -Wall
+
+# allow the ENV var to override this
+RANLIB ?= ranlib
+
+gumbo_objs := $(patsubst %.c,%.o,$(wildcard *.c))
+
+libgumbo.a: $(gumbo_objs)
+	$(AR) $(ARFLAGS) $@ $^
+	- ($(RANLIB) $@ || true) >/dev/null 2>&1
+
+clean:
+	rm -f $(gumbo_objs) libgumbo.a
diff --git a/nokogumbo-import/gumbo-parser/src/README.md b/gumbo-parser/src/README.md
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/README.md
rename to gumbo-parser/src/README.md
diff --git a/nokogumbo-import/gumbo-parser/src/ascii.c b/gumbo-parser/src/ascii.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/ascii.c
rename to gumbo-parser/src/ascii.c
diff --git a/nokogumbo-import/gumbo-parser/src/ascii.h b/gumbo-parser/src/ascii.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/ascii.h
rename to gumbo-parser/src/ascii.h
diff --git a/nokogumbo-import/gumbo-parser/src/attribute.c b/gumbo-parser/src/attribute.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/attribute.c
rename to gumbo-parser/src/attribute.c
diff --git a/nokogumbo-import/gumbo-parser/src/attribute.h b/gumbo-parser/src/attribute.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/attribute.h
rename to gumbo-parser/src/attribute.h
diff --git a/nokogumbo-import/gumbo-parser/src/char_ref.c b/gumbo-parser/src/char_ref.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/char_ref.c
rename to gumbo-parser/src/char_ref.c
diff --git a/nokogumbo-import/gumbo-parser/src/char_ref.h b/gumbo-parser/src/char_ref.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/char_ref.h
rename to gumbo-parser/src/char_ref.h
diff --git a/nokogumbo-import/gumbo-parser/src/char_ref.rl b/gumbo-parser/src/char_ref.rl
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/char_ref.rl
rename to gumbo-parser/src/char_ref.rl
diff --git a/nokogumbo-import/gumbo-parser/src/error.c b/gumbo-parser/src/error.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/error.c
rename to gumbo-parser/src/error.c
diff --git a/nokogumbo-import/gumbo-parser/src/error.h b/gumbo-parser/src/error.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/error.h
rename to gumbo-parser/src/error.h
diff --git a/nokogumbo-import/gumbo-parser/src/foreign_attrs.c b/gumbo-parser/src/foreign_attrs.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/foreign_attrs.c
rename to gumbo-parser/src/foreign_attrs.c
diff --git a/nokogumbo-import/gumbo-parser/src/foreign_attrs.gperf b/gumbo-parser/src/foreign_attrs.gperf
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/foreign_attrs.gperf
rename to gumbo-parser/src/foreign_attrs.gperf
diff --git a/nokogumbo-import/gumbo-parser/src/gumbo.h b/gumbo-parser/src/gumbo.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/gumbo.h
rename to gumbo-parser/src/gumbo.h
diff --git a/nokogumbo-import/gumbo-parser/src/insertion_mode.h b/gumbo-parser/src/insertion_mode.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/insertion_mode.h
rename to gumbo-parser/src/insertion_mode.h
diff --git a/nokogumbo-import/gumbo-parser/src/macros.h b/gumbo-parser/src/macros.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/macros.h
rename to gumbo-parser/src/macros.h
diff --git a/nokogumbo-import/gumbo-parser/src/parser.c b/gumbo-parser/src/parser.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/parser.c
rename to gumbo-parser/src/parser.c
diff --git a/nokogumbo-import/gumbo-parser/src/parser.h b/gumbo-parser/src/parser.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/parser.h
rename to gumbo-parser/src/parser.h
diff --git a/nokogumbo-import/gumbo-parser/src/replacement.h b/gumbo-parser/src/replacement.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/replacement.h
rename to gumbo-parser/src/replacement.h
diff --git a/nokogumbo-import/gumbo-parser/src/string_buffer.c b/gumbo-parser/src/string_buffer.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/string_buffer.c
rename to gumbo-parser/src/string_buffer.c
diff --git a/nokogumbo-import/gumbo-parser/src/string_buffer.h b/gumbo-parser/src/string_buffer.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/string_buffer.h
rename to gumbo-parser/src/string_buffer.h
diff --git a/nokogumbo-import/gumbo-parser/src/string_piece.c b/gumbo-parser/src/string_piece.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/string_piece.c
rename to gumbo-parser/src/string_piece.c
diff --git a/nokogumbo-import/gumbo-parser/src/svg_attrs.c b/gumbo-parser/src/svg_attrs.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/svg_attrs.c
rename to gumbo-parser/src/svg_attrs.c
diff --git a/nokogumbo-import/gumbo-parser/src/svg_attrs.gperf b/gumbo-parser/src/svg_attrs.gperf
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/svg_attrs.gperf
rename to gumbo-parser/src/svg_attrs.gperf
diff --git a/nokogumbo-import/gumbo-parser/src/svg_tags.c b/gumbo-parser/src/svg_tags.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/svg_tags.c
rename to gumbo-parser/src/svg_tags.c
diff --git a/nokogumbo-import/gumbo-parser/src/svg_tags.gperf b/gumbo-parser/src/svg_tags.gperf
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/svg_tags.gperf
rename to gumbo-parser/src/svg_tags.gperf
diff --git a/nokogumbo-import/gumbo-parser/src/tag.c b/gumbo-parser/src/tag.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/tag.c
rename to gumbo-parser/src/tag.c
diff --git a/nokogumbo-import/gumbo-parser/src/tag_lookup.c b/gumbo-parser/src/tag_lookup.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/tag_lookup.c
rename to gumbo-parser/src/tag_lookup.c
diff --git a/nokogumbo-import/gumbo-parser/src/tag_lookup.gperf b/gumbo-parser/src/tag_lookup.gperf
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/tag_lookup.gperf
rename to gumbo-parser/src/tag_lookup.gperf
diff --git a/nokogumbo-import/gumbo-parser/src/tag_lookup.h b/gumbo-parser/src/tag_lookup.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/tag_lookup.h
rename to gumbo-parser/src/tag_lookup.h
diff --git a/nokogumbo-import/gumbo-parser/src/token_buffer.c b/gumbo-parser/src/token_buffer.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/token_buffer.c
rename to gumbo-parser/src/token_buffer.c
diff --git a/nokogumbo-import/gumbo-parser/src/token_buffer.h b/gumbo-parser/src/token_buffer.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/token_buffer.h
rename to gumbo-parser/src/token_buffer.h
diff --git a/nokogumbo-import/gumbo-parser/src/token_type.h b/gumbo-parser/src/token_type.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/token_type.h
rename to gumbo-parser/src/token_type.h
diff --git a/nokogumbo-import/gumbo-parser/src/tokenizer.c b/gumbo-parser/src/tokenizer.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/tokenizer.c
rename to gumbo-parser/src/tokenizer.c
diff --git a/nokogumbo-import/gumbo-parser/src/tokenizer.h b/gumbo-parser/src/tokenizer.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/tokenizer.h
rename to gumbo-parser/src/tokenizer.h
diff --git a/nokogumbo-import/gumbo-parser/src/tokenizer_states.h b/gumbo-parser/src/tokenizer_states.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/tokenizer_states.h
rename to gumbo-parser/src/tokenizer_states.h
diff --git a/nokogumbo-import/gumbo-parser/src/utf8.c b/gumbo-parser/src/utf8.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/utf8.c
rename to gumbo-parser/src/utf8.c
diff --git a/nokogumbo-import/gumbo-parser/src/utf8.h b/gumbo-parser/src/utf8.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/utf8.h
rename to gumbo-parser/src/utf8.h
diff --git a/nokogumbo-import/gumbo-parser/src/util.c b/gumbo-parser/src/util.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/util.c
rename to gumbo-parser/src/util.c
diff --git a/nokogumbo-import/gumbo-parser/src/util.h b/gumbo-parser/src/util.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/util.h
rename to gumbo-parser/src/util.h
diff --git a/nokogumbo-import/gumbo-parser/src/vector.c b/gumbo-parser/src/vector.c
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/vector.c
rename to gumbo-parser/src/vector.c
diff --git a/nokogumbo-import/gumbo-parser/src/vector.h b/gumbo-parser/src/vector.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/src/vector.h
rename to gumbo-parser/src/vector.h
diff --git a/nokogumbo-import/gumbo-parser/test/attribute.cc b/gumbo-parser/test/attribute.cc
similarity index 100%
rename from nokogumbo-import/gumbo-parser/test/attribute.cc
rename to gumbo-parser/test/attribute.cc
diff --git a/nokogumbo-import/gumbo-parser/test/parser.cc b/gumbo-parser/test/parser.cc
similarity index 100%
rename from nokogumbo-import/gumbo-parser/test/parser.cc
rename to gumbo-parser/test/parser.cc
diff --git a/nokogumbo-import/gumbo-parser/test/string_buffer.cc b/gumbo-parser/test/string_buffer.cc
similarity index 100%
rename from nokogumbo-import/gumbo-parser/test/string_buffer.cc
rename to gumbo-parser/test/string_buffer.cc
diff --git a/nokogumbo-import/gumbo-parser/test/string_piece.cc b/gumbo-parser/test/string_piece.cc
similarity index 100%
rename from nokogumbo-import/gumbo-parser/test/string_piece.cc
rename to gumbo-parser/test/string_piece.cc
diff --git a/nokogumbo-import/gumbo-parser/test/test_utils.cc b/gumbo-parser/test/test_utils.cc
similarity index 100%
rename from nokogumbo-import/gumbo-parser/test/test_utils.cc
rename to gumbo-parser/test/test_utils.cc
diff --git a/nokogumbo-import/gumbo-parser/test/test_utils.h b/gumbo-parser/test/test_utils.h
similarity index 100%
rename from nokogumbo-import/gumbo-parser/test/test_utils.h
rename to gumbo-parser/test/test_utils.h
diff --git a/nokogumbo-import/gumbo-parser/test/token_buffer.cc b/gumbo-parser/test/token_buffer.cc
similarity index 100%
rename from nokogumbo-import/gumbo-parser/test/token_buffer.cc
rename to gumbo-parser/test/token_buffer.cc
diff --git a/nokogumbo-import/gumbo-parser/test/tokenizer.cc b/gumbo-parser/test/tokenizer.cc
similarity index 100%
rename from nokogumbo-import/gumbo-parser/test/tokenizer.cc
rename to gumbo-parser/test/tokenizer.cc
diff --git a/nokogumbo-import/gumbo-parser/test/utf8.cc b/gumbo-parser/test/utf8.cc
similarity index 100%
rename from nokogumbo-import/gumbo-parser/test/utf8.cc
rename to gumbo-parser/test/utf8.cc
diff --git a/nokogumbo-import/gumbo-parser/test/vector.cc b/gumbo-parser/test/vector.cc
similarity index 100%
rename from nokogumbo-import/gumbo-parser/test/vector.cc
rename to gumbo-parser/test/vector.cc
diff --git a/lib/nokogiri.rb b/lib/nokogiri.rb
index bf5663dad4..5921c75995 100644
--- a/lib/nokogiri.rb
+++ b/lib/nokogiri.rb
@@ -19,6 +19,8 @@
 require 'nokogiri/css'
 require 'nokogiri/html/builder'
 
+require 'nokogiri/html5' if Nokogiri.uses_gumbo?
+
 # Nokogiri parses and searches XML/HTML very quickly, and also has
 # correctly implemented CSS3 selector support as well as XPath 1.0
 # support.
diff --git a/lib/nokogiri/gumbo.rb b/lib/nokogiri/gumbo.rb
new file mode 100644
index 0000000000..d6f7403ec3
--- /dev/null
+++ b/lib/nokogiri/gumbo.rb
@@ -0,0 +1,14 @@
+# frozen_string_literal: true
+module Nokogiri
+  module Gumbo
+    # The default maximum number of attributes per element.
+    DEFAULT_MAX_ATTRIBUTES = 400
+
+    # The default maximum number of errors for parsing a document or a fragment.
+    DEFAULT_MAX_ERRORS = 0
+
+    # The default maximum depth of the DOM tree produced by parsing a document
+    # or fragment.
+    DEFAULT_MAX_TREE_DEPTH = 400
+  end
+end
diff --git a/nokogumbo-import/lib/nokogumbo/html5.rb b/lib/nokogiri/html5.rb
similarity index 98%
rename from nokogumbo-import/lib/nokogumbo/html5.rb
rename to lib/nokogiri/html5.rb
index 4cdd474068..306a29175e 100644
--- a/nokogumbo-import/lib/nokogumbo/html5.rb
+++ b/lib/nokogiri/html5.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 #
 #  Copyright 2013-2021 Sam Ruby, Stephen Checkoway
 #
@@ -14,9 +15,9 @@
 #  limitations under the License.
 #
 
-require 'nokogumbo/html5/document'
-require 'nokogumbo/html5/document_fragment'
-require 'nokogumbo/html5/node'
+require_relative 'html5/document'
+require_relative 'html5/document_fragment'
+require_relative 'html5/node'
 
 module Nokogiri
   # Parse an HTML 5 document. Convenience method for Nokogiri::HTML5::Document.parse
@@ -266,3 +267,5 @@ def self.prepend_newline?(node)
     end
   end
 end
+
+require_relative 'gumbo'
diff --git a/nokogumbo-import/lib/nokogumbo/html5/document.rb b/lib/nokogiri/html5/document.rb
similarity index 87%
rename from nokogumbo-import/lib/nokogumbo/html5/document.rb
rename to lib/nokogiri/html5/document.rb
index cc3ee25388..4fa49f2192 100644
--- a/nokogumbo-import/lib/nokogumbo/html5/document.rb
+++ b/lib/nokogiri/html5/document.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 #
 #  Copyright 2013-2021 Sam Ruby, Stephen Checkoway
 #
@@ -57,10 +58,10 @@ def to_xml(options = {}, &block)
       private
       def self.do_parse(string_or_io, url, encoding, options)
         string = HTML5.read_and_encode(string_or_io, encoding)
-        max_attributes = options[:max_attributes] || Nokogumbo::DEFAULT_MAX_ATTRIBUTES
-        max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
-        max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
-        doc = Nokogumbo.parse(string, url, max_attributes, max_errors, max_depth)
+        max_attributes = options[:max_attributes] || Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
+        max_errors = options[:max_errors] || options[:max_parse_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
+        max_depth = options[:max_tree_depth] || Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
+        doc = Nokogiri::Gumbo.parse(string, url, max_attributes, max_errors, max_depth)
         doc.encoding = 'UTF-8'
         doc
       end
diff --git a/nokogumbo-import/lib/nokogumbo/html5/document_fragment.rb b/lib/nokogiri/html5/document_fragment.rb
similarity index 83%
rename from nokogumbo-import/lib/nokogumbo/html5/document_fragment.rb
rename to lib/nokogiri/html5/document_fragment.rb
index 45afc36791..574b16e3f7 100644
--- a/nokogumbo-import/lib/nokogumbo/html5/document_fragment.rb
+++ b/lib/nokogiri/html5/document_fragment.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 #
 #  Copyright 2013-2021 Sam Ruby, Stephen Checkoway
 #
@@ -14,7 +15,7 @@
 #  limitations under the License.
 #
 
-require 'nokogiri'
+require 'nokogiri/html/document_fragment'
 
 module Nokogiri
   module HTML5
@@ -28,11 +29,11 @@ def initialize(doc, tags = nil, ctx = nil, options = {})
         self.errors = []
         return self unless tags
 
-        max_attributes = options[:max_attributes] || Nokogumbo::DEFAULT_MAX_ATTRIBUTES
-        max_errors = options[:max_errors] || Nokogumbo::DEFAULT_MAX_ERRORS
-        max_depth = options[:max_tree_depth] || Nokogumbo::DEFAULT_MAX_TREE_DEPTH
+        max_attributes = options[:max_attributes] || Nokogiri::Gumbo::DEFAULT_MAX_ATTRIBUTES
+        max_errors = options[:max_errors] || Nokogiri::Gumbo::DEFAULT_MAX_ERRORS
+        max_depth = options[:max_tree_depth] || Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH
         tags = Nokogiri::HTML5.read_and_encode(tags, nil)
-        Nokogumbo.fragment(self, tags, ctx, max_attributes, max_errors, max_depth)
+        Nokogiri::Gumbo.fragment(self, tags, ctx, max_attributes, max_errors, max_depth)
       end
 
       def serialize(options = {}, &block)
diff --git a/nokogumbo-import/lib/nokogumbo/html5/node.rb b/lib/nokogiri/html5/node.rb
similarity index 97%
rename from nokogumbo-import/lib/nokogumbo/html5/node.rb
rename to lib/nokogiri/html5/node.rb
index 34af272651..d1f76d68e7 100644
--- a/nokogumbo-import/lib/nokogumbo/html5/node.rb
+++ b/lib/nokogiri/html5/node.rb
@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 #
 #  Copyright 2013-2021 Sam Ruby, Stephen Checkoway
 #
@@ -14,7 +15,7 @@
 #  limitations under the License.
 #
 
-require 'nokogiri'
+require 'nokogiri/xml/node'
 
 module Nokogiri
   module HTML5
@@ -40,7 +41,7 @@ def add_child_node_and_reparent_attrs(node)
 
       def inner_html(options = {})
         return super(options) unless document.is_a?(HTML5::Document)
-        result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? "\n" : ""
+        result = options[:preserve_newline] && HTML5.prepend_newline?(self) ? String.new("\n") : String.new
         result << children.map { |child| child.to_html(options) }.join
         result
       end
diff --git a/lib/nokogiri/version/info.rb b/lib/nokogiri/version/info.rb
index 1bfea42908..8011525667 100644
--- a/lib/nokogiri/version/info.rb
+++ b/lib/nokogiri/version/info.rb
@@ -190,6 +190,10 @@ def self.uses_libxml?(requirement = nil) # :nodoc:
     Gem::Requirement.new(requirement).satisfied_by?(VersionInfo.instance.loaded_libxml_version)
   end
 
+  def self.uses_gumbo?
+    uses_libxml? # TODO: replace with Gumbo functionality
+  end
+
   def self.jruby? # :nodoc:
     VersionInfo.instance.jruby?
   end
diff --git a/nokogiri.gemspec b/nokogiri.gemspec
index 7a414ab1be..7e00470e20 100644
--- a/nokogiri.gemspec
+++ b/nokogiri.gemspec
@@ -181,6 +181,51 @@ Gem::Specification.new do |spec|
     "ext/nokogiri/xml_text.c",
     "ext/nokogiri/xml_xpath_context.c",
     "ext/nokogiri/xslt_stylesheet.c",
+    "gumbo-parser/CHANGES.md",
+    "gumbo-parser/Makefile",
+    "gumbo-parser/THANKS",
+    "gumbo-parser/src/Makefile",
+    "gumbo-parser/src/README.md",
+    "gumbo-parser/src/ascii.c",
+    "gumbo-parser/src/ascii.h",
+    "gumbo-parser/src/attribute.c",
+    "gumbo-parser/src/attribute.h",
+    "gumbo-parser/src/char_ref.c",
+    "gumbo-parser/src/char_ref.h",
+    "gumbo-parser/src/char_ref.rl",
+    "gumbo-parser/src/error.c",
+    "gumbo-parser/src/error.h",
+    "gumbo-parser/src/foreign_attrs.c",
+    "gumbo-parser/src/foreign_attrs.gperf",
+    "gumbo-parser/src/gumbo.h",
+    "gumbo-parser/src/insertion_mode.h",
+    "gumbo-parser/src/macros.h",
+    "gumbo-parser/src/parser.c",
+    "gumbo-parser/src/parser.h",
+    "gumbo-parser/src/replacement.h",
+    "gumbo-parser/src/string_buffer.c",
+    "gumbo-parser/src/string_buffer.h",
+    "gumbo-parser/src/string_piece.c",
+    "gumbo-parser/src/svg_attrs.c",
+    "gumbo-parser/src/svg_attrs.gperf",
+    "gumbo-parser/src/svg_tags.c",
+    "gumbo-parser/src/svg_tags.gperf",
+    "gumbo-parser/src/tag.c",
+    "gumbo-parser/src/tag_lookup.c",
+    "gumbo-parser/src/tag_lookup.gperf",
+    "gumbo-parser/src/tag_lookup.h",
+    "gumbo-parser/src/token_buffer.c",
+    "gumbo-parser/src/token_buffer.h",
+    "gumbo-parser/src/token_type.h",
+    "gumbo-parser/src/tokenizer.c",
+    "gumbo-parser/src/tokenizer.h",
+    "gumbo-parser/src/tokenizer_states.h",
+    "gumbo-parser/src/utf8.c",
+    "gumbo-parser/src/utf8.h",
+    "gumbo-parser/src/util.c",
+    "gumbo-parser/src/util.h",
+    "gumbo-parser/src/vector.c",
+    "gumbo-parser/src/vector.h",
     "lib/isorelax.jar",
     "lib/jing.jar",
     "lib/nekodtd.jar",
@@ -197,6 +242,7 @@ Gem::Specification.new do |spec|
     "lib/nokogiri/css/xpath_visitor.rb",
     "lib/nokogiri/decorators/slop.rb",
     "lib/nokogiri/extension.rb",
+    "lib/nokogiri/gumbo.rb",
     "lib/nokogiri/html.rb",
     "lib/nokogiri/html/builder.rb",
     "lib/nokogiri/html/document.rb",
@@ -207,6 +253,10 @@ Gem::Specification.new do |spec|
     "lib/nokogiri/html/sax/parser.rb",
     "lib/nokogiri/html/sax/parser_context.rb",
     "lib/nokogiri/html/sax/push_parser.rb",
+    "lib/nokogiri/html5.rb",
+    "lib/nokogiri/html5/document.rb",
+    "lib/nokogiri/html5/document_fragment.rb",
+    "lib/nokogiri/html5/node.rb",
     "lib/nokogiri/jruby/dependencies.rb",
     "lib/nokogiri/syntax_error.rb",
     "lib/nokogiri/version.rb",
@@ -266,7 +316,7 @@ Gem::Specification.new do |spec|
   spec.rdoc_options = ["--main", "README.md"]
 
   spec.add_runtime_dependency("racc", "~> 1.4")
-  spec.add_runtime_dependency("mini_portile2", "~> 2.5.0") unless java_p # keep version in sync with extconf.rb
+  spec.add_runtime_dependency("mini_portile2", "~> 2.5.1") unless java_p # keep version in sync with extconf.rb
 
   spec.add_development_dependency("bundler", "~> 2.2")
   spec.add_development_dependency("concourse", "~> 0.41")
diff --git a/nokogumbo-import/Rakefile b/nokogumbo-import/Rakefile
index 94200bedbe..c04ca8de0c 100644
--- a/nokogumbo-import/Rakefile
+++ b/nokogumbo-import/Rakefile
@@ -12,10 +12,10 @@ task default: :test
 task test: :compile
 task gem: :test
 
-ext = Rake::ExtensionTask.new 'nokogumbo' do |e|
-  e.lib_dir = 'lib/nokogumbo'
-  e.source_pattern = '{,../../gumbo-parser/src/}*.[hc]'
-end
+# ext = Rake::ExtensionTask.new 'nokogumbo' do |e|
+#   e.lib_dir = 'lib/nokogumbo'
+#   e.source_pattern = '{,../../gumbo-parser/src/}*.[hc]'
+# end
 
 Rake::TestTask.new(:test) do |t|
   t.libs << 'test'
diff --git a/nokogumbo-import/ext/nokogumbo/extconf.rb b/nokogumbo-import/ext/nokogumbo/extconf.rb
deleted file mode 100644
index d0a81d0dfd..0000000000
--- a/nokogumbo-import/ext/nokogumbo/extconf.rb
+++ /dev/null
@@ -1,160 +0,0 @@
-#
-#  Copyright 2013-2021 Sam Ruby, Stephen Checkoway, Mike Dalessio
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-
-require 'rubygems'
-require 'fileutils'
-require 'mkmf'
-require 'nokogiri'
-
-$CFLAGS += " -std=c99"
-$LDFLAGS.gsub!('-Wl,--no-undefined', '')
-$DLDFLAGS.gsub!('-Wl,--no-undefined', '')
-$warnflags = CONFIG['warnflags'] = '-Wall'
-
-NG_SPEC = Gem::Specification.find_by_name('nokogiri', "= #{Nokogiri::VERSION}")
-
-def download_headers
-  begin
-    require 'yaml'
-
-    dependencies = YAML.load_file(File.join(NG_SPEC.gem_dir, 'dependencies.yml'))
-    version = dependencies['libxml2']['version']
-    host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
-    path = File.join('ports', host, 'libxml2', version, 'include/libxml2')
-    return path if File.directory?(path)
-
-    # Make sure we're using the same version Nokogiri uses
-    dep_index = NG_SPEC.dependencies.index { |dep| dep.name == 'mini_portile2' and dep.type == :runtime }
-    return nil if dep_index.nil?
-    requirement = NG_SPEC.dependencies[dep_index].requirement.to_s
-
-    gem 'mini_portile2', requirement
-    require 'mini_portile2'
-    p = MiniPortile::new('libxml2', version).tap do |r|
-      r.host = RbConfig::CONFIG["host_alias"].empty? ? RbConfig::CONFIG["host"] : RbConfig::CONFIG["host_alias"]
-      r.files = [{
-        url: "http://xmlsoft.org/sources/libxml2-#{r.version}.tar.gz",
-        sha256: dependencies['libxml2']['sha256']
-      }]
-      r.configure_options += [
-        "--without-python",
-        "--without-readline",
-        "--with-c14n",
-        "--with-debug",
-        "--with-threads"
-      ]
-    end
-    p.download unless p.downloaded?
-    p.extract
-    p.configure unless p.configured?
-    system('make', '-C', "tmp/#{p.host}/ports/libxml2/#{version}/libxml2-#{version}/include/libxml", 'install-xmlincHEADERS')
-    path
-  rescue
-    puts 'failed to download/install headers'
-    nil
-  end
-end
-
-required = arg_config('--with-libxml2')
-prohibited = arg_config('--without-libxml2')
-if required and prohibited
-  abort "cannot use both --with-libxml2 and --without-libxml2"
-end
-
-have_libxml2 = false
-have_ng = false
-
-def windows?
-  ::RUBY_PLATFORM =~ /mingw|mswin/
-end
-
-def modern_nokogiri?
-  nokogiri_version = Gem::Version.new(Nokogiri::VERSION)
-  requirement = windows? ? ">= 1.11.2" : ">= 1.11.0.rc4"
-  Gem::Requirement.new(requirement).satisfied_by?(nokogiri_version)
-end
-
-if !prohibited
-  if modern_nokogiri?
-    append_cflags(Nokogiri::VERSION_INFO["nokogiri"]["cppflags"])
-    append_ldflags(Nokogiri::VERSION_INFO["nokogiri"]["ldflags"]) # may be nil for nokogiri pre-1.11.2
-    have_libxml2 = if Nokogiri::VERSION_INFO["nokogiri"]["ldflags"].empty?
-                     have_header('libxml/tree.h')
-                   else
-                     have_func("xmlNewDoc", "libxml/tree.h")
-                   end
-  end
-
-  if !have_libxml2
-    if Nokogiri::VERSION_INFO.include?('libxml') and
-       Nokogiri::VERSION_INFO['libxml']['source'] == 'packaged'
-      # Nokogiri has libxml2 built in. Find the headers.
-      libxml2_path = File.join(Nokogiri::VERSION_INFO['libxml']['libxml2_path'],
-                               'include/libxml2')
-      if find_header('libxml/tree.h', libxml2_path)
-        have_libxml2 = true
-      else
-        # Unfortunately, some versions of Nokogiri delete these files.
-        # https://github.com/sparklemotion/nokogiri/pull/1788
-        # Try to download them
-        libxml2_path = download_headers
-        unless libxml2_path.nil?
-          have_libxml2 = find_header('libxml/tree.h', libxml2_path)
-        end
-      end
-    else
-      # Nokogiri is compiled with system headers.
-      # Hack to work around broken mkmf on macOS
-      # (https://bugs.ruby-lang.org/issues/14992 fixed now)
-      if RbConfig::MAKEFILE_CONFIG['LIBPATHENV'] == 'DYLD_LIBRARY_PATH'
-        RbConfig::MAKEFILE_CONFIG['LIBPATHENV'] = 'DYLD_FALLBACK_LIBRARY_PATH'
-      end
-
-      pkg_config('libxml-2.0')
-      have_libxml2 = have_library('xml2', 'xmlNewDoc')
-    end
-  end
-
-  if required and !have_libxml2
-    abort "libxml2 required but could not be located"
-  end
-
-
-  if have_libxml2
-    have_ng = have_header('nokogiri.h') || find_header('nokogiri.h', File.join(NG_SPEC.gem_dir, 'ext/nokogiri'))
-  end
-end
-
-if have_libxml2 and have_ng
-  $CFLAGS += " -DNGLIB=1"
-end
-
-# Symlink gumbo-parser source files.
-ext_dir = File.dirname(__FILE__)
-
-Dir.chdir(ext_dir) do
-  $srcs = Dir['*.c', '../../gumbo-parser/src/*.c']
-  $hdrs = Dir['*.h', '../../gumbo-parser/src/*.h']
-end
-$INCFLAGS << ' -I$(srcdir)/../../gumbo-parser/src'
-$VPATH << '$(srcdir)/../../gumbo-parser/src'
-
-create_makefile('nokogumbo/nokogumbo') do |conf|
-  conf.map! do |chunk|
-    chunk.gsub(/^HDRS = .*$/, "HDRS = #{$hdrs.map { |h| File.join('$(srcdir)', h)}.join(' ')}")
-  end
-end
-# vim: set sw=2 sts=2 ts=8 et:
diff --git a/nokogumbo-import/ext/nokogumbo/nokogumbo.c b/nokogumbo-import/ext/nokogumbo/nokogumbo.c
deleted file mode 100644
index 1ee7d70178..0000000000
--- a/nokogumbo-import/ext/nokogumbo/nokogumbo.c
+++ /dev/null
@@ -1,809 +0,0 @@
-//
-//  Copyright 2013-2021 Sam Ruby, Stephen Checkoway
-//
-//  Licensed under the Apache License, Version 2.0 (the "License");
-//  you may not use this file except in compliance with the License.
-//  You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-//  Unless required by applicable law or agreed to in writing, software
-//  distributed under the License is distributed on an "AS IS" BASIS,
-//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-//  See the License for the specific language governing permissions and
-//  limitations under the License.
-//
-
-//
-// nokogumbo.c defines the following:
-//
-//   class Nokogumbo
-//     def parse(utf8_string) # returns Nokogiri::HTML5::Document
-//   end
-//
-// Processing starts by calling gumbo_parse_with_options.  The resulting
-// document tree is then walked:
-//
-//  * if Nokogiri and libxml2 headers are available at compile time,
-//    (if NGLIB) then a parallel libxml2 tree is constructed, and the
-//    final document is then wrapped using Nokogiri_wrap_xml_document.
-//    This approach reduces memory and CPU requirements as Ruby objects
-//    are only built when necessary.
-//
-//  * if the necessary headers are not available at compile time, Nokogiri
-//    methods are called instead, producing the equivalent functionality.
-//
-
-#include <assert.h>
-#include <ruby.h>
-#include <ruby/version.h>
-
-#include "gumbo.h"
-
-// class constants
-static VALUE Document;
-
-// Interned symbols
-static ID internal_subset;
-static ID parent;
-
-/* Backwards compatibility to Ruby 2.1.0 */
-#if RUBY_API_VERSION_CODE < 20200
-#define ONIG_ESCAPE_UCHAR_COLLISION 1
-#include <ruby/encoding.h>
-
-static VALUE rb_utf8_str_new(const char *str, long length) {
-  return rb_enc_str_new(str, length, rb_utf8_encoding());
-}
-
-static VALUE rb_utf8_str_new_cstr(const char *str) {
-  return rb_enc_str_new_cstr(str, rb_utf8_encoding());
-}
-
-static VALUE rb_utf8_str_new_static(const char *str, long length) {
-  return rb_enc_str_new(str, length, rb_utf8_encoding());
-}
-#endif
-
-#if NGLIB
-#include <nokogiri.h>
-#include <libxml/tree.h>
-#include <libxml/HTMLtree.h>
-
-#define NIL NULL
-#else
-#define NIL Qnil
-
-// These are defined by nokogiri.h
-static VALUE cNokogiriXmlSyntaxError;
-static VALUE cNokogiriXmlElement;
-static VALUE cNokogiriXmlText;
-static VALUE cNokogiriXmlCData;
-static VALUE cNokogiriXmlComment;
-
-// Interned symbols.
-static ID new;
-static ID node_name_;
-
-// Map libxml2 types to Ruby VALUE.
-typedef VALUE xmlNodePtr;
-typedef VALUE xmlDocPtr;
-typedef VALUE xmlNsPtr;
-typedef VALUE xmlDtdPtr;
-typedef char xmlChar;
-#define BAD_CAST
-
-// Redefine libxml2 API as Ruby function calls.
-static xmlNodePtr xmlNewDocNode(xmlDocPtr doc, xmlNsPtr ns, const xmlChar *name, const xmlChar *content) {
-  assert(ns == NIL && content == NULL);
-  return rb_funcall(cNokogiriXmlElement, new, 2, rb_utf8_str_new_cstr(name), doc);
-}
-
-static xmlNodePtr xmlNewDocText(xmlDocPtr doc, const xmlChar *content) {
-  VALUE str = rb_utf8_str_new_cstr(content);
-  return rb_funcall(cNokogiriXmlText, new, 2, str, doc);
-}
-
-static xmlNodePtr xmlNewCDataBlock(xmlDocPtr doc, const xmlChar *content, int len) {
-  VALUE str = rb_utf8_str_new(content, len);
-  // CDATA.new takes arguments in the opposite order from Text.new.
-  return rb_funcall(cNokogiriXmlCData, new, 2, doc, str);
-}
-
-static xmlNodePtr xmlNewDocComment(xmlDocPtr doc, const xmlChar *content) {
-  VALUE str = rb_utf8_str_new_cstr(content);
-  return rb_funcall(cNokogiriXmlComment, new, 2, doc, str);
-}
-
-static xmlNodePtr xmlAddChild(xmlNodePtr parent, xmlNodePtr cur) {
-  ID add_child;
-  CONST_ID(add_child, "add_child");
-  return rb_funcall(parent, add_child, 1, cur);
-}
-
-static void xmlSetNs(xmlNodePtr node, xmlNsPtr ns) {
-  ID namespace_;
-  CONST_ID(namespace_, "namespace=");
-  rb_funcall(node, namespace_, 1, ns);
-}
-
-static void xmlFreeDoc(xmlDocPtr doc) { }
-
-static VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc) {
-  return doc;
-}
-
-static VALUE find_dummy_key(VALUE collection) {
-  VALUE r_dummy = Qnil;
-  char dummy[5] = "a";
-  size_t len = 1;
-  ID key_;
-  CONST_ID(key_, "key?");
-  while (len < sizeof dummy) {
-    r_dummy = rb_utf8_str_new(dummy, len);
-    if (rb_funcall(collection, key_, 1, r_dummy) == Qfalse)
-      return r_dummy;
-    for (size_t i = 0; ; ++i) {
-      if (dummy[i] == 0) {
-        dummy[i] = 'a';
-        ++len;
-        break;
-      }
-      if (dummy[i] == 'z')
-        dummy[i] = 'a';
-      else {
-        ++dummy[i];
-        break;
-      }
-    }
-  }
-  // This collection has 475254 elements?? Give up.
-  rb_raise(rb_eArgError, "Failed to find a dummy key.");
-}
-
-// This should return an xmlAttrPtr, but we don't need it and it's easier to
-// not get the result.
-static void xmlNewNsProp (
-  xmlNodePtr node,
-  xmlNsPtr ns,
-  const xmlChar *name,
-  const xmlChar *value
-) {
-  ID set_attribute;
-  CONST_ID(set_attribute, "set_attribute");
-
-  VALUE rvalue = rb_utf8_str_new_cstr(value);
-
-  if (RTEST(ns)) {
-    // This is an easy case, we have a namespace so it's enough to do
-    // node["#{ns.prefix}:#{name}"] = value
-    ID prefix;
-    CONST_ID(prefix, "prefix");
-    VALUE ns_prefix = rb_funcall(ns, prefix, 0);
-    VALUE qname = rb_sprintf("%" PRIsVALUE ":%s", ns_prefix, name);
-    rb_funcall(node, set_attribute, 2, qname, rvalue);
-    return;
-  }
-
-  size_t len = strlen(name);
-  VALUE rname = rb_utf8_str_new(name, len);
-  if (memchr(name, ':', len) == NULL) {
-    // This is the easiest case. There's no colon so we can do
-    // node[name] = value.
-    rb_funcall(node, set_attribute, 2, rname, rvalue);
-    return;
-  }
-
-  // Nokogiri::XML::Node#set_attribute calls xmlSetProp(node, name, value)
-  // which behaves roughly as
-  // if name is a QName prefix:local
-  //   if node->doc has a namespace ns corresponding to prefix
-  //     return xmlSetNsProp(node, ns, local, value)
-  // return xmlSetNsProp(node, NULL, name, value)
-  //
-  // If the prefix is "xml", then the namespace lookup will create it.
-  //
-  // By contrast, xmlNewNsProp does not do this parsing and creates an attribute
-  // with the name and value exactly as given. This is the behavior that we
-  // want.
-  //
-  // Thus, for attribute names like "xml:lang", #set_attribute will create an
-  // attribute with namespace "xml" and name "lang". This is incorrect for
-  // html elements (but correct for foreign elements).
-  //
-  // Work around this by inserting a dummy attribute and then changing the
-  // name, if needed.
-
-  // Find a dummy attribute string that doesn't already exist.
-  VALUE dummy = find_dummy_key(node);
-  // Add the dummy attribute.
-  rb_funcall(node, set_attribute, 2, dummy, rvalue);
-
-  // Remove the old attribute, if it exists.
-  ID remove_attribute;
-  CONST_ID(remove_attribute, "remove_attribute");
-  rb_funcall(node, remove_attribute, 1, rname);
-
-  // Rename the dummy
-  ID attribute;
-  CONST_ID(attribute, "attribute");
-  VALUE attr = rb_funcall(node, attribute, 1, dummy);
-  rb_funcall(attr, node_name_, 1, rname);
-}
-#endif
-
-// URI = system id
-// external id = public id
-static xmlDocPtr new_html_doc(const char *dtd_name, const char *system, const char *public)
-{
-#if NGLIB
-  // These two libxml2 functions take the public and system ids in
-  // opposite orders.
-  htmlDocPtr doc = htmlNewDocNoDtD(/* URI */ NULL, /* ExternalID */NULL);
-  assert(doc);
-  if (dtd_name)
-    xmlCreateIntSubset(doc, BAD_CAST dtd_name, BAD_CAST public, BAD_CAST system);
-  return doc;
-#else
-  // remove internal subset from newly created documents
-  VALUE doc;
-  // If system and public are both NULL, Document#new is going to set default
-  // values for them so we're going to have to remove the internal subset
-  // which seems to leak memory in Nokogiri, so leak as little as possible.
-  if (system == NULL && public == NULL) {
-    ID remove;
-    CONST_ID(remove, "remove");
-    doc = rb_funcall(Document, new, 2, /* URI */ Qnil, /* external_id */ rb_utf8_str_new_static("", 0));
-    rb_funcall(rb_funcall(doc, internal_subset, 0), remove, 0);
-    if (dtd_name) {
-      // We need to create an internal subset now.
-      ID create_internal_subset;
-      CONST_ID(create_internal_subset, "create_internal_subset");
-      rb_funcall(doc, create_internal_subset, 3, rb_utf8_str_new_cstr(dtd_name), Qnil, Qnil);
-    }
-  } else {
-    assert(dtd_name);
-    // Rather than removing and creating the internal subset as we did above,
-    // just create and then rename one.
-    VALUE r_system = system ? rb_utf8_str_new_cstr(system) : Qnil;
-    VALUE r_public = public ? rb_utf8_str_new_cstr(public) : Qnil;
-    doc = rb_funcall(Document, new, 2, r_system, r_public);
-    rb_funcall(rb_funcall(doc, internal_subset, 0), node_name_, 1, rb_utf8_str_new_cstr(dtd_name));
-  }
-  return doc;
-#endif
-}
-
-static xmlNodePtr get_parent(xmlNodePtr node) {
-#if NGLIB
-  return node->parent;
-#else
-  if (!rb_respond_to(node, parent))
-    return Qnil;
-  return rb_funcall(node, parent, 0);
-#endif
-}
-
-static GumboOutput *perform_parse(const GumboOptions *options, VALUE input) {
-  assert(RTEST(input));
-  Check_Type(input, T_STRING);
-  GumboOutput *output = gumbo_parse_with_options (
-    options,
-    RSTRING_PTR(input),
-    RSTRING_LEN(input)
-  );
-
-  const char *status_string = gumbo_status_to_string(output->status);
-  switch (output->status) {
-  case GUMBO_STATUS_OK:
-    break;
-  case GUMBO_STATUS_TOO_MANY_ATTRIBUTES:
-  case GUMBO_STATUS_TREE_TOO_DEEP:
-    gumbo_destroy_output(output);
-    rb_raise(rb_eArgError, "%s", status_string);
-  case GUMBO_STATUS_OUT_OF_MEMORY:
-    gumbo_destroy_output(output);
-    rb_raise(rb_eNoMemError, "%s", status_string);
-  }
-  return output;
-}
-
-static xmlNsPtr lookup_or_add_ns (
-  xmlDocPtr doc,
-  xmlNodePtr root,
-  const char *href,
-  const char *prefix
-) {
-#if NGLIB
-  xmlNsPtr ns = xmlSearchNs(doc, root, BAD_CAST prefix);
-  if (ns)
-    return ns;
-  return xmlNewNs(root, BAD_CAST href, BAD_CAST prefix);
-#else
-  ID add_namespace_definition;
-  CONST_ID(add_namespace_definition, "add_namespace_definition");
-  VALUE rprefix = rb_utf8_str_new_cstr(prefix);
-  VALUE rhref = rb_utf8_str_new_cstr(href);
-  return rb_funcall(root, add_namespace_definition, 2, rprefix, rhref);
-#endif
-}
-
-static void set_line(xmlNodePtr node, size_t line) {
-#if NGLIB
-  // libxml2 uses 65535 to mean look elsewhere for the line number on some
-  // nodes.
-  if (line < 65535)
-    node->line = (unsigned short)line;
-#else
-  // XXX: If Nokogiri gets a `#line=` method, we'll use that.
-#endif
-}
-
-// Construct an XML tree rooted at xml_output_node from the Gumbo tree rooted
-// at gumbo_node.
-static void build_tree (
-  xmlDocPtr doc,
-  xmlNodePtr xml_output_node,
-  const GumboNode *gumbo_node
-) {
-  xmlNodePtr xml_root = NIL;
-  xmlNodePtr xml_node = xml_output_node;
-  size_t child_index = 0;
-
-  while (true) {
-    assert(gumbo_node != NULL);
-    const GumboVector *children = gumbo_node->type == GUMBO_NODE_DOCUMENT?
-      &gumbo_node->v.document.children : &gumbo_node->v.element.children;
-    if (child_index >= children->length) {
-      // Move up the tree and to the next child.
-      if (xml_node == xml_output_node) {
-        // We've built as much of the tree as we can.
-        return;
-      }
-      child_index = gumbo_node->index_within_parent + 1;
-      gumbo_node = gumbo_node->parent;
-      xml_node = get_parent(xml_node);
-      // Children of fragments don't share the same root, so reset it and
-      // it'll be set below. In the non-fragment case, this will only happen
-      // after the html element has been finished at which point there are no
-      // further elements.
-      if (xml_node == xml_output_node)
-        xml_root = NIL;
-      continue;
-    }
-    const GumboNode *gumbo_child = children->data[child_index++];
-    xmlNodePtr xml_child;
-
-    switch (gumbo_child->type) {
-      case GUMBO_NODE_DOCUMENT:
-        abort(); // Bug in Gumbo.
-
-      case GUMBO_NODE_TEXT:
-      case GUMBO_NODE_WHITESPACE:
-        xml_child = xmlNewDocText(doc, BAD_CAST gumbo_child->v.text.text);
-        set_line(xml_child, gumbo_child->v.text.start_pos.line);
-        xmlAddChild(xml_node, xml_child);
-        break;
-
-      case GUMBO_NODE_CDATA:
-        xml_child = xmlNewCDataBlock(doc, BAD_CAST gumbo_child->v.text.text,
-                                     (int) strlen(gumbo_child->v.text.text));
-        set_line(xml_child, gumbo_child->v.text.start_pos.line);
-        xmlAddChild(xml_node, xml_child);
-        break;
-
-      case GUMBO_NODE_COMMENT:
-        xml_child = xmlNewDocComment(doc, BAD_CAST gumbo_child->v.text.text);
-        set_line(xml_child, gumbo_child->v.text.start_pos.line);
-        xmlAddChild(xml_node, xml_child);
-        break;
-
-      case GUMBO_NODE_TEMPLATE:
-        // XXX: Should create a template element and a new DocumentFragment
-      case GUMBO_NODE_ELEMENT:
-      {
-        xml_child = xmlNewDocNode(doc, NIL, BAD_CAST gumbo_child->v.element.name, NULL);
-        set_line(xml_child, gumbo_child->v.element.start_pos.line);
-        if (xml_root == NIL)
-          xml_root = xml_child;
-        xmlNsPtr ns = NIL;
-        switch (gumbo_child->v.element.tag_namespace) {
-        case GUMBO_NAMESPACE_HTML:
-          break;
-        case GUMBO_NAMESPACE_SVG:
-          ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/2000/svg", "svg");
-          break;
-        case GUMBO_NAMESPACE_MATHML:
-          ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/1998/Math/MathML", "math");
-          break;
-        }
-        if (ns != NIL)
-          xmlSetNs(xml_child, ns);
-        xmlAddChild(xml_node, xml_child);
-
-        // Add the attributes.
-        const GumboVector* attrs = &gumbo_child->v.element.attributes;
-        for (size_t i=0; i < attrs->length; i++) {
-          const GumboAttribute *attr = attrs->data[i];
-
-          switch (attr->attr_namespace) {
-            case GUMBO_ATTR_NAMESPACE_XLINK:
-              ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/1999/xlink", "xlink");
-              break;
-
-            case GUMBO_ATTR_NAMESPACE_XML:
-              ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/XML/1998/namespace", "xml");
-              break;
-
-            case GUMBO_ATTR_NAMESPACE_XMLNS:
-              ns = lookup_or_add_ns(doc, xml_root, "http://www.w3.org/2000/xmlns/", "xmlns");
-              break;
-
-            default:
-              ns = NIL;
-          }
-          xmlNewNsProp(xml_child, ns, BAD_CAST attr->name, BAD_CAST attr->value);
-        }
-
-        // Add children for this element.
-        child_index = 0;
-        gumbo_node = gumbo_child;
-        xml_node = xml_child;
-      }
-    }
-  }
-}
-
-static void add_errors(const GumboOutput *output, VALUE rdoc, VALUE input, VALUE url) {
-  const char *input_str = RSTRING_PTR(input);
-  size_t input_len = RSTRING_LEN(input);
-
-  // Add parse errors to rdoc.
-  if (output->errors.length) {
-    const GumboVector *errors = &output->errors;
-    VALUE rerrors = rb_ary_new2(errors->length);
-
-    for (size_t i=0; i < errors->length; i++) {
-      GumboError *err = errors->data[i];
-      GumboSourcePosition position = gumbo_error_position(err);
-      char *msg;
-      size_t size = gumbo_caret_diagnostic_to_string(err, input_str, input_len, &msg);
-      VALUE err_str = rb_utf8_str_new(msg, size);
-      free(msg);
-      VALUE syntax_error = rb_class_new_instance(1, &err_str, cNokogiriXmlSyntaxError);
-      const char *error_code = gumbo_error_code(err);
-      VALUE str1 = error_code? rb_utf8_str_new_static(error_code, strlen(error_code)) : Qnil;
-      rb_iv_set(syntax_error, "@domain", INT2NUM(1)); // XML_FROM_PARSER
-      rb_iv_set(syntax_error, "@code", INT2NUM(1));   // XML_ERR_INTERNAL_ERROR
-      rb_iv_set(syntax_error, "@level", INT2NUM(2));  // XML_ERR_ERROR
-      rb_iv_set(syntax_error, "@file", url);
-      rb_iv_set(syntax_error, "@line", INT2NUM(position.line));
-      rb_iv_set(syntax_error, "@str1", str1);
-      rb_iv_set(syntax_error, "@str2", Qnil);
-      rb_iv_set(syntax_error, "@str3", Qnil);
-      rb_iv_set(syntax_error, "@int1", INT2NUM(0));
-      rb_iv_set(syntax_error, "@column", INT2NUM(position.column));
-      rb_ary_push(rerrors, syntax_error);
-    }
-    rb_iv_set(rdoc, "@errors", rerrors);
-  }
-}
-
-typedef struct {
-  GumboOutput *output;
-  VALUE input;
-  VALUE url_or_frag;
-  xmlDocPtr doc;
-} ParseArgs;
-
-static void parse_args_mark(void *parse_args) {
-  ParseArgs *args = parse_args;
-  rb_gc_mark_maybe(args->input);
-  rb_gc_mark_maybe(args->url_or_frag);
-}
-
-// Wrap a ParseArgs pointer. The underlying ParseArgs must outlive the
-// wrapper.
-static VALUE wrap_parse_args(ParseArgs *args) {
-  return Data_Wrap_Struct(rb_cData, parse_args_mark, RUBY_NEVER_FREE, args);
-}
-
-// Returnsd the underlying ParseArgs wrapped by wrap_parse_args.
-static ParseArgs *unwrap_parse_args(VALUE obj) {
-  ParseArgs *args;
-  Data_Get_Struct(obj, ParseArgs, args);
-  return args;
-}
-
-static VALUE parse_cleanup(VALUE parse_args) {
-  ParseArgs *args = unwrap_parse_args(parse_args);
-  gumbo_destroy_output(args->output);
-  // Make sure garbage collection doesn't mark the objects as being live based
-  // on references from the ParseArgs. This may be unnecessary.
-  args->input = Qnil;
-  args->url_or_frag = Qnil;
-  if (args->doc != NIL)
-    xmlFreeDoc(args->doc);
-  return Qnil;
-}
-
-static VALUE parse_continue(VALUE parse_args);
-
-// Parse a string using gumbo_parse into a Nokogiri document
-static VALUE parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth) {
-  GumboOptions options = kGumboDefaultOptions;
-  options.max_attributes = NUM2INT(max_attributes);
-  options.max_errors = NUM2INT(max_errors);
-  options.max_tree_depth = NUM2INT(max_depth);
-
-  GumboOutput *output = perform_parse(&options, input);
-  ParseArgs args = {
-    .output = output,
-    .input = input,
-    .url_or_frag = url,
-    .doc = NIL,
-  };
-  VALUE parse_args = wrap_parse_args(&args);
-
-  return rb_ensure(parse_continue, parse_args, parse_cleanup, parse_args);
-}
-
-static VALUE parse_continue(VALUE parse_args) {
-  ParseArgs *args = unwrap_parse_args(parse_args);
-  GumboOutput *output = args->output;
-  xmlDocPtr doc;
-  if (output->document->v.document.has_doctype) {
-    const char *name   = output->document->v.document.name;
-    const char *public = output->document->v.document.public_identifier;
-    const char *system = output->document->v.document.system_identifier;
-    public = public[0] ? public : NULL;
-    system = system[0] ? system : NULL;
-    doc = new_html_doc(name, system, public);
-  } else {
-    doc = new_html_doc(NULL, NULL, NULL);
-  }
-  args->doc = doc; // Make sure doc gets cleaned up if an error is thrown.
-  build_tree(doc, (xmlNodePtr)doc, output->document);
-  VALUE rdoc = Nokogiri_wrap_xml_document(Document, doc);
-  args->doc = NIL; // The Ruby runtime now owns doc so don't delete it.
-  add_errors(output, rdoc, args->input, args->url_or_frag);
-  return rdoc;
-}
-
-static int lookup_namespace(VALUE node, bool require_known_ns) {
-  ID namespace, href;
-  CONST_ID(namespace, "namespace");
-  CONST_ID(href, "href");
-  VALUE ns = rb_funcall(node, namespace, 0);
-
-  if (NIL_P(ns))
-    return GUMBO_NAMESPACE_HTML;
-  ns = rb_funcall(ns, href, 0);
-  assert(RTEST(ns));
-  Check_Type(ns, T_STRING);
-
-  const char *href_ptr = RSTRING_PTR(ns);
-  size_t href_len = RSTRING_LEN(ns);
-#define NAMESPACE_P(uri) (href_len == sizeof uri - 1 && !memcmp(href_ptr, uri, href_len))
-  if (NAMESPACE_P("http://www.w3.org/1999/xhtml"))
-    return GUMBO_NAMESPACE_HTML;
-  if (NAMESPACE_P("http://www.w3.org/1998/Math/MathML"))
-    return GUMBO_NAMESPACE_MATHML;
-  if (NAMESPACE_P("http://www.w3.org/2000/svg"))
-    return GUMBO_NAMESPACE_SVG;
-#undef NAMESPACE_P
-  if (require_known_ns)
-    rb_raise(rb_eArgError, "Unexpected namespace URI \"%*s\"", (int)href_len, href_ptr);
-  return -1;
-}
-
-static xmlNodePtr extract_xml_node(VALUE node) {
-#if NGLIB
-  xmlNodePtr xml_node;
-  Data_Get_Struct(node, xmlNode, xml_node);
-  return xml_node;
-#else
-  return node;
-#endif
-}
-
-static VALUE fragment_continue(VALUE parse_args);
-
-static VALUE fragment (
-  VALUE self,
-  VALUE doc_fragment,
-  VALUE tags,
-  VALUE ctx,
-  VALUE max_attributes,
-  VALUE max_errors,
-  VALUE max_depth
-) {
-  ID name = rb_intern_const("name");
-  const char *ctx_tag;
-  GumboNamespaceEnum ctx_ns;
-  GumboQuirksModeEnum quirks_mode;
-  bool form = false;
-  const char *encoding = NULL;
-
-  if (NIL_P(ctx)) {
-    ctx_tag = "body";
-    ctx_ns = GUMBO_NAMESPACE_HTML;
-  } else if (TYPE(ctx) == T_STRING) {
-    ctx_tag = StringValueCStr(ctx);
-    ctx_ns = GUMBO_NAMESPACE_HTML;
-    size_t len = RSTRING_LEN(ctx);
-    const char *colon = memchr(ctx_tag, ':', len);
-    if (colon) {
-      switch (colon - ctx_tag) {
-      case 3:
-        if (st_strncasecmp(ctx_tag, "svg", 3) != 0)
-          goto error;
-        ctx_ns = GUMBO_NAMESPACE_SVG;
-        break;
-      case 4:
-        if (st_strncasecmp(ctx_tag, "html", 4) == 0)
-          ctx_ns = GUMBO_NAMESPACE_HTML;
-        else if (st_strncasecmp(ctx_tag, "math", 4) == 0)
-          ctx_ns = GUMBO_NAMESPACE_MATHML;
-        else
-          goto error;
-        break;
-      default:
-      error:
-        rb_raise(rb_eArgError, "Invalid context namespace '%*s'", (int)(colon - ctx_tag), ctx_tag);
-      }
-      ctx_tag = colon+1;
-    } else {
-      // For convenience, put 'svg' and 'math' in their namespaces.
-      if (len == 3 && st_strncasecmp(ctx_tag, "svg", 3) == 0)
-        ctx_ns = GUMBO_NAMESPACE_SVG;
-      else if (len == 4 && st_strncasecmp(ctx_tag, "math", 4) == 0)
-        ctx_ns = GUMBO_NAMESPACE_MATHML;
-    }
-
-    // Check if it's a form.
-    form = ctx_ns == GUMBO_NAMESPACE_HTML && st_strcasecmp(ctx_tag, "form") == 0;
-  } else {
-    ID element_ = rb_intern_const("element?");
-
-    // Context fragment name.
-    VALUE tag_name = rb_funcall(ctx, name, 0);
-    assert(RTEST(tag_name));
-    Check_Type(tag_name, T_STRING);
-    ctx_tag = StringValueCStr(tag_name);
-
-    // Context fragment namespace.
-    ctx_ns = lookup_namespace(ctx, true);
-
-    // Check for a form ancestor, including self.
-    for (VALUE node = ctx;
-         !NIL_P(node);
-         node = rb_respond_to(node, parent) ? rb_funcall(node, parent, 0) : Qnil) {
-      if (!RTEST(rb_funcall(node, element_, 0)))
-        continue;
-      VALUE element_name = rb_funcall(node, name, 0);
-      if (RSTRING_LEN(element_name) == 4
-          && !st_strcasecmp(RSTRING_PTR(element_name), "form")
-          && lookup_namespace(node, false) == GUMBO_NAMESPACE_HTML) {
-        form = true;
-        break;
-      }
-    }
-
-    // Encoding.
-    if (RSTRING_LEN(tag_name) == 14
-        && !st_strcasecmp(ctx_tag, "annotation-xml")) {
-      VALUE enc = rb_funcall(ctx, rb_intern_const("[]"),
-                             rb_utf8_str_new_static("encoding", 8));
-      if (RTEST(enc)) {
-        Check_Type(enc, T_STRING);
-        encoding = StringValueCStr(enc);
-      }
-    }
-  }
-
-  // Quirks mode.
-  VALUE doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0);
-  VALUE dtd = rb_funcall(doc, internal_subset, 0);
-  if (NIL_P(dtd)) {
-    quirks_mode = GUMBO_DOCTYPE_NO_QUIRKS;
-  } else {
-    VALUE dtd_name = rb_funcall(dtd, name, 0);
-    VALUE pubid = rb_funcall(dtd, rb_intern_const("external_id"), 0);
-    VALUE sysid = rb_funcall(dtd, rb_intern_const("system_id"), 0);
-    quirks_mode = gumbo_compute_quirks_mode (
-      NIL_P(dtd_name)? NULL:StringValueCStr(dtd_name),
-      NIL_P(pubid)? NULL:StringValueCStr(pubid),
-      NIL_P(sysid)? NULL:StringValueCStr(sysid)
-    );
-  }
-
-  // Perform a fragment parse.
-  int depth = NUM2INT(max_depth);
-  GumboOptions options = kGumboDefaultOptions;
-  options.max_attributes = NUM2INT(max_attributes);
-  options.max_errors = NUM2INT(max_errors);
-  // Add one to account for the HTML element.
-  options.max_tree_depth = depth < 0 ? -1 : (depth + 1);
-  options.fragment_context = ctx_tag;
-  options.fragment_namespace = ctx_ns;
-  options.fragment_encoding = encoding;
-  options.quirks_mode = quirks_mode;
-  options.fragment_context_has_form_ancestor = form;
-
-  GumboOutput *output = perform_parse(&options, tags);
-  ParseArgs args = {
-    .output = output,
-    .input = tags,
-    .url_or_frag = doc_fragment,
-    .doc = (xmlDocPtr)extract_xml_node(doc),
-  };
-  VALUE parse_args = wrap_parse_args(&args);
-  rb_ensure(fragment_continue, parse_args, parse_cleanup, parse_args);
-  return Qnil;
-}
-
-static VALUE fragment_continue(VALUE parse_args) {
-  ParseArgs *args = unwrap_parse_args(parse_args);
-  GumboOutput *output = args->output;
-  VALUE doc_fragment = args->url_or_frag;
-  xmlDocPtr xml_doc = args->doc;
-
-  args->doc = NIL; // The Ruby runtime owns doc so make sure we don't delete it.
-  xmlNodePtr xml_frag = extract_xml_node(doc_fragment);
-  build_tree(xml_doc, xml_frag, output->root);
-  add_errors(output, doc_fragment, args->input, rb_utf8_str_new_static("#fragment", 9));
-  return Qnil;
-}
-
-// Initialize the Nokogumbo class and fetch constants we will use later.
-void Init_nokogumbo() {
-  rb_funcall(rb_mKernel, rb_intern_const("gem"), 1, rb_utf8_str_new_static("nokogiri", 8));
-  rb_require("nokogiri");
-
-  VALUE line_supported = Qtrue;
-
-#if !NGLIB
-  // Class constants.
-  VALUE mNokogiri = rb_const_get(rb_cObject, rb_intern_const("Nokogiri"));
-  VALUE mNokogiriXml = rb_const_get(mNokogiri, rb_intern_const("XML"));
-  cNokogiriXmlSyntaxError = rb_const_get(mNokogiriXml, rb_intern_const("SyntaxError"));
-  rb_gc_register_mark_object(cNokogiriXmlSyntaxError);
-  cNokogiriXmlElement = rb_const_get(mNokogiriXml, rb_intern_const("Element"));
-  rb_gc_register_mark_object(cNokogiriXmlElement);
-  cNokogiriXmlText = rb_const_get(mNokogiriXml, rb_intern_const("Text"));
-  rb_gc_register_mark_object(cNokogiriXmlText);
-  cNokogiriXmlCData = rb_const_get(mNokogiriXml, rb_intern_const("CDATA"));
-  rb_gc_register_mark_object(cNokogiriXmlCData);
-  cNokogiriXmlComment = rb_const_get(mNokogiriXml, rb_intern_const("Comment"));
-  rb_gc_register_mark_object(cNokogiriXmlComment);
-
-  // Interned symbols.
-  new = rb_intern_const("new");
-  node_name_ = rb_intern_const("node_name=");
-
-  // #line is not supported (returns 0)
-  line_supported = Qfalse;
-#endif
-
-  // Class constants.
-  VALUE HTML5 = rb_const_get(mNokogiri, rb_intern_const("HTML5"));
-  Document = rb_const_get(HTML5, rb_intern_const("Document"));
-  rb_gc_register_mark_object(Document);
-
-  // Interned symbols.
-  internal_subset = rb_intern_const("internal_subset");
-  parent = rb_intern_const("parent");
-
-  // Define Nokogumbo module with parse and fragment methods.
-  VALUE Gumbo = rb_define_module("Nokogumbo");
-  rb_define_singleton_method(Gumbo, "parse", parse, 5);
-  rb_define_singleton_method(Gumbo, "fragment", fragment, 6);
-
-  // Add private constant for testing.
-  rb_define_const(Gumbo, "LINE_SUPPORTED", line_supported);
-  rb_funcall(Gumbo, rb_intern_const("private_constant"), 1,
-             rb_utf8_str_new_cstr("LINE_SUPPORTED"));
-}
-
-// vim: set shiftwidth=2 softtabstop=2 tabstop=8 expandtab:
diff --git a/nokogumbo-import/lib/nokogumbo.rb b/nokogumbo-import/lib/nokogumbo.rb
deleted file mode 100644
index ca797294e0..0000000000
--- a/nokogumbo-import/lib/nokogumbo.rb
+++ /dev/null
@@ -1,43 +0,0 @@
-#
-#  Copyright 2013-2021 Sam Ruby, Stephen Checkoway
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-
-require 'nokogiri'
-
-if ((defined?(Nokogiri::HTML5) && Nokogiri::HTML5.respond_to?(:parse)) &&
-    (defined?(Nokogiri::Gumbo) && Nokogiri::Gumbo.respond_to?(:parse)) &&
-    !(ENV.key?("NOKOGUMBO_IGNORE_NOKOGIRI_HTML5") && ENV["NOKOGUMBO_IGNORE_NOKOGIRI_HTML5"] != "false"))
-
-  warn "NOTE: nokogumbo: Using Nokogiri::HTML5 provided by Nokogiri. See https://github.com/sparklemotion/nokogiri/issues/2205 for more information."
-
-  ::Nokogumbo = ::Nokogiri::Gumbo
-else
-  require 'nokogumbo/html5'
-  require 'nokogumbo/nokogumbo'
-
-  module Nokogumbo
-    # The default maximum number of attributes per element.
-    DEFAULT_MAX_ATTRIBUTES = 400
-
-    # The default maximum number of errors for parsing a document or a fragment.
-    DEFAULT_MAX_ERRORS = 0
-
-    # The default maximum depth of the DOM tree produced by parsing a document
-    # or fragment.
-    DEFAULT_MAX_TREE_DEPTH = 400
-  end
-end
-
-require 'nokogumbo/version'
diff --git a/nokogumbo-import/lib/nokogumbo/version.rb b/nokogumbo-import/lib/nokogumbo/version.rb
deleted file mode 100644
index 84da549a1d..0000000000
--- a/nokogumbo-import/lib/nokogumbo/version.rb
+++ /dev/null
@@ -1,19 +0,0 @@
-#
-#  Copyright 2013-2021 Sam Ruby, Stephen Checkoway
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-#
-
-module Nokogumbo
-  VERSION = "2.0.5"
-end
diff --git a/nokogumbo-import/nokogumbo.gemspec b/nokogumbo-import/nokogumbo.gemspec
deleted file mode 100644
index 0529fa2932..0000000000
--- a/nokogumbo-import/nokogumbo.gemspec
+++ /dev/null
@@ -1,32 +0,0 @@
-require_relative 'lib/nokogumbo/version'
-
-Gem::Specification.new do |s|
-  s.name = 'nokogumbo'
-  s.version = Nokogumbo::VERSION
-
-  s.authors = ['Sam Ruby', 'Stephen Checkoway']
-  s.email = ['rubys@intertwingly.net', 's@pahtak.org']
-
-  s.license = 'Apache-2.0'
-  s.homepage = 'https://github.com/rubys/nokogumbo/#readme'
-  s.summary = 'Nokogiri interface to the Gumbo HTML5 parser'
-  s.description = 'Nokogumbo allows a Ruby program to invoke the Gumbo ' \
-    'HTML5 parser and access the result as a Nokogiri parsed document.'
-
-  s.metadata = {
-    'bug_tracker_uri' => 'https://github.com/rubys/nokogumbo/issues',
-    'changelog_uri'   => 'https://github.com/rubys/nokogumbo/blob/master/CHANGELOG.md',
-    'homepage_uri'    => s.homepage,
-    'source_code_uri' => 'https://github.com/rubys/nokogumbo'
-  }
-
-  s.extensions = %w[ ext/nokogumbo/extconf.rb ]
-
-  s.files = %w[ LICENSE.txt README.md ] +
-    Dir['lib/**/*.rb'] +
-    Dir['ext/nokogumbo/*.{rb,c}'] +
-    Dir['gumbo-parser/src/*.[hc]']
-
-  s.required_ruby_version = ">= 2.1"
-  s.add_runtime_dependency 'nokogiri', '~> 1.8', '>= 1.8.4'
-end
diff --git a/nokogumbo-import/test/test_encoding.rb b/nokogumbo-import/test/test_encoding.rb
deleted file mode 100644
index 7a94f1564d..0000000000
--- a/nokogumbo-import/test/test_encoding.rb
+++ /dev/null
@@ -1,208 +0,0 @@
-require 'nokogumbo'
-require 'minitest/autorun'
-
-class TestNokogumbo < Minitest::Test
-  if ''.respond_to? 'encoding'
-    def test_macroman_encoding
-      mac="<span>\xCA</span>".force_encoding('macroman')
-      doc = Nokogiri::HTML5(mac)
-      assert_equal "<span> </span>", doc.at("span").to_xml
-    end
-
-    def test_iso8859_encoding
-      iso8859="<span>Se\xF1or</span>".force_encoding(Encoding::ASCII_8BIT)
-      doc = Nokogiri::HTML5(iso8859)
-      assert_equal '<span>Señor</span>', doc.at('span').to_xml
-    end
-
-    def test_charset_encoding
-      utf8="<meta charset='utf-8'><span>Se\xC3\xB1or</span>".
-        force_encoding(Encoding::ASCII_8BIT)
-      doc = Nokogiri::HTML5(utf8)
-      assert_equal '<span>Señor</span>', doc.at('span').to_xml
-    end
-
-    def test_bogus_encoding
-      bogus="<meta charset='bogus'><span>Se\xF1or</span>".
-        force_encoding(Encoding::ASCII_8BIT)
-      doc = Nokogiri::HTML5(bogus)
-      assert_equal '<span>Señor</span>', doc.at('span').to_xml
-    end
-
-    def test_utf8_bom
-      utf8 = "\uFEFF<!DOCTYPE html><html></html>".encode('UTF-8')
-      doc = Nokogiri::HTML5(utf8, max_errors: 10)
-      assert_equal [], doc.errors
-    end
-
-    def test_utf16le_bom
-      utf16le = "\uFEFF<!DOCTYPE html><html></html>".encode('UTF-16LE')
-      doc = Nokogiri::HTML5(utf16le, max_errors: 10)
-      assert_equal [], doc.errors
-    end
-
-    def test_utf16be_bom
-      utf16be = "\uFEFF<!DOCTYPE html><html></html>".encode('UTF-16BE')
-      doc = Nokogiri::HTML5(utf16be, max_errors: 10)
-      assert_equal [], doc.errors
-    end
-
-    def test_utf8_bom_ascii
-      utf8 = "\uFEFF<!DOCTYPE html><html></html>".encode('UTF-8')
-      utf8.force_encoding(Encoding::ASCII_8BIT)
-      doc = Nokogiri::HTML5(utf8, max_errors: 10)
-      doc.errors.each { |err| puts(err) }
-      assert_equal [], doc.errors
-    end
-
-    def test_utf16le_bom_ascii
-      utf16le = "\uFEFF<!DOCTYPE html><html></html>".encode('UTF-16LE')
-      utf16le.force_encoding(Encoding::ASCII_8BIT)
-      doc = Nokogiri::HTML5(utf16le, max_errors: 10)
-      assert_equal [], doc.errors
-      doc.errors.each { |err| puts(err) }
-    end
-
-    def test_utf16be_bom_ascii
-      utf16be = "\uFEFF<!DOCTYPE html><html></html>".encode('UTF-16BE')
-      utf16be.force_encoding(Encoding::ASCII_8BIT)
-      doc = Nokogiri::HTML5(utf16be, max_errors: 10)
-      assert_equal [], doc.errors
-      doc.errors.each { |err| puts(err) }
-    end
-
-    def test_tag_after_utf8_bom
-      utf8 = "\uFEFF<b></b>".encode('UTF-8')
-      doc = Nokogiri::HTML5.fragment(utf8, max_errors: 10)
-      assert_equal [], doc.errors
-    end
-  end
-
-  # https://github.com/rubys/nokogumbo/issues/68
-  def test_charset_sniff_to_html
-    html = <<-EOF.gsub(/^      /, '')
-      <!DOCTYPE html>
-      <html>
-        <head>
-          <meta http-equiv="Content-Type" content="text/html; charset=utf-8; width=device-width">
-        </head>
-        <body>
-          Hello!
-        </body>
-      </html>
-    EOF
-    doc = Nokogiri::HTML5(html, max_errors: 10)
-    assert_equal 0, doc.errors.length
-    refute_equal '', doc.to_html
-  end
-
-  # https://encoding.spec.whatwg.org/#names-and-labels
-  # I chose these by looking at the Wikipedia page for each encoding, picked
-  # one of the languages it was supposed to encode, and then Googled for a
-  # proverb in the language. Apologies if these are ill-chosen or nonsensical.
-  # I'm happy to change them. I'm just pasting them in here so I'm pretty sure
-  # the right-to-left languages are backward. Corrections welcome.
-  ENCODINGS = [
-    ['UTF-8',          "Let's concatentate all of these for UTF-8"], # English
-    ['IBM866',         'А дело бывало -- и коза волка съедала'], # Russian
-    ['ISO-8859-2',     'Co můžeš udělat dnes, neodkládej na zítřek.'], # Czech
-    ['ISO-8859-3',     'Yukarda mavi gök, asağıda yağız yer yaratıldıkta'], # Turkish
-    ['ISO-8859-4',     'Ceļš uz elli ir bruģēts ar labiem nodomiem.'], # Latvian
-    ['ISO-8859-5',     'Каде има сила, нема правдина.'], # Macedonian
-    ['ISO-8859-6',     'أباد الله خضراءهم ابذل لصديقك دمك ومالك'], # Arabic
-    ['ISO-8859-7',     'Η καλύτερη άμυνα είναι η επίθεση.'], # Greek
-    ['ISO-8859-8',     'אין הנחתום מעיד על עיסתו'], # Hebrew
-    ['ISO-8859-8-I',   'אל תסתכל בקנקן, אלא במה שבתוכו'], # Hebrew
-    ['ISO-8859-10',    'Alla känner apan, men apan känner ingen.'], # Swedish
-    ['ISO-8859-13',    'Lašas po lašo ir akmenį pratašo.'], # Lithuanian
-    ['ISO-8859-14',    "ha bhòrd bòrd gun aran ach 's bòrd aran leis fhèin."], # Scottish Gaelic
-    ['ISO-8859-15',    'This is essentially ISO 8859-1 but with € Š š Ž ž Œ œ Ÿ'], # English
-    ['ISO-8859-16',    'Kiedy wszedłeś między wrony, musisz krakać jak i one.'], # Polish
-    ['KOI8-R',         'А дело бывало -- и коза волка съедала'], # Russian
-    ['KOI8-U',         'Яблуко від яблуньки не далеко. Ґ, Є, І, Ї'], # Ukrainian
-    ['macroman',       'Some good old Mac Roman œ∑´®†¥¨ˆøπåßƒ©'], # English
-    ['windows-874',    'กระต่ายหมายจันทร์'], # Thai
-    ['windows-1250',   'Addig nyújtózkodj, amíg a takaród ér.'], # Hungarian
-    ['windows-1251',   'Бързата работа - срам за майстора.'], # Bulgarian
-    ['windows-1252',   'Basically ISO 8859-1 with ‘differences’™ •'], # English
-    ['windows-1253',   'Και οι τοίχοι έχουν αυτιά.'], # Greek
-    ['windows-1254',   'Baban nasılsa oğlu da öyledir.'], # Turkish
-    ['windows-1255',   'אל תקנה חתול בשק; ₪'], # Hebrew
-    ['windows-1256',   'أبطأ من سلحفاة'], # Arabic
-    ['windows-1257',   'Hommikune töö kuld, õhtune muld.'], # Estonian
-    ['windows-1258',   'Ăn theo thuở, ở theo thời.'], # Vietnamese
-    ['macCyrillic', 'А дело бывало -- и коза волка съедала'], # Russian
-    ['GBK',            '不闻不若闻之，闻之不若见之，见之不若知之，知之不若行之；学至于行之而止矣'], # Simplified Chinese
-    ['gb18030',        '不聞不若聞之，聞之不若見之，見之不若知之，知之不若行之；學至於行之而止矣'], # Traditional Chinese
-    ['Big5',           '有其父必有其子'], # Traditional Chinese
-    ['EUC-JP',         '猿も木から落ちる'], # Japanese
-    ['ISO-2022-JP',    '井の中の蛙大海を知らず'], # Japanese
-    ['Shift_JIS',      '鳥なき里の蝙蝠'], # Japanese
-    ['EUC-KR',         '아는 길도 물어가라'], # Korean
-    ['replacement',    '콩 심은데 콩나고, 팥 심은데 팥난다'], # Korean
-    ['UTF-16BE',       'Everything had better be representable!'], # English
-    ['UTF-16LE',       'Same as with UTF-16BE'], # English
-    ['US-ASCII',       'Surprisingly not one of the required encodings'] # English
-  ].freeze
-
-  def encodings_html
-    @encodings_html ||=
-      "<!DOCTYPE html><html><head></head><body>" +
-      ENCODINGS.map { |enc| %(<span id="#{enc[0]}">#{enc[1]}</span>) }.join +
-      '</body></html>'
-  end
-
-  def encodings_doc
-    @encodings_doc ||= Nokogiri::HTML5(encodings_html)
-  end
-
-  def round_trip_through(str, enc)
-    begin
-      encoding = Encoding.find(enc)
-    rescue ArgumentError
-      skip "#{enc} not supported"
-    end
-    begin
-      encoded = str.encode(encoding)
-    rescue Encoding::ConverterNotFoundError
-      skip "Converting UTF-8 to #{enc} not supported"
-    end
-    begin
-      decoded = encoded.encode('UTF-8')
-    rescue Encoding::ConverterNotFoundError
-      skip "Converting #{enc} to UTF-8 not supported"
-    end
-    assert_equal str, decoded, "'#{str}' did not round trip through #{enc[0]}"
-    encoded
-  end
-
-  ENCODINGS.each do |enc|
-    define_method("test_parse_encoded_#{enc[0]}".to_sym) do
-      html = "<!DOCTYPE html><span>#{enc[1]}</span>"
-      encoded_html = round_trip_through(html, enc[0])
-      doc = Nokogiri::HTML5(encoded_html, encoding: enc[0])
-      span = doc.at('/html/body/span')
-      refute_nil span
-      assert_equal enc[1], span.content
-    end
-
-    define_method("test_inner_html_encoded_#{enc[0]}".to_sym) do
-      encoded = round_trip_through(enc[1], enc[0])
-      span = encodings_doc.at(%(/html/body/span[@id="#{enc[0]}"]))
-      refute_nil span
-      assert_equal encoded, span.inner_html(encoding: enc[0])
-    end
-
-    define_method("test_roundtrip_through_#{enc[0]}".to_sym) do
-      # https://bugs.ruby-lang.org/issues/15033
-      # Ruby has a bug with the `:fallback` parameter passed to `#encode` when
-      # multiple conversions have to happen. I'm not sure it's worth working
-      # around. It impacts this test though.
-      skip 'https://bugs.ruby-lang.org/issues/15033' if enc[0] == 'ISO-2022-JP'
-      round_trip_through(enc[1], enc[0])
-      encoded = encodings_doc.serialize(encoding: enc[0])
-      doc = Nokogiri::HTML5(encoded, encoding: enc[0])
-      assert_equal encodings_html, doc.serialize
-    end
-  end
-end
diff --git a/nokogumbo-import/test/test_tree-construction.rb b/nokogumbo-import/test/test_tree-construction.rb
deleted file mode 100644
index 5060d2137c..0000000000
--- a/nokogumbo-import/test/test_tree-construction.rb
+++ /dev/null
@@ -1,277 +0,0 @@
-# encoding: utf-8
-require 'nokogumbo'
-require 'minitest/autorun'
-
-def parse_test(test_data)
-  test = { script: :both }
-  index = /(?:^#errors\n|\n#errors\n)/ =~ test_data
-  abort "Expected #errors in\n#{test_data}" if index.nil?
-  skip_amount = $~[0].length
-  # Omit the final new line
-  test[:data] = test_data[0...index]
-
-  # Process the rest line by line
-  lines = test_data[index+skip_amount..-1].split("\n")
-  index = lines.find_index do |line|
-    line == '#document-fragment' ||
-      line == '#document' ||
-      line == '#script-off' ||
-      line == '#script-on' ||
-      line == '#new-errors'
-  end
-  abort 'Expected #document' if index.nil?
-  test[:errors] = lines[0...index]
-  test[:new_errors] = []
-  if lines[index] == '#new-errors'
-    index += 1
-    while !%w[#document-fragment #document #script-off #script-on].include?(lines[index])
-      test[:new_errors] << lines[index]
-      index += 1
-    end
-  end
-
-  if lines[index] == '#document-fragment'
-    test[:context] = lines[index+1].chomp.split(' ', 2)
-    index += 2
-  end
-  abort "failed to find fragment: #{index}: #{lines[index]}" if test_data.include?("#document-fragment") && test[:context].nil?
-
-  if lines[index] =~ /#script-(on|off)/
-    test[:script] = $~[1].to_sym
-    index += 1
-  end
-
-  abort "Expected #document, got #{lines[index]}" unless lines[index] == '#document'
-  index += 1
-
-  document = {
-    type: test[:context] ? :fragment : :document,
-    children: []
-  }
-  open_nodes = [document]
-  while index < lines.length
-    abort "Expected '| ' but got #{lines[index]}" unless /^\| ( *)([^ ].*$)/ =~ lines[index]
-    depth = $~[1].length
-    if depth.odd?
-      abort "Invalid nesting depth"
-    else
-      depth = depth / 2
-    end
-    abort "Too deep" if depth >= open_nodes.length
-
-    node = {}
-    node_text = $~[2]
-    if node_text[0] == '"'
-      if node_text == '"' || node_text[-1] != '"'
-        loop do
-          index += 1
-          node_text << "\n" + lines[index]
-          break if node_text[-1] == '"'
-        end
-      end
-      node[:type] = :text
-      node[:contents] = node_text[1..-2]
-    elsif /^<!DOCTYPE ([^ >]*)(?: "([^"]*)" "(.*)")?>$/ =~ node_text
-      node[:type] = :doctype
-      node[:name] = $~[1]
-      node[:public_id] = $~[2].nil? || $~[2].empty? ? nil : $~[2]
-      node[:system_id] = $~[3].nil? || $~[3].empty? ? nil : $~[3]
-    elsif /^<!-- (.*) -->$/ =~ node_text
-      node[:type] = :comment
-      node[:contents] = $~[1]
-    elsif /^<(svg |math )?(.+)>$/ =~ node_text
-      node[:type] = :element
-      node[:ns] = $~[1].nil? ? nil : $~[1].rstrip
-      node[:tag] = $~[2]
-      node[:attributes] = []
-      node[:children] = []
-    elsif /^([^ ]+ )?([^=]+)="(.*)"$/ =~ node_text
-      node[:type] = :attribute
-      node[:ns] = $~[1].nil? ? nil : $~[1].rstrip
-      node[:name] = $~[2]
-      node[:value] = $~[3]
-    elsif node_text == 'content'
-      node[:type] = :template
-    else
-      abort "Unexpected node_text: #{node_text}"
-    end
-
-    if node[:type] == :attribute
-      abort "depth #{depth} != #{open_nodes.length}" unless depth == open_nodes.length - 1
-      abort "type :#{open_nodes[-1][:type]} != :element" unless open_nodes[-1][:type] == :element
-      abort "element has children" unless open_nodes[-1][:children].empty?
-      open_nodes[-1][:attributes] << node
-    elsif node[:type] == :template
-      abort "depth #{depth} != #{open_nodes.length}" unless depth == open_nodes.length - 1
-      abort "type :#{open_nodes[-1][:type]} != :element" unless open_nodes[-1][:type] == :element
-      abort "tag :#{open_nodes[-1][:tag]} != template" unless open_nodes[-1][:tag] == 'template'
-      abort "template has children before the 'content'" unless open_nodes[-1][:children].empty?
-      # Hack. We want the children of this template node to be reparented as
-      # children of the template element.
-      # XXX: Template contents are _not_ supposed to be children of the
-      # template, but we currently mishandle this.
-      open_nodes << open_nodes[-1]
-    else
-      open_nodes[depth][:children] << node
-      open_nodes[depth+1..-1] = []
-      if node[:type] == :element
-        open_nodes << node
-      end
-    end
-    index += 1
-  end
-  test[:document] = document
-  test
-end
-
-class TestTreeConstructionBase < Minitest::Test
-  def assert_equal_or_nil(exp, act)
-    if exp.nil?
-      assert_nil act
-    else
-      assert_equal exp, act
-    end
-  end
-
-  def compare_nodes(node, ng_node)
-    case ng_node.type
-    when Nokogiri::XML::Node::ELEMENT_NODE
-      assert_equal node[:type], :element
-      if node[:ns]
-        refute_nil ng_node.namespace
-        assert_equal node[:ns], ng_node.namespace.prefix
-      end
-      assert_equal node[:tag], ng_node.name
-      attributes = ng_node.attributes
-      assert_equal node[:attributes].length, attributes.length
-      node[:attributes].each do |attr|
-        if attr[:ns]
-          value = ng_node["#{attr[:ns]}:#{attr[:name]}"]
-        else
-          value = attributes[attr[:name]].value
-        end
-        assert_equal attr[:value], value
-      end
-      assert_equal node[:children].length, ng_node.children.length,
-        "Element <#{node[:tag]}> has wrong number of children: #{ng_node.children.map { |c| c.name }}"
-    when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
-      # We preserve the CDATA in the tree, but the tests represent it as text.
-      assert_equal node[:type], :text
-      assert_equal node[:contents], ng_node.content
-    when Nokogiri::XML::Node::COMMENT_NODE
-      assert_equal node[:type], :comment
-      assert_equal node[:contents], ng_node.content
-    when Nokogiri::XML::Node::HTML_DOCUMENT_NODE
-      assert_equal node[:type], :document
-      assert_equal node[:children].length, ng_node.children.length
-    when Nokogiri::XML::Node::DOCUMENT_FRAG_NODE
-      assert_equal node[:type], :fragment
-      assert_equal node[:children].length, ng_node.children.length
-    when Nokogiri::XML::Node::DTD_NODE
-      assert_equal node[:type], :doctype
-      assert_equal node[:name], ng_node.name
-      assert_equal_or_nil node[:public_id], ng_node.external_id
-      assert_equal_or_nil node[:system_id], ng_node.system_id
-    else
-      flunk "Unknown node type #{ng_node.type} (expected #{node[:type]})"
-    end
-  end
-
-  def run_test
-    if @test[:context]
-      ctx = @test[:context].join(':')
-      doc = Nokogiri::HTML5::Document.new
-      doc = Nokogiri::HTML5::DocumentFragment.new(doc, @test[:data], ctx, max_errors: @test[:errors].length + 10)
-    else
-      doc = Nokogiri::HTML5.parse(@test[:data], max_errors: @test[:errors].length + 10)
-    end
-    # Walk the tree.
-    exp_nodes = [@test[:document]]
-    act_nodes = [doc]
-    children = [0]
-    compare_nodes(exp_nodes[0], doc)
-    while children.any?
-      child_index = children[-1]
-      exp = exp_nodes[-1]
-      act = act_nodes[-1]
-      if child_index == exp[:children].length
-        exp_nodes.pop
-        act_nodes.pop
-        children.pop
-        next
-      end
-      exp_child = exp[:children][child_index]
-      act_child = act.children[child_index]
-      compare_nodes(exp_child, act_child)
-      children[-1] = child_index + 1
-      if exp_child.has_key?(:children)
-        exp_nodes << exp_child
-        act_nodes << act_child
-        children << 0
-      end
-    end
-
-    # Test the errors.
-    assert_equal @test[:errors].length, doc.errors.length
-
-    # The new, standardized tokenizer errors live in @test[:new_errors]. Let's
-    # match each one to exactly one error in doc.errors. Unfortunately, the
-    # tests specify the column the error is detected, _not_ the column of the
-    # start of the problematic HTML (e.g., the start of a character reference
-    # or <![CDATA[) the way gumbo does. So check that Gumbo's column is no
-    # later than the error's column.
-    errors = doc.errors.map { |err| { line: err.line, column: err.column, code: err.str1 } }
-    errors.reject! { |err| err[:code] == 'generic-parser' }
-    error_regex = /^\((?<line>\d+):(?<column>\d+)(?:-\d+:\d+)?\) (?<code>.*)$/
-    @test[:new_errors].each do |err|
-      assert_match(error_regex, err)
-      m = err.match(error_regex)
-      line = m[:line].to_i
-      column = m[:column].to_i
-      code = m[:code]
-      idx = errors.index do |e|
-        e[:line] == line &&
-          e[:code] == code &&
-          e[:column] <= column
-      end
-      # This error should be the first error in the list.
-      #refute_nil(idx, "Expected to find error #{code} at #{line}:#{column}")
-      assert_equal(0, idx, "Expected to find error #{code} at #{line}:#{column}")
-      errors.delete_at(idx)
-    end
-  end
-end
-
-tc_path = File.expand_path('../html5lib-tests/tree-construction', __FILE__)
-Dir[File.join(tc_path, '*.dat')].each do |path|
-  test_name = "TestTreeConstruction" + File.basename(path, '.dat')
-    .split(/[_-]/)
-    .map { |s| s.capitalize }
-    .join('')
-  tests = []
-  File.open(path, "r", encoding: 'UTF-8') do |f|
-    f.each("\n\n#data\n") do |test_data|
-      if test_data.start_with?("#data\n")
-        test_data = test_data[6..-1]
-      end
-      if test_data.end_with?("\n\n#data\n")
-        test_data = test_data[0..-9]
-      end
-      tests << parse_test(test_data)
-    end
-  end
-
-  klass = Class.new(TestTreeConstructionBase) do
-    tests.each_with_index do |test, index|
-      next if test[:script] == :on;
-      define_method "test_#{index}".to_sym do
-        @test = test
-        @index = index
-        run_test
-      end
-    end
-  end
-  Object.const_set test_name, klass
-end
-
-# vim: set sw=2 sts=2 ts=8 et:
diff --git a/rakelib/check-manifest.rake b/rakelib/check-manifest.rake
index d5eda72380..bc3a42792e 100644
--- a/rakelib/check-manifest.rake
+++ b/rakelib/check-manifest.rake
@@ -16,6 +16,7 @@ task :check_manifest do
     coverage
     doc
     gems
+    nokogumbo-import
     patches
     pkg
     ports
@@ -33,8 +34,6 @@ task :check_manifest do
     .editorconfig
     .gitignore
     .yardopts
-    appveyor.yml
-    nokogiri.gemspec
     CHANGELOG.md
     CODE_OF_CONDUCT.md
     CONTRIBUTING.md
@@ -44,8 +43,11 @@ task :check_manifest do
     SECURITY.md
     STANDARD_RESPONSES.md
     Vagrantfile
-    [0-9]+-.*
-    [a-z.]+.(log|out)
+    [a-z]*.{log,out}
+    appveyor.yml
+    gumbo-parser/test/*
+    lib/nokogiri/**/nokogiri.{jar,so}
+    nokogiri.gemspec
   ]
 
   intended_directories = Dir.children(".")
@@ -54,10 +56,11 @@ task :check_manifest do
 
   intended_files = Dir.children(".")
     .select { |filename| File.file?(filename) }
-    .reject { |filename| ignore_files.any? { |ig| File.fnmatch?(ig, filename) } }
+    .reject { |filename| ignore_files.any? { |ig| File.fnmatch?(ig, filename, File::FNM_EXTGLOB) } }
 
   intended_files += Dir.glob(intended_directories.map { |d| File.join(d, "/**/*") })
     .select { |filename| File.file?(filename) }
+    .reject { |filename| ignore_files.any? { |ig| File.fnmatch?(ig, filename, File::FNM_EXTGLOB) } }
     .sort
 
   spec_files = raw_gemspec.files.sort
diff --git a/rakelib/extensions.rake b/rakelib/extensions.rake
index bc5067e3f6..766861949a 100644
--- a/rakelib/extensions.rake
+++ b/rakelib/extensions.rake
@@ -307,7 +307,9 @@ if java?
     jruby_home = RbConfig::CONFIG['prefix']
     jars = ["#{jruby_home}/lib/jruby.jar"] + FileList['lib/*.jar']
 
+    # Keep the extension C files because they have docstrings (and Java files don't)
     ext.gem_spec.files.reject! { |path| File.fnmatch?("ext/nokogiri/*.h", path) }
+    ext.gem_spec.files.reject! { |path| File.fnmatch?("gumbo-parser/**/*", path) }
 
     ext.ext_dir = 'ext/java'
     ext.lib_dir = 'lib/nokogiri'
@@ -326,7 +328,7 @@ else
   dependencies = YAML.load_file("dependencies.yml")
 
   task gem_build_path do
-    NOKOGIRI_SPEC.files.reject! { |f| f =~ %r{\.(java|jar)$} }
+    NOKOGIRI_SPEC.files.reject! { |path| File.fnmatch?("**/*.{java,jar}", path, File::FNM_EXTGLOB) }
 
     ["libxml2", "libxslt"].each do |lib|
       version = dependencies[lib]["version"]
@@ -345,7 +347,7 @@ else
   end
 
   Rake::ExtensionTask.new("nokogiri", NOKOGIRI_SPEC) do |ext|
-    ext.gem_spec.files.reject! { |f| f =~ %r{\.(java|jar)$} }
+    ext.gem_spec.files.reject! { |path| File.fnmatch?("**/*.{java,jar}", path, File::FNM_EXTGLOB) }
 
     ext.lib_dir = File.join(*['lib', 'nokogiri', ENV['FAT_DIR']].compact)
     ext.config_options << ENV['EXTOPTS']
@@ -354,6 +356,7 @@ else
     ext.cross_config_options << "--enable-cross-build"
     ext.cross_compiling do |spec|
       spec.files.reject! { |path| File.fnmatch?('ports/*', path) }
+      spec.files.reject! { |path| File.fnmatch?("gumbo-parser/**/*", path) }
       spec.dependencies.reject! { |dep| dep.name=='mini_portile2' }
 
       # when pre-compiling a native gem, package all the C headers sitting in ext/nokogiri/include
diff --git a/scripts/test-gem-file-contents b/scripts/test-gem-file-contents
index 08bee5ff73..c567bf40d5 100755
--- a/scripts/test-gem-file-contents
+++ b/scripts/test-gem-file-contents
@@ -78,7 +78,7 @@ describe File.basename(gemfile) do
       assert_operator(actual, :>, 60, "expected gemfile to contain more than #{actual} files")
     end
 
-    it "gemspec is a Gem::Specfication" do
+    it "gemspec is a Gem::Specification" do
       assert_equal(Gem::Specification, gemspec.class)
     end
   end
@@ -93,6 +93,19 @@ describe File.basename(gemfile) do
   end
 
   describe "ruby platform" do
+    it "depends on mini_portile2" do
+      assert(gemspec.dependencies.find { |d| d.name == "mini_portile2" })
+    end
+
+    it "contains ext/nokogiri C and header files" do
+      assert_operator(gemfile_contents.grep(%r{^ext/nokogiri/.*\.c}).length, :>, 20)
+      assert_operator(gemfile_contents.grep(%r{^ext/nokogiri/.*\.h}).length, :>, 0)
+    end
+
+    it "includes C files in extra_rdoc_files" do
+      assert_operator(gemspec.extra_rdoc_files.grep(%r{ext/nokogiri/.*\.c$}).length, :>, 10)
+    end
+
     it "contains the port files" do
       actual_ports = gemfile_contents.grep(%r{^ports/})
       assert_equal(1, actual_ports.grep(/libxml2-\d+\.\d+\.\d+\.tar\.gz/).length,
@@ -106,31 +119,37 @@ describe File.basename(gemfile) do
       assert_operator(gemfile_contents.grep(%r{^patches/}).length, :>, 0)
     end
 
-    it "contains ext/nokogiri C and header files" do
-      assert_operator(gemfile_contents.grep(%r{^ext/nokogiri/.*\.c}).length, :>, 20)
-      assert_operator(gemfile_contents.grep(%r{^ext/nokogiri/.*\.h}).length, :>, 0)
-    end
-
     it "does not contain packaged libraries' header files" do
       # these files are present after installation if the packaged libraries are used
       assert_empty(gemfile_contents.grep(%r{^ext/nokogiri/include/}))
     end
 
+    it "contains the gumbo parser source code" do
+      assert_includes(gemfile_contents, "gumbo-parser/src/Makefile")
+      assert_operator(gemfile_contents.grep(%r{^gumbo-parser/src/.*\.c}).length, :>, 10)
+      assert_operator(gemfile_contents.grep(%r{^gumbo-parser/src/.*\.h}).length, :>, 10)
+    end
+
     it "does not contain java files" do
       assert_empty(gemfile_contents.grep(%r{^ext/java/}))
       assert_empty(gemfile_contents.grep(/.*\.jar$/))
     end
+  end if gemspec.platform == Gem::Platform::RUBY
 
-    it "depends on mini_portile2" do
-      assert(gemspec.dependencies.find { |d| d.name == "mini_portile2" })
+  describe "native platform" do
+    it "does not depend on mini_portile2" do
+      refute(gemspec.dependencies.find { |d| d.name == "mini_portile2" })
+    end
+
+    it "contains ext/nokogiri C and header files" do
+      assert_operator(gemfile_contents.grep(%r{^ext/nokogiri/.*\.c}).length, :>, 20)
+      assert_operator(gemfile_contents.grep(%r{^ext/nokogiri/.*\.h}).length, :>, 20)
     end
 
     it "includes C files in extra_rdoc_files" do
       assert_operator(gemspec.extra_rdoc_files.grep(%r{ext/nokogiri/.*\.c$}).length, :>, 10)
     end
-  end if gemspec.platform == Gem::Platform::RUBY
 
-  describe "native platform" do
     it "does not contain the port files" do
       assert_empty(gemfile_contents.grep(%r{^ports/}))
     end
@@ -139,30 +158,21 @@ describe File.basename(gemfile) do
       assert_empty(gemfile_contents.grep(%r{^patches/}))
     end
 
-    it "contains ext/nokogiri C and header files" do
-      assert_operator(gemfile_contents.grep(%r{^ext/nokogiri/.*\.c}).length, :>, 20)
-      assert_operator(gemfile_contents.grep(%r{^ext/nokogiri/.*\.h}).length, :>, 20)
-    end
-
     it "contains packaged libraries' header files" do
       assert_includes(gemfile_contents, "ext/nokogiri/include/libxml2/libxml/tree.h")
       assert_includes(gemfile_contents, "ext/nokogiri/include/libxslt/xslt.h")
       assert_includes(gemfile_contents, "ext/nokogiri/include/libexslt/exslt.h")
     end
 
+    it "does not contain the gumbo parser source code" do
+      assert_empty(gemfile_contents.grep(%r{^gumbo-parser/src/}))
+    end
+
     it "does not contain java files" do
       assert_empty(gemfile_contents.grep(%r{^ext/java/}))
       assert_empty(gemfile_contents.grep(/.*\.jar$/))
     end
 
-    it "does not depend on mini_portile2" do
-      refute(gemspec.dependencies.find { |d| d.name == "mini_portile2" })
-    end
-
-    it "includes C files in extra_rdoc_files" do
-      assert_operator(gemspec.extra_rdoc_files.grep(%r{ext/nokogiri/.*\.c$}).length, :>, 10)
-    end
-
     it "contains expected shared library files " do
       native_ruby_versions.each do |version|
         actual = gemfile_contents.find do |p|
@@ -184,15 +194,12 @@ describe File.basename(gemfile) do
   end if gemspec.platform.is_a?(Gem::Platform) && gemspec.platform.cpu
 
   describe "java platform" do
-    it "does not contain the port files" do
-      assert_empty(gemfile_contents.grep(%r{^ports/}))
-    end
-
-    it "does not contain the patch files" do
-      assert_empty(gemfile_contents.grep(%r{^patches/}))
+    it "does not depend on mini_portile2" do
+      refute(gemspec.dependencies.find { |d| d.name == "mini_portile2" })
     end
 
     it "contains ext/nokogiri C files" do
+      # Note: we keep the C files because they have docstrings and Java files don't
       assert_operator(gemfile_contents.grep(%r{^ext/nokogiri/.*\.c}).length, :>, 20)
     end
 
@@ -200,10 +207,30 @@ describe File.basename(gemfile) do
       assert_empty(gemfile_contents.grep(%r{^ext/nokogiri/.*\.h}))
     end
 
+    it "includes C files in extra_rdoc_files" do
+      assert_operator(gemspec.extra_rdoc_files.grep(%r{ext/nokogiri/.*\.c$}).length, :>, 10)
+    end
+
+    it "does not contain the port files" do
+      assert_empty(gemfile_contents.grep(%r{^ports/}))
+    end
+
+    it "does not contain the patch files" do
+      assert_empty(gemfile_contents.grep(%r{^patches/}))
+    end
+
     it "does not contain packaged libraries' header files" do
       assert_empty(gemfile_contents.grep(%r{^ext/nokogiri/include/}))
     end
 
+    it "does not contain the gumbo parser source code" do
+      assert_empty(gemfile_contents.grep(%r{^gumbo-parser/src/}))
+    end
+
+    it "contains java source files" do
+      assert_operator(gemfile_contents.grep(%r{^ext/java/.*\.java}).length, :>, 20)
+    end
+
     it "contains the java jar files" do
       actual_jars = gemfile_contents.grep(/.*\.jar$/)
       expected_jars = [
@@ -221,9 +248,5 @@ describe File.basename(gemfile) do
         assert_equal(1, actual_jars.grep(%r{/#{jar}\.jar$}).length, "expected to contain #{jar}.jar")
       end
     end
-
-    it "does not depend on mini_portile2" do
-      refute(gemspec.dependencies.find { |d| d.name == "mini_portile2" })
-    end
   end if gemspec.platform == Gem::Platform.new("java")
 end
diff --git a/nokogumbo-import/test/test_api.rb b/test/html5/test_api.rb
similarity index 55%
rename from nokogumbo-import/test/test_api.rb
rename to test/html5/test_api.rb
index 045807055f..c1c03652c4 100644
--- a/nokogumbo-import/test/test_api.rb
+++ b/test/html5/test_api.rb
@@ -1,39 +1,40 @@
-require 'nokogumbo'
-require 'minitest/autorun'
+# coding: utf-8
+# frozen_string_literal: true
+require "helper"
 
-class TestAPI < Minitest::Test
+class TestHtml5API < Nokogiri::TestCase
   def test_parse_convenience_methods
-    html = '<!DOCTYPE html><p>hi'.freeze
+    html = "<!DOCTYPE html><p>hi"
     base = Nokogiri::HTML5::Document.parse(html)
     html5_parse = Nokogiri::HTML5.parse(html)
     html5 = Nokogiri::HTML5(html)
     str = base.to_html
-    assert_equal str, html5_parse.to_html
-    assert_equal str, html5.to_html
+    assert_equal(str, html5_parse.to_html)
+    assert_equal(str, html5.to_html)
   end
 
   def test_fragment_convenience_methods
-    frag = '<div><p>hi</div>'.freeze
+    frag = "<div><p>hi</div>"
     base = Nokogiri::HTML5::DocumentFragment.parse(frag)
     html5_fragment = Nokogiri::HTML5.fragment(frag)
-    assert_equal base.to_html, html5_fragment.to_html
+    assert_equal(base.to_html, html5_fragment.to_html)
   end
 
   def test_url
-    html = '<p>hi'
-    url = 'http://example.com'
+    html = "<p>hi"
+    url = "http://example.com"
     doc = Nokogiri::HTML5::Document.parse(html, url, max_errors: 1)
-    assert_equal url, doc.errors[0].file
+    assert_equal(url, doc.errors[0].file)
 
     doc = Nokogiri::HTML5.parse(html, url, max_errors: 1)
-    assert_equal url, doc.errors[0].file
+    assert_equal(url, doc.errors[0].file)
 
     doc = Nokogiri::HTML5(html, url, max_errors: 1)
-    assert_equal url, doc.errors[0].file
+    assert_equal(url, doc.errors[0].file)
   end
 
   def test_parse_encoding
-    utf8 = '<!DOCTYPE html><body><p>おはようございます'
+    utf8 = "<!DOCTYPE html><body><p>おはようございます"
     shift_jis = utf8.encode(Encoding::SHIFT_JIS)
     raw = shift_jis.dup
     raw.force_encoding(Encoding::ASCII_8BIT)
@@ -48,7 +49,7 @@ def test_parse_encoding
   end
 
   def test_fragment_encoding
-    utf8 = '<div><p>おはようございます</div>'
+    utf8 = "<div><p>おはようございます</div>"
     shift_jis = utf8.encode(Encoding::SHIFT_JIS)
     raw = shift_jis.dup
     raw.force_encoding(Encoding::ASCII_8BIT)
@@ -62,33 +63,32 @@ def test_fragment_encoding
   end
 
   def test_fragment_serialization_encoding
-    frag = Nokogiri::HTML5.fragment('<span>아는 길도 물어가라</span>')
-    html = frag.serialize(encoding: 'US-ASCII')
-    assert_equal '<span>&#xc544;&#xb294; &#xae38;&#xb3c4; &#xbb3c;&#xc5b4;&#xac00;&#xb77c;</span>', html
+    frag = Nokogiri::HTML5.fragment("<span>아는 길도 물어가라</span>")
+    html = frag.serialize(encoding: "US-ASCII")
+    assert_equal("<span>&#xc544;&#xb294; &#xae38;&#xb3c4; &#xbb3c;&#xc5b4;&#xac00;&#xb77c;</span>", html)
     frag = Nokogiri::HTML5.fragment(html)
-    assert_equal '<span>아는 길도 물어가라</span>', frag.serialize
+    assert_equal("<span>아는 길도 물어가라</span>", frag.serialize)
   end
 
   def test_serialization_encoding
-    html = '<!DOCUMENT html><span>ฉันไม่พูดภาษาไทย</span>'
+    html = "<!DOCUMENT html><span>ฉันไม่พูดภาษาไทย</span>"
     doc = Nokogiri::HTML5(html)
-    span = doc.at('/html/body/span')
-    serialized = span.inner_html(encoding: 'US-ASCII')
+    span = doc.at("/html/body/span")
+    serialized = span.inner_html(encoding: "US-ASCII")
     assert_match(/^(?:&#(?:\d+|x\h+);)*$/, serialized)
-    assert_equal('ฉันไม่พูดภาษาไทย'.each_char.map(&:ord),
-                 serialized.scan(/&#(\d+|x\h+);/).map do |s|
+    assert_equal("ฉันไม่พูดภาษาไทย".each_char.map(&:ord),
+      serialized.scan(/&#(\d+|x\h+);/).map do |s|
         s = s.first
-        if s.start_with? 'x'
+        if s.start_with?("x")
           s[1..-1].to_i(16)
         else
           s.to_i
         end
-      end
-    )
+      end)
 
-    doc2 = Nokogiri::HTML5(doc.serialize(encoding: 'Big5'))
-    html2 = doc2.serialize(encoding: 'UTF-8')
-    assert_match 'ฉันไม่พูดภาษาไทย', html2
+    doc2 = Nokogiri::HTML5(doc.serialize(encoding: "Big5"))
+    html2 = doc2.serialize(encoding: "UTF-8")
+    assert_match("ฉันไม่พูดภาษาไทย", html2)
   end
 
   %w[pre listing textarea].each do |tag|
@@ -106,73 +106,73 @@ def test_serialization_encoding
   end
 
   def test_document_io
-    html = StringIO.new('<!DOCTYPE html><span>test</span>', 'r')
+    html = StringIO.new("<!DOCTYPE html><span>test</span>", "r")
     doc = Nokogiri::HTML5::Document.read_io(html)
-    refute_nil doc.at_xpath('/html/body/span')
+    refute_nil(doc.at_xpath("/html/body/span"))
   end
 
   def test_document_memory
-    html = '<!DOCTYPE html><span>test</span>'
+    html = "<!DOCTYPE html><span>test</span>"
     doc = Nokogiri::HTML5::Document.read_memory(html)
-    refute_nil doc
-    refute_nil doc.at_xpath('/html/body/span')
+    refute_nil(doc)
+    refute_nil(doc.at_xpath("/html/body/span"))
   end
 
   def test_document_io_failure
-    html = '<!DOCTYPE html><span>test</span>'
+    html = "<!DOCTYPE html><span>test</span>"
     assert_raises(ArgumentError) { Nokogiri::HTML5::Document.read_io(html) }
   end
 
   def test_document_memory_failure
-    html = StringIO.new('<!DOCTYPE html><span>test</span>', 'r')
+    html = StringIO.new("<!DOCTYPE html><span>test</span>", "r")
     assert_raises(ArgumentError) { Nokogiri::HTML5::Document.read_memory(html) }
   end
 
   def test_document_parse_failure
-    html = ['Neither a string, nor I/O']
+    html = ["Neither a string, nor I/O"]
     assert_raises(ArgumentError) { Nokogiri::HTML5::Document.parse(html) }
   end
 
   def test_ownership
     # Test that we don't change the passed in string, even if we need to
     # re-encode it.
-    html = '<!DOCTYPE html><html></html>'.freeze
-    refute_nil Nokogiri::HTML5.parse(html)
+    html = "<!DOCTYPE html><html></html>"
+    refute_nil(Nokogiri::HTML5.parse(html))
 
     iso8859_1 = html.encode(Encoding::ISO_8859_1).freeze
-    refute_nil Nokogiri::HTML5.parse(iso8859_1)
+    refute_nil(Nokogiri::HTML5.parse(iso8859_1))
 
     ascii_8bit = html.encode(Encoding::ASCII_8BIT).freeze
-    refute_nil Nokogiri::HTML5.parse(ascii_8bit)
+    refute_nil(Nokogiri::HTML5.parse(ascii_8bit))
   end
 
   def test_fragment_from_node
-    doc = Nokogiri.HTML5('<!DOCTYPE html><form><span></span></form>')
-    span = doc.at_xpath('/html/body/form/span')
-    refute_nil span
-    frag = span.fragment('<form>Nested forms should be ignored</form>')
-    assert frag.is_a?(Nokogiri::HTML5::DocumentFragment)
-    assert_equal 1, frag.children.length
-    nested_form = frag.at_xpath('form')
-    assert_nil nested_form
-    assert frag.children[0].text?
+    doc = Nokogiri.HTML5("<!DOCTYPE html><form><span></span></form>")
+    span = doc.at_xpath("/html/body/form/span")
+    refute_nil(span)
+    frag = span.fragment("<form>Nested forms should be ignored</form>")
+    assert(frag.is_a?(Nokogiri::HTML5::DocumentFragment))
+    assert_equal(1, frag.children.length)
+    nested_form = frag.at_xpath("form")
+    assert_nil(nested_form)
+    assert(frag.children[0].text?)
   end
 
   def test_fragment_from_node_no_form
-    doc = Nokogiri.HTML5('<!DOCTYPE html><span></span></form>')
-    span = doc.at_xpath('/html/body/span')
-    refute_nil span
-    frag = span.fragment('<form><span>Form should not be ignored</span></form>')
-    assert frag.is_a?(Nokogiri::HTML5::DocumentFragment)
-    assert_equal 1, frag.children.length
-    form = frag.at_xpath('form')
-    refute_nil form
+    doc = Nokogiri.HTML5("<!DOCTYPE html><span></span></form>")
+    span = doc.at_xpath("/html/body/span")
+    refute_nil(span)
+    frag = span.fragment("<form><span>Form should not be ignored</span></form>")
+    assert(frag.is_a?(Nokogiri::HTML5::DocumentFragment))
+    assert_equal(1, frag.children.length)
+    form = frag.at_xpath("form")
+    refute_nil(form)
   end
 
   def test_empty_fragment
-    doc = Nokogiri.HTML5('<!DOCTYPE html><body>')
+    doc = Nokogiri.HTML5("<!DOCTYPE html><body>")
     frag = doc.fragment
-    assert frag.is_a?(Nokogiri::HTML5::DocumentFragment)
-    assert frag.children.empty?
+    assert(frag.is_a?(Nokogiri::HTML5::DocumentFragment))
+    assert(frag.children.empty?)
   end
-end
+end if Nokogiri.uses_gumbo?
diff --git a/test/html5/test_encoding.rb b/test/html5/test_encoding.rb
new file mode 100644
index 0000000000..bbe24771ba
--- /dev/null
+++ b/test/html5/test_encoding.rb
@@ -0,0 +1,209 @@
+# coding: utf-8
+# frozen_string_literal: true
+require "helper"
+
+class TestHtml5Encoding < Nokogiri::TestCase
+  if "".respond_to?("encoding")
+    def test_macroman_encoding
+      mac = String.new("<span>\xCA</span>").force_encoding("macroman")
+      doc = Nokogiri::HTML5(mac)
+      assert_equal("<span> </span>", doc.at("span").to_xml)
+    end
+
+    def test_iso8859_encoding
+      iso8859 = String.new("<span>Se\xF1or</span>").force_encoding(Encoding::ASCII_8BIT)
+      doc = Nokogiri::HTML5(iso8859)
+      assert_equal("<span>Señor</span>", doc.at("span").to_xml)
+    end
+
+    def test_charset_encoding
+      utf8 = String.new("<meta charset='utf-8'><span>Se\xC3\xB1or</span>")
+        .force_encoding(Encoding::ASCII_8BIT)
+      doc = Nokogiri::HTML5(utf8)
+      assert_equal("<span>Señor</span>", doc.at("span").to_xml)
+    end
+
+    def test_bogus_encoding
+      bogus = String.new("<meta charset='bogus'><span>Se\xF1or</span>")
+        .force_encoding(Encoding::ASCII_8BIT)
+      doc = Nokogiri::HTML5(bogus)
+      assert_equal("<span>Señor</span>", doc.at("span").to_xml)
+    end
+
+    def test_utf8_bom
+      utf8 = "\uFEFF<!DOCTYPE html><html></html>".encode("UTF-8")
+      doc = Nokogiri::HTML5(utf8, max_errors: 10)
+      assert_equal([], doc.errors)
+    end
+
+    def test_utf16le_bom
+      utf16le = "\uFEFF<!DOCTYPE html><html></html>".encode("UTF-16LE")
+      doc = Nokogiri::HTML5(utf16le, max_errors: 10)
+      assert_equal([], doc.errors)
+    end
+
+    def test_utf16be_bom
+      utf16be = "\uFEFF<!DOCTYPE html><html></html>".encode("UTF-16BE")
+      doc = Nokogiri::HTML5(utf16be, max_errors: 10)
+      assert_equal([], doc.errors)
+    end
+
+    def test_utf8_bom_ascii
+      utf8 = "\uFEFF<!DOCTYPE html><html></html>".encode("UTF-8")
+      utf8.force_encoding(Encoding::ASCII_8BIT)
+      doc = Nokogiri::HTML5(utf8, max_errors: 10)
+      doc.errors.each { |err| puts(err) }
+      assert_equal([], doc.errors)
+    end
+
+    def test_utf16le_bom_ascii
+      utf16le = "\uFEFF<!DOCTYPE html><html></html>".encode("UTF-16LE")
+      utf16le.force_encoding(Encoding::ASCII_8BIT)
+      doc = Nokogiri::HTML5(utf16le, max_errors: 10)
+      assert_equal([], doc.errors)
+      doc.errors.each { |err| puts(err) }
+    end
+
+    def test_utf16be_bom_ascii
+      utf16be = "\uFEFF<!DOCTYPE html><html></html>".encode("UTF-16BE")
+      utf16be.force_encoding(Encoding::ASCII_8BIT)
+      doc = Nokogiri::HTML5(utf16be, max_errors: 10)
+      assert_equal([], doc.errors)
+      doc.errors.each { |err| puts(err) }
+    end
+
+    def test_tag_after_utf8_bom
+      utf8 = "\uFEFF<b></b>".encode("UTF-8")
+      doc = Nokogiri::HTML5.fragment(utf8, max_errors: 10)
+      assert_equal([], doc.errors)
+    end
+  end
+
+  # https://github.com/rubys/nokogumbo/issues/68
+  def test_charset_sniff_to_html
+    html = <<-EOF.gsub(/^      /, "")
+      <!DOCTYPE html>
+      <html>
+        <head>
+          <meta http-equiv="Content-Type" content="text/html; charset=utf-8; width=device-width">
+        </head>
+        <body>
+          Hello!
+        </body>
+      </html>
+    EOF
+    doc = Nokogiri::HTML5(html, max_errors: 10)
+    assert_equal(0, doc.errors.length)
+    refute_equal("", doc.to_html)
+  end
+
+  # https://encoding.spec.whatwg.org/#names-and-labels
+  # I chose these by looking at the Wikipedia page for each encoding, picked
+  # one of the languages it was supposed to encode, and then Googled for a
+  # proverb in the language. Apologies if these are ill-chosen or nonsensical.
+  # I'm happy to change them. I'm just pasting them in here so I'm pretty sure
+  # the right-to-left languages are backward. Corrections welcome.
+  ENCODINGS = [
+    ["UTF-8",          "Let's concatentate all of these for UTF-8"], # English
+    ["IBM866",         "А дело бывало -- и коза волка съедала"], # Russian
+    ["ISO-8859-2",     "Co můžeš udělat dnes, neodkládej na zítřek."], # Czech
+    ["ISO-8859-3",     "Yukarda mavi gök, asağıda yağız yer yaratıldıkta"], # Turkish
+    ["ISO-8859-4",     "Ceļš uz elli ir bruģēts ar labiem nodomiem."], # Latvian
+    ["ISO-8859-5",     "Каде има сила, нема правдина."], # Macedonian
+    ["ISO-8859-6",     "أباد الله خضراءهم ابذل لصديقك دمك ومالك"], # Arabic
+    ["ISO-8859-7",     "Η καλύτερη άμυνα είναι η επίθεση."], # Greek
+    ["ISO-8859-8",     "אין הנחתום מעיד על עיסתו"], # Hebrew
+    ["ISO-8859-8-I",   "אל תסתכל בקנקן, אלא במה שבתוכו"], # Hebrew
+    ["ISO-8859-10",    "Alla känner apan, men apan känner ingen."], # Swedish
+    ["ISO-8859-13",    "Lašas po lašo ir akmenį pratašo."], # Lithuanian
+    ["ISO-8859-14",    "ha bhòrd bòrd gun aran ach 's bòrd aran leis fhèin."], # Scottish Gaelic
+    ["ISO-8859-15",    "This is essentially ISO 8859-1 but with € Š š Ž ž Œ œ Ÿ"], # English
+    ["ISO-8859-16",    "Kiedy wszedłeś między wrony, musisz krakać jak i one."], # Polish
+    ["KOI8-R",         "А дело бывало -- и коза волка съедала"], # Russian
+    ["KOI8-U",         "Яблуко від яблуньки не далеко. Ґ, Є, І, Ї"], # Ukrainian
+    ["macroman",       "Some good old Mac Roman œ∑´®†¥¨ˆøπåßƒ©"], # English
+    ["windows-874",    "กระต่ายหมายจันทร์"], # Thai
+    ["windows-1250",   "Addig nyújtózkodj, amíg a takaród ér."], # Hungarian
+    ["windows-1251",   "Бързата работа - срам за майстора."], # Bulgarian
+    ["windows-1252",   "Basically ISO 8859-1 with ‘differences’™ •"], # English
+    ["windows-1253",   "Και οι τοίχοι έχουν αυτιά."], # Greek
+    ["windows-1254",   "Baban nasılsa oğlu da öyledir."], # Turkish
+    ["windows-1255",   "אל תקנה חתול בשק; ₪"], # Hebrew
+    ["windows-1256",   "أبطأ من سلحفاة"], # Arabic
+    ["windows-1257",   "Hommikune töö kuld, õhtune muld."], # Estonian
+    ["windows-1258",   "Ăn theo thuở, ở theo thời."], # Vietnamese
+    ["macCyrillic", "А дело бывало -- и коза волка съедала"], # Russian
+    ["GBK",            "不闻不若闻之，闻之不若见之，见之不若知之，知之不若行之；学至于行之而止矣"], # Simplified Chinese
+    ["gb18030",        "不聞不若聞之，聞之不若見之，見之不若知之，知之不若行之；學至於行之而止矣"], # Traditional Chinese
+    ["Big5",           "有其父必有其子"], # Traditional Chinese
+    ["EUC-JP",         "猿も木から落ちる"], # Japanese
+    ["ISO-2022-JP",    "井の中の蛙大海を知らず"], # Japanese
+    ["Shift_JIS",      "鳥なき里の蝙蝠"], # Japanese
+    ["EUC-KR",         "아는 길도 물어가라"], # Korean
+    ["replacement",    "콩 심은데 콩나고, 팥 심은데 팥난다"], # Korean
+    ["UTF-16BE",       "Everything had better be representable!"], # English
+    ["UTF-16LE",       "Same as with UTF-16BE"], # English
+    ["US-ASCII",       "Surprisingly not one of the required encodings"], # English
+  ].freeze
+
+  def encodings_html
+    @encodings_html ||=
+      "<!DOCTYPE html><html><head></head><body>" +
+      ENCODINGS.map { |enc| %(<span id="#{enc[0]}">#{enc[1]}</span>) }.join +
+      "</body></html>"
+  end
+
+  def encodings_doc
+    @encodings_doc ||= Nokogiri::HTML5(encodings_html)
+  end
+
+  def round_trip_through(str, enc)
+    begin
+      encoding = Encoding.find(enc)
+    rescue ArgumentError
+      skip("#{enc} not supported")
+    end
+    begin
+      encoded = str.encode(encoding)
+    rescue Encoding::ConverterNotFoundError
+      skip("Converting UTF-8 to #{enc} not supported")
+    end
+    begin
+      decoded = encoded.encode("UTF-8")
+    rescue Encoding::ConverterNotFoundError
+      skip("Converting #{enc} to UTF-8 not supported")
+    end
+    assert_equal(str, decoded, "'#{str}' did not round trip through #{enc[0]}")
+    encoded
+  end
+
+  ENCODINGS.each do |enc|
+    define_method("test_parse_encoded_#{enc[0]}".to_sym) do
+      html = "<!DOCTYPE html><span>#{enc[1]}</span>"
+      encoded_html = round_trip_through(html, enc[0])
+      doc = Nokogiri::HTML5(encoded_html, encoding: enc[0])
+      span = doc.at("/html/body/span")
+      refute_nil span
+      assert_equal enc[1], span.content
+    end
+
+    define_method("test_inner_html_encoded_#{enc[0]}".to_sym) do
+      encoded = round_trip_through(enc[1], enc[0])
+      span = encodings_doc.at(%(/html/body/span[@id="#{enc[0]}"]))
+      refute_nil span
+      assert_equal encoded, span.inner_html(encoding: enc[0])
+    end
+
+    define_method("test_roundtrip_through_#{enc[0]}".to_sym) do
+      # https://bugs.ruby-lang.org/issues/15033
+      # Ruby has a bug with the `:fallback` parameter passed to `#encode` when
+      # multiple conversions have to happen. I'm not sure it's worth working
+      # around. It impacts this test though.
+      skip "https://bugs.ruby-lang.org/issues/15033" if enc[0] == "ISO-2022-JP"
+      round_trip_through(enc[1], enc[0])
+      encoded = encodings_doc.serialize(encoding: enc[0])
+      doc = Nokogiri::HTML5(encoded, encoding: enc[0])
+      assert_equal encodings_html, doc.serialize
+    end
+  end
+end if Nokogiri.uses_gumbo?
diff --git a/nokogumbo-import/test/test_monkey_patch.rb b/test/html5/test_monkey_patch.rb
similarity index 75%
rename from nokogumbo-import/test/test_monkey_patch.rb
rename to test/html5/test_monkey_patch.rb
index 5fdd1e4e71..42ff0ac6ab 100644
--- a/nokogumbo-import/test/test_monkey_patch.rb
+++ b/test/html5/test_monkey_patch.rb
@@ -1,8 +1,7 @@
 # encoding: utf-8
-require 'nokogumbo'
-require 'minitest/autorun'
+require 'helper'
 
-class TestNokogumbo < Minitest::Test
+class TestHtml5SerializationMonkeyPatch < Nokogiri::TestCase
   def test_to_xml
     xml = Nokogiri.HTML5('<!DOCTYPE html><source>').to_xml
     assert_match(/\A<\?xml version/, xml)
@@ -13,4 +12,4 @@ def test_html4_fragment
     frag = Nokogiri::HTML.fragment('<span></span>')
     assert frag.is_a?(Nokogiri::HTML::DocumentFragment)
   end
-end
+end if Nokogiri.uses_gumbo?
diff --git a/nokogumbo-import/test/test_nokogumbo.rb b/test/html5/test_nokogumbo.rb
similarity index 61%
rename from nokogumbo-import/test/test_nokogumbo.rb
rename to test/html5/test_nokogumbo.rb
index 307f3b1846..a49b9e9848 100644
--- a/nokogumbo-import/test/test_nokogumbo.rb
+++ b/test/html5/test_nokogumbo.rb
@@ -1,58 +1,51 @@
 # encoding: utf-8
-require 'nokogumbo'
+# frozen_string_literal: true
+require "helper"
 
-# Make sure that Ruby objects constructed in C are treated as GC roots.
-# See: https://github.com/rubys/nokogumbo/pull/150
-if GC.respond_to?(:verify_compaction_references)
-  GC.verify_compaction_references(toward: :empty, double_heap: true)
-end
-
-require 'minitest/autorun'
-
-class TestNokogumbo < Minitest::Test
+class TestHtml5Nokogumbo < Nokogiri::TestCase
   def test_element_text
     doc = Nokogiri::HTML5(buffer)
-    assert_equal "content", doc.at('span').text
+    assert_equal("content", doc.at("span").text)
   end
 
   def test_element_cdata_textarea
     doc = Nokogiri::HTML5(buffer)
-    assert_equal "foo<x>bar", doc.at('textarea').text.strip
+    assert_equal("foo<x>bar", doc.at("textarea").text.strip)
   end
 
   def test_element_cdata_script
     doc = Nokogiri::HTML5.fragment(buffer)
-    assert_equal true, doc.document.html?
-    assert_equal "<script> if (a < b) alert(1) </script>", doc.at('script').to_s
+    assert_equal(true, doc.document.html?)
+    assert_equal("<script> if (a < b) alert(1) </script>", doc.at("script").to_s)
   end
 
   def test_attr_value
     doc = Nokogiri::HTML5(buffer)
-    assert_equal "utf-8", doc.at('meta')['charset']
+    assert_equal("utf-8", doc.at("meta")["charset"])
   end
 
   def test_comment
     doc = Nokogiri::HTML5(buffer)
-    assert_equal " test comment ", doc.xpath('//comment()').text
+    assert_equal(" test comment ", doc.xpath("//comment()").text)
   end
 
   def test_unknown_element
     doc = Nokogiri::HTML5(buffer)
-    assert_equal "main", doc.at('main').name
+    assert_equal("main", doc.at("main").name)
   end
 
   def test_IO
-    require 'stringio'
+    require "stringio"
     doc = Nokogiri::HTML5(StringIO.new(buffer))
-    assert_equal 'textarea', doc.at('form').element_children.first.name
+    assert_equal("textarea", doc.at("form").element_children.first.name)
   end
 
   def test_nil
     doc = Nokogiri::HTML5(nil)
-    assert_equal 1, doc.search('body').count
+    assert_equal(1, doc.search("body").count)
 
-    fragment = Nokogiri::HTML5::fragment(nil)
-    assert_equal 0, fragment.errors.length
+    fragment = Nokogiri::HTML5.fragment(nil)
+    assert_equal(0, fragment.errors.length)
   end
 
   def test_html5_doctype
@@ -62,56 +55,56 @@ def test_html5_doctype
 
   def test_fragment_no_errors
     doc = Nokogiri::HTML5.fragment("no missing DOCTYPE errors", max_errors: 10)
-    assert_equal 0, doc.errors.length
+    assert_equal(0, doc.errors.length)
   end
 
   # This should be deleted when `:max_parse_errors` is removed.
   def test_fragment_max_parse_errors
     doc = Nokogiri::HTML5.fragment("testing deprecated :max_parse_errors", max_parse_errors: 10)
-    assert_equal 0, doc.errors.length
+    assert_equal(0, doc.errors.length)
   end
 
   def test_fragment_head
-    doc = Nokogiri::HTML5.fragment(buffer[/<head>(.*?)<\/head>/m, 1])
-    assert_equal "hello world", doc.xpath('title').text
-    assert_equal "utf-8", doc.xpath('meta').first['charset']
+    doc = Nokogiri::HTML5.fragment(buffer[%r{<head>(.*?)</head>}m, 1])
+    assert_equal("hello world", doc.xpath("title").text)
+    assert_equal("utf-8", doc.xpath("meta").first["charset"])
   end
 
   def test_fragment_body
-    doc = Nokogiri::HTML5.fragment(buffer[/<body>(.*?)<\/body>/m, 1])
-    assert_equal '<span>content</span>', doc.xpath('main/span').to_xml
-    assert_equal " test comment ", doc.xpath('comment()').text
+    doc = Nokogiri::HTML5.fragment(buffer[%r{<body>(.*?)</body>}m, 1])
+    assert_equal("<span>content</span>", doc.xpath("main/span").to_xml)
+    assert_equal(" test comment ", doc.xpath("comment()").text)
   end
 
   def test_xlink_attribute
-    source = <<-EOF.gsub(/^ {6}/, '')
+    source = <<-EOF.gsub(/^ {6}/, "")
       <!DOCTYPE html>
       <svg xmlns="http://www.w3.org/2000/svg">
         <a xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#s1"/>
       </svg>
     EOF
     doc = Nokogiri::HTML5.parse(source)
-    a = doc.at_xpath('/html/body/svg:svg/svg:a')
-    refute_nil a
-    refute_nil a['xlink:href']
-    refute_nil a['xmlns:xlink']
+    a = doc.at_xpath("/html/body/svg:svg/svg:a")
+    refute_nil(a)
+    refute_nil(a["xlink:href"])
+    refute_nil(a["xmlns:xlink"])
   end
 
   def test_xlink_attribute_fragment
-    source = <<-EOF.gsub(/^ {6}/, '')
+    source = <<-EOF.gsub(/^ {6}/, "")
       <svg xmlns="http://www.w3.org/2000/svg">
         <a xmlns:xlink="http://www.w3.org/1999/xlink" xlink:href="#s1"/>
       </svg>
     EOF
     doc = Nokogiri::HTML5.fragment(source)
-    a = doc.at_xpath('svg:svg/svg:a')
-    refute_nil a
-    refute_nil a['xlink:href']
-    refute_nil a['xmlns:xlink']
+    a = doc.at_xpath("svg:svg/svg:a")
+    refute_nil(a)
+    refute_nil(a["xlink:href"])
+    refute_nil(a["xmlns:xlink"])
   end
 
   def test_template
-    source = <<-EOF.gsub(/^ {6}/, '')
+    source = <<-EOF.gsub(/^ {6}/, "")
       <template id="productrow">
         <tr>
           <td class="record"></td>
@@ -120,14 +113,14 @@ def test_template
       </template>
     EOF
     doc = Nokogiri::HTML5.fragment(source)
-    template = doc.at('template')
-    assert_equal "productrow", template['id']
-    assert_equal "record", template.at('td')['class']
+    template = doc.at("template")
+    assert_equal("productrow", template["id"])
+    assert_equal("record", template.at("td")["class"])
   end
 
   def test_root_comments
     doc = Nokogiri::HTML5("<!DOCTYPE html><!-- start --><html></html><!-- -->")
-    assert_equal ["html", "comment", "html", "comment"], doc.children.map(&:name)
+    assert_equal(["html", "comment", "html", "comment"], doc.children.map(&:name))
   end
 
   def test_max_attributes
@@ -139,15 +132,16 @@ def test_max_attributes
 
     # -1 disables limit
     doc = Nokogiri::HTML5(html, max_attributes: -1)
-    assert_equal({ 'id' => 'i', 'class' => 'c', 'title' => 't' }, attributes(doc.at_css('div')))
-    assert_equal({ 'src' => 's', 'alt' => 'a' }, attributes(doc.at_css('img')))
+    assert_equal({ "id" => "i", "class" => "c", "title" => "t" }, attributes(doc.at_css("div")))
+    assert_equal({ "src" => "s", "alt" => "a" }, attributes(doc.at_css("img")))
   end
 
   def test_max_attributes_boolean
     html = '<label><input checked type="checkbox" disabled name="cheese"> Cheese</label>'
 
     doc = Nokogiri::HTML5(html, max_attributes: 4)
-    assert_equal({ 'checked' => '', 'type' => 'checkbox', 'disabled' => '', 'name' => 'cheese' }, attributes(doc.at_css('input')))
+    assert_equal({ "checked" => "", "type" => "checkbox", "disabled" => "", "name" => "cheese" },
+      attributes(doc.at_css("input")))
 
     assert_raises(ArgumentError) { Nokogiri::HTML5(html, max_attributes: 3) }
     assert_raises(ArgumentError) { Nokogiri::HTML5(html, max_attributes: 2) }
@@ -156,13 +150,17 @@ def test_max_attributes_boolean
   end
 
   def test_default_max_attributes
-    a = 'a'
+    a = String.new("a")
     attrs = 50_000.times.map { x = a.dup; a.succ!; x }
 
     # <div> contains 50,000 attributes, but default limit is 400. Parsing this would take ages if
     # we were not enforcing any limit on attributes. All attributes are duplicated to make sure
     # this doesn’t alter performance or end result.
-    html = "<div #{attrs.map.with_index { |x, i| "data-#{x}=#{i} data-#{x}=#{i}" }.join(' ')}>hello</div>"
+    html = <<~EOF
+      <div #{attrs.map.with_index { |x, i| "data-#{x}=#{i} data-#{x}=#{i}" }.join(" ")}>
+        hello
+      </div>
+    EOF
 
     assert_raises(ArgumentError) { Nokogiri::HTML5(html) }
   end
@@ -176,81 +174,85 @@ def test_fragment_max_attributes
 
     # -1 disables limit
     doc = Nokogiri::HTML5.fragment(html, max_attributes: -1)
-    assert_equal({ 'id' => 'i', 'class' => 'c', 'title' => 't' }, attributes(doc.at_css('div')))
-    assert_equal({ 'src' => 's', 'alt' => 'a' }, attributes(doc.at_css('img')))
+    assert_equal({ "id" => "i", "class" => "c", "title" => "t" }, attributes(doc.at_css("div")))
+    assert_equal({ "src" => "s", "alt" => "a" }, attributes(doc.at_css("img")))
   end
 
   def test_fragment_default_max_attributes
-    a = 'a'
+    a = String.new("a")
     attrs = 50_000.times.map { x = a.dup; a.succ!; x }
 
     # <div> contains 50,000 attributes, but default limit is 400. Parsing this would take ages if
     # we were not enforcing any limit on attributes. All attributes are duplicated to make sure
     # this doesn’t alter performance or end result.
-    html = "<div #{attrs.map.with_index { |x, i| "data-#{x}=#{i} data-#{x}=#{i}" }.join(' ')}>hello</div>"
+    html = <<~EOF
+      <div #{attrs.map.with_index { |x, i| "data-#{x}=#{i} data-#{x}=#{i}" }.join(" ")}>
+        hello
+      </div>
+    EOF
 
     assert_raises(ArgumentError) { Nokogiri::HTML5.fragment(html) }
   end
 
   def test_parse_errors
     doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- <!-- --></a>", max_errors: 10)
-    assert_equal doc.errors.length, 2
+    assert_equal(doc.errors.length, 2)
     doc = Nokogiri::HTML5("<!DOCTYPE html><html>", max_errors: 10)
-    assert_empty doc.errors
+    assert_empty(doc.errors)
   end
 
   def test_max_errors
     # This document contains 2 parse errors, but we force limit to 1.
     doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>", max_errors: 1)
-    assert_equal 1, doc.errors.length
+    assert_equal(1, doc.errors.length)
     doc = Nokogiri::HTML5("<!DOCTYPE html><html>", max_errors: 1)
-    assert_empty doc.errors
+    assert_empty(doc.errors)
   end
 
   def test_default_max_errors
     # This document contains 200 parse errors, but default limit is 0.
     doc = Nokogiri::HTML5("<!DOCTYPE html><html>" + "</p>" * 200)
-    assert_equal 0, doc.errors.length
+    assert_equal(0, doc.errors.length)
   end
 
   def test_parse_fragment_errors
     doc = Nokogiri::HTML5.fragment("<\r\n", max_errors: 10)
-    refute_empty doc.errors
+    refute_empty(doc.errors)
   end
 
   def test_fragment_max_errors
     # This fragment contains 2 parse errors, but we force limit to 1.
     doc = Nokogiri::HTML5.fragment("<!-- <!-- --></span>", max_errors: 1)
-    assert_equal 1, doc.errors.length
+    assert_equal(1, doc.errors.length)
     doc = Nokogiri::HTML5.fragment("<!-- <!-- --></span>", max_errors: 10)
-    assert_equal 2, doc.errors.length
+    assert_equal(2, doc.errors.length)
   end
 
   def test_fragment_default_max_errors
     # This fragment contains 200 parse errors, but default limit is 0.
     doc = Nokogiri::HTML5.fragment("</p>" * 200)
-    assert_equal 0, Nokogumbo::DEFAULT_MAX_ERRORS
-    assert_equal 0, doc.errors.length
+    assert_equal(0, Nokogiri::Gumbo::DEFAULT_MAX_ERRORS)
+    assert_equal(0, doc.errors.length)
   end
 
   def test_default_max_depth_parse
-    assert_raises ArgumentError do
-      depth = Nokogumbo::DEFAULT_MAX_TREE_DEPTH + 1
-      Nokogiri::HTML5('<!DOCTYPE html><html><body>' + '<div>' * (depth - 2))
+    assert_raises(ArgumentError) do
+      depth = Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH + 1
+      Nokogiri::HTML5("<!DOCTYPE html><html><body>" + "<div>" * (depth - 2))
     end
   end
 
   def test_default_max_depth_fragment
-    assert_raises ArgumentError do
-      depth = Nokogumbo::DEFAULT_MAX_TREE_DEPTH + 1
-      Nokogiri::HTML5.fragment('<div>' * depth)
+    assert_raises(ArgumentError) do
+      depth = Nokogiri::Gumbo::DEFAULT_MAX_TREE_DEPTH + 1
+      Nokogiri::HTML5.fragment("<div>" * depth)
     end
   end
 
   def test_max_depth_parse
     depth = 10
-    html = '<!DOCTYPE html><html><body>' + '<div>' * (depth - 2)
-    assert_raises ArgumentError do
+    html = "<!DOCTYPE html><html><body>" + "<div>" * (depth - 2)
+    assert_raises(ArgumentError) do
       Nokogiri::HTML5(html, max_tree_depth: depth - 1)
     end
 
@@ -258,14 +260,14 @@ def test_max_depth_parse
       Nokogiri::HTML5(html, max_tree_depth: depth)
       pass
     rescue ArgumentError
-      flunk "Expected document parse to succeed"
+      flunk("Expected document parse to succeed")
     end
   end
 
   def test_max_depth_fragment
     depth = 10
-    html = '<div>' * depth
-    assert_raises ArgumentError do
+    html = "<div>" * depth
+    assert_raises(ArgumentError) do
       Nokogiri::HTML5.fragment(html, max_tree_depth: depth - 1)
     end
 
@@ -273,11 +275,10 @@ def test_max_depth_fragment
       Nokogiri::HTML5.fragment(html, max_tree_depth: depth)
       pass
     rescue ArgumentError
-      flunk "Expected fragment parse to succeed"
+      flunk("Expected fragment parse to succeed")
     end
   end
 
-
   def test_document_encoding
     html = <<-TEXT
       <html>
@@ -290,47 +291,42 @@ def test_document_encoding
       </html>
     TEXT
     doc = Nokogiri::HTML5.parse(html)
-    assert_equal "UTF-8", doc.encoding
-    assert_equal "Кирилические символы", doc.at('body').text.gsub(/\n\s+/,'')
+    assert_equal("UTF-8", doc.encoding)
+    assert_equal("Кирилические символы", doc.at("body").text.gsub(/\n\s+/, ""))
   end
 
   def test_line_text
-    skip unless Nokogumbo.const_get(:LINE_SUPPORTED)
     doc = Nokogiri.HTML5("<!DOCTYPE html>\ntext node")
-    assert_equal 2, doc.at_xpath('/html/body/text()').line
+    assert_equal(2, doc.at_xpath("/html/body/text()").line)
   end
 
   def test_line_comment
-    skip unless Nokogumbo.const_get(:LINE_SUPPORTED)
     doc = Nokogiri.HTML5("<!DOCTYPE html>\n\n<!-- comment -->")
-    assert_equal 3, doc.at_xpath('/comment()').line
+    assert_equal(3, doc.at_xpath("/comment()").line)
   end
 
   def test_line_element
-    skip unless Nokogumbo.const_get(:LINE_SUPPORTED)
     doc = Nokogiri.HTML5("<!DOCTYPE html>\n<p>")
-    assert_equal 2, doc.at_xpath('/html/body/p').line
+    assert_equal(2, doc.at_xpath("/html/body/p").line)
   end
 
   def test_line_template
-    skip unless Nokogumbo.const_get(:LINE_SUPPORTED)
     doc = Nokogiri.HTML5("<!DOCTYPE html>\n\n<template></template>")
-    assert_equal 3, doc.at_xpath('/html/head/template').line
+    assert_equal(3, doc.at_xpath("/html/head/template").line)
   end
 
   def test_line_cdata
-    skip unless Nokogumbo.const_get(:LINE_SUPPORTED)
     html = "<!DOCTYPE html>\n<svg>\n<script><![CDATA[ ]]></script></svg>"
     doc = Nokogiri.HTML5(html)
-    node = doc.at_xpath('/html/body/svg:svg/svg:script/text()')
-    assert node.cdata?
-    assert_equal 3, node.line
+    node = doc.at_xpath("/html/body/svg:svg/svg:script/text()")
+    assert(node.cdata?)
+    assert_equal(3, node.line)
   end
 
-private
+  private
 
   def buffer
-    <<-EOF.gsub(/^      /, '')
+    <<-EOF.gsub(/^      /, "")
       <html>
         <head>
           <meta charset="utf-8"/>
@@ -354,4 +350,4 @@ def buffer
   def attributes(element)
     element.attributes.map { |name, attribute| [name, attribute.value] }.to_h
   end
-end
+end if Nokogiri.uses_gumbo?
diff --git a/nokogumbo-import/test/test_null.rb b/test/html5/test_null.rb
similarity index 81%
rename from nokogumbo-import/test/test_null.rb
rename to test/html5/test_null.rb
index 8d23c5b0b9..d0a326c291 100644
--- a/nokogumbo-import/test/test_null.rb
+++ b/test/html5/test_null.rb
@@ -1,22 +1,22 @@
 # encoding: utf-8
-require 'nokogumbo'
-require 'minitest/autorun'
+# frozen_string_literal: true
+require "helper"
 
-class TestNull < Minitest::Test
+class TestHtml5Null < Nokogiri::TestCase
   def fragment(s)
     Nokogiri::HTML5.fragment(s, max_errors: 10)
   end
 
   def test_null_char_ref
-    frag = fragment('&#0;')
-    assert_equal 1, frag.errors.length
+    frag = fragment("&#0;")
+    assert_equal(1, frag.errors.length)
   end
 
   def test_data_state
     frag = fragment("\u0000")
     # 12.2.5.1 Data state: unexpected-null-character parse error
     # 12.2.6.4.7 The "in body" insertion mode: Parse error
-    assert_equal 2, frag.errors.length
+    assert_equal(2, frag.errors.length)
   end
 
   def test_data_rcdata_state
@@ -24,7 +24,7 @@ def test_data_rcdata_state
     # state
     frag = fragment("<textarea>\u0000</textarea>")
     # 12.2.5.2 RCDATA state: unexpected-null-character parse error
-    assert_equal 1, frag.errors.length
+    assert_equal(1, frag.errors.length)
   end
 
   def test_data_scriptdata_state
@@ -34,20 +34,20 @@ def test_data_scriptdata_state
     # data state
     frag = fragment("<script>\u0000</script>")
     # 12.2.5.4 Script data state: unexpected-null-character parse error
-    assert_equal 1, frag.errors.length
+    assert_equal(1, frag.errors.length)
   end
 
   def test_data_plaintext_state
     frag = fragment("<plaintext>\u0000</plaintext>")
     # 12.2.5.5 PLAINTEXT state: unexpected-null-character parse error
     # EOF parse error because there's no way to switch out of plaintext!
-    assert_equal 2, frag.errors.length
+    assert_equal(2, frag.errors.length)
   end
 
   def test_data_tag_name_state
     frag = fragment("<x\u0000></x\ufffd>")
     # 12.2.5.8 Tag name state: unexpected-null-character parse error
-    assert_equal 1, frag.errors.length
+    assert_equal(1, frag.errors.length)
   end
 
   # XXX: There are 6 script states to test.
@@ -55,7 +55,7 @@ def test_data_tag_name_state
   def test_attribute_name_state
     frag = fragment("<p \u0000>")
     # 12.2.5.33 Attribute name state
-    assert_equal 1, frag.errors.length
+    assert_equal(1, frag.errors.length)
   end
 
   def test_attribute_value_states
@@ -63,7 +63,7 @@ def test_attribute_value_states
     # 12.2.5.36 Attribute value (double-quoted) state
     # 12.2.5.37 Attribute value (single-quoted) state
     # 12.2.5.38 Attribute value (unquoted) state
-    assert_equal 3, frag.errors.length
+    assert_equal(3, frag.errors.length)
   end
 
   def test_bogus_comment_state
@@ -71,19 +71,19 @@ def test_bogus_comment_state
     # 12.2.5.42 Markup declaration open state: incorrectly-opened-comment
     # parse error
     # 12.2.5.41 Bogus comment state: unexpected-null-character parse error
-    assert_equal 2, frag.errors.length
+    assert_equal(2, frag.errors.length)
   end
 
   def test_comment_state
     frag = fragment("<!-- \u0000 -->")
     # 12.2.5.45 Comment state: unexpected-null-character parse error
-    assert_equal 1, frag.errors.length
+    assert_equal(1, frag.errors.length)
   end
 
   def test_doctype_name_states
     # There are two missing here for double quoted PUBLIC and SYSTEM values.
     doc = Nokogiri::HTML5.parse("<!DOCTYPE \u0000\u0000 PUBLIC '\u0000' '\u0000' \u0000>",
-                                max_errors: 10)
+      max_errors: 10)
     # 12.2.5.54 Before DOCTYPE name state: unexpected-null-character parse
     # error
     # 12.2.5.55 DOCTYPE name state: unexpected-null-character parse error
@@ -95,18 +95,18 @@ def test_doctype_name_states
     # unexpected-character-after-doctype-system-identifier parse error
     # 12.2.5.68 Bogus DOCTYPE state: unexpected-null-character parse error
     # 12.2.6.4.1 The "initial" insertion mode: parse error
-    assert_equal 7, doc.errors.length
+    assert_equal(7, doc.errors.length)
   end
 
   def test_cdata_section_state
     frag = fragment("<script>//<![CDATA[\n\u0000\n//]]></script>")
     # 12.2.6.5 The rules for parsing tokens in foreign content: parse error
-    assert_equal 1, frag.errors.length
+    assert_equal(1, frag.errors.length)
   end
 
   def test_error_api_with_null
     frag = fragment("<p \u0000>")
-    assert frag.errors.any?
-    assert_includes frag.errors[0].to_s, "<p \u0000>"
+    assert(frag.errors.any?)
+    assert_includes(frag.errors[0].to_s, "<p \u0000>")
   end
-end
+end if Nokogiri.uses_gumbo?
diff --git a/nokogumbo-import/test/test_serialize.rb b/test/html5/test_serialize.rb
similarity index 61%
rename from nokogumbo-import/test/test_serialize.rb
rename to test/html5/test_serialize.rb
index f05d6c996d..32cd759061 100644
--- a/nokogumbo-import/test/test_serialize.rb
+++ b/test/html5/test_serialize.rb
@@ -1,29 +1,29 @@
 # encoding: utf-8
-require 'nokogumbo'
-require 'minitest/autorun'
+# frozen_string_literal: true
+require "helper"
 
-class TestAPI < Minitest::Test
+class TestHtml5Serialize < Nokogiri::TestCase
   # https://github.com/web-platform-tests/wpt/blob/master/html/syntax/serializing-html-fragments/initial-linefeed-pre.html
   def initial_linefeed_pre
     @initial_linefeed_pre ||= begin
-      html = <<-EOF.gsub(/^        /, '').freeze
+      html = <<~EOF.gsub(/^        /, "").freeze
         <!DOCTYPE html>
         <div id="outer">
         <div id="inner">
         <pre id="pre1">
         x</pre>
         <pre id="pre2">
-        
+
         x</pre>
         <textarea id="textarea1">
         x</textarea>
         <textarea id="textarea2">
-        
+
         x</textarea>
         <listing id="listing1">
         x</listing>
         <listing id="listing2">
-        
+
         x</listing>
         </div>
         </div>
@@ -36,22 +36,22 @@ def initial_linefeed_pre
   def test_initial_linefeed_pre_outer
     expected = %{\n<div id="inner">\n<pre id="pre1">x</pre>\n<pre id="pre2">\nx</pre>\n<textarea id="textarea1">x</textarea>\n<textarea id="textarea2">\nx</textarea>\n<listing id="listing1">x</listing>\n<listing id="listing2">\nx</listing>\n</div>\n}
     outer = initial_linefeed_pre.xpath('//div[@id="outer"]')[0]
-    refute_nil outer
-    assert_equal expected, outer.inner_html
+    refute_nil(outer)
+    assert_equal(expected, outer.inner_html)
   end
 
   def test_initial_linefeed_pre_inner
     expected = %{\n<pre id="pre1">x</pre>\n<pre id="pre2">\nx</pre>\n<textarea id="textarea1">x</textarea>\n<textarea id="textarea2">\nx</textarea>\n<listing id="listing1">x</listing>\n<listing id="listing2">\nx</listing>\n}
     inner = initial_linefeed_pre.at('//div[@id="inner"]')
-    refute_nil inner
-    assert_equal expected, inner.inner_html
+    refute_nil(inner)
+    assert_equal(expected, inner.inner_html)
   end
 
   %w[pre textarea listing].each do |tag|
     define_method("test_initial_linefeed_#{tag}1".to_sym) do
       elem = initial_linefeed_pre.at("//*[@id=\"#{tag}1\"]")
       refute_nil elem
-      assert_equal 'x', elem.inner_html
+      assert_equal "x", elem.inner_html
     end
 
     define_method("test_initial_linefeed_#{tag}2".to_sym) do
@@ -197,7 +197,7 @@ def test_initial_linefeed_pre_inner
   # https://github.com/web-platform-tests/wpt/blob/master/html/syntax/serializing-html-fragments/serializing.html
   def serializing_test_data
     @serializing_test_data ||= begin
-      html = <<-EOF.gsub(/        /, '')
+      html = <<~EOF.gsub(/        /, "")
         <!DOCTYPE html>
         <div id="test" style="display:none">
         <span></span>
@@ -230,7 +230,7 @@ def serializing_test_data
         <span b=c></span>
         </div>
         EOF
-      Nokogiri::HTML5(html).xpath('/html/body/div/*')
+      Nokogiri::HTML5(html).xpath("/html/body/div/*")
     end
     @serializing_test_data
   end
@@ -263,136 +263,136 @@ def serializing_test_data
     ["<noscript><&></noscript>", "<span><noscript><&></noscript></span>"],
     ["<!--data-->", "<span><!--data--></span>"],
     ["<a><b><c></c></b><d>e</d><f><g>h</g></f></a>", "<span><a><b><c></c></b><d>e</d><f><g>h</g></f></a></span>"],
-    ["", "<span b=\"c\"></span>"]
+    ["", "<span b=\"c\"></span>"],
   ].freeze
 
   DOM_TESTS = [
-    ['Attribute in the XML namespace',
-      lambda do
-        doc = Nokogiri::HTML5::Document.new
-        span = Nokogiri::XML::Element.new('span', doc)
-        svg = Nokogiri::XML::Element.new('svg', doc)
-        span.add_child(svg)
-        svg.add_namespace('xml', 'http://www.w3.org/XML/1998/namespace')
-        svg['xml:foo'] = 'test'
-        span
-      end,
-      '<svg xml:foo="test"></svg>',
-      '<span><svg xml:foo="test"></svg></span>'],
+    ["Attribute in the XML namespace",
+     lambda do
+       doc = Nokogiri::HTML5::Document.new
+       span = Nokogiri::XML::Element.new("span", doc)
+       svg = Nokogiri::XML::Element.new("svg", doc)
+       span.add_child(svg)
+       svg.add_namespace("xml", "http://www.w3.org/XML/1998/namespace")
+       svg["xml:foo"] = "test"
+       span
+     end,
+     '<svg xml:foo="test"></svg>',
+     '<span><svg xml:foo="test"></svg></span>'],
 
     ["Attribute in the XML namespace with the prefix not set to xml:",
-      lambda do
-        doc = Nokogiri::HTML5::Document.new
-        span = Nokogiri::XML::Element.new('span', doc)
-        svg = Nokogiri::XML::Element.new('svg', doc)
-        span.add_child(svg)
-        svg['abc:foo'] = 'test'
-        ns = svg.add_namespace('xml', 'http://www.w3.org/XML/1998/namespace')
-        svg.attribute('abc:foo').namespace = ns
-        span
-      end,
-      '<svg xml:foo="test"></svg>',
-      '<span><svg xml:foo="test"></svg></span>'],
+     lambda do
+       doc = Nokogiri::HTML5::Document.new
+       span = Nokogiri::XML::Element.new("span", doc)
+       svg = Nokogiri::XML::Element.new("svg", doc)
+       span.add_child(svg)
+       svg["abc:foo"] = "test"
+       ns = svg.add_namespace("xml", "http://www.w3.org/XML/1998/namespace")
+       svg.attribute("abc:foo").namespace = ns
+       span
+     end,
+     '<svg xml:foo="test"></svg>',
+     '<span><svg xml:foo="test"></svg></span>'],
 
     ["Non-'xmlns' attribute in the xmlns namespace",
-      lambda do
-        doc = Nokogiri::HTML5::Document.new
-        span = Nokogiri::XML::Element.new('span', doc)
-        svg = Nokogiri::XML::Element.new('svg', doc)
-        span.add_child(svg)
-        svg.add_namespace('xmlns', 'http://www.w3.org/2000/xmlns/')
-        svg['xmlns:foo'] = 'test'
-        span
-      end,
-      '<svg xmlns:foo="test"></svg>',
-      '<span><svg xmlns:foo="test"></svg></span>'],
+     lambda do
+       doc = Nokogiri::HTML5::Document.new
+       span = Nokogiri::XML::Element.new("span", doc)
+       svg = Nokogiri::XML::Element.new("svg", doc)
+       span.add_child(svg)
+       svg.add_namespace("xmlns", "http://www.w3.org/2000/xmlns/")
+       svg["xmlns:foo"] = "test"
+       span
+     end,
+     '<svg xmlns:foo="test"></svg>',
+     '<span><svg xmlns:foo="test"></svg></span>'],
 
     ["'xmlns' attribute in the xmlns namespace",
-      lambda do
-        doc = Nokogiri::HTML5::Document.new
-        span = Nokogiri::XML::Element.new('span', doc)
-        svg = Nokogiri::XML::Element.new('svg', doc)
-        span.add_child(svg)
-        svg.add_namespace('xmlns', 'http://www.w3.org/2000/xmlns/')
-        svg['xmlns'] = 'test'
-        span
-      end,
-      '<svg xmlns="test"></svg>',
-      '<span><svg xmlns="test"></svg></span>'],
+     lambda do
+       doc = Nokogiri::HTML5::Document.new
+       span = Nokogiri::XML::Element.new("span", doc)
+       svg = Nokogiri::XML::Element.new("svg", doc)
+       span.add_child(svg)
+       svg.add_namespace("xmlns", "http://www.w3.org/2000/xmlns/")
+       svg["xmlns"] = "test"
+       span
+     end,
+     '<svg xmlns="test"></svg>',
+     '<span><svg xmlns="test"></svg></span>'],
 
     ["Attribute in non-standard namespace",
-      lambda do
-        doc = Nokogiri::HTML5::Document.new
-        span = Nokogiri::XML::Element.new('span', doc)
-        svg = Nokogiri::XML::Element.new('svg', doc)
-        span.add_child(svg)
-        svg.add_namespace('abc', 'fake_ns')
-        svg['abc:def'] = 'test'
-        span
-      end,
-      '<svg abc:def="test"></svg>',
-      '<span><svg abc:def="test"></svg></span>'],
+     lambda do
+       doc = Nokogiri::HTML5::Document.new
+       span = Nokogiri::XML::Element.new("span", doc)
+       svg = Nokogiri::XML::Element.new("svg", doc)
+       span.add_child(svg)
+       svg.add_namespace("abc", "fake_ns")
+       svg["abc:def"] = "test"
+       span
+     end,
+     '<svg abc:def="test"></svg>',
+     '<span><svg abc:def="test"></svg></span>'],
 
     ["<span> starting with U+000A",
-      lambda do
-        doc = Nokogiri::HTML5::Document.new
-        span = Nokogiri::XML::Element.new('span', doc)
-        text = Nokogiri::XML::Text.new("\x0A", doc)
-        span.add_child(text)
-        span
-      end,
-      "\x0A",
-      "<span>\x0A</span>"],
-    #TODO: Processing instructions
+     lambda do
+       doc = Nokogiri::HTML5::Document.new
+       span = Nokogiri::XML::Element.new("span", doc)
+       text = Nokogiri::XML::Text.new("\x0A", doc)
+       span.add_child(text)
+       span
+     end,
+     "\x0A",
+     "<span>\x0A</span>"],
+    # TODO: Processing instructions
   ]
 
   TEXT_ELEMENTS = %w[pre textarea listing]
   TEXT_TESTS = [
     ["<%text> context starting with U+000A",
-      lambda do |tag|
-        doc = Nokogiri::HTML5::Document.new
-        elem = Nokogiri::XML::Element.new(tag, doc)
-        text = Nokogiri::XML::Text.new("\x0A", doc)
-        elem.add_child(text)
-        elem
-      end,
+     lambda do |tag|
+       doc = Nokogiri::HTML5::Document.new
+       elem = Nokogiri::XML::Element.new(tag, doc)
+       text = Nokogiri::XML::Text.new("\x0A", doc)
+       elem.add_child(text)
+       elem
+     end,
      "\x0A",
      "<%text>\x0A</%text>"],
 
     ["<%text> context not starting with U+000A",
-      lambda do |tag|
-        doc = Nokogiri::HTML5::Document.new
-        elem = Nokogiri::XML::Element.new(tag, doc)
-        text = Nokogiri::XML::Text.new("a\x0A", doc)
-        elem.add_child(text)
-        elem
-      end,
+     lambda do |tag|
+       doc = Nokogiri::HTML5::Document.new
+       elem = Nokogiri::XML::Element.new(tag, doc)
+       text = Nokogiri::XML::Text.new("a\x0A", doc)
+       elem.add_child(text)
+       elem
+     end,
      "a\x0A",
      "<%text>a\x0A</%text>"],
 
     ["<%text> non-context starting with U+000A",
-      lambda do |tag|
-        doc = Nokogiri::HTML5::Document.new
-        elem = Nokogiri::XML::Element.new(tag, doc)
-        span = Nokogiri::XML::Element.new('span', doc)
-        text = Nokogiri::XML::Text.new("\x0A", doc)
-        elem.add_child(text)
-        span.add_child(elem)
-        span
-      end,
+     lambda do |tag|
+       doc = Nokogiri::HTML5::Document.new
+       elem = Nokogiri::XML::Element.new(tag, doc)
+       span = Nokogiri::XML::Element.new("span", doc)
+       text = Nokogiri::XML::Text.new("\x0A", doc)
+       elem.add_child(text)
+       span.add_child(elem)
+       span
+     end,
      "<%text>\x0A</%text>",
      "<span><%text>\x0A</%text></span>"],
 
     ["<%text> non-context not starting with U+000A",
-      lambda do |tag|
-        doc = Nokogiri::HTML5::Document.new
-        elem = Nokogiri::XML::Element.new(tag, doc)
-        span = Nokogiri::XML::Element.new('span', doc)
-        text = Nokogiri::XML::Text.new("a\x0A", doc)
-        elem.add_child(text)
-        span.add_child(elem)
-        span
-      end,
+     lambda do |tag|
+       doc = Nokogiri::HTML5::Document.new
+       elem = Nokogiri::XML::Element.new(tag, doc)
+       span = Nokogiri::XML::Element.new("span", doc)
+       text = Nokogiri::XML::Text.new("a\x0A", doc)
+       elem.add_child(text)
+       span.add_child(elem)
+       span
+     end,
      "<%text>a\x0A</%text>",
      "<span><%text>a\x0A</%text></span>"],
   ]
@@ -404,56 +404,52 @@ def serializing_test_data
   ]
   VOID_TESTS = [
     ["Void context node",
-      lambda do |tag|
-        doc = Nokogiri::HTML5::Document.new
-        Nokogiri::XML::Element.new(tag, doc)
-      end,
-      "",
-      "<%void>"],
+     lambda do |tag|
+       doc = Nokogiri::HTML5::Document.new
+       Nokogiri::XML::Element.new(tag, doc)
+     end,
+     "",
+     "<%void>"],
 
     ["void as first child with following siblings",
-      lambda do |tag|
-        doc = Nokogiri::HTML5::Document.new
-        span = Nokogiri::XML::Element.new('span', doc)
-        span.add_child(Nokogiri::XML::Element.new(tag, doc))
-        span.add_child(Nokogiri::XML::Element.new('a', doc))
-          .add_child(Nokogiri::XML::Text.new('test', doc))
-        span.add_child(Nokogiri::XML::Element.new('b', doc))
-        span
-      end,
-      "<%void><a>test</a><b></b>",
-      "<span><%void><a>test</a><b></b></span>"
-     ],
+     lambda do |tag|
+       doc = Nokogiri::HTML5::Document.new
+       span = Nokogiri::XML::Element.new("span", doc)
+       span.add_child(Nokogiri::XML::Element.new(tag, doc))
+       span.add_child(Nokogiri::XML::Element.new("a", doc))
+         .add_child(Nokogiri::XML::Text.new("test", doc))
+       span.add_child(Nokogiri::XML::Element.new("b", doc))
+       span
+     end,
+     "<%void><a>test</a><b></b>",
+     "<span><%void><a>test</a><b></b></span>"],
 
     ["void as second child with following siblings",
-      lambda do |tag|
-        doc = Nokogiri::HTML5::Document.new
-        span = Nokogiri::XML::Element.new('span', doc)
-        span.add_child(Nokogiri::XML::Element.new('a', doc))
-          .add_child(Nokogiri::XML::Text.new('test', doc))
-        span.add_child(Nokogiri::XML::Element.new(tag, doc))
-        span.add_child(Nokogiri::XML::Element.new('b', doc))
-        span
-      end,
-      "<a>test</a><%void><b></b>",
-      "<span><a>test</a><%void><b></b></span>"
-     ],
+     lambda do |tag|
+       doc = Nokogiri::HTML5::Document.new
+       span = Nokogiri::XML::Element.new("span", doc)
+       span.add_child(Nokogiri::XML::Element.new("a", doc))
+         .add_child(Nokogiri::XML::Text.new("test", doc))
+       span.add_child(Nokogiri::XML::Element.new(tag, doc))
+       span.add_child(Nokogiri::XML::Element.new("b", doc))
+       span
+     end,
+     "<a>test</a><%void><b></b>",
+     "<span><a>test</a><%void><b></b></span>"],
     ["void as last child with preceding siblings",
-      lambda do |tag|
-        doc = Nokogiri::HTML5::Document.new
-        span = Nokogiri::XML::Element.new('span', doc)
-        span.add_child(Nokogiri::XML::Element.new('a', doc))
-          .add_child(Nokogiri::XML::Text.new('test', doc))
-        span.add_child(Nokogiri::XML::Element.new('b', doc))
-        span.add_child(Nokogiri::XML::Element.new(tag, doc))
-        span
-      end,
-      "<a>test</a><b></b><%void>",
-      "<span><a>test</a><b></b><%void></span>"
-    ],
+     lambda do |tag|
+       doc = Nokogiri::HTML5::Document.new
+       span = Nokogiri::XML::Element.new("span", doc)
+       span.add_child(Nokogiri::XML::Element.new("a", doc))
+         .add_child(Nokogiri::XML::Text.new("test", doc))
+       span.add_child(Nokogiri::XML::Element.new("b", doc))
+       span.add_child(Nokogiri::XML::Element.new(tag, doc))
+       span
+     end,
+     "<a>test</a><b></b><%void>",
+     "<span><a>test</a><b></b><%void></span>"],
   ]
 
-
   # Generate tests
   def self.cross_map(a1, a2)
     rv = []
@@ -489,22 +485,22 @@ def self.cross_map(a1, a2)
   end
 
   cross_map(TEXT_TESTS, TEXT_ELEMENTS) do |test_data, tag|
-    define_method("test_serializing_text_innerHTML_#{test_data[0].gsub('%text', tag)}".to_sym) do
-      assert_equal test_data[2].gsub('%text', tag), test_data[1].call(tag).inner_html
+    define_method("test_serializing_text_innerHTML_#{test_data[0].gsub("%text", tag)}".to_sym) do
+      assert_equal test_data[2].gsub("%text", tag), test_data[1].call(tag).inner_html
     end
 
-    define_method("test_serialization_text_outerHTML_#{test_data[0].gsub('%text', tag)}".to_sym) do
-      assert_equal test_data[3].gsub('%text', tag), test_data[1].call(tag).serialize
+    define_method("test_serialization_text_outerHTML_#{test_data[0].gsub("%text", tag)}".to_sym) do
+      assert_equal test_data[3].gsub("%text", tag), test_data[1].call(tag).serialize
     end
   end
 
   cross_map(VOID_TESTS, VOID_ELEMENTS) do |test_data, tag|
     define_method("test_serializing_void_innerHTML_#{test_data[0]}_#{tag}".to_sym) do
-      assert_equal test_data[2].gsub('%void', tag), test_data[1].call(tag).inner_html
+      assert_equal test_data[2].gsub("%void", tag), test_data[1].call(tag).inner_html
     end
 
     define_method("test_serialization_void_outerHTML_#{test_data[0]}_#{tag}".to_sym) do
-      assert_equal test_data[3].gsub('%void', tag), test_data[1].call(tag).serialize
+      assert_equal test_data[3].gsub("%void", tag), test_data[1].call(tag).serialize
     end
   end
-end
+end if Nokogiri.uses_gumbo?
diff --git a/test/html5/test_tree-construction.rb b/test/html5/test_tree-construction.rb
new file mode 100644
index 0000000000..191142c2db
--- /dev/null
+++ b/test/html5/test_tree-construction.rb
@@ -0,0 +1,276 @@
+# encoding: utf-8
+# frozen_string_literal: true
+require "helper"
+
+if Nokogiri.uses_gumbo?
+  def parse_test(test_data)
+    test = { script: :both }
+    index = /(?:^#errors\n|\n#errors\n)/ =~ test_data
+    abort("Expected #errors in\n#{test_data}") if index.nil?
+    skip_amount = $~[0].length
+    # Omit the final new line
+    test[:data] = test_data[0...index]
+
+    # Process the rest line by line
+    lines = test_data[index + skip_amount..-1].split("\n")
+    index = lines.find_index do |line|
+      line == "#document-fragment" ||
+        line == "#document" ||
+        line == "#script-off" ||
+        line == "#script-on" ||
+        line == "#new-errors"
+    end
+    abort("Expected #document") if index.nil?
+    test[:errors] = lines[0...index]
+    test[:new_errors] = []
+    if lines[index] == "#new-errors"
+      index += 1
+      until %w[#document-fragment #document #script-off #script-on].include?(lines[index])
+        test[:new_errors] << lines[index]
+        index += 1
+      end
+    end
+
+    if lines[index] == "#document-fragment"
+      test[:context] = lines[index + 1].chomp.split(" ", 2)
+      index += 2
+    end
+    abort("failed to find fragment: #{index}: #{lines[index]}") if test_data.include?("#document-fragment") && test[:context].nil?
+
+    if lines[index] =~ /#script-(on|off)/
+      test[:script] = $~[1].to_sym
+      index += 1
+    end
+
+    abort("Expected #document, got #{lines[index]}") unless lines[index] == "#document"
+    index += 1
+
+    document = {
+      type: test[:context] ? :fragment : :document,
+      children: [],
+    }
+    open_nodes = [document]
+    while index < lines.length
+      abort("Expected '| ' but got #{lines[index]}") unless /^\| ( *)([^ ].*$)/ =~ lines[index]
+      depth = $~[1].length
+      if depth.odd?
+        abort("Invalid nesting depth")
+      else
+        depth /= 2
+      end
+      abort("Too deep") if depth >= open_nodes.length
+
+      node = {}
+      node_text = $~[2]
+      if node_text[0] == '"'
+        if node_text == '"' || node_text[-1] != '"'
+          loop do
+            index += 1
+            node_text << "\n" + lines[index]
+            break if node_text[-1] == '"'
+          end
+        end
+        node[:type] = :text
+        node[:contents] = node_text[1..-2]
+      elsif /^<!DOCTYPE ([^ >]*)(?: "([^"]*)" "(.*)")?>$/ =~ node_text
+        node[:type] = :doctype
+        node[:name] = $~[1]
+        node[:public_id] = $~[2].nil? || $~[2].empty? ? nil : $~[2]
+        node[:system_id] = $~[3].nil? || $~[3].empty? ? nil : $~[3]
+      elsif /^<!-- (.*) -->$/ =~ node_text
+        node[:type] = :comment
+        node[:contents] = $~[1]
+      elsif /^<(svg |math )?(.+)>$/ =~ node_text
+        node[:type] = :element
+        node[:ns] = $~[1].nil? ? nil : $~[1].rstrip
+        node[:tag] = $~[2]
+        node[:attributes] = []
+        node[:children] = []
+      elsif /^([^ ]+ )?([^=]+)="(.*)"$/ =~ node_text
+        node[:type] = :attribute
+        node[:ns] = $~[1].nil? ? nil : $~[1].rstrip
+        node[:name] = $~[2]
+        node[:value] = $~[3]
+      elsif node_text == "content"
+        node[:type] = :template
+      else
+        abort("Unexpected node_text: #{node_text}")
+      end
+
+      if node[:type] == :attribute
+        abort("depth #{depth} != #{open_nodes.length}") unless depth == open_nodes.length - 1
+        abort("type :#{open_nodes[-1][:type]} != :element") unless open_nodes[-1][:type] == :element
+        abort("element has children") unless open_nodes[-1][:children].empty?
+        open_nodes[-1][:attributes] << node
+      elsif node[:type] == :template
+        abort("depth #{depth} != #{open_nodes.length}") unless depth == open_nodes.length - 1
+        abort("type :#{open_nodes[-1][:type]} != :element") unless open_nodes[-1][:type] == :element
+        abort("tag :#{open_nodes[-1][:tag]} != template") unless open_nodes[-1][:tag] == "template"
+        abort("template has children before the 'content'") unless open_nodes[-1][:children].empty?
+        # Hack. We want the children of this template node to be reparented as
+        # children of the template element.
+        # XXX: Template contents are _not_ supposed to be children of the
+        # template, but we currently mishandle this.
+        open_nodes << open_nodes[-1]
+      else
+        open_nodes[depth][:children] << node
+        open_nodes[depth + 1..-1] = []
+        if node[:type] == :element
+          open_nodes << node
+        end
+      end
+      index += 1
+    end
+    test[:document] = document
+    test
+  end
+
+  class TestHtml5TreeConstructionBase < Nokogiri::TestCase
+    def assert_equal_or_nil(exp, act)
+      if exp.nil?
+        assert_nil(act)
+      else
+        assert_equal(exp, act)
+      end
+    end
+
+    def compare_nodes(node, ng_node)
+      case ng_node.type
+      when Nokogiri::XML::Node::ELEMENT_NODE
+        assert_equal(node[:type], :element)
+        if node[:ns]
+          refute_nil(ng_node.namespace)
+          assert_equal(node[:ns], ng_node.namespace.prefix)
+        end
+        assert_equal(node[:tag], ng_node.name)
+        attributes = ng_node.attributes
+        assert_equal(node[:attributes].length, attributes.length)
+        node[:attributes].each do |attr|
+          value = if attr[:ns]
+            ng_node["#{attr[:ns]}:#{attr[:name]}"]
+          else
+            attributes[attr[:name]].value
+          end
+          assert_equal(attr[:value], value)
+        end
+        assert_equal(node[:children].length, ng_node.children.length,
+          "Element <#{node[:tag]}> has wrong number of children: #{ng_node.children.map { |c| c.name }}")
+      when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
+        # We preserve the CDATA in the tree, but the tests represent it as text.
+        assert_equal(node[:type], :text)
+        assert_equal(node[:contents], ng_node.content)
+      when Nokogiri::XML::Node::COMMENT_NODE
+        assert_equal(node[:type], :comment)
+        assert_equal(node[:contents], ng_node.content)
+      when Nokogiri::XML::Node::HTML_DOCUMENT_NODE
+        assert_equal(node[:type], :document)
+        assert_equal(node[:children].length, ng_node.children.length)
+      when Nokogiri::XML::Node::DOCUMENT_FRAG_NODE
+        assert_equal(node[:type], :fragment)
+        assert_equal(node[:children].length, ng_node.children.length)
+      when Nokogiri::XML::Node::DTD_NODE
+        assert_equal(node[:type], :doctype)
+        assert_equal(node[:name], ng_node.name)
+        assert_equal_or_nil(node[:public_id], ng_node.external_id)
+        assert_equal_or_nil(node[:system_id], ng_node.system_id)
+      else
+        flunk("Unknown node type #{ng_node.type} (expected #{node[:type]})")
+      end
+    end
+
+    def run_test
+      if @test[:context]
+        ctx = @test[:context].join(":")
+        doc = Nokogiri::HTML5::Document.new
+        doc = Nokogiri::HTML5::DocumentFragment.new(doc, @test[:data], ctx, max_errors: @test[:errors].length + 10)
+      else
+        doc = Nokogiri::HTML5.parse(@test[:data], max_errors: @test[:errors].length + 10)
+      end
+      # Walk the tree.
+      exp_nodes = [@test[:document]]
+      act_nodes = [doc]
+      children = [0]
+      compare_nodes(exp_nodes[0], doc)
+      while children.any?
+        child_index = children[-1]
+        exp = exp_nodes[-1]
+        act = act_nodes[-1]
+        if child_index == exp[:children].length
+          exp_nodes.pop
+          act_nodes.pop
+          children.pop
+          next
+        end
+        exp_child = exp[:children][child_index]
+        act_child = act.children[child_index]
+        compare_nodes(exp_child, act_child)
+        children[-1] = child_index + 1
+        next unless exp_child.has_key?(:children)
+        exp_nodes << exp_child
+        act_nodes << act_child
+        children << 0
+      end
+
+      # Test the errors.
+      assert_equal(@test[:errors].length, doc.errors.length)
+
+      # The new, standardized tokenizer errors live in @test[:new_errors]. Let's
+      # match each one to exactly one error in doc.errors. Unfortunately, the
+      # tests specify the column the error is detected, _not_ the column of the
+      # start of the problematic HTML (e.g., the start of a character reference
+      # or <![CDATA[) the way gumbo does. So check that Gumbo's column is no
+      # later than the error's column.
+      errors = doc.errors.map { |err| { line: err.line, column: err.column, code: err.str1 } }
+      errors.reject! { |err| err[:code] == "generic-parser" }
+      error_regex = /^\((?<line>\d+):(?<column>\d+)(?:-\d+:\d+)?\) (?<code>.*)$/
+      @test[:new_errors].each do |err|
+        assert_match(error_regex, err)
+        m = err.match(error_regex)
+        line = m[:line].to_i
+        column = m[:column].to_i
+        code = m[:code]
+        idx = errors.index do |e|
+          e[:line] == line &&
+            e[:code] == code &&
+            e[:column] <= column
+        end
+        # This error should be the first error in the list.
+        # refute_nil(idx, "Expected to find error #{code} at #{line}:#{column}")
+        assert_equal(0, idx, "Expected to find error #{code} at #{line}:#{column}")
+        errors.delete_at(idx)
+      end
+    end
+  end
+
+  tc_path = File.expand_path("../../html5lib-tests/tree-construction", __FILE__)
+  Dir[File.join(tc_path, "*.dat")].each do |path|
+    test_name = "TestHtml5TreeConstruction" + File.basename(path, ".dat")
+      .split(/[_-]/)
+      .map { |s| s.capitalize }
+      .join("")
+    tests = []
+    File.open(path, "r", encoding: "UTF-8") do |f|
+      f.each("\n\n#data\n") do |test_data|
+        if test_data.start_with?("#data\n")
+          test_data = test_data[6..-1]
+        end
+        if test_data.end_with?("\n\n#data\n")
+          test_data = test_data[0..-9]
+        end
+        tests << parse_test(test_data)
+      end
+    end
+
+    klass = Class.new(TestHtml5TreeConstructionBase) do
+      tests.each_with_index do |test, index|
+        next if test[:script] == :on
+        define_method "test_#{index}".to_sym do
+          @test = test
+          @index = index
+          run_test
+        end
+      end
+    end
+    Object.const_set(test_name, klass)
+  end
+end
diff --git a/test/test_nokogumbo_contract.rb b/test/test_nokogumbo_contract.rb
new file mode 100644
index 0000000000..83df5837ea
--- /dev/null
+++ b/test/test_nokogumbo_contract.rb
@@ -0,0 +1,26 @@
+require "helper"
+
+describe "Nokogumbo contract expectations" do
+  # per https://github.com/rubys/nokogumbo/pull/171
+  it "includes the HTML5 public interface" do
+    skip("Gumbo is not supported on this platform") unless Nokogiri.uses_gumbo?
+
+    assert_includes(::Nokogiri.singleton_methods, :HTML5)
+
+    assert_equal(defined?(::Nokogiri::HTML5), "constant")
+    assert_includes(::Nokogiri::HTML5.singleton_methods, :parse)
+    assert_includes(::Nokogiri::HTML5.singleton_methods, :fragment)
+
+    assert_equal(defined?(::Nokogiri::HTML5::Node), "constant")
+    assert_equal(defined?(::Nokogiri::HTML5::Document), "constant")
+    assert_equal(defined?(::Nokogiri::HTML5::DocumentFragment), "constant")
+  end
+
+  it "includes a replacement for the Nokogumbo private interface" do
+    skip("Gumbo is not supported on this platform") unless Nokogiri.uses_gumbo?
+
+    assert_equal(defined?(::Nokogiri::Gumbo), "constant")
+    assert_includes(::Nokogiri::Gumbo.singleton_methods, :parse)
+    assert_includes(::Nokogiri::Gumbo.singleton_methods, :fragment)
+  end
+end