Skip to content

Commit

Permalink
feat: provide packaged libraries' headers in ext/nokogiri/include
Browse files Browse the repository at this point in the history
This commit introduces complete test coverage for:

- the contents of every flavor of gem file
- the filesystem after every flavor of gem installation

The key outcome of this change is to allow nokogumbo to compile
against the same headers used by Nokogiri, #1788

A secondary outcome is much improved confidence and understanding of
how we're packaging the gems and what we're putting on disk during
installation.

Closes #1788
Closes #2076
Closes #2078
  • Loading branch information
flavorjones committed Dec 23, 2020
1 parent 99a8f35 commit f4519f5
Show file tree
Hide file tree
Showing 7 changed files with 372 additions and 2 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Expand Up @@ -23,9 +23,10 @@ coverage
ext/java/Canna
ext/java/nokogiri/**/*.class
ext/nokogiri/*.dll
ext/nokogiri/include
gems
lib/nokogiri/**/nokogiri.so
lib/nokogiri/**/nokogiri.bundle
lib/nokogiri/**/nokogiri.so
lib/nokogiri/nokogiri.jar
pkg
ports
Expand Down
2 changes: 2 additions & 0 deletions Rakefile
Expand Up @@ -43,6 +43,8 @@ HOE = Hoe.spec "nokogiri" do |hoe|
"lib/nokogiri/[0-9].[0-9]",
"coverage",
"concourse/images/*.generated",
"ext/nokogiri/include",
"tmp",
]
hoe.clean_globs += Dir.glob("ports/*").reject { |d| d =~ %r{/archives$} }

Expand Down
22 changes: 22 additions & 0 deletions ext/nokogiri/extconf.rb
Expand Up @@ -445,6 +445,14 @@ def process_recipe(name, version, static_p, cross_p)
end
end

def copy_packaged_libraries_headers(to_path:, from_recipes:)
FileUtils.rm_rf(to_path, secure: true)
FileUtils.mkdir(to_path)
from_recipes.each do |recipe|
FileUtils.cp_r(Dir[File.join(recipe.path, 'include/*')], to_path)
end
end

def do_help
print NOKOGIRI_HELP_MESSAGE
exit! 0
Expand Down Expand Up @@ -762,6 +770,20 @@ def configure

have_func('vasprintf')

unless using_system_libraries?
if cross_build_p
# When precompiling native gems, copy packaged libraries' headers to ext/nokogiri/include
# These are packaged up by the cross-compiling callback in the ExtensionTask
copy_packaged_libraries_headers(to_path: File.join(PACKAGE_ROOT_DIR, "ext/nokogiri/include"),
from_recipes: [libxml2_recipe, libxslt_recipe])
else
# When compiling during installation, install packaged libraries' header files into ext/nokogiri/include
copy_packaged_libraries_headers(to_path: "include",
from_recipes: [libxml2_recipe, libxslt_recipe])
$INSTALLFILES << ["include/**/*.h", "$(rubylibdir)"]
end
end

create_makefile('nokogiri/nokogiri')

if enable_config('clean', true)
Expand Down
3 changes: 3 additions & 0 deletions lib/nokogiri/version/info.rb
Expand Up @@ -90,6 +90,9 @@ def to_hash
libxml["source"] = "packaged"
libxml["precompiled"] = libxml2_precompiled?
libxml["patches"] = Nokogiri::LIBXML2_PATCHES

# this is for nokogumbo and shouldn't be forever
libxml["libxml2_path"] = File.expand_path(File.join(File.dirname(__FILE__), "../../../ext/nokogiri"))
else
libxml["source"] = "system"
end
Expand Down
20 changes: 19 additions & 1 deletion rakelib/cross-ruby.rake
Expand Up @@ -297,11 +297,14 @@ if java?
require "rake/javaextensiontask"
Rake::JavaExtensionTask.new("nokogiri", HOE.spec) do |ext|
jruby_home = RbConfig::CONFIG['prefix']
jars = ["#{jruby_home}/lib/jruby.jar"] + FileList['lib/*.jar']

ext.gem_spec.files.reject! { |path| File.fnmatch?("ext/nokogiri/*.h", path) }

ext.ext_dir = 'ext/java'
ext.lib_dir = 'lib/nokogiri'
ext.source_version = '1.6'
ext.target_version = '1.6'
jars = ["#{jruby_home}/lib/jruby.jar"] + FileList['lib/*.jar']
ext.classpath = jars.map { |x| File.expand_path x }.join ':'
ext.debug = true if ENV['JAVA_DEBUG']
end
Expand Down Expand Up @@ -348,6 +351,21 @@ else

spec.files.reject! { |path| File.fnmatch?('ports/*', path) }
spec.dependencies.reject! { |dep| dep.name=='mini_portile2' }

# when pre-compiling a native gem, package all the C headers sitting in ext/nokogiri/include
# which were copied there in the $INSTALLFILES section of extconf.rb.
# (see scripts/test-gem-file-contents and scripts/test-gem-installation for tests)
headers_dir = "ext/nokogiri/include"

["libxml2", "libxslt"].each do |lib|
unless File.directory?(File.join(headers_dir, lib))
raise "#{lib} headers are not present in #{headers_dir}"
end
end

Dir.glob(File.join(headers_dir, "**", "*.h")).each do |header|
spec.files << header
end
end
end
end
217 changes: 217 additions & 0 deletions scripts/test-gem-file-contents
@@ -0,0 +1,217 @@
#! /usr/bin/env ruby
# frozen_string_literal: true
#
# this script is intended to run as part of the CI test suite.
#
# it inspects the contents of a nokogiri gem file -- both the files and the gemspec -- to ensure
# we're packaging what we expect, and that we're not packaging anything we don't expect.
#
# this file isn't in the `test/` subdirectory because it's intended to be run standalone against a
# built gem file (and not against the source code or behavior of the gem itself).
#

require "bundler/inline"

gemfile do
source "https://rubygems.org"
gem "minitest"
gem "minitest-reporters"
end

require "yaml"

def usage_and_exit(message = nil)
puts "ERROR: #{message}" if message
puts "USAGE: #{File.basename(__FILE__)} <gemfile> [options]"
exit(1)
end

usage_and_exit if ARGV.include?("-h")
usage_and_exit unless gemfile = ARGV[0]
usage_and_exit("#{gemfile} does not exist") unless File.file?(gemfile)
usage_and_exit("#{gemfile} is not a gem") unless /\.gem$/.match(gemfile)
gemfile = File.expand_path(gemfile)

gemfile_contents = Dir.mktmpdir do |dir|
Dir.chdir(dir) do
unless system("tar -xf #{gemfile} data.tar.gz")
raise "could not unpack gem #{gemfile}"
end
%x(tar -ztf data.tar.gz).split("\n")
end
end

gemspec = Dir.mktmpdir do |dir|
Dir.chdir(dir) do
unless system("tar -xf #{gemfile} metadata.gz")
raise "could not unpack gem #{gemfile}"
end
YAML.load(%x(gunzip -c metadata.gz))
end
end

if ARGV.include?("-v")
puts "---------- gemfile contents ----------"
puts gemfile_contents
puts
puts "---------- gemspec ----------"
puts gemspec.to_ruby
puts
end

require "minitest/autorun"
require "minitest/reporters"
Minitest::Reporters.use!([Minitest::Reporters::SpecReporter.new])

puts "Testing '#{gemfile}' (#{gemspec.platform})"
describe File.basename(gemfile) do
let(:cross_rubies_path) { File.join(File.dirname(__FILE__), "..", ".cross_rubies") }
let(:native_ruby_versions) do
File.read(cross_rubies_path).split("\n").map do |line|
line.split(":").first.split(".").take(2).join(".") # ugh
end.uniq.sort
end

describe "setup" do
it "gemfile contains some files" do
actual = gemfile_contents.length
assert_operator(actual, :>, 60, "expected gemfile to contain more than #{actual} files")
end

it "gemspec is a Gem::Specfication" do
assert_equal(Gem::Specification, gemspec.class)
end
end

describe "all platforms" do
it "contains every ruby file in lib/" do
expected = %x(git ls-files lib).split("\n").grep(/\.rb$/).sort
actual = gemfile_contents.grep(%r{^lib/}).grep(/\.rb$/).sort
assert_equal(expected, actual)
end
end

describe "ruby platform" do
it "contains the port files" do
actual_ports = gemfile_contents.grep(%r{^ports/})
assert_equal(1, actual_ports.grep(/libxml2-\d+\.\d+\.\d+\.tar\.gz/).length,
"expected #{actual_ports} to include libxml2")
assert_equal(1, actual_ports.grep(/libxslt-\d+\.\d+\.\d+\.tar\.gz/).length,
"expected #{actual_ports} to include libxslt")
assert_equal(2, actual_ports.length)
end

it "contains the patch files" do
assert_operator(gemfile_contents.grep(%r{^patches/}).length, :>, 0)
end

it "contains ext/nokogiri C and header files" do
assert_operator(gemfile_contents.grep(%r{^ext/nokogiri/.*\.c}).length, :>, 20)
assert_operator(gemfile_contents.grep(%r{^ext/nokogiri/.*\.h}).length, :>, 20)
end

it "does not contain packaged libraries' header files" do
# these files are present after installation if the packaged libraries are used
assert_empty(gemfile_contents.grep(%r{^ext/nokogiri/include/}))
end

it "does not contain java files" do
assert_empty(gemfile_contents.grep(%r{^ext/java/}))
assert_empty(gemfile_contents.grep(/.*\.jar$/))
end

it "depends on mini_portile2" do
assert(gemspec.dependencies.find { |d| d.name == "mini_portile2" })
end

it "includes C files in extra_rdoc_files" do
assert_operator(gemspec.extra_rdoc_files.grep(%r{ext/nokogiri/.*\.c$}).length, :>, 10)
end
end if gemspec.platform == Gem::Platform::RUBY

describe "native platform" do
it "does not contain the port files" do
assert_empty(gemfile_contents.grep(%r{^ports/}))
end

it "does not contain the patch files" do
assert_empty(gemfile_contents.grep(%r{^patches/}))
end

it "contains ext/nokogiri C and header files" do
assert_operator(gemfile_contents.grep(%r{^ext/nokogiri/.*\.c}).length, :>, 20)
assert_operator(gemfile_contents.grep(%r{^ext/nokogiri/.*\.h}).length, :>, 20)
end

it "contains packaged libraries' header files" do
assert_includes(gemfile_contents, "ext/nokogiri/include/libxml2/libxml/tree.h")
assert_includes(gemfile_contents, "ext/nokogiri/include/libxslt/xslt.h")
assert_includes(gemfile_contents, "ext/nokogiri/include/libexslt/exslt.h")
end

it "does not contain java files" do
assert_empty(gemfile_contents.grep(%r{^ext/java/}))
assert_empty(gemfile_contents.grep(/.*\.jar$/))
end

it "does not depend on mini_portile2" do
refute(gemspec.dependencies.find { |d| d.name == "mini_portile2" })
end

it "includes C files in extra_rdoc_files" do
assert_operator(gemspec.extra_rdoc_files.grep(%r{ext/nokogiri/.*\.c$}).length, :>, 10)
end

it "contains expected .so files " do
native_ruby_versions.each do |version|
assert_includes(gemfile_contents, "lib/nokogiri/#{version}/nokogiri.so")
end
refute_includes(gemfile_contents, "lib/nokogiri/nokogiri.so")
assert_equal(native_ruby_versions.length, Dir.glob("lib/nokogiri/**/*.so").length)
end
end if gemspec.platform.is_a?(Gem::Platform) && gemspec.platform.cpu

describe "java platform" do
it "does not contain the port files" do
assert_empty(gemfile_contents.grep(%r{^ports/}))
end

it "does not contain the patch files" do
assert_empty(gemfile_contents.grep(%r{^patches/}))
end

it "contains ext/nokogiri C files" do
assert_operator(gemfile_contents.grep(%r{^ext/nokogiri/.*\.c}).length, :>, 20)
end

it "does not contain ext/nokogiri header files" do
assert_empty(gemfile_contents.grep(%r{^ext/nokogiri/.*\.h}))
end

it "does not contain packaged libraries' header files" do
assert_empty(gemfile_contents.grep(%r{^ext/nokogiri/include/}))
end

it "contains the java jar files" do
actual_jars = gemfile_contents.grep(/.*\.jar$/)
expected_jars = [
"isorelax",
"jing",
"nekodtd",
"nekohtml",
"nokogiri",
"serializer",
"xalan",
"xercesImpl",
"xml-apis",
]
expected_jars.each do |jar|
assert_equal(1, actual_jars.grep(%r{/#{jar}\.jar$}).length, "expected to contain #{jar}.jar")
end
end

it "does not depend on mini_portile2" do
refute(gemspec.dependencies.find { |d| d.name == "mini_portile2" })
end
end if gemspec.platform == Gem::Platform.new("java")
end

0 comments on commit f4519f5

Please sign in to comment.