From 3d8a57075aebdc647728181f0fd279ed3d063857 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 19 May 2021 13:51:26 -0400 Subject: [PATCH 1/4] ci: windows config for github actions file copied from main@7df5d3c --- .github/workflows/windows.yml | 50 +++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 .github/workflows/windows.yml diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml new file mode 100644 index 0000000000..36e8cfb78c --- /dev/null +++ b/.github/workflows/windows.yml @@ -0,0 +1,50 @@ +# this is a work in progress! +name: windows +on: + push: + branches: + - main + pull_request: + types: [opened, synchronize] + branches: + - '*' + +jobs: + windows: + name: "windows, sys: ${{ matrix.sys }}, ${{ matrix.ruby }}" + + env: + MAKEFLAGS: -j2 + + runs-on: windows-latest + + strategy: + fail-fast: false + matrix: + sys: [ enable, disable ] + ruby: [ "2.5", "2.6", "2.7", "3.0", "mingw" ] + + steps: + - name: configure git crlf on windows + run: | + git config --system core.autocrlf false + git config --system core.eol lf + - name: checkout + uses: actions/checkout@v2 + - name: load Ruby and bundle install + uses: MSP-Greg/setup-ruby-pkgs@v1 + with: + ruby-version: ${{ matrix.ruby }} + mingw: libxml2 libxslt + bundler-cache: true + - uses: actions/cache@v2 + if: matrix.sys == 'disable' + with: + path: ports/archives + key: ${{ matrix.os }}-${{ matrix.ruby }}-tarballs-${{ hashFiles('**/dependencies.yml') }} + restore-keys: ${{ matrix.os }}-${{ matrix.ruby }}-tarballs- + - name: bundle exec rake compile + run: | + bundle exec rake compile -- --${{ matrix.sys }}-system-libraries + - name: bundle exec rake test + run: bundle exec rake test From 8f54c0fc5a85e4d384f5dacefdfd5f595cf858a4 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 18 May 2021 09:58:31 -0400 Subject: [PATCH 2/4] test: adjust tests to pass on system libxml2 >= 2.9.11 because the comment parsing improvement was merged upstream. --- test/html/test_comments.rb | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/test/html/test_comments.rb b/test/html/test_comments.rb index 476fc4b473..7d207815f5 100644 --- a/test/html/test_comments.rb +++ b/test/html/test_comments.rb @@ -113,8 +113,7 @@ class TestComment < Nokogiri::TestCase let(:subject) { doc.at_css("div#under-test") } let(:inner_div) { doc.at_css("div#do-i-exist") } - if Nokogiri.uses_libxml? && Nokogiri::VersionInfo.instance.libxml2_using_packaged? - # see patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch + if Nokogiri::VersionInfo.instance.libxml2_using_packaged? || (Nokogiri::VersionInfo.instance.libxml2_using_system? && Nokogiri.uses_libxml?(">=2.9.11")) it "behaves as if the comment is normally closed" do # COMPLIANT assert_equal 3, subject.children.length assert subject.children[0].comment? @@ -128,9 +127,7 @@ class TestComment < Nokogiri::TestCase end end - if Nokogiri.jruby? || Nokogiri::VersionInfo.instance.libxml2_using_system? - # this behavior may change to the above in libxml v2.9.11 depending on whether - # https://gitlab.gnome.org/GNOME/libxml2/-/merge_requests/82 is merged + if Nokogiri.jruby? || (Nokogiri::VersionInfo.instance.libxml2_using_system? && Nokogiri.uses_libxml?("<2.9.11")) it "behaves as if the comment encompasses the inner div" do # NON-COMPLIANT assert_equal 1, subject.children.length assert subject.children.first.comment? From e6709aaa14b8f8cd9e842bd07bb7f2edf94a4d7c Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 19 May 2021 10:28:24 -0400 Subject: [PATCH 3/4] windows: work around libxml2 xmlCleanupParser Closes #2241 --- ext/nokogiri/nokogiri.c | 19 +++++++++++++++++++ lib/nokogiri/version/info.rb | 1 + 2 files changed, 20 insertions(+) diff --git a/ext/nokogiri/nokogiri.c b/ext/nokogiri/nokogiri.c index fb255f03e3..6258f1d591 100644 --- a/ext/nokogiri/nokogiri.c +++ b/ext/nokogiri/nokogiri.c @@ -191,7 +191,26 @@ Init_nokogiri() rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS)); #endif +#if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES) + /* + * We choose *not* to do use Ruby's memory management functions with windows DLLs because of this + * issue in libxml 2.9.12: + * + * https://github.com/sparklemotion/nokogiri/issues/2241 + * + * If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip + * this config only for the specific libxml2 versions 2.9.12. + * + * Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its + * default memory management functions (recall that this config was introduced to reduce memory + * bloat and allow Ruby to GC more often); but we should *really* test with production workloads + * before making that kind of a potentially-invasive change. + */ + rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default")); +#else + rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby")); xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup); +#endif xmlInitParser(); diff --git a/lib/nokogiri/version/info.rb b/lib/nokogiri/version/info.rb index 1bfea42908..b1ac82275f 100644 --- a/lib/nokogiri/version/info.rb +++ b/lib/nokogiri/version/info.rb @@ -137,6 +137,7 @@ def to_hash else libxml["source"] = "system" end + libxml["memory_management"] = Nokogiri::LIBXML_MEMORY_MANAGEMENT libxml["iconv_enabled"] = libxml2_has_iconv? libxml["compiled"] = compiled_libxml_version.to_s libxml["loaded"] = loaded_libxml_version.to_s From 05f30eb4693cbfc900faced0b0ea172a8fc13df7 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 19 May 2021 14:06:31 -0400 Subject: [PATCH 4/4] update CHANGELOG --- CHANGELOG.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 57a28c8516..79fb158dd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,22 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA --- +## 1.11.5 / 2021-05-19 + +### Fixed + +[Windows CRuby] Work around segfault at process exit on Windows when using libxml2 system DLLs. + +libxml 2.9.12 introduced new behavior to avoid memory leaks when unloading libxml2 shared libraries (see [libxml/!66](https://gitlab.gnome.org/GNOME/libxml2/-/merge_requests/66)). Early testing caught this segfault on non-Windows platforms (see [#2059](https://github.com/sparklemotion/nokogiri/issues/2059) and [libxml@956534e](https://gitlab.gnome.org/GNOME/libxml2/-/commit/956534e02ef280795a187c16f6ac04e107f23c5d)) but it was incompletely fixed and is still an issue on Windows platforms that are using system DLLs. + +We work around this by configuring libxml2 in this situation to use its default memory management functions. Note that if Nokogiri is not on Windows, or is not using shared system libraries, it will will continue to configure libxml2 to use Ruby's memory management functions. `Nokogiri::VERSION_INFO["libxml"]["memory_management"]` will allow you to verify when the default memory management functions are being used. [[#2241](https://github.com/sparklemotion/nokogiri/issues/2241)] + + +### Changed + +`Nokogiri::VERSION_INFO["libxml"]` now contains the key `"memory_management"` to declare whether libxml2 is using its `default` memory management functions, or whether it uses the memory management functions from `ruby`. See above for more details. + + ## 1.11.4 / 2021-05-14 ### Security