From b682ac5afeabfc636edf282700f05fd5923fa396 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 5 Jan 2021 15:39:43 -0500 Subject: [PATCH 1/5] ci: ensure all tests are running `setup` --- test/namespaces/test_namespaces_preservation.rb | 1 + test/test_xslt_transforms.rb | 1 + test/xml/sax/test_parser_context.rb | 1 + test/xml/test_relax_ng.rb | 1 + test/xml/test_schema.rb | 1 + test/xml/test_unparented_node.rb | 1 + 6 files changed, 6 insertions(+) diff --git a/test/namespaces/test_namespaces_preservation.rb b/test/namespaces/test_namespaces_preservation.rb index f3e576aa47..be12bd3d35 100755 --- a/test/namespaces/test_namespaces_preservation.rb +++ b/test/namespaces/test_namespaces_preservation.rb @@ -5,6 +5,7 @@ module XML class TestNamespacePreservation < Nokogiri::TestCase def setup + super @xml = Nokogiri.XML <<-eoxml diff --git a/test/test_xslt_transforms.rb b/test/test_xslt_transforms.rb index fea43c9aff..73d0967692 100644 --- a/test/test_xslt_transforms.rb +++ b/test/test_xslt_transforms.rb @@ -2,6 +2,7 @@ class TestXsltTransforms < Nokogiri::TestCase def setup + super @doc = Nokogiri::XML(File.open(XML_FILE)) end diff --git a/test/xml/sax/test_parser_context.rb b/test/xml/sax/test_parser_context.rb index c460c6203a..d50b0287af 100644 --- a/test/xml/sax/test_parser_context.rb +++ b/test/xml/sax/test_parser_context.rb @@ -7,6 +7,7 @@ module XML module SAX class TestParserContext < Nokogiri::SAX::TestCase def setup + super @xml = ' world diff --git a/test/xml/test_relax_ng.rb b/test/xml/test_relax_ng.rb index 733e9b6547..c6c8dc7776 100644 --- a/test/xml/test_relax_ng.rb +++ b/test/xml/test_relax_ng.rb @@ -4,6 +4,7 @@ module Nokogiri module XML class TestRelaxNG < Nokogiri::TestCase def setup + super assert @schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE)) end diff --git a/test/xml/test_schema.rb b/test/xml/test_schema.rb index 9d52ebbdaf..e5d581dc9a 100644 --- a/test/xml/test_schema.rb +++ b/test/xml/test_schema.rb @@ -4,6 +4,7 @@ module Nokogiri module XML class TestSchema < Nokogiri::TestCase def setup + super assert @xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE)) end diff --git a/test/xml/test_unparented_node.rb b/test/xml/test_unparented_node.rb index 2d7327a5db..5836c1c8f4 100644 --- a/test/xml/test_unparented_node.rb +++ b/test/xml/test_unparented_node.rb @@ -6,6 +6,7 @@ module Nokogiri module XML class TestUnparentedNode < Nokogiri::TestCase def setup + super begin xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) @node = xml.at('staff') From 07459fd0e0db1d488748726d1f4accd9bac18646 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 5 Jan 2021 17:02:34 -0500 Subject: [PATCH 2/5] fix(test): clobber libxml2's global error handler before every test to check that we're setting error handlers everywhere we need to. Related to #2168 --- ext/nokogiri/nokogiri.c | 3 +++ ext/nokogiri/test_global_handlers.c | 41 +++++++++++++++++++++++++++++ test/helper.rb | 14 ++++++++++ 3 files changed, 58 insertions(+) create mode 100644 ext/nokogiri/test_global_handlers.c diff --git a/ext/nokogiri/nokogiri.c b/ext/nokogiri/nokogiri.c index 963253ad96..70ad541684 100644 --- a/ext/nokogiri/nokogiri.c +++ b/ext/nokogiri/nokogiri.c @@ -1,5 +1,7 @@ #include +void init_test_global_handlers(); /* in lieu of test_global_handlers.h */ + VALUE mNokogiri ; VALUE mNokogiriXml ; VALUE mNokogiriHtml ; @@ -132,4 +134,5 @@ void Init_nokogiri() init_xml_relax_ng(); init_nokogiri_io(); init_xml_encoding_handler(); + init_test_global_handlers(); } diff --git a/ext/nokogiri/test_global_handlers.c b/ext/nokogiri/test_global_handlers.c new file mode 100644 index 0000000000..09d3b7e400 --- /dev/null +++ b/ext/nokogiri/test_global_handlers.c @@ -0,0 +1,41 @@ +#include +#include "libxml/xmlerror.h" + +static VALUE foreign_error_handler_block = Qnil; + +static void foreign_error_handler(void* user_data, xmlErrorPtr c_error) +{ + rb_funcall(foreign_error_handler_block, rb_intern("call"), 0); +} + +/* + * call-seq: + * __foreign_error_handler { ... } -> nil + * + * Override libxml2's global error handlers to call the block. This method thus has very little + * value except to test that Nokogiri is properly setting error handlers elsewhere in the code. See + * test/helper.rb for how this is being used. + */ +static VALUE +rb_foreign_error_handler(VALUE klass) +{ + rb_need_block(); + foreign_error_handler_block = rb_block_proc(); + xmlSetStructuredErrorFunc(NULL, foreign_error_handler); + return Qnil; +} + +/* + * Document-module: Nokogiri::Test + * + * The Nokogiri::Test module should only be used for testing Nokogiri. + * Do NOT use this outside of the Nokogiri test suite. + */ +void +init_test_global_handlers() +{ + VALUE mNokogiri = rb_define_module("Nokogiri"); + VALUE mNokogiriTest = rb_define_module_under(mNokogiri, "Test"); + + rb_define_singleton_method(mNokogiriTest, "__foreign_error_handler", rb_foreign_error_handler, 0); +} diff --git a/test/helper.rb b/test/helper.rb index fc380878f2..bd08bccc4c 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -58,7 +58,16 @@ class TestCase < MiniTest::Spec XSLT_FILE = File.join(ASSETS_DIR, 'staff.xslt') XPATH_FILE = File.join(ASSETS_DIR, 'slow-xpath.xml') + def setup + @fake_error_handler_called = false + Nokogiri::Test.__foreign_error_handler do + @fake_error_handler_called = true + end if Nokogiri.uses_libxml? + end + def teardown + refute(@fake_error_handler_called, "the fake error handler should never get called") if Nokogiri.uses_libxml? + if ENV['NOKOGIRI_GC'] STDOUT.putc '!' if RUBY_PLATFORM =~ /java/ @@ -135,6 +144,11 @@ class Doc < XML::SAX::Document attr_reader :xmldecls attr_reader :processing_instructions + def initialize + @errors = [] + super + end + def xmldecl version, encoding, standalone @xmldecls = [version, encoding, standalone].compact super From 35aa88b75e7d8436217471ef6ff58f814466b26e Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 5 Jan 2021 17:25:00 -0500 Subject: [PATCH 3/5] fix(cruby): reset libxml2's error handler in sax and push parsers Note that this change regresses the behavior changed in 771164d which we'll have to fix in an upcoming commit. Related to #2168, #87 --- ext/nokogiri/html_sax_parser_context.c | 2 ++ ext/nokogiri/html_sax_push_parser.c | 2 ++ ext/nokogiri/xml_sax_parser_context.c | 2 ++ ext/nokogiri/xml_sax_push_parser.c | 2 ++ 4 files changed, 8 insertions(+) diff --git a/ext/nokogiri/html_sax_parser_context.c b/ext/nokogiri/html_sax_parser_context.c index 9f41b51adf..7f1379abf2 100644 --- a/ext/nokogiri/html_sax_parser_context.c +++ b/ext/nokogiri/html_sax_parser_context.c @@ -92,6 +92,8 @@ parse_with(VALUE self, VALUE sax_handler) ctxt->sax = sax; ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler); + xmlSetStructuredErrorFunc(NULL, NULL); + rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt); return self; diff --git a/ext/nokogiri/html_sax_push_parser.c b/ext/nokogiri/html_sax_push_parser.c index 2df4532f10..3a739ae1d2 100644 --- a/ext/nokogiri/html_sax_push_parser.c +++ b/ext/nokogiri/html_sax_push_parser.c @@ -20,6 +20,8 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk) size = (int)RSTRING_LEN(_chunk); } + xmlSetStructuredErrorFunc(NULL, NULL); + if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) { if (!(ctx->options & XML_PARSE_RECOVER)) { xmlErrorPtr e = xmlCtxtGetLastError(ctx); diff --git a/ext/nokogiri/xml_sax_parser_context.c b/ext/nokogiri/xml_sax_parser_context.c index 19c96c69c1..370d7b9fc3 100644 --- a/ext/nokogiri/xml_sax_parser_context.c +++ b/ext/nokogiri/xml_sax_parser_context.c @@ -120,6 +120,8 @@ parse_with(VALUE self, VALUE sax_handler) ctxt->sax = sax; ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler); + xmlSetStructuredErrorFunc(NULL, NULL); + rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt); return Qnil; diff --git a/ext/nokogiri/xml_sax_push_parser.c b/ext/nokogiri/xml_sax_push_parser.c index dac0a24db5..9daa22767d 100644 --- a/ext/nokogiri/xml_sax_push_parser.c +++ b/ext/nokogiri/xml_sax_push_parser.c @@ -35,6 +35,8 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk) size = (int)RSTRING_LEN(_chunk); } + xmlSetStructuredErrorFunc(NULL, NULL); + if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) { if (!(ctx->options & XML_PARSE_RECOVER)) { xmlErrorPtr e = xmlCtxtGetLastError(ctx); From f9a2c4e050f337e30f08ac32f19e1e10f229723a Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 5 Jan 2021 19:11:27 -0500 Subject: [PATCH 4/5] fix: restore proper error handling in the SAX push parser originally introduced in 771164d but broken in the recent commits. This is an incomplete fix. We should adopt this same strategy of save-and-restore everywhere we set the error handlers. --- ext/nokogiri/html_sax_push_parser.c | 22 +++++++++++++--------- ext/nokogiri/xml_syntax_error.c | 23 +++++++++++++++++++++++ ext/nokogiri/xml_syntax_error.h | 18 +++++++++++++++--- 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/ext/nokogiri/html_sax_push_parser.c b/ext/nokogiri/html_sax_push_parser.c index 3a739ae1d2..af1dfd2b4f 100644 --- a/ext/nokogiri/html_sax_push_parser.c +++ b/ext/nokogiri/html_sax_push_parser.c @@ -9,9 +9,10 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk) { xmlParserCtxtPtr ctx; - const char * chunk = NULL; - int size = 0; - + const char * chunk = NULL; + int size = 0; + int status = 0; + libxmlStructuredErrorHandlerState handler_state; Data_Get_Struct(self, xmlParserCtxt, ctx); @@ -20,13 +21,16 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk) size = (int)RSTRING_LEN(_chunk); } - xmlSetStructuredErrorFunc(NULL, NULL); + Nokogiri_structured_error_func_save_and_set(&handler_state, NULL, NULL); + + status = htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0); + + Nokogiri_structured_error_func_restore(&handler_state); - if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) { - if (!(ctx->options & XML_PARSE_RECOVER)) { - xmlErrorPtr e = xmlCtxtGetLastError(ctx); - Nokogiri_error_raise(NULL, e); - } + if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) { + // TODO: there appear to be no tests for this block + xmlErrorPtr e = xmlCtxtGetLastError(ctx); + Nokogiri_error_raise(NULL, e); } return self; diff --git a/ext/nokogiri/xml_syntax_error.c b/ext/nokogiri/xml_syntax_error.c index 0b240f05a5..13da073da9 100644 --- a/ext/nokogiri/xml_syntax_error.c +++ b/ext/nokogiri/xml_syntax_error.c @@ -1,5 +1,28 @@ #include +void +Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state) +{ + /* this method is tightly coupled to the implementation of xmlSetStructuredErrorFunc */ + handler_state->user_data = xmlStructuredErrorContext; + handler_state->handler = xmlStructuredError; +} + +void +Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, + void *user_data, + xmlStructuredErrorFunc handler) +{ + Nokogiri_structured_error_func_save(handler_state); + xmlSetStructuredErrorFunc(user_data, handler); +} + +void +Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state) +{ + xmlSetStructuredErrorFunc(handler_state->user_data, handler_state->handler); +} + void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error) { VALUE list = (VALUE)ctx; diff --git a/ext/nokogiri/xml_syntax_error.h b/ext/nokogiri/xml_syntax_error.h index 58475cb852..6994cb1ff5 100644 --- a/ext/nokogiri/xml_syntax_error.h +++ b/ext/nokogiri/xml_syntax_error.h @@ -3,11 +3,23 @@ #include +typedef struct _libxmlStructuredErrorHandlerState { + void *user_data; + xmlStructuredErrorFunc handler; +} libxmlStructuredErrorHandlerState ; + void init_xml_syntax_error(); + +void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state); +void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, + void *user_data, + xmlStructuredErrorFunc handler); +void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state); + VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error); -void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error); -NORETURN(void Nokogiri_error_raise(void * ctx, xmlErrorPtr error)); +void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error); +NORETURN(void Nokogiri_error_raise(void *ctx, xmlErrorPtr error)); extern VALUE cNokogiriXmlSyntaxError; -#endif +#endif /* NOKOGIRI_XML_SYNTAX_ERROR */ From bbf850c6297347461df92b1f208f4e8fc744e1c8 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 5 Jan 2021 22:46:04 -0500 Subject: [PATCH 5/5] changelog: update for #2168 --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8159dec6cb..a08d62bb52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA --- +## v1.11.1 / unreleased + +### Fixed + +* [CRuby] If `libxml-ruby` is loaded before `nokogiri`, the SAX and Push parsers no longer call `libxml-ruby`'s handlers. Instead, they defensively override the libxml2 global handler before parsing. [[#2168](https://github.com/sparklemotion/nokogiri/issues/2168)] + + ## v1.11.0 / 2021-01-03 ### Notes