diff --git a/CHANGELOG.md b/CHANGELOG.md index 8159dec6cb..a08d62bb52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA --- +## v1.11.1 / unreleased + +### Fixed + +* [CRuby] If `libxml-ruby` is loaded before `nokogiri`, the SAX and Push parsers no longer call `libxml-ruby`'s handlers. Instead, they defensively override the libxml2 global handler before parsing. [[#2168](https://github.com/sparklemotion/nokogiri/issues/2168)] + + ## v1.11.0 / 2021-01-03 ### Notes diff --git a/ext/nokogiri/html_sax_parser_context.c b/ext/nokogiri/html_sax_parser_context.c index 9f41b51adf..7f1379abf2 100644 --- a/ext/nokogiri/html_sax_parser_context.c +++ b/ext/nokogiri/html_sax_parser_context.c @@ -92,6 +92,8 @@ parse_with(VALUE self, VALUE sax_handler) ctxt->sax = sax; ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler); + xmlSetStructuredErrorFunc(NULL, NULL); + rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt); return self; diff --git a/ext/nokogiri/html_sax_push_parser.c b/ext/nokogiri/html_sax_push_parser.c index 2df4532f10..af1dfd2b4f 100644 --- a/ext/nokogiri/html_sax_push_parser.c +++ b/ext/nokogiri/html_sax_push_parser.c @@ -9,9 +9,10 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk) { xmlParserCtxtPtr ctx; - const char * chunk = NULL; - int size = 0; - + const char * chunk = NULL; + int size = 0; + int status = 0; + libxmlStructuredErrorHandlerState handler_state; Data_Get_Struct(self, xmlParserCtxt, ctx); @@ -20,11 +21,16 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk) size = (int)RSTRING_LEN(_chunk); } - if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) { - if (!(ctx->options & XML_PARSE_RECOVER)) { - xmlErrorPtr e = xmlCtxtGetLastError(ctx); - Nokogiri_error_raise(NULL, e); - } + Nokogiri_structured_error_func_save_and_set(&handler_state, NULL, NULL); + + status = htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0); + + Nokogiri_structured_error_func_restore(&handler_state); + + if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) { + // TODO: there appear to be no tests for this block + xmlErrorPtr e = xmlCtxtGetLastError(ctx); + Nokogiri_error_raise(NULL, e); } return self; diff --git a/ext/nokogiri/nokogiri.c b/ext/nokogiri/nokogiri.c index 963253ad96..70ad541684 100644 --- a/ext/nokogiri/nokogiri.c +++ b/ext/nokogiri/nokogiri.c @@ -1,5 +1,7 @@ #include +void init_test_global_handlers(); /* in lieu of test_global_handlers.h */ + VALUE mNokogiri ; VALUE mNokogiriXml ; VALUE mNokogiriHtml ; @@ -132,4 +134,5 @@ void Init_nokogiri() init_xml_relax_ng(); init_nokogiri_io(); init_xml_encoding_handler(); + init_test_global_handlers(); } diff --git a/ext/nokogiri/test_global_handlers.c b/ext/nokogiri/test_global_handlers.c new file mode 100644 index 0000000000..09d3b7e400 --- /dev/null +++ b/ext/nokogiri/test_global_handlers.c @@ -0,0 +1,41 @@ +#include +#include "libxml/xmlerror.h" + +static VALUE foreign_error_handler_block = Qnil; + +static void foreign_error_handler(void* user_data, xmlErrorPtr c_error) +{ + rb_funcall(foreign_error_handler_block, rb_intern("call"), 0); +} + +/* + * call-seq: + * __foreign_error_handler { ... } -> nil + * + * Override libxml2's global error handlers to call the block. This method thus has very little + * value except to test that Nokogiri is properly setting error handlers elsewhere in the code. See + * test/helper.rb for how this is being used. + */ +static VALUE +rb_foreign_error_handler(VALUE klass) +{ + rb_need_block(); + foreign_error_handler_block = rb_block_proc(); + xmlSetStructuredErrorFunc(NULL, foreign_error_handler); + return Qnil; +} + +/* + * Document-module: Nokogiri::Test + * + * The Nokogiri::Test module should only be used for testing Nokogiri. + * Do NOT use this outside of the Nokogiri test suite. + */ +void +init_test_global_handlers() +{ + VALUE mNokogiri = rb_define_module("Nokogiri"); + VALUE mNokogiriTest = rb_define_module_under(mNokogiri, "Test"); + + rb_define_singleton_method(mNokogiriTest, "__foreign_error_handler", rb_foreign_error_handler, 0); +} diff --git a/ext/nokogiri/xml_sax_parser_context.c b/ext/nokogiri/xml_sax_parser_context.c index 19c96c69c1..370d7b9fc3 100644 --- a/ext/nokogiri/xml_sax_parser_context.c +++ b/ext/nokogiri/xml_sax_parser_context.c @@ -120,6 +120,8 @@ parse_with(VALUE self, VALUE sax_handler) ctxt->sax = sax; ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler); + xmlSetStructuredErrorFunc(NULL, NULL); + rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt); return Qnil; diff --git a/ext/nokogiri/xml_sax_push_parser.c b/ext/nokogiri/xml_sax_push_parser.c index dac0a24db5..9daa22767d 100644 --- a/ext/nokogiri/xml_sax_push_parser.c +++ b/ext/nokogiri/xml_sax_push_parser.c @@ -35,6 +35,8 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk) size = (int)RSTRING_LEN(_chunk); } + xmlSetStructuredErrorFunc(NULL, NULL); + if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) { if (!(ctx->options & XML_PARSE_RECOVER)) { xmlErrorPtr e = xmlCtxtGetLastError(ctx); diff --git a/ext/nokogiri/xml_syntax_error.c b/ext/nokogiri/xml_syntax_error.c index 0b240f05a5..13da073da9 100644 --- a/ext/nokogiri/xml_syntax_error.c +++ b/ext/nokogiri/xml_syntax_error.c @@ -1,5 +1,28 @@ #include +void +Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state) +{ + /* this method is tightly coupled to the implementation of xmlSetStructuredErrorFunc */ + handler_state->user_data = xmlStructuredErrorContext; + handler_state->handler = xmlStructuredError; +} + +void +Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, + void *user_data, + xmlStructuredErrorFunc handler) +{ + Nokogiri_structured_error_func_save(handler_state); + xmlSetStructuredErrorFunc(user_data, handler); +} + +void +Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state) +{ + xmlSetStructuredErrorFunc(handler_state->user_data, handler_state->handler); +} + void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error) { VALUE list = (VALUE)ctx; diff --git a/ext/nokogiri/xml_syntax_error.h b/ext/nokogiri/xml_syntax_error.h index 58475cb852..6994cb1ff5 100644 --- a/ext/nokogiri/xml_syntax_error.h +++ b/ext/nokogiri/xml_syntax_error.h @@ -3,11 +3,23 @@ #include +typedef struct _libxmlStructuredErrorHandlerState { + void *user_data; + xmlStructuredErrorFunc handler; +} libxmlStructuredErrorHandlerState ; + void init_xml_syntax_error(); + +void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state); +void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, + void *user_data, + xmlStructuredErrorFunc handler); +void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state); + VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error); -void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error); -NORETURN(void Nokogiri_error_raise(void * ctx, xmlErrorPtr error)); +void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error); +NORETURN(void Nokogiri_error_raise(void *ctx, xmlErrorPtr error)); extern VALUE cNokogiriXmlSyntaxError; -#endif +#endif /* NOKOGIRI_XML_SYNTAX_ERROR */ diff --git a/test/helper.rb b/test/helper.rb index fc380878f2..bd08bccc4c 100644 --- a/test/helper.rb +++ b/test/helper.rb @@ -58,7 +58,16 @@ class TestCase < MiniTest::Spec XSLT_FILE = File.join(ASSETS_DIR, 'staff.xslt') XPATH_FILE = File.join(ASSETS_DIR, 'slow-xpath.xml') + def setup + @fake_error_handler_called = false + Nokogiri::Test.__foreign_error_handler do + @fake_error_handler_called = true + end if Nokogiri.uses_libxml? + end + def teardown + refute(@fake_error_handler_called, "the fake error handler should never get called") if Nokogiri.uses_libxml? + if ENV['NOKOGIRI_GC'] STDOUT.putc '!' if RUBY_PLATFORM =~ /java/ @@ -135,6 +144,11 @@ class Doc < XML::SAX::Document attr_reader :xmldecls attr_reader :processing_instructions + def initialize + @errors = [] + super + end + def xmldecl version, encoding, standalone @xmldecls = [version, encoding, standalone].compact super diff --git a/test/namespaces/test_namespaces_preservation.rb b/test/namespaces/test_namespaces_preservation.rb index f3e576aa47..be12bd3d35 100755 --- a/test/namespaces/test_namespaces_preservation.rb +++ b/test/namespaces/test_namespaces_preservation.rb @@ -5,6 +5,7 @@ module XML class TestNamespacePreservation < Nokogiri::TestCase def setup + super @xml = Nokogiri.XML <<-eoxml diff --git a/test/test_xslt_transforms.rb b/test/test_xslt_transforms.rb index fea43c9aff..73d0967692 100644 --- a/test/test_xslt_transforms.rb +++ b/test/test_xslt_transforms.rb @@ -2,6 +2,7 @@ class TestXsltTransforms < Nokogiri::TestCase def setup + super @doc = Nokogiri::XML(File.open(XML_FILE)) end diff --git a/test/xml/sax/test_parser_context.rb b/test/xml/sax/test_parser_context.rb index c460c6203a..d50b0287af 100644 --- a/test/xml/sax/test_parser_context.rb +++ b/test/xml/sax/test_parser_context.rb @@ -7,6 +7,7 @@ module XML module SAX class TestParserContext < Nokogiri::SAX::TestCase def setup + super @xml = ' world diff --git a/test/xml/test_relax_ng.rb b/test/xml/test_relax_ng.rb index 733e9b6547..c6c8dc7776 100644 --- a/test/xml/test_relax_ng.rb +++ b/test/xml/test_relax_ng.rb @@ -4,6 +4,7 @@ module Nokogiri module XML class TestRelaxNG < Nokogiri::TestCase def setup + super assert @schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE)) end diff --git a/test/xml/test_schema.rb b/test/xml/test_schema.rb index 9d52ebbdaf..e5d581dc9a 100644 --- a/test/xml/test_schema.rb +++ b/test/xml/test_schema.rb @@ -4,6 +4,7 @@ module Nokogiri module XML class TestSchema < Nokogiri::TestCase def setup + super assert @xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE)) end diff --git a/test/xml/test_unparented_node.rb b/test/xml/test_unparented_node.rb index 2d7327a5db..5836c1c8f4 100644 --- a/test/xml/test_unparented_node.rb +++ b/test/xml/test_unparented_node.rb @@ -6,6 +6,7 @@ module Nokogiri module XML class TestUnparentedNode < Nokogiri::TestCase def setup + super begin xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE) @node = xml.at('staff')