Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(cruby): SAX and Push parser error handling in the presence of foreign handlers #2169

Merged
merged 5 commits into from Jan 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Expand Up @@ -4,6 +4,13 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA

---

## v1.11.1 / unreleased

### Fixed

* [CRuby] If `libxml-ruby` is loaded before `nokogiri`, the SAX and Push parsers no longer call `libxml-ruby`'s handlers. Instead, they defensively override the libxml2 global handler before parsing. [[#2168](https://github.com/sparklemotion/nokogiri/issues/2168)]


## v1.11.0 / 2021-01-03

### Notes
Expand Down
2 changes: 2 additions & 0 deletions ext/nokogiri/html_sax_parser_context.c
Expand Up @@ -92,6 +92,8 @@ parse_with(VALUE self, VALUE sax_handler)
ctxt->sax = sax;
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);

xmlSetStructuredErrorFunc(NULL, NULL);

rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);

return self;
Expand Down
22 changes: 14 additions & 8 deletions ext/nokogiri/html_sax_push_parser.c
Expand Up @@ -9,9 +9,10 @@
static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
{
xmlParserCtxtPtr ctx;
const char * chunk = NULL;
int size = 0;

const char * chunk = NULL;
int size = 0;
int status = 0;
libxmlStructuredErrorHandlerState handler_state;

Data_Get_Struct(self, xmlParserCtxt, ctx);

Expand All @@ -20,11 +21,16 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
size = (int)RSTRING_LEN(_chunk);
}

if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
if (!(ctx->options & XML_PARSE_RECOVER)) {
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
Nokogiri_error_raise(NULL, e);
}
Nokogiri_structured_error_func_save_and_set(&handler_state, NULL, NULL);

status = htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0);

Nokogiri_structured_error_func_restore(&handler_state);

if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
// TODO: there appear to be no tests for this block
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
Nokogiri_error_raise(NULL, e);
}

return self;
Expand Down
3 changes: 3 additions & 0 deletions ext/nokogiri/nokogiri.c
@@ -1,5 +1,7 @@
#include <nokogiri.h>

void init_test_global_handlers(); /* in lieu of test_global_handlers.h */

VALUE mNokogiri ;
VALUE mNokogiriXml ;
VALUE mNokogiriHtml ;
Expand Down Expand Up @@ -132,4 +134,5 @@ void Init_nokogiri()
init_xml_relax_ng();
init_nokogiri_io();
init_xml_encoding_handler();
init_test_global_handlers();
}
41 changes: 41 additions & 0 deletions ext/nokogiri/test_global_handlers.c
@@ -0,0 +1,41 @@
#include <nokogiri.h>
#include "libxml/xmlerror.h"

static VALUE foreign_error_handler_block = Qnil;

static void foreign_error_handler(void* user_data, xmlErrorPtr c_error)
{
rb_funcall(foreign_error_handler_block, rb_intern("call"), 0);
}

/*
* call-seq:
* __foreign_error_handler { ... } -> nil
*
* Override libxml2's global error handlers to call the block. This method thus has very little
* value except to test that Nokogiri is properly setting error handlers elsewhere in the code. See
* test/helper.rb for how this is being used.
*/
static VALUE
rb_foreign_error_handler(VALUE klass)
{
rb_need_block();
foreign_error_handler_block = rb_block_proc();
xmlSetStructuredErrorFunc(NULL, foreign_error_handler);
return Qnil;
}

/*
* Document-module: Nokogiri::Test
*
* The Nokogiri::Test module should only be used for testing Nokogiri.
* Do NOT use this outside of the Nokogiri test suite.
*/
void
init_test_global_handlers()
{
VALUE mNokogiri = rb_define_module("Nokogiri");
VALUE mNokogiriTest = rb_define_module_under(mNokogiri, "Test");

rb_define_singleton_method(mNokogiriTest, "__foreign_error_handler", rb_foreign_error_handler, 0);
}
2 changes: 2 additions & 0 deletions ext/nokogiri/xml_sax_parser_context.c
Expand Up @@ -120,6 +120,8 @@ parse_with(VALUE self, VALUE sax_handler)
ctxt->sax = sax;
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);

xmlSetStructuredErrorFunc(NULL, NULL);

rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);

return Qnil;
Expand Down
2 changes: 2 additions & 0 deletions ext/nokogiri/xml_sax_push_parser.c
Expand Up @@ -35,6 +35,8 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
size = (int)RSTRING_LEN(_chunk);
}

xmlSetStructuredErrorFunc(NULL, NULL);

if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
if (!(ctx->options & XML_PARSE_RECOVER)) {
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
Expand Down
23 changes: 23 additions & 0 deletions ext/nokogiri/xml_syntax_error.c
@@ -1,5 +1,28 @@
#include <xml_syntax_error.h>

void
Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state)
{
/* this method is tightly coupled to the implementation of xmlSetStructuredErrorFunc */
handler_state->user_data = xmlStructuredErrorContext;
handler_state->handler = xmlStructuredError;
}

void
Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state,
void *user_data,
xmlStructuredErrorFunc handler)
{
Nokogiri_structured_error_func_save(handler_state);
xmlSetStructuredErrorFunc(user_data, handler);
}

void
Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state)
{
xmlSetStructuredErrorFunc(handler_state->user_data, handler_state->handler);
}

void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error)
{
VALUE list = (VALUE)ctx;
Expand Down
18 changes: 15 additions & 3 deletions ext/nokogiri/xml_syntax_error.h
Expand Up @@ -3,11 +3,23 @@

#include <nokogiri.h>

typedef struct _libxmlStructuredErrorHandlerState {
void *user_data;
xmlStructuredErrorFunc handler;
} libxmlStructuredErrorHandlerState ;

void init_xml_syntax_error();

void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state,
void *user_data,
xmlStructuredErrorFunc handler);
void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);

VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error);
void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error);
NORETURN(void Nokogiri_error_raise(void * ctx, xmlErrorPtr error));
void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error);
NORETURN(void Nokogiri_error_raise(void *ctx, xmlErrorPtr error));

extern VALUE cNokogiriXmlSyntaxError;
#endif

#endif /* NOKOGIRI_XML_SYNTAX_ERROR */
14 changes: 14 additions & 0 deletions test/helper.rb
Expand Up @@ -58,7 +58,16 @@ class TestCase < MiniTest::Spec
XSLT_FILE = File.join(ASSETS_DIR, 'staff.xslt')
XPATH_FILE = File.join(ASSETS_DIR, 'slow-xpath.xml')

def setup
@fake_error_handler_called = false
Nokogiri::Test.__foreign_error_handler do
@fake_error_handler_called = true
end if Nokogiri.uses_libxml?
end

def teardown
refute(@fake_error_handler_called, "the fake error handler should never get called") if Nokogiri.uses_libxml?

if ENV['NOKOGIRI_GC']
STDOUT.putc '!'
if RUBY_PLATFORM =~ /java/
Expand Down Expand Up @@ -135,6 +144,11 @@ class Doc < XML::SAX::Document
attr_reader :xmldecls
attr_reader :processing_instructions

def initialize
@errors = []
super
end

def xmldecl version, encoding, standalone
@xmldecls = [version, encoding, standalone].compact
super
Expand Down
1 change: 1 addition & 0 deletions test/namespaces/test_namespaces_preservation.rb
Expand Up @@ -5,6 +5,7 @@ module XML
class TestNamespacePreservation < Nokogiri::TestCase

def setup
super
@xml = Nokogiri.XML <<-eoxml
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element xmlns:quer="http://api.geotrust.com/webtrust/query"/>
Expand Down
1 change: 1 addition & 0 deletions test/test_xslt_transforms.rb
Expand Up @@ -2,6 +2,7 @@

class TestXsltTransforms < Nokogiri::TestCase
def setup
super
@doc = Nokogiri::XML(File.open(XML_FILE))
end

Expand Down
1 change: 1 addition & 0 deletions test/xml/sax/test_parser_context.rb
Expand Up @@ -7,6 +7,7 @@ module XML
module SAX
class TestParserContext < Nokogiri::SAX::TestCase
def setup
super
@xml = '<hello>

world
Expand Down
1 change: 1 addition & 0 deletions test/xml/test_relax_ng.rb
Expand Up @@ -4,6 +4,7 @@ module Nokogiri
module XML
class TestRelaxNG < Nokogiri::TestCase
def setup
super
assert @schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE))
end

Expand Down
1 change: 1 addition & 0 deletions test/xml/test_schema.rb
Expand Up @@ -4,6 +4,7 @@ module Nokogiri
module XML
class TestSchema < Nokogiri::TestCase
def setup
super
assert @xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE))
end

Expand Down
1 change: 1 addition & 0 deletions test/xml/test_unparented_node.rb
Expand Up @@ -6,6 +6,7 @@ module Nokogiri
module XML
class TestUnparentedNode < Nokogiri::TestCase
def setup
super
begin
xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
@node = xml.at('staff')
Expand Down