Skip to content

Commit

Permalink
Merge pull request #2169 from sparklemotion/2168-active-support-test-…
Browse files Browse the repository at this point in the history
…failure

fix(cruby): SAX and Push parser error handling in the presence of foreign handlers
  • Loading branch information
flavorjones committed Jan 6, 2021
2 parents ee69772 + bbf850c commit 3d90c6d
Show file tree
Hide file tree
Showing 16 changed files with 129 additions and 11 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Expand Up @@ -4,6 +4,13 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA

---

## v1.11.1 / unreleased

### Fixed

* [CRuby] If `libxml-ruby` is loaded before `nokogiri`, the SAX and Push parsers no longer call `libxml-ruby`'s handlers. Instead, they defensively override the libxml2 global handler before parsing. [[#2168](https://github.com/sparklemotion/nokogiri/issues/2168)]


## v1.11.0 / 2021-01-03

### Notes
Expand Down
2 changes: 2 additions & 0 deletions ext/nokogiri/html_sax_parser_context.c
Expand Up @@ -92,6 +92,8 @@ parse_with(VALUE self, VALUE sax_handler)
ctxt->sax = sax;
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);

xmlSetStructuredErrorFunc(NULL, NULL);

rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);

return self;
Expand Down
22 changes: 14 additions & 8 deletions ext/nokogiri/html_sax_push_parser.c
Expand Up @@ -9,9 +9,10 @@
static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
{
xmlParserCtxtPtr ctx;
const char * chunk = NULL;
int size = 0;

const char * chunk = NULL;
int size = 0;
int status = 0;
libxmlStructuredErrorHandlerState handler_state;

Data_Get_Struct(self, xmlParserCtxt, ctx);

Expand All @@ -20,11 +21,16 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
size = (int)RSTRING_LEN(_chunk);
}

if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
if (!(ctx->options & XML_PARSE_RECOVER)) {
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
Nokogiri_error_raise(NULL, e);
}
Nokogiri_structured_error_func_save_and_set(&handler_state, NULL, NULL);

status = htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0);

Nokogiri_structured_error_func_restore(&handler_state);

if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) {
// TODO: there appear to be no tests for this block
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
Nokogiri_error_raise(NULL, e);
}

return self;
Expand Down
3 changes: 3 additions & 0 deletions ext/nokogiri/nokogiri.c
@@ -1,5 +1,7 @@
#include <nokogiri.h>

void init_test_global_handlers(); /* in lieu of test_global_handlers.h */

VALUE mNokogiri ;
VALUE mNokogiriXml ;
VALUE mNokogiriHtml ;
Expand Down Expand Up @@ -132,4 +134,5 @@ void Init_nokogiri()
init_xml_relax_ng();
init_nokogiri_io();
init_xml_encoding_handler();
init_test_global_handlers();
}
41 changes: 41 additions & 0 deletions ext/nokogiri/test_global_handlers.c
@@ -0,0 +1,41 @@
#include <nokogiri.h>
#include "libxml/xmlerror.h"

static VALUE foreign_error_handler_block = Qnil;

static void foreign_error_handler(void* user_data, xmlErrorPtr c_error)
{
rb_funcall(foreign_error_handler_block, rb_intern("call"), 0);
}

/*
* call-seq:
* __foreign_error_handler { ... } -> nil
*
* Override libxml2's global error handlers to call the block. This method thus has very little
* value except to test that Nokogiri is properly setting error handlers elsewhere in the code. See
* test/helper.rb for how this is being used.
*/
static VALUE
rb_foreign_error_handler(VALUE klass)
{
rb_need_block();
foreign_error_handler_block = rb_block_proc();
xmlSetStructuredErrorFunc(NULL, foreign_error_handler);
return Qnil;
}

/*
* Document-module: Nokogiri::Test
*
* The Nokogiri::Test module should only be used for testing Nokogiri.
* Do NOT use this outside of the Nokogiri test suite.
*/
void
init_test_global_handlers()
{
VALUE mNokogiri = rb_define_module("Nokogiri");
VALUE mNokogiriTest = rb_define_module_under(mNokogiri, "Test");

rb_define_singleton_method(mNokogiriTest, "__foreign_error_handler", rb_foreign_error_handler, 0);
}
2 changes: 2 additions & 0 deletions ext/nokogiri/xml_sax_parser_context.c
Expand Up @@ -120,6 +120,8 @@ parse_with(VALUE self, VALUE sax_handler)
ctxt->sax = sax;
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);

xmlSetStructuredErrorFunc(NULL, NULL);

rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);

return Qnil;
Expand Down
2 changes: 2 additions & 0 deletions ext/nokogiri/xml_sax_push_parser.c
Expand Up @@ -35,6 +35,8 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk)
size = (int)RSTRING_LEN(_chunk);
}

xmlSetStructuredErrorFunc(NULL, NULL);

if (xmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) {
if (!(ctx->options & XML_PARSE_RECOVER)) {
xmlErrorPtr e = xmlCtxtGetLastError(ctx);
Expand Down
23 changes: 23 additions & 0 deletions ext/nokogiri/xml_syntax_error.c
@@ -1,5 +1,28 @@
#include <xml_syntax_error.h>

void
Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state)
{
/* this method is tightly coupled to the implementation of xmlSetStructuredErrorFunc */
handler_state->user_data = xmlStructuredErrorContext;
handler_state->handler = xmlStructuredError;
}

void
Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state,
void *user_data,
xmlStructuredErrorFunc handler)
{
Nokogiri_structured_error_func_save(handler_state);
xmlSetStructuredErrorFunc(user_data, handler);
}

void
Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state)
{
xmlSetStructuredErrorFunc(handler_state->user_data, handler_state->handler);
}

void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error)
{
VALUE list = (VALUE)ctx;
Expand Down
18 changes: 15 additions & 3 deletions ext/nokogiri/xml_syntax_error.h
Expand Up @@ -3,11 +3,23 @@

#include <nokogiri.h>

typedef struct _libxmlStructuredErrorHandlerState {
void *user_data;
xmlStructuredErrorFunc handler;
} libxmlStructuredErrorHandlerState ;

void init_xml_syntax_error();

void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state,
void *user_data,
xmlStructuredErrorFunc handler);
void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);

VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error);
void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error);
NORETURN(void Nokogiri_error_raise(void * ctx, xmlErrorPtr error));
void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error);
NORETURN(void Nokogiri_error_raise(void *ctx, xmlErrorPtr error));

extern VALUE cNokogiriXmlSyntaxError;
#endif

#endif /* NOKOGIRI_XML_SYNTAX_ERROR */
14 changes: 14 additions & 0 deletions test/helper.rb
Expand Up @@ -58,7 +58,16 @@ class TestCase < MiniTest::Spec
XSLT_FILE = File.join(ASSETS_DIR, 'staff.xslt')
XPATH_FILE = File.join(ASSETS_DIR, 'slow-xpath.xml')

def setup
@fake_error_handler_called = false
Nokogiri::Test.__foreign_error_handler do
@fake_error_handler_called = true
end if Nokogiri.uses_libxml?
end

def teardown
refute(@fake_error_handler_called, "the fake error handler should never get called") if Nokogiri.uses_libxml?

if ENV['NOKOGIRI_GC']
STDOUT.putc '!'
if RUBY_PLATFORM =~ /java/
Expand Down Expand Up @@ -135,6 +144,11 @@ class Doc < XML::SAX::Document
attr_reader :xmldecls
attr_reader :processing_instructions

def initialize
@errors = []
super
end

def xmldecl version, encoding, standalone
@xmldecls = [version, encoding, standalone].compact
super
Expand Down
1 change: 1 addition & 0 deletions test/namespaces/test_namespaces_preservation.rb
Expand Up @@ -5,6 +5,7 @@ module XML
class TestNamespacePreservation < Nokogiri::TestCase

def setup
super
@xml = Nokogiri.XML <<-eoxml
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element xmlns:quer="http://api.geotrust.com/webtrust/query"/>
Expand Down
1 change: 1 addition & 0 deletions test/test_xslt_transforms.rb
Expand Up @@ -2,6 +2,7 @@

class TestXsltTransforms < Nokogiri::TestCase
def setup
super
@doc = Nokogiri::XML(File.open(XML_FILE))
end

Expand Down
1 change: 1 addition & 0 deletions test/xml/sax/test_parser_context.rb
Expand Up @@ -7,6 +7,7 @@ module XML
module SAX
class TestParserContext < Nokogiri::SAX::TestCase
def setup
super
@xml = '<hello>
world
Expand Down
1 change: 1 addition & 0 deletions test/xml/test_relax_ng.rb
Expand Up @@ -4,6 +4,7 @@ module Nokogiri
module XML
class TestRelaxNG < Nokogiri::TestCase
def setup
super
assert @schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE))
end

Expand Down
1 change: 1 addition & 0 deletions test/xml/test_schema.rb
Expand Up @@ -4,6 +4,7 @@ module Nokogiri
module XML
class TestSchema < Nokogiri::TestCase
def setup
super
assert @xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE))
end

Expand Down
1 change: 1 addition & 0 deletions test/xml/test_unparented_node.rb
Expand Up @@ -6,6 +6,7 @@ module Nokogiri
module XML
class TestUnparentedNode < Nokogiri::TestCase
def setup
super
begin
xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
@node = xml.at('staff')
Expand Down

0 comments on commit 3d90c6d

Please sign in to comment.