From f9a2c4e050f337e30f08ac32f19e1e10f229723a Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 5 Jan 2021 19:11:27 -0500 Subject: [PATCH] fix: restore proper error handling in the SAX push parser originally introduced in 771164d but broken in the recent commits. This is an incomplete fix. We should adopt this same strategy of save-and-restore everywhere we set the error handlers. --- ext/nokogiri/html_sax_push_parser.c | 22 +++++++++++++--------- ext/nokogiri/xml_syntax_error.c | 23 +++++++++++++++++++++++ ext/nokogiri/xml_syntax_error.h | 18 +++++++++++++++--- 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/ext/nokogiri/html_sax_push_parser.c b/ext/nokogiri/html_sax_push_parser.c index 3a739ae1d2..af1dfd2b4f 100644 --- a/ext/nokogiri/html_sax_push_parser.c +++ b/ext/nokogiri/html_sax_push_parser.c @@ -9,9 +9,10 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk) { xmlParserCtxtPtr ctx; - const char * chunk = NULL; - int size = 0; - + const char * chunk = NULL; + int size = 0; + int status = 0; + libxmlStructuredErrorHandlerState handler_state; Data_Get_Struct(self, xmlParserCtxt, ctx); @@ -20,13 +21,16 @@ static VALUE native_write(VALUE self, VALUE _chunk, VALUE _last_chunk) size = (int)RSTRING_LEN(_chunk); } - xmlSetStructuredErrorFunc(NULL, NULL); + Nokogiri_structured_error_func_save_and_set(&handler_state, NULL, NULL); + + status = htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0); + + Nokogiri_structured_error_func_restore(&handler_state); - if(htmlParseChunk(ctx, chunk, size, Qtrue == _last_chunk ? 1 : 0)) { - if (!(ctx->options & XML_PARSE_RECOVER)) { - xmlErrorPtr e = xmlCtxtGetLastError(ctx); - Nokogiri_error_raise(NULL, e); - } + if ((status != 0) && !(ctx->options & XML_PARSE_RECOVER)) { + // TODO: there appear to be no tests for this block + xmlErrorPtr e = xmlCtxtGetLastError(ctx); + Nokogiri_error_raise(NULL, e); } return self; diff --git a/ext/nokogiri/xml_syntax_error.c b/ext/nokogiri/xml_syntax_error.c index 0b240f05a5..13da073da9 100644 --- a/ext/nokogiri/xml_syntax_error.c +++ b/ext/nokogiri/xml_syntax_error.c @@ -1,5 +1,28 @@ #include +void +Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state) +{ + /* this method is tightly coupled to the implementation of xmlSetStructuredErrorFunc */ + handler_state->user_data = xmlStructuredErrorContext; + handler_state->handler = xmlStructuredError; +} + +void +Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, + void *user_data, + xmlStructuredErrorFunc handler) +{ + Nokogiri_structured_error_func_save(handler_state); + xmlSetStructuredErrorFunc(user_data, handler); +} + +void +Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state) +{ + xmlSetStructuredErrorFunc(handler_state->user_data, handler_state->handler); +} + void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error) { VALUE list = (VALUE)ctx; diff --git a/ext/nokogiri/xml_syntax_error.h b/ext/nokogiri/xml_syntax_error.h index 58475cb852..6994cb1ff5 100644 --- a/ext/nokogiri/xml_syntax_error.h +++ b/ext/nokogiri/xml_syntax_error.h @@ -3,11 +3,23 @@ #include +typedef struct _libxmlStructuredErrorHandlerState { + void *user_data; + xmlStructuredErrorFunc handler; +} libxmlStructuredErrorHandlerState ; + void init_xml_syntax_error(); + +void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state); +void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, + void *user_data, + xmlStructuredErrorFunc handler); +void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state); + VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error); -void Nokogiri_error_array_pusher(void * ctx, xmlErrorPtr error); -NORETURN(void Nokogiri_error_raise(void * ctx, xmlErrorPtr error)); +void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error); +NORETURN(void Nokogiri_error_raise(void *ctx, xmlErrorPtr error)); extern VALUE cNokogiriXmlSyntaxError; -#endif +#endif /* NOKOGIRI_XML_SYNTAX_ERROR */