diff --git a/CHANGELOG.md b/CHANGELOG.md index 57dbbe8b82..8efd77c1ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,18 +4,25 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA --- +## 1.12.3 / unreleased + +### Fixed + +* [CRuby] Fix compilation of libgumbo on older systems with versions of GCC that give errors on C99-isms. Affected systems include RHEL6, RHEL7, and SLES12. [[#2302](https://github.com/sparklemotion/nokogiri/issues/2302)] + + ## 1.12.2 / 2021-08-04 ### Fixed -* Ensure that C extension files in non-native gem installations are loaded using `require` and rely on `$LOAD_PATH` instead of using `require_relative`. This issue only exists when deleting shared libraries that exist outside the extensions directory, something users occasionally do to conserve disk space. [[#2300](https://github.com/sparklemotion/nokogiri/issues/2300)] +* [CRuby] Ensure that C extension files in non-native gem installations are loaded using `require` and rely on `$LOAD_PATH` instead of using `require_relative`. This issue only exists when deleting shared libraries that exist outside the extensions directory, something users occasionally do to conserve disk space. [[#2300](https://github.com/sparklemotion/nokogiri/issues/2300)] ## 1.12.1 / 2021-08-03 ### Fixed -* Fix compilation of libgumbo on BSD systems by avoiding GNU-isms. [[#2298](https://github.com/sparklemotion/nokogiri/issues/2298)] +* [CRuby] Fix compilation of libgumbo on BSD systems by avoiding GNU-isms. [[#2298](https://github.com/sparklemotion/nokogiri/issues/2298)] ## 1.12.0 / 2021-08-02 diff --git a/ext/nokogiri/gumbo.c b/ext/nokogiri/gumbo.c index 29e0dfcdfb..1ec979b30c 100644 --- a/ext/nokogiri/gumbo.c +++ b/ext/nokogiri/gumbo.c @@ -75,7 +75,7 @@ new_html_doc(const char *dtd_name, const char *system, const char *public) htmlDocPtr doc = htmlNewDocNoDtD(/* URI */ NULL, /* ExternalID */NULL); assert(doc); if (dtd_name) { - xmlCreateIntSubset(doc, BAD_CAST dtd_name, BAD_CAST public, BAD_CAST system); + xmlCreateIntSubset(doc, (const xmlChar *)dtd_name, (const xmlChar *)public, (const xmlChar *)system); } return doc; } @@ -89,15 +89,19 @@ get_parent(xmlNodePtr node) static GumboOutput * perform_parse(const GumboOptions *options, VALUE input) { + GumboOutput *output; + const char *status_string; + assert(RTEST(input)); Check_Type(input, T_STRING); - GumboOutput *output = gumbo_parse_with_options( - options, - RSTRING_PTR(input), - RSTRING_LEN(input) - ); - const char *status_string = gumbo_status_to_string(output->status); + output = gumbo_parse_with_options( + options, + RSTRING_PTR(input), + RSTRING_LEN(input) + ); + + status_string = gumbo_status_to_string(output->status); switch (output->status) { case GUMBO_STATUS_OK: break; @@ -120,11 +124,11 @@ lookup_or_add_ns( const char *prefix ) { - xmlNsPtr ns = xmlSearchNs(doc, root, BAD_CAST prefix); + xmlNsPtr ns = xmlSearchNs(doc, root, (const xmlChar *)prefix); if (ns) { return ns; } - return xmlNewNs(root, BAD_CAST href, BAD_CAST prefix); + return xmlNewNs(root, (const xmlChar *)href, (const xmlChar *)prefix); } static void @@ -151,9 +155,13 @@ build_tree( size_t child_index = 0; while (true) { + const GumboVector *children; + const GumboNode *gumbo_child; + xmlNodePtr xml_child; + assert(gumbo_node != NULL); - const GumboVector *children = gumbo_node->type == GUMBO_NODE_DOCUMENT ? - &gumbo_node->v.document.children : &gumbo_node->v.element.children; + children = gumbo_node->type == GUMBO_NODE_DOCUMENT ? + &gumbo_node->v.document.children : &gumbo_node->v.element.children; if (child_index >= children->length) { // Move up the tree and to the next child. if (xml_node == xml_output_node) { @@ -172,29 +180,28 @@ build_tree( } continue; } - const GumboNode *gumbo_child = children->data[child_index++]; - xmlNodePtr xml_child; + gumbo_child = children->data[child_index++]; switch (gumbo_child->type) { case GUMBO_NODE_DOCUMENT: abort(); // Bug in Gumbo. case GUMBO_NODE_TEXT: case GUMBO_NODE_WHITESPACE: - xml_child = xmlNewDocText(doc, BAD_CAST gumbo_child->v.text.text); + xml_child = xmlNewDocText(doc, (const xmlChar *)gumbo_child->v.text.text); set_line(xml_child, gumbo_child->v.text.start_pos.line); xmlAddChild(xml_node, xml_child); break; case GUMBO_NODE_CDATA: - xml_child = xmlNewCDataBlock(doc, BAD_CAST gumbo_child->v.text.text, + xml_child = xmlNewCDataBlock(doc, (const xmlChar *)gumbo_child->v.text.text, (int) strlen(gumbo_child->v.text.text)); set_line(xml_child, gumbo_child->v.text.start_pos.line); xmlAddChild(xml_node, xml_child); break; case GUMBO_NODE_COMMENT: - xml_child = xmlNewDocComment(doc, BAD_CAST gumbo_child->v.text.text); + xml_child = xmlNewDocComment(doc, (const xmlChar *)gumbo_child->v.text.text); set_line(xml_child, gumbo_child->v.text.start_pos.line); xmlAddChild(xml_node, xml_child); break; @@ -202,12 +209,15 @@ build_tree( case GUMBO_NODE_TEMPLATE: // XXX: Should create a template element and a new DocumentFragment case GUMBO_NODE_ELEMENT: { - xml_child = xmlNewDocNode(doc, NULL, BAD_CAST gumbo_child->v.element.name, NULL); + xmlNsPtr ns = NULL; + const GumboVector *attrs; + size_t i; + + xml_child = xmlNewDocNode(doc, NULL, (const xmlChar *)gumbo_child->v.element.name, NULL); set_line(xml_child, gumbo_child->v.element.start_pos.line); if (xml_root == NULL) { xml_root = xml_child; } - xmlNsPtr ns = NULL; switch (gumbo_child->v.element.tag_namespace) { case GUMBO_NAMESPACE_HTML: break; @@ -224,8 +234,8 @@ build_tree( xmlAddChild(xml_node, xml_child); // Add the attributes. - const GumboVector *attrs = &gumbo_child->v.element.attributes; - for (size_t i = 0; i < attrs->length; i++) { + attrs = &gumbo_child->v.element.attributes; + for (i = 0; i < attrs->length; i++) { const GumboAttribute *attr = attrs->data[i]; switch (attr->attr_namespace) { @@ -244,7 +254,7 @@ build_tree( default: ns = NULL; } - xmlNewNsProp(xml_child, ns, BAD_CAST attr->name, BAD_CAST attr->value); + xmlNewNsProp(xml_child, ns, (const xmlChar *)attr->name, (const xmlChar *)attr->value); } // Add children for this element. @@ -264,19 +274,29 @@ add_errors(const GumboOutput *output, VALUE rdoc, VALUE input, VALUE url) // Add parse errors to rdoc. if (output->errors.length) { + size_t i; const GumboVector *errors = &output->errors; VALUE rerrors = rb_ary_new2(errors->length); - for (size_t i = 0; i < errors->length; i++) { - GumboError *err = errors->data[i]; - GumboSourcePosition position = gumbo_error_position(err); + for (i = 0; i < errors->length; i++) { + GumboError *err; + GumboSourcePosition position; char *msg; - size_t size = gumbo_caret_diagnostic_to_string(err, input_str, input_len, &msg); - VALUE err_str = rb_utf8_str_new(msg, size); + size_t size; + VALUE err_str; + VALUE syntax_error; + const char *error_code; + VALUE str1; + + err = errors->data[i]; + position = gumbo_error_position(err); + size = gumbo_caret_diagnostic_to_string(err, input_str, input_len, &msg); + err_str = rb_utf8_str_new(msg, size); free(msg); - VALUE syntax_error = rb_class_new_instance(1, &err_str, cNokogiriXmlSyntaxError); - const char *error_code = gumbo_error_code(err); - VALUE str1 = error_code ? rb_utf8_str_new_static(error_code, strlen(error_code)) : Qnil; + syntax_error = rb_class_new_instance(1, &err_str, cNokogiriXmlSyntaxError); + error_code = gumbo_error_code(err); + str1 = error_code ? rb_utf8_str_new_static(error_code, strlen(error_code)) : Qnil; + rb_iv_set(syntax_error, "@domain", INT2NUM(1)); // XML_FROM_PARSER rb_iv_set(syntax_error, "@code", INT2NUM(1)); // XML_ERR_INTERNAL_ERROR rb_iv_set(syntax_error, "@level", INT2NUM(2)); // XML_ERR_ERROR @@ -303,7 +323,7 @@ typedef struct { static VALUE parse_cleanup(VALUE parse_args) { - ParseArgs *args = (ParseArgs*)parse_args; + ParseArgs *args = (ParseArgs *)parse_args; gumbo_destroy_output(args->output); // Make sure garbage collection doesn't mark the objects as being live based // on references from the ParseArgs. This may be unnecessary. @@ -323,18 +343,20 @@ static VALUE parse_continue(VALUE parse_args); static VALUE parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth) { + GumboOutput *output; GumboOptions options = kGumboDefaultOptions; + ParseArgs args; + options.max_attributes = NUM2INT(max_attributes); options.max_errors = NUM2INT(max_errors); options.max_tree_depth = NUM2INT(max_depth); - GumboOutput *output = perform_parse(&options, input); - ParseArgs args = { - .output = output, - .input = input, - .url_or_frag = url, - .doc = NULL, - }; + output = perform_parse(&options, input); + + args.output = output; + args.input = input; + args.url_or_frag = url; + args.doc = NULL; return rb_ensure(parse_continue, (VALUE)(&args), parse_cleanup, (VALUE)(&args)); } @@ -342,9 +364,11 @@ parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors static VALUE parse_continue(VALUE parse_args) { - ParseArgs *args = (ParseArgs*)parse_args; + ParseArgs *args = (ParseArgs *)parse_args; GumboOutput *output = args->output; xmlDocPtr doc; + VALUE rdoc; + if (output->document->v.document.has_doctype) { const char *name = output->document->v.document.name; const char *public = output->document->v.document.public_identifier; @@ -357,7 +381,7 @@ parse_continue(VALUE parse_args) } args->doc = doc; // Make sure doc gets cleaned up if an error is thrown. build_tree(doc, (xmlNodePtr)doc, output->document); - VALUE rdoc = Nokogiri_wrap_xml_document(cNokogiriHtml5Document, doc); + rdoc = Nokogiri_wrap_xml_document(cNokogiriHtml5Document, doc); args->doc = NULL; // The Ruby runtime now owns doc so don't delete it. add_errors(output, rdoc, args->input, args->url_or_frag); return rdoc; @@ -366,10 +390,15 @@ parse_continue(VALUE parse_args) static int lookup_namespace(VALUE node, bool require_known_ns) { + VALUE ns; ID namespace, href; + const char *href_ptr; + size_t href_len; + CONST_ID(namespace, "namespace"); CONST_ID(href, "href"); - VALUE ns = rb_funcall(node, namespace, 0); + + ns = rb_funcall(node, namespace, 0); if (NIL_P(ns)) { return GUMBO_NAMESPACE_HTML; @@ -378,8 +407,8 @@ lookup_namespace(VALUE node, bool require_known_ns) assert(RTEST(ns)); Check_Type(ns, T_STRING); - const char *href_ptr = RSTRING_PTR(ns); - size_t href_len = RSTRING_LEN(ns); + href_ptr = RSTRING_PTR(ns); + href_len = RSTRING_LEN(ns); #define NAMESPACE_P(uri) (href_len == sizeof uri - 1 && !memcmp(href_ptr, uri, href_len)) if (NAMESPACE_P("http://www.w3.org/1999/xhtml")) { return GUMBO_NAMESPACE_HTML; @@ -427,15 +456,23 @@ fragment( GumboQuirksModeEnum quirks_mode; bool form = false; const char *encoding = NULL; + VALUE doc, dtd; + int depth; + GumboOptions options = kGumboDefaultOptions; + GumboOutput *output; + ParseArgs args; if (NIL_P(ctx)) { ctx_tag = "body"; ctx_ns = GUMBO_NAMESPACE_HTML; } else if (TYPE(ctx) == T_STRING) { + size_t len; + const char *colon; + ctx_tag = StringValueCStr(ctx); ctx_ns = GUMBO_NAMESPACE_HTML; - size_t len = RSTRING_LEN(ctx); - const char *colon = memchr(ctx_tag, ':', len); + len = RSTRING_LEN(ctx); + colon = memchr(ctx_tag, ':', len); if (colon) { switch (colon - ctx_tag) { case 3: @@ -470,6 +507,7 @@ fragment( // Check if it's a form. form = ctx_ns == GUMBO_NAMESPACE_HTML && st_strcasecmp(ctx_tag, "form") == 0; } else { + VALUE node, element_name; ID element_ = rb_intern_const("element?"); // Context fragment name. @@ -482,13 +520,13 @@ fragment( ctx_ns = lookup_namespace(ctx, true); // Check for a form ancestor, including self. - for (VALUE node = ctx; + for (node = ctx; !NIL_P(node); node = rb_respond_to(node, parent) ? rb_funcall(node, parent, 0) : Qnil) { if (!RTEST(rb_funcall(node, element_, 0))) { continue; } - VALUE element_name = rb_funcall(node, name, 0); + element_name = rb_funcall(node, name, 0); if (RSTRING_LEN(element_name) == 4 && !st_strcasecmp(RSTRING_PTR(element_name), "form") && lookup_namespace(node, false) == GUMBO_NAMESPACE_HTML) { @@ -510,8 +548,8 @@ fragment( } // Quirks mode. - VALUE doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0); - VALUE dtd = rb_funcall(doc, internal_subset, 0); + doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0); + dtd = rb_funcall(doc, internal_subset, 0); if (NIL_P(dtd)) { quirks_mode = GUMBO_DOCTYPE_NO_QUIRKS; } else { @@ -526,8 +564,8 @@ fragment( } // Perform a fragment parse. - int depth = NUM2INT(max_depth); - GumboOptions options = kGumboDefaultOptions; + depth = NUM2INT(max_depth); + options = kGumboDefaultOptions; options.max_attributes = NUM2INT(max_attributes); options.max_errors = NUM2INT(max_errors); // Add one to account for the HTML element. @@ -538,13 +576,13 @@ fragment( options.quirks_mode = quirks_mode; options.fragment_context_has_form_ancestor = form; - GumboOutput *output = perform_parse(&options, tags); - ParseArgs args = { - .output = output, - .input = tags, - .url_or_frag = doc_fragment, - .doc = (xmlDocPtr)extract_xml_node(doc), - }; + output = perform_parse(&options, tags); + + args.output = output; + args.input = tags; + args.url_or_frag = doc_fragment; + args.doc = (xmlDocPtr)extract_xml_node(doc); + rb_ensure(fragment_continue, (VALUE)(&args), parse_cleanup, (VALUE)(&args)); return Qnil; } @@ -552,13 +590,14 @@ fragment( static VALUE fragment_continue(VALUE parse_args) { - ParseArgs *args = (ParseArgs*)parse_args; + ParseArgs *args = (ParseArgs *)parse_args; GumboOutput *output = args->output; VALUE doc_fragment = args->url_or_frag; xmlDocPtr xml_doc = args->doc; + xmlNodePtr xml_frag; args->doc = NULL; // The Ruby runtime owns doc so make sure we don't delete it. - xmlNodePtr xml_frag = extract_xml_node(doc_fragment); + xml_frag = extract_xml_node(doc_fragment); build_tree(xml_doc, xml_frag, output->root); add_errors(output, doc_fragment, args->input, rb_utf8_str_new_static("#fragment", 9)); return Qnil; diff --git a/ext/nokogiri/html4_element_description.c b/ext/nokogiri/html4_element_description.c index 3344772f3f..a9ba9f784a 100644 --- a/ext/nokogiri/html4_element_description.c +++ b/ext/nokogiri/html4_element_description.c @@ -266,7 +266,7 @@ get_description(VALUE klass, VALUE tag_name) ); if (NULL == description) { return Qnil; } - return Data_Wrap_Struct(klass, 0, 0, (void *)(uintptr_t)description); + return Data_Wrap_Struct(klass, 0, 0, DISCARD_CONST_QUAL(void *, description)); } void diff --git a/ext/nokogiri/html4_sax_parser_context.c b/ext/nokogiri/html4_sax_parser_context.c index 6462442eec..dec0d88fe5 100644 --- a/ext/nokogiri/html4_sax_parser_context.c +++ b/ext/nokogiri/html4_sax_parser_context.c @@ -110,7 +110,8 @@ void noko_init_html_sax_parser_context() { assert(cNokogiriXmlSaxParserContext); - cNokogiriHtml4SaxParserContext = rb_define_class_under(mNokogiriHtml4Sax, "ParserContext", cNokogiriXmlSaxParserContext); + cNokogiriHtml4SaxParserContext = rb_define_class_under(mNokogiriHtml4Sax, "ParserContext", + cNokogiriXmlSaxParserContext); rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "memory", parse_memory, 2); rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "file", parse_file, 2); diff --git a/ext/nokogiri/nokogiri.c b/ext/nokogiri/nokogiri.c index 661c337ea9..b82329cccf 100644 --- a/ext/nokogiri/nokogiri.c +++ b/ext/nokogiri/nokogiri.c @@ -220,7 +220,7 @@ Init_nokogiri() xmlInitParser(); exsltRegisterAll(); - if (xsltExtModuleFunctionLookup((xmlChar*)"date-time", EXSLT_DATE_NAMESPACE)) { + if (xsltExtModuleFunctionLookup((const xmlChar *)"date-time", EXSLT_DATE_NAMESPACE)) { rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qtrue); } else { rb_const_set(mNokogiri, rb_intern("LIBXSLT_DATETIME_ENABLED"), Qfalse); diff --git a/ext/nokogiri/nokogiri.h b/ext/nokogiri/nokogiri.h index ccae81460d..ba3d6d9ebb 100644 --- a/ext/nokogiri/nokogiri.h +++ b/ext/nokogiri/nokogiri.h @@ -197,6 +197,9 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass, #define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) nokogiri_sax_tuple_new(_ctxt, _self) #define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) free(_tuple) +#define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v)) +#define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v) + void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state); void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data, xmlStructuredErrorFunc handler); diff --git a/ext/nokogiri/xml_document.c b/ext/nokogiri/xml_document.c index f90aa64a5f..b59ea4bd84 100644 --- a/ext/nokogiri/xml_document.c +++ b/ext/nokogiri/xml_document.c @@ -213,7 +213,7 @@ set_encoding(VALUE self, VALUE encoding) Data_Get_Struct(self, xmlDoc, doc); if (doc->encoding) { - free((char *)(uintptr_t) doc->encoding); /* avoid gcc cast warning */ + xmlFree(DISCARD_CONST_QUAL_XMLCHAR(doc->encoding)); } doc->encoding = xmlStrdup((xmlChar *)StringValueCStr(encoding)); diff --git a/ext/nokogiri/xml_namespace.c b/ext/nokogiri/xml_namespace.c index 1517be5ca8..0b41acf1af 100644 --- a/ext/nokogiri/xml_namespace.c +++ b/ext/nokogiri/xml_namespace.c @@ -33,10 +33,10 @@ dealloc_namespace(xmlNsPtr ns) */ NOKOGIRI_DEBUG_START(ns) ; if (ns->href) { - xmlFree((xmlChar *)(uintptr_t)ns->href); + xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->href)); } if (ns->prefix) { - xmlFree((xmlChar *)(uintptr_t)ns->prefix); + xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->prefix)); } xmlFree(ns); NOKOGIRI_DEBUG_END(ns) ; diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 288a1f3ebc..c28fb751a9 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -304,7 +304,7 @@ reparent_node_with(VALUE pivot_obj, VALUE reparentee_obj, pivot_reparentee_func * issue #391, where new node's prefix may become the string "default" * see libxml2 tree.c xmlNewReconciliedNs which implements this behavior. */ - xmlFree((xmlChar *)reparentee->ns->prefix); + xmlFree(DISCARD_CONST_QUAL_XMLCHAR(reparentee->ns->prefix)); reparentee->ns->prefix = NULL; } } @@ -934,7 +934,7 @@ get(VALUE self, VALUE rattribute) Data_Get_Struct(self, xmlNode, node); attribute = xmlCharStrdup(StringValueCStr(rattribute)); - colon = (xmlChar *)(uintptr_t)xmlStrchr(attribute, (const xmlChar)':'); + colon = DISCARD_CONST_QUAL_XMLCHAR(xmlStrchr(attribute, (const xmlChar)':')); if (colon) { /* split the attribute string into separate prefix and name by * null-terminating the prefix at the colon */