Skip to content

Commit

Permalink
Merge pull request #2304 from sparklemotion/2303-better-c99-fix
Browse files Browse the repository at this point in the history
better c99 fix
  • Loading branch information
flavorjones committed Aug 9, 2021
2 parents efe8c0a + a81343b commit e3435aa
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 88 deletions.
1 change: 0 additions & 1 deletion .github/workflows/upstream.yml
Expand Up @@ -89,7 +89,6 @@ jobs:
apt-get: "libxml2-dev libxslt1-dev pkg-config"
mingw: "_upgrade_ libxml2 libxslt pkgconf"
bundler-cache: true
setup-ruby-ref: MSP-Greg/ruby-setup-ruby/00-win-ucrt
- uses: actions/cache@v2
if: matrix.sys == 'disable'
with:
Expand Down
4 changes: 4 additions & 0 deletions ext/nokogiri/extconf.rb
Expand Up @@ -594,6 +594,10 @@ def do_clean
append_ldflags(ENV["LDFLAGS"].split) unless ENV["LDFLAGS"].nil?
$LIBS = concat_flags($LIBS, ENV["LIBS"])

# nokogumbo code uses C90/C99 features, let's make sure older compilers won't give
# errors/warnings. see #2302
append_cflags(["-std=c99", "-Wno-declaration-after-statement"])

# always include debugging information
append_cflags("-g")

Expand Down
135 changes: 48 additions & 87 deletions ext/nokogiri/gumbo.c
Expand Up @@ -89,19 +89,15 @@ get_parent(xmlNodePtr node)
static GumboOutput *
perform_parse(const GumboOptions *options, VALUE input)
{
GumboOutput *output;
const char *status_string;

assert(RTEST(input));
Check_Type(input, T_STRING);
GumboOutput *output = gumbo_parse_with_options(
options,
RSTRING_PTR(input),
RSTRING_LEN(input)
);

output = gumbo_parse_with_options(
options,
RSTRING_PTR(input),
RSTRING_LEN(input)
);

status_string = gumbo_status_to_string(output->status);
const char *status_string = gumbo_status_to_string(output->status);
switch (output->status) {
case GUMBO_STATUS_OK:
break;
Expand Down Expand Up @@ -155,13 +151,9 @@ build_tree(
size_t child_index = 0;

while (true) {
const GumboVector *children;
const GumboNode *gumbo_child;
xmlNodePtr xml_child;

assert(gumbo_node != NULL);
children = gumbo_node->type == GUMBO_NODE_DOCUMENT ?
&gumbo_node->v.document.children : &gumbo_node->v.element.children;
const GumboVector *children = gumbo_node->type == GUMBO_NODE_DOCUMENT ?
&gumbo_node->v.document.children : &gumbo_node->v.element.children;
if (child_index >= children->length) {
// Move up the tree and to the next child.
if (xml_node == xml_output_node) {
Expand All @@ -180,8 +172,9 @@ build_tree(
}
continue;
}
const GumboNode *gumbo_child = children->data[child_index++];
xmlNodePtr xml_child;

gumbo_child = children->data[child_index++];
switch (gumbo_child->type) {
case GUMBO_NODE_DOCUMENT:
abort(); // Bug in Gumbo.
Expand Down Expand Up @@ -209,15 +202,12 @@ build_tree(
case GUMBO_NODE_TEMPLATE:
// XXX: Should create a template element and a new DocumentFragment
case GUMBO_NODE_ELEMENT: {
xmlNsPtr ns = NULL;
const GumboVector *attrs;
size_t i;

xml_child = xmlNewDocNode(doc, NULL, (const xmlChar *)gumbo_child->v.element.name, NULL);
set_line(xml_child, gumbo_child->v.element.start_pos.line);
if (xml_root == NULL) {
xml_root = xml_child;
}
xmlNsPtr ns = NULL;
switch (gumbo_child->v.element.tag_namespace) {
case GUMBO_NAMESPACE_HTML:
break;
Expand All @@ -234,8 +224,8 @@ build_tree(
xmlAddChild(xml_node, xml_child);

// Add the attributes.
attrs = &gumbo_child->v.element.attributes;
for (i = 0; i < attrs->length; i++) {
const GumboVector *attrs = &gumbo_child->v.element.attributes;
for (size_t i = 0; i < attrs->length; i++) {
const GumboAttribute *attr = attrs->data[i];

switch (attr->attr_namespace) {
Expand Down Expand Up @@ -274,29 +264,19 @@ add_errors(const GumboOutput *output, VALUE rdoc, VALUE input, VALUE url)

// Add parse errors to rdoc.
if (output->errors.length) {
size_t i;
const GumboVector *errors = &output->errors;
VALUE rerrors = rb_ary_new2(errors->length);

for (i = 0; i < errors->length; i++) {
GumboError *err;
GumboSourcePosition position;
for (size_t i = 0; i < errors->length; i++) {
GumboError *err = errors->data[i];
GumboSourcePosition position = gumbo_error_position(err);
char *msg;
size_t size;
VALUE err_str;
VALUE syntax_error;
const char *error_code;
VALUE str1;

err = errors->data[i];
position = gumbo_error_position(err);
size = gumbo_caret_diagnostic_to_string(err, input_str, input_len, &msg);
err_str = rb_utf8_str_new(msg, size);
size_t size = gumbo_caret_diagnostic_to_string(err, input_str, input_len, &msg);
VALUE err_str = rb_utf8_str_new(msg, size);
free(msg);
syntax_error = rb_class_new_instance(1, &err_str, cNokogiriXmlSyntaxError);
error_code = gumbo_error_code(err);
str1 = error_code ? rb_utf8_str_new_static(error_code, strlen(error_code)) : Qnil;

VALUE syntax_error = rb_class_new_instance(1, &err_str, cNokogiriXmlSyntaxError);
const char *error_code = gumbo_error_code(err);
VALUE str1 = error_code ? rb_utf8_str_new_static(error_code, strlen(error_code)) : Qnil;
rb_iv_set(syntax_error, "@domain", INT2NUM(1)); // XML_FROM_PARSER
rb_iv_set(syntax_error, "@code", INT2NUM(1)); // XML_ERR_INTERNAL_ERROR
rb_iv_set(syntax_error, "@level", INT2NUM(2)); // XML_ERR_ERROR
Expand Down Expand Up @@ -343,20 +323,18 @@ static VALUE parse_continue(VALUE parse_args);
static VALUE
parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth)
{
GumboOutput *output;
GumboOptions options = kGumboDefaultOptions;
ParseArgs args;

options.max_attributes = NUM2INT(max_attributes);
options.max_errors = NUM2INT(max_errors);
options.max_tree_depth = NUM2INT(max_depth);

output = perform_parse(&options, input);

args.output = output;
args.input = input;
args.url_or_frag = url;
args.doc = NULL;
GumboOutput *output = perform_parse(&options, input);
ParseArgs args = {
.output = output,
.input = input,
.url_or_frag = url,
.doc = NULL,
};

return rb_ensure(parse_continue, (VALUE)(&args), parse_cleanup, (VALUE)(&args));
}
Expand All @@ -367,8 +345,6 @@ parse_continue(VALUE parse_args)
ParseArgs *args = (ParseArgs *)parse_args;
GumboOutput *output = args->output;
xmlDocPtr doc;
VALUE rdoc;

if (output->document->v.document.has_doctype) {
const char *name = output->document->v.document.name;
const char *public = output->document->v.document.public_identifier;
Expand All @@ -381,7 +357,7 @@ parse_continue(VALUE parse_args)
}
args->doc = doc; // Make sure doc gets cleaned up if an error is thrown.
build_tree(doc, (xmlNodePtr)doc, output->document);
rdoc = Nokogiri_wrap_xml_document(cNokogiriHtml5Document, doc);
VALUE rdoc = Nokogiri_wrap_xml_document(cNokogiriHtml5Document, doc);
args->doc = NULL; // The Ruby runtime now owns doc so don't delete it.
add_errors(output, rdoc, args->input, args->url_or_frag);
return rdoc;
Expand All @@ -390,15 +366,10 @@ parse_continue(VALUE parse_args)
static int
lookup_namespace(VALUE node, bool require_known_ns)
{
VALUE ns;
ID namespace, href;
const char *href_ptr;
size_t href_len;

CONST_ID(namespace, "namespace");
CONST_ID(href, "href");

ns = rb_funcall(node, namespace, 0);
VALUE ns = rb_funcall(node, namespace, 0);

if (NIL_P(ns)) {
return GUMBO_NAMESPACE_HTML;
Expand All @@ -407,8 +378,8 @@ lookup_namespace(VALUE node, bool require_known_ns)
assert(RTEST(ns));
Check_Type(ns, T_STRING);

href_ptr = RSTRING_PTR(ns);
href_len = RSTRING_LEN(ns);
const char *href_ptr = RSTRING_PTR(ns);
size_t href_len = RSTRING_LEN(ns);
#define NAMESPACE_P(uri) (href_len == sizeof uri - 1 && !memcmp(href_ptr, uri, href_len))
if (NAMESPACE_P("http://www.w3.org/1999/xhtml")) {
return GUMBO_NAMESPACE_HTML;
Expand Down Expand Up @@ -456,23 +427,15 @@ fragment(
GumboQuirksModeEnum quirks_mode;
bool form = false;
const char *encoding = NULL;
VALUE doc, dtd;
int depth;
GumboOptions options = kGumboDefaultOptions;
GumboOutput *output;
ParseArgs args;

if (NIL_P(ctx)) {
ctx_tag = "body";
ctx_ns = GUMBO_NAMESPACE_HTML;
} else if (TYPE(ctx) == T_STRING) {
size_t len;
const char *colon;

ctx_tag = StringValueCStr(ctx);
ctx_ns = GUMBO_NAMESPACE_HTML;
len = RSTRING_LEN(ctx);
colon = memchr(ctx_tag, ':', len);
size_t len = RSTRING_LEN(ctx);
const char *colon = memchr(ctx_tag, ':', len);
if (colon) {
switch (colon - ctx_tag) {
case 3:
Expand Down Expand Up @@ -507,7 +470,6 @@ fragment(
// Check if it's a form.
form = ctx_ns == GUMBO_NAMESPACE_HTML && st_strcasecmp(ctx_tag, "form") == 0;
} else {
VALUE node, element_name;
ID element_ = rb_intern_const("element?");

// Context fragment name.
Expand All @@ -520,13 +482,13 @@ fragment(
ctx_ns = lookup_namespace(ctx, true);

// Check for a form ancestor, including self.
for (node = ctx;
for (VALUE node = ctx;
!NIL_P(node);
node = rb_respond_to(node, parent) ? rb_funcall(node, parent, 0) : Qnil) {
if (!RTEST(rb_funcall(node, element_, 0))) {
continue;
}
element_name = rb_funcall(node, name, 0);
VALUE element_name = rb_funcall(node, name, 0);
if (RSTRING_LEN(element_name) == 4
&& !st_strcasecmp(RSTRING_PTR(element_name), "form")
&& lookup_namespace(node, false) == GUMBO_NAMESPACE_HTML) {
Expand All @@ -548,8 +510,8 @@ fragment(
}

// Quirks mode.
doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0);
dtd = rb_funcall(doc, internal_subset, 0);
VALUE doc = rb_funcall(doc_fragment, rb_intern_const("document"), 0);
VALUE dtd = rb_funcall(doc, internal_subset, 0);
if (NIL_P(dtd)) {
quirks_mode = GUMBO_DOCTYPE_NO_QUIRKS;
} else {
Expand All @@ -564,8 +526,8 @@ fragment(
}

// Perform a fragment parse.
depth = NUM2INT(max_depth);
options = kGumboDefaultOptions;
int depth = NUM2INT(max_depth);
GumboOptions options = kGumboDefaultOptions;
options.max_attributes = NUM2INT(max_attributes);
options.max_errors = NUM2INT(max_errors);
// Add one to account for the HTML element.
Expand All @@ -576,13 +538,13 @@ fragment(
options.quirks_mode = quirks_mode;
options.fragment_context_has_form_ancestor = form;

output = perform_parse(&options, tags);

args.output = output;
args.input = tags;
args.url_or_frag = doc_fragment;
args.doc = (xmlDocPtr)extract_xml_node(doc);

GumboOutput *output = perform_parse(&options, tags);
ParseArgs args = {
.output = output,
.input = tags,
.url_or_frag = doc_fragment,
.doc = (xmlDocPtr)extract_xml_node(doc),
};
rb_ensure(fragment_continue, (VALUE)(&args), parse_cleanup, (VALUE)(&args));
return Qnil;
}
Expand All @@ -594,10 +556,9 @@ fragment_continue(VALUE parse_args)
GumboOutput *output = args->output;
VALUE doc_fragment = args->url_or_frag;
xmlDocPtr xml_doc = args->doc;
xmlNodePtr xml_frag;

args->doc = NULL; // The Ruby runtime owns doc so make sure we don't delete it.
xml_frag = extract_xml_node(doc_fragment);
xmlNodePtr xml_frag = extract_xml_node(doc_fragment);
build_tree(xml_doc, xml_frag, output->root);
add_errors(output, doc_fragment, args->input, rb_utf8_str_new_static("#fragment", 9));
return Qnil;
Expand Down

0 comments on commit e3435aa

Please sign in to comment.