Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SAX::Parser constructors check types (v1.13.x branch) #2530

Merged
merged 1 commit into from May 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 11 additions & 0 deletions ext/java/nokogiri/Html4SaxParserContext.java
Expand Up @@ -231,6 +231,13 @@ static EncodingType get(final int ordinal)
IRubyObject data,
IRubyObject encoding)
{
if (!(data instanceof RubyString)) {
throw context.getRuntime().newTypeError("data must be kind_of String");
}
if (!(encoding instanceof RubyString)) {
throw context.getRuntime().newTypeError("data must be kind_of String");
}

Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass);
ctx.setInputSourceFile(context, data);
String javaEncoding = findEncodingName(context, encoding);
Expand All @@ -247,6 +254,10 @@ static EncodingType get(final int ordinal)
IRubyObject data,
IRubyObject encoding)
{
if (!(encoding instanceof RubyFixnum)) {
throw context.getRuntime().newTypeError("encoding must be kind_of String");
}

Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass);
ctx.setIOInputSource(context, data, context.nil);
String javaEncoding = findEncodingName(context, encoding);
Expand Down
7 changes: 5 additions & 2 deletions ext/java/nokogiri/XmlSaxParserContext.java
Expand Up @@ -130,9 +130,12 @@ public class XmlSaxParserContext extends ParserContext
parse_io(ThreadContext context,
IRubyObject klazz,
IRubyObject data,
IRubyObject enc)
IRubyObject encoding)
{
//int encoding = (int)enc.convertToInteger().getLongValue();
// check the type of the unused encoding to match behavior of CRuby
if (!(encoding instanceof RubyFixnum)) {
throw context.getRuntime().newTypeError("encoding must be kind_of String");
}
final Ruby runtime = context.runtime;
XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
ctx.initialize(runtime);
Expand Down
8 changes: 7 additions & 1 deletion ext/java/nokogiri/internals/ParserContext.java
Expand Up @@ -58,6 +58,12 @@ public abstract class ParserContext extends RubyObject
source = new InputSource();
ParserContext.setUrl(context, source, url);

Ruby ruby = context.getRuntime();

if (!(data.respondsTo("read"))) {
throw ruby.newTypeError("must respond to :read");
}

source.setByteStream(new IOInputStream(data));
if (java_encoding != null) {
source.setEncoding(java_encoding);
Expand All @@ -73,7 +79,7 @@ public abstract class ParserContext extends RubyObject
Ruby ruby = context.getRuntime();

if (!(data instanceof RubyString)) {
throw ruby.newArgumentError("must be kind_of String");
throw ruby.newTypeError("must be kind_of String");
}

RubyString stringData = (RubyString) data;
Expand Down
5 changes: 2 additions & 3 deletions ext/nokogiri/html4_sax_parser_context.c
Expand Up @@ -19,9 +19,8 @@ parse_memory(VALUE klass, VALUE data, VALUE encoding)
{
htmlParserCtxtPtr ctxt;

if (NIL_P(data)) {
rb_raise(rb_eArgError, "data cannot be nil");
}
Check_Type(data, T_STRING);

if (!(int)RSTRING_LEN(data)) {
rb_raise(rb_eRuntimeError, "data cannot be empty");
}
Expand Down
13 changes: 10 additions & 3 deletions ext/nokogiri/xml_sax_parser_context.c
Expand Up @@ -2,6 +2,8 @@

VALUE cNokogiriXmlSaxParserContext ;

static ID id_read;

static void
deallocate(xmlParserCtxtPtr ctxt)
{
Expand All @@ -26,6 +28,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
xmlParserCtxtPtr ctxt;
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);

if (!rb_respond_to(io, id_read)) {
rb_raise(rb_eTypeError, "argument expected to respond to :read");
}

ctxt = xmlCreateIOParserCtxt(NULL, NULL,
(xmlInputReadCallback)noko_io_read,
(xmlInputCloseCallback)noko_io_close,
Expand Down Expand Up @@ -62,9 +68,8 @@ parse_memory(VALUE klass, VALUE data)
{
xmlParserCtxtPtr ctxt;

if (NIL_P(data)) {
rb_raise(rb_eArgError, "data cannot be nil");
}
Check_Type(data, T_STRING);

if (!(int)RSTRING_LEN(data)) {
rb_raise(rb_eRuntimeError, "data cannot be empty");
}
Expand Down Expand Up @@ -278,4 +283,6 @@ noko_init_xml_sax_parser_context()
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);

id_read = rb_intern("read");
}
2 changes: 1 addition & 1 deletion lib/nokogiri/html4/sax/parser.rb
Expand Up @@ -28,7 +28,7 @@ class Parser < Nokogiri::XML::SAX::Parser
###
# Parse html stored in +data+ using +encoding+
def parse_memory(data, encoding = "UTF-8")
raise ArgumentError unless data
raise TypeError unless String === data
return if data.empty?

ctx = ParserContext.memory(data, encoding)
Expand Down
8 changes: 7 additions & 1 deletion test/html4/sax/test_parser.rb
Expand Up @@ -54,7 +54,7 @@ def test_parse_file_with_dir
end

def test_parse_memory_nil
assert_raises(ArgumentError) do
assert_raises(TypeError) do
@parser.parse_memory(nil)
end
end
Expand Down Expand Up @@ -161,6 +161,12 @@ def test_parsing_dom_error_from_io
def test_empty_processing_instruction
@parser.parse_memory("<strong>this will segfault<?strong>")
end

it "handles invalid types gracefully" do
assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse(0xcafecafe) }
assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse_memory(0xcafecafe) }
assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse_io(0xcafecafe) }
end
end
end
end
Expand Down
9 changes: 9 additions & 0 deletions test/html4/sax/test_parser_context.rb
Expand Up @@ -40,6 +40,15 @@ def test_from_file
ctx.parse_with(parser)
# end
end

def test_graceful_handling_of_invalid_types
assert_raises(TypeError) { ParserContext.new(0xcafecafe) }
assert_raises(TypeError) { ParserContext.memory(0xcafecafe, "UTF-8") }
assert_raises(TypeError) { ParserContext.io(0xcafecafe, 1) }
assert_raises(TypeError) { ParserContext.io(StringIO.new("asdf"), "should be an index into ENCODINGS") }
assert_raises(TypeError) { ParserContext.file(0xcafecafe, "UTF-8") }
assert_raises(TypeError) { ParserContext.file("path/to/file", 0xcafecafe) }
end
end
end
end
Expand Down
8 changes: 7 additions & 1 deletion test/xml/sax/test_parser.rb
Expand Up @@ -71,6 +71,12 @@ class Nokogiri::SAX::TestCase
end
end

it "handles invalid types gracefully" do
assert_raises(TypeError) { Nokogiri::XML::SAX::Parser.new.parse(0xcafecafe) }
assert_raises(TypeError) { Nokogiri::XML::SAX::Parser.new.parse_memory(0xcafecafe) }
assert_raises(TypeError) { Nokogiri::XML::SAX::Parser.new.parse_io(0xcafecafe) }
end

it :test_namespace_declaration_order_is_saved do
parser.parse(<<~EOF)
<root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
Expand Down Expand Up @@ -261,7 +267,7 @@ def call_parse_io_with_encoding(encoding)
end

it :test_render_parse_nil_param do
assert_raises(ArgumentError) { parser.parse_memory(nil) }
assert_raises(TypeError) { parser.parse_memory(nil) }
end

it :test_bad_encoding_args do
Expand Down
7 changes: 7 additions & 0 deletions test/xml/sax/test_parser_context.rb
Expand Up @@ -80,6 +80,13 @@ def test_recovery
assert(pc.recovery)
end

def test_graceful_handling_of_invalid_types
assert_raises(TypeError) { ParserContext.new(0xcafecafe) }
assert_raises(TypeError) { ParserContext.memory(0xcafecafe) }
assert_raises(TypeError) { ParserContext.io(0xcafecafe, 1) }
assert_raises(TypeError) { ParserContext.io(StringIO.new("asdf"), "should be an index into ENCODINGS") }
end

def test_from_io
ctx = ParserContext.new(StringIO.new("fo"), "UTF-8")
assert(ctx)
Expand Down