Skip to content

Commit

Permalink
Merge pull request #2530 from sparklemotion/flavorjones-check-parse-m…
Browse files Browse the repository at this point in the history
…emory-types_v1.13.x

SAX::Parser constructors check types (v1.13.x branch)

---

**What problem is this PR intended to solve?**

HTML4::SAX::Parser, HTML4::SAX::ParserContext, XML::SAX::Parser, and XML::SAX::ParserContext now properly check the types of the arguments to their various constructor methods.

Previously, passing arguments of unexpected types might cause a segfault or other less-obvious exceptions.

This is a backport of #2529 


**Have you included adequate test coverage?**

Yes! Added test coverage for these cases.


**Does this change affect the behavior of either the C or the Java implementations?**

Both the C and Java implementations have been updated to behave identically in this circumstance.
  • Loading branch information
flavorjones authored May 7, 2022
2 parents 22c9e5b + 83cc451 commit 61b1a39
Show file tree
Hide file tree
Showing 10 changed files with 66 additions and 12 deletions.
11 changes: 11 additions & 0 deletions ext/java/nokogiri/Html4SaxParserContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,13 @@ static EncodingType get(final int ordinal)
IRubyObject data,
IRubyObject encoding)
{
if (!(data instanceof RubyString)) {
throw context.getRuntime().newTypeError("data must be kind_of String");
}
if (!(encoding instanceof RubyString)) {
throw context.getRuntime().newTypeError("data must be kind_of String");
}

Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass);
ctx.setInputSourceFile(context, data);
String javaEncoding = findEncodingName(context, encoding);
Expand All @@ -247,6 +254,10 @@ static EncodingType get(final int ordinal)
IRubyObject data,
IRubyObject encoding)
{
if (!(encoding instanceof RubyFixnum)) {
throw context.getRuntime().newTypeError("encoding must be kind_of String");
}

Html4SaxParserContext ctx = Html4SaxParserContext.newInstance(context.runtime, (RubyClass) klass);
ctx.setIOInputSource(context, data, context.nil);
String javaEncoding = findEncodingName(context, encoding);
Expand Down
7 changes: 5 additions & 2 deletions ext/java/nokogiri/XmlSaxParserContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,12 @@ public class XmlSaxParserContext extends ParserContext
parse_io(ThreadContext context,
IRubyObject klazz,
IRubyObject data,
IRubyObject enc)
IRubyObject encoding)
{
//int encoding = (int)enc.convertToInteger().getLongValue();
// check the type of the unused encoding to match behavior of CRuby
if (!(encoding instanceof RubyFixnum)) {
throw context.getRuntime().newTypeError("encoding must be kind_of String");
}
final Ruby runtime = context.runtime;
XmlSaxParserContext ctx = newInstance(runtime, (RubyClass) klazz);
ctx.initialize(runtime);
Expand Down
8 changes: 7 additions & 1 deletion ext/java/nokogiri/internals/ParserContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ public abstract class ParserContext extends RubyObject
source = new InputSource();
ParserContext.setUrl(context, source, url);

Ruby ruby = context.getRuntime();

if (!(data.respondsTo("read"))) {
throw ruby.newTypeError("must respond to :read");
}

source.setByteStream(new IOInputStream(data));
if (java_encoding != null) {
source.setEncoding(java_encoding);
Expand All @@ -73,7 +79,7 @@ public abstract class ParserContext extends RubyObject
Ruby ruby = context.getRuntime();

if (!(data instanceof RubyString)) {
throw ruby.newArgumentError("must be kind_of String");
throw ruby.newTypeError("must be kind_of String");
}

RubyString stringData = (RubyString) data;
Expand Down
5 changes: 2 additions & 3 deletions ext/nokogiri/html4_sax_parser_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@ parse_memory(VALUE klass, VALUE data, VALUE encoding)
{
htmlParserCtxtPtr ctxt;

if (NIL_P(data)) {
rb_raise(rb_eArgError, "data cannot be nil");
}
Check_Type(data, T_STRING);

if (!(int)RSTRING_LEN(data)) {
rb_raise(rb_eRuntimeError, "data cannot be empty");
}
Expand Down
13 changes: 10 additions & 3 deletions ext/nokogiri/xml_sax_parser_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

VALUE cNokogiriXmlSaxParserContext ;

static ID id_read;

static void
deallocate(xmlParserCtxtPtr ctxt)
{
Expand All @@ -26,6 +28,10 @@ parse_io(VALUE klass, VALUE io, VALUE encoding)
xmlParserCtxtPtr ctxt;
xmlCharEncoding enc = (xmlCharEncoding)NUM2INT(encoding);

if (!rb_respond_to(io, id_read)) {
rb_raise(rb_eTypeError, "argument expected to respond to :read");
}

ctxt = xmlCreateIOParserCtxt(NULL, NULL,
(xmlInputReadCallback)noko_io_read,
(xmlInputCloseCallback)noko_io_close,
Expand Down Expand Up @@ -62,9 +68,8 @@ parse_memory(VALUE klass, VALUE data)
{
xmlParserCtxtPtr ctxt;

if (NIL_P(data)) {
rb_raise(rb_eArgError, "data cannot be nil");
}
Check_Type(data, T_STRING);

if (!(int)RSTRING_LEN(data)) {
rb_raise(rb_eRuntimeError, "data cannot be empty");
}
Expand Down Expand Up @@ -278,4 +283,6 @@ noko_init_xml_sax_parser_context()
rb_define_method(cNokogiriXmlSaxParserContext, "recovery", get_recovery, 0);
rb_define_method(cNokogiriXmlSaxParserContext, "line", line, 0);
rb_define_method(cNokogiriXmlSaxParserContext, "column", column, 0);

id_read = rb_intern("read");
}
2 changes: 1 addition & 1 deletion lib/nokogiri/html4/sax/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class Parser < Nokogiri::XML::SAX::Parser
###
# Parse html stored in +data+ using +encoding+
def parse_memory(data, encoding = "UTF-8")
raise ArgumentError unless data
raise TypeError unless String === data
return if data.empty?

ctx = ParserContext.memory(data, encoding)
Expand Down
8 changes: 7 additions & 1 deletion test/html4/sax/test_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def test_parse_file_with_dir
end

def test_parse_memory_nil
assert_raises(ArgumentError) do
assert_raises(TypeError) do
@parser.parse_memory(nil)
end
end
Expand Down Expand Up @@ -161,6 +161,12 @@ def test_parsing_dom_error_from_io
def test_empty_processing_instruction
@parser.parse_memory("<strong>this will segfault<?strong>")
end

it "handles invalid types gracefully" do
assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse(0xcafecafe) }
assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse_memory(0xcafecafe) }
assert_raises(TypeError) { Nokogiri::HTML::SAX::Parser.new.parse_io(0xcafecafe) }
end
end
end
end
Expand Down
9 changes: 9 additions & 0 deletions test/html4/sax/test_parser_context.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,15 @@ def test_from_file
ctx.parse_with(parser)
# end
end

def test_graceful_handling_of_invalid_types
assert_raises(TypeError) { ParserContext.new(0xcafecafe) }
assert_raises(TypeError) { ParserContext.memory(0xcafecafe, "UTF-8") }
assert_raises(TypeError) { ParserContext.io(0xcafecafe, 1) }
assert_raises(TypeError) { ParserContext.io(StringIO.new("asdf"), "should be an index into ENCODINGS") }
assert_raises(TypeError) { ParserContext.file(0xcafecafe, "UTF-8") }
assert_raises(TypeError) { ParserContext.file("path/to/file", 0xcafecafe) }
end
end
end
end
Expand Down
8 changes: 7 additions & 1 deletion test/xml/sax/test_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@ class Nokogiri::SAX::TestCase
end
end

it "handles invalid types gracefully" do
assert_raises(TypeError) { Nokogiri::XML::SAX::Parser.new.parse(0xcafecafe) }
assert_raises(TypeError) { Nokogiri::XML::SAX::Parser.new.parse_memory(0xcafecafe) }
assert_raises(TypeError) { Nokogiri::XML::SAX::Parser.new.parse_io(0xcafecafe) }
end

it :test_namespace_declaration_order_is_saved do
parser.parse(<<~EOF)
<root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
Expand Down Expand Up @@ -261,7 +267,7 @@ def call_parse_io_with_encoding(encoding)
end

it :test_render_parse_nil_param do
assert_raises(ArgumentError) { parser.parse_memory(nil) }
assert_raises(TypeError) { parser.parse_memory(nil) }
end

it :test_bad_encoding_args do
Expand Down
7 changes: 7 additions & 0 deletions test/xml/sax/test_parser_context.rb
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,13 @@ def test_recovery
assert(pc.recovery)
end

def test_graceful_handling_of_invalid_types
assert_raises(TypeError) { ParserContext.new(0xcafecafe) }
assert_raises(TypeError) { ParserContext.memory(0xcafecafe) }
assert_raises(TypeError) { ParserContext.io(0xcafecafe, 1) }
assert_raises(TypeError) { ParserContext.io(StringIO.new("asdf"), "should be an index into ENCODINGS") }
end

def test_from_io
ctx = ParserContext.new(StringIO.new("fo"), "UTF-8")
assert(ctx)
Expand Down

0 comments on commit 61b1a39

Please sign in to comment.