Skip to content

Commit

Permalink
Merge pull request #2668 from sparklemotion/flavorjones-namespace-sco…
Browse files Browse the repository at this point in the history
…pes-compaction_v1.13.x

fix: namespace nodes behave with compaction (backport to v1.13.x)
  • Loading branch information
flavorjones authored Oct 16, 2022
2 parents 7b369e5 + 73d73d6 commit 4db3b4d
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 17 deletions.
1 change: 1 addition & 0 deletions ext/nokogiri/nokogiri.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ int noko_io_write(void *ctx, char *buffer, int len);
int noko_io_close(void *ctx);

#define Noko_Node_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj))
#define Noko_Namespace_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj))

VALUE noko_xml_node_wrap(VALUE klass, xmlNodePtr node) ;
VALUE noko_xml_node_wrap_node_set_result(xmlNodePtr node, VALUE node_set) ;
Expand Down
6 changes: 5 additions & 1 deletion ext/nokogiri/xml_document.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,11 @@ recursively_remove_namespaces_from_node(xmlNodePtr node)
(node->type == XML_XINCLUDE_START) ||
(node->type == XML_XINCLUDE_END)) &&
node->nsDef) {
xmlFreeNsList(node->nsDef);
xmlNsPtr curr = node->nsDef;
while (curr) {
noko_xml_document_pin_namespace(curr, node->doc);
curr = curr->next;
}
node->nsDef = NULL;
}

Expand Down
46 changes: 41 additions & 5 deletions ext/nokogiri/xml_namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@
VALUE cNokogiriXmlNamespace ;

static void
dealloc_namespace(xmlNsPtr ns)
_xml_namespace_dealloc(void *ptr)
{
/*
* this deallocator is only used for namespace nodes that are part of an xpath
* node set. see noko_xml_namespace_wrap().
*/
xmlNsPtr ns = ptr;
NOKOGIRI_DEBUG_START(ns) ;

if (ns->href) {
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->href));
}
Expand All @@ -42,6 +44,36 @@ dealloc_namespace(xmlNsPtr ns)
NOKOGIRI_DEBUG_END(ns) ;
}

#ifdef HAVE_RB_GC_LOCATION
static void
_xml_namespace_update_references(void *ptr)
{
xmlNsPtr ns = ptr;
if (ns->_private) {
ns->_private = (void *)rb_gc_location((VALUE)ns->_private);
}
}
#else
# define _xml_namespace_update_references 0
#endif

static const rb_data_type_t nokogiri_xml_namespace_type_with_dealloc = {
"Nokogiri/XMLNamespace/WithDealloc",
{0, _xml_namespace_dealloc, 0, _xml_namespace_update_references},
0, 0,
#ifdef RUBY_TYPED_FREE_IMMEDIATELY
RUBY_TYPED_FREE_IMMEDIATELY,
#endif
};

static const rb_data_type_t nokogiri_xml_namespace_type_without_dealloc = {
"Nokogiri/XMLNamespace/WithoutDealloc",
{0, 0, 0, _xml_namespace_update_references},
0, 0,
#ifdef RUBY_TYPED_FREE_IMMEDIATELY
RUBY_TYPED_FREE_IMMEDIATELY,
#endif
};

/*
* call-seq:
Expand All @@ -54,7 +86,7 @@ prefix(VALUE self)
{
xmlNsPtr ns;

Data_Get_Struct(self, xmlNs, ns);
Noko_Namespace_Get_Struct(self, xmlNs, ns);
if (!ns->prefix) { return Qnil; }

return NOKOGIRI_STR_NEW2(ns->prefix);
Expand All @@ -71,7 +103,7 @@ href(VALUE self)
{
xmlNsPtr ns;

Data_Get_Struct(self, xmlNs, ns);
Noko_Namespace_Get_Struct(self, xmlNs, ns);
if (!ns->href) { return Qnil; }

return NOKOGIRI_STR_NEW2(ns->href);
Expand All @@ -87,14 +119,18 @@ noko_xml_namespace_wrap(xmlNsPtr c_namespace, xmlDocPtr c_document)
}

if (c_document) {
rb_namespace = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, 0, c_namespace);
rb_namespace = TypedData_Wrap_Struct(cNokogiriXmlNamespace,
&nokogiri_xml_namespace_type_without_dealloc,
c_namespace);

if (DOC_RUBY_OBJECT_TEST(c_document)) {
rb_iv_set(rb_namespace, "@document", DOC_RUBY_OBJECT(c_document));
rb_ary_push(DOC_NODE_CACHE(c_document), rb_namespace);
}
} else {
rb_namespace = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, dealloc_namespace, c_namespace);
rb_namespace = TypedData_Wrap_Struct(cNokogiriXmlNamespace,
&nokogiri_xml_namespace_type_with_dealloc,
c_namespace);
}

c_namespace->_private = (void *)rb_namespace;
Expand Down
2 changes: 1 addition & 1 deletion ext/nokogiri/xml_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -1353,7 +1353,7 @@ set_namespace(VALUE self, VALUE namespace)
Noko_Node_Get_Struct(self, xmlNode, node);

if (!NIL_P(namespace)) {
Data_Get_Struct(namespace, xmlNs, ns);
Noko_Namespace_Get_Struct(namespace, xmlNs, ns);
}

xmlSetNs(node, ns);
Expand Down
64 changes: 54 additions & 10 deletions test/test_compaction.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,63 @@
require "helper"

describe "compaction" do
it "https://github.com/sparklemotion/nokogiri/pull/2579" do
skip unless GC.respond_to?(:verify_compaction_references)
describe Nokogiri::XML::Node do
it "compacts safely" do # https://github.com/sparklemotion/nokogiri/pull/2579
skip unless GC.respond_to?(:verify_compaction_references)

big_doc = "<root>" + ("a".."zz").map { |x| "<#{x}>#{x}</#{x}>" }.join + "</root>"
doc = Nokogiri.XML(big_doc)
big_doc = "<root>" + ("a".."zz").map { |x| "<#{x}>#{x}</#{x}>" }.join + "</root>"
doc = Nokogiri.XML(big_doc)

# ensure a bunch of node objects have been wrapped
doc.root.children.each(&:inspect)
# ensure a bunch of node objects have been wrapped
doc.root.children.each(&:inspect)

# compact the heap and try to get the node wrappers to move
GC.verify_compaction_references(double_heap: true, toward: :empty)
# compact the heap and try to get the node wrappers to move
GC.verify_compaction_references(double_heap: true, toward: :empty)

# access the node wrappers and make sure they didn't move
doc.root.children.each(&:inspect)
# access the node wrappers and make sure they didn't move
doc.root.children.each(&:inspect)
end
end

describe Nokogiri::XML::Namespace do
it "namespace_scopes" do
skip unless GC.respond_to?(:verify_compaction_references)

doc = Nokogiri::XML(<<~EOF)
<root xmlns="http://example.com/root" xmlns:bar="http://example.com/bar">
<first/>
<second xmlns="http://example.com/child"/>
<third xmlns:foo="http://example.com/foo"/>
</root>
EOF

doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_scopes.inspect

GC.verify_compaction_references(double_heap: true, toward: :empty)

doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_scopes.inspect
end

it "remove_namespaces!" do
skip unless GC.respond_to?(:verify_compaction_references)

doc = Nokogiri::XML(<<~XML)
<root xmlns:a="http://a.flavorjon.es/" xmlns:b="http://b.flavorjon.es/">
<a:foo>hello from a</a:foo>
<b:foo>hello from b</b:foo>
<container xmlns:c="http://c.flavorjon.es/">
<c:foo c:attr='attr-value'>hello from c</c:foo>
</container>
</root>
XML

namespaces = doc.root.namespaces
namespaces.each(&:inspect)
doc.remove_namespaces!

GC.verify_compaction_references(double_heap: true, toward: :empty)

namespaces.each(&:inspect)
end
end
end
41 changes: 41 additions & 0 deletions test/test_memory_leak.rb
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,47 @@ def test_leaking_namespace_node_strings_with_prefix
end
end

def test_document_remove_namespaces_with_ruby_objects
xml = <<~XML
<root xmlns:a="http://a.flavorjon.es/" xmlns:b="http://b.flavorjon.es/">
<a:foo>hello from a</a:foo>
<b:foo>hello from b</b:foo>
<container xmlns:c="http://c.flavorjon.es/">
<c:foo c:attr='attr-value'>hello from c</c:foo>
</container>
</root>
XML

20.times do
10_000.times do
doc = Nokogiri::XML::Document.parse(xml)
doc.namespaces.each(&:inspect)
doc.remove_namespaces!
end
puts MemInfo.rss
end
end

def test_document_remove_namespaces_without_ruby_objects
xml = <<~XML
<root xmlns:a="http://a.flavorjon.es/" xmlns:b="http://b.flavorjon.es/">
<a:foo>hello from a</a:foo>
<b:foo>hello from b</b:foo>
<container xmlns:c="http://c.flavorjon.es/">
<c:foo c:attr='attr-value'>hello from c</c:foo>
</container>
</root>
XML

20.times do
20_000.times do
doc = Nokogiri::XML::Document.parse(xml)
doc.remove_namespaces!
end
puts MemInfo.rss
end
end

def test_leaking_dtd_nodes_after_internal_subset_removal
# see https://github.com/sparklemotion/nokogiri/issues/1784
100_000.times do |i|
Expand Down

0 comments on commit 4db3b4d

Please sign in to comment.