Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: namespace nodes behave with compaction (backport to v1.13.x) #2668

Merged
merged 3 commits into from
Oct 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ext/nokogiri/nokogiri.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ int noko_io_write(void *ctx, char *buffer, int len);
int noko_io_close(void *ctx);

#define Noko_Node_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj))
#define Noko_Namespace_Get_Struct(obj,type,sval) ((sval) = (type*)DATA_PTR(obj))

VALUE noko_xml_node_wrap(VALUE klass, xmlNodePtr node) ;
VALUE noko_xml_node_wrap_node_set_result(xmlNodePtr node, VALUE node_set) ;
Expand Down
6 changes: 5 additions & 1 deletion ext/nokogiri/xml_document.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,11 @@ recursively_remove_namespaces_from_node(xmlNodePtr node)
(node->type == XML_XINCLUDE_START) ||
(node->type == XML_XINCLUDE_END)) &&
node->nsDef) {
xmlFreeNsList(node->nsDef);
xmlNsPtr curr = node->nsDef;
while (curr) {
noko_xml_document_pin_namespace(curr, node->doc);
curr = curr->next;
}
node->nsDef = NULL;
}

Expand Down
46 changes: 41 additions & 5 deletions ext/nokogiri/xml_namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@
VALUE cNokogiriXmlNamespace ;

static void
dealloc_namespace(xmlNsPtr ns)
_xml_namespace_dealloc(void *ptr)
{
/*
* this deallocator is only used for namespace nodes that are part of an xpath
* node set. see noko_xml_namespace_wrap().
*/
xmlNsPtr ns = ptr;
NOKOGIRI_DEBUG_START(ns) ;

if (ns->href) {
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->href));
}
Expand All @@ -42,6 +44,36 @@ dealloc_namespace(xmlNsPtr ns)
NOKOGIRI_DEBUG_END(ns) ;
}

#ifdef HAVE_RB_GC_LOCATION
static void
_xml_namespace_update_references(void *ptr)
{
xmlNsPtr ns = ptr;
if (ns->_private) {
ns->_private = (void *)rb_gc_location((VALUE)ns->_private);
}
}
#else
# define _xml_namespace_update_references 0
#endif

static const rb_data_type_t nokogiri_xml_namespace_type_with_dealloc = {
"Nokogiri/XMLNamespace/WithDealloc",
{0, _xml_namespace_dealloc, 0, _xml_namespace_update_references},
0, 0,
#ifdef RUBY_TYPED_FREE_IMMEDIATELY
RUBY_TYPED_FREE_IMMEDIATELY,
#endif
};

static const rb_data_type_t nokogiri_xml_namespace_type_without_dealloc = {
"Nokogiri/XMLNamespace/WithoutDealloc",
{0, 0, 0, _xml_namespace_update_references},
0, 0,
#ifdef RUBY_TYPED_FREE_IMMEDIATELY
RUBY_TYPED_FREE_IMMEDIATELY,
#endif
};

/*
* call-seq:
Expand All @@ -54,7 +86,7 @@ prefix(VALUE self)
{
xmlNsPtr ns;

Data_Get_Struct(self, xmlNs, ns);
Noko_Namespace_Get_Struct(self, xmlNs, ns);
if (!ns->prefix) { return Qnil; }

return NOKOGIRI_STR_NEW2(ns->prefix);
Expand All @@ -71,7 +103,7 @@ href(VALUE self)
{
xmlNsPtr ns;

Data_Get_Struct(self, xmlNs, ns);
Noko_Namespace_Get_Struct(self, xmlNs, ns);
if (!ns->href) { return Qnil; }

return NOKOGIRI_STR_NEW2(ns->href);
Expand All @@ -87,14 +119,18 @@ noko_xml_namespace_wrap(xmlNsPtr c_namespace, xmlDocPtr c_document)
}

if (c_document) {
rb_namespace = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, 0, c_namespace);
rb_namespace = TypedData_Wrap_Struct(cNokogiriXmlNamespace,
&nokogiri_xml_namespace_type_without_dealloc,
c_namespace);

if (DOC_RUBY_OBJECT_TEST(c_document)) {
rb_iv_set(rb_namespace, "@document", DOC_RUBY_OBJECT(c_document));
rb_ary_push(DOC_NODE_CACHE(c_document), rb_namespace);
}
} else {
rb_namespace = Data_Wrap_Struct(cNokogiriXmlNamespace, 0, dealloc_namespace, c_namespace);
rb_namespace = TypedData_Wrap_Struct(cNokogiriXmlNamespace,
&nokogiri_xml_namespace_type_with_dealloc,
c_namespace);
}

c_namespace->_private = (void *)rb_namespace;
Expand Down
2 changes: 1 addition & 1 deletion ext/nokogiri/xml_node.c
Original file line number Diff line number Diff line change
Expand Up @@ -1353,7 +1353,7 @@ set_namespace(VALUE self, VALUE namespace)
Noko_Node_Get_Struct(self, xmlNode, node);

if (!NIL_P(namespace)) {
Data_Get_Struct(namespace, xmlNs, ns);
Noko_Namespace_Get_Struct(namespace, xmlNs, ns);
}

xmlSetNs(node, ns);
Expand Down
64 changes: 54 additions & 10 deletions test/test_compaction.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,63 @@
require "helper"

describe "compaction" do
it "https://github.com/sparklemotion/nokogiri/pull/2579" do
skip unless GC.respond_to?(:verify_compaction_references)
describe Nokogiri::XML::Node do
it "compacts safely" do # https://github.com/sparklemotion/nokogiri/pull/2579
skip unless GC.respond_to?(:verify_compaction_references)

big_doc = "<root>" + ("a".."zz").map { |x| "<#{x}>#{x}</#{x}>" }.join + "</root>"
doc = Nokogiri.XML(big_doc)
big_doc = "<root>" + ("a".."zz").map { |x| "<#{x}>#{x}</#{x}>" }.join + "</root>"
doc = Nokogiri.XML(big_doc)

# ensure a bunch of node objects have been wrapped
doc.root.children.each(&:inspect)
# ensure a bunch of node objects have been wrapped
doc.root.children.each(&:inspect)

# compact the heap and try to get the node wrappers to move
GC.verify_compaction_references(double_heap: true, toward: :empty)
# compact the heap and try to get the node wrappers to move
GC.verify_compaction_references(double_heap: true, toward: :empty)

# access the node wrappers and make sure they didn't move
doc.root.children.each(&:inspect)
# access the node wrappers and make sure they didn't move
doc.root.children.each(&:inspect)
end
end

describe Nokogiri::XML::Namespace do
it "namespace_scopes" do
skip unless GC.respond_to?(:verify_compaction_references)

doc = Nokogiri::XML(<<~EOF)
<root xmlns="http://example.com/root" xmlns:bar="http://example.com/bar">
<first/>
<second xmlns="http://example.com/child"/>
<third xmlns:foo="http://example.com/foo"/>
</root>
EOF

doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_scopes.inspect

GC.verify_compaction_references(double_heap: true, toward: :empty)

doc.at_xpath("//root:first", "root" => "http://example.com/root").namespace_scopes.inspect
end

it "remove_namespaces!" do
skip unless GC.respond_to?(:verify_compaction_references)

doc = Nokogiri::XML(<<~XML)
<root xmlns:a="http://a.flavorjon.es/" xmlns:b="http://b.flavorjon.es/">
<a:foo>hello from a</a:foo>
<b:foo>hello from b</b:foo>
<container xmlns:c="http://c.flavorjon.es/">
<c:foo c:attr='attr-value'>hello from c</c:foo>
</container>
</root>
XML

namespaces = doc.root.namespaces
namespaces.each(&:inspect)
doc.remove_namespaces!

GC.verify_compaction_references(double_heap: true, toward: :empty)

namespaces.each(&:inspect)
end
end
end
41 changes: 41 additions & 0 deletions test/test_memory_leak.rb
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,47 @@ def test_leaking_namespace_node_strings_with_prefix
end
end

def test_document_remove_namespaces_with_ruby_objects
xml = <<~XML
<root xmlns:a="http://a.flavorjon.es/" xmlns:b="http://b.flavorjon.es/">
<a:foo>hello from a</a:foo>
<b:foo>hello from b</b:foo>
<container xmlns:c="http://c.flavorjon.es/">
<c:foo c:attr='attr-value'>hello from c</c:foo>
</container>
</root>
XML

20.times do
10_000.times do
doc = Nokogiri::XML::Document.parse(xml)
doc.namespaces.each(&:inspect)
doc.remove_namespaces!
end
puts MemInfo.rss
end
end

def test_document_remove_namespaces_without_ruby_objects
xml = <<~XML
<root xmlns:a="http://a.flavorjon.es/" xmlns:b="http://b.flavorjon.es/">
<a:foo>hello from a</a:foo>
<b:foo>hello from b</b:foo>
<container xmlns:c="http://c.flavorjon.es/">
<c:foo c:attr='attr-value'>hello from c</c:foo>
</container>
</root>
XML

20.times do
20_000.times do
doc = Nokogiri::XML::Document.parse(xml)
doc.remove_namespaces!
end
puts MemInfo.rss
end
end

def test_leaking_dtd_nodes_after_internal_subset_removal
# see https://github.com/sparklemotion/nokogiri/issues/1784
100_000.times do |i|
Expand Down