Skip to content

Commit

Permalink
Performance and memory optimizations
Browse files Browse the repository at this point in the history
Co-authored-by: Sutou Kouhei <[email protected]>
  • Loading branch information
fatkodima and kou committed Mar 20, 2023
1 parent cbb9c1f commit 10184ad
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 35 deletions.
11 changes: 7 additions & 4 deletions lib/rexml/attribute.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# frozen_string_literal: false
# frozen_string_literal: true
require_relative "namespace"
require_relative 'text'

Expand Down Expand Up @@ -119,10 +119,13 @@ def hash
# b = Attribute.new( "ns:x", "y" )
# b.to_string # -> "ns:x='y'"
def to_string
value = to_s
if @element and @element.context and @element.context[:attribute_quote] == :quote
%Q^#@expanded_name="#{to_s().gsub(/"/, '&quot;')}"^
value = value.gsub('"', '&quot;') if value.include?('"')
%Q^#@expanded_name="#{value}"^
else
"#@expanded_name='#{to_s().gsub(/'/, '&apos;')}'"
value = value.gsub("'", '&apos;') if value.include?("'")
"#@expanded_name='#{value}'"
end
end

Expand Down Expand Up @@ -192,7 +195,7 @@ def node_type
end

def inspect
rv = ""
rv = +""
write( rv )
rv
end
Expand Down
40 changes: 25 additions & 15 deletions lib/rexml/entity.rb
Original file line number Diff line number Diff line change
Expand Up @@ -132,24 +132,34 @@ def to_s
# then:
# doctype.entity('yada').value #-> "nanoo bar nanoo"
def value
if @value
matches = @value.scan(PEREFERENCE_RE)
rv = @value.clone
if @parent
sum = 0
matches.each do |entity_reference|
entity_value = @parent.entity( entity_reference[0] )
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
raise "entity expansion has grown too large"
else
sum += entity_value.bytesize
end
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
@resolved_value ||= resolve_value
end

def parent=(other)
@resolved_value = nil
super
end

private
def resolve_value
return nil if @value.nil?
return @value unless @value.match?(PEREFERENCE_RE)

matches = @value.scan(PEREFERENCE_RE)
rv = @value.clone
if @parent
sum = 0
matches.each do |entity_reference|
entity_value = @parent.entity( entity_reference[0] )
if sum + entity_value.bytesize > Security.entity_expansion_text_limit
raise "entity expansion has grown too large"
else
sum += entity_value.bytesize
end
rv.gsub!( /%#{entity_reference.join};/um, entity_value )
end
return rv
end
nil
rv
end
end

Expand Down
4 changes: 2 additions & 2 deletions lib/rexml/formatters/pretty.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# frozen_string_literal: false
# frozen_string_literal: true
require_relative 'default'

module REXML
Expand Down Expand Up @@ -58,7 +58,7 @@ def write_element(node, output)
skip = false
if compact
if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
string = ""
string = +""
old_level = @level
@level = 0
node.children.each { |child| write( child, string ) }
Expand Down
12 changes: 8 additions & 4 deletions lib/rexml/namespace.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# frozen_string_literal: false
# frozen_string_literal: true

require_relative 'xmltokens'

Expand All @@ -10,21 +10,25 @@ module Namespace
# The expanded name of the object, valid if name is set
attr_accessor :prefix
include XMLTokens
NAME_WITHOUT_NAMESPACE = /\A#{NCNAME_STR}\z/
NAMESPLIT = /^(?:(#{NCNAME_STR}):)?(#{NCNAME_STR})/u

# Sets the name and the expanded name
def name=( name )
@expanded_name = name
case name
when NAMESPLIT
if name.match?(NAME_WITHOUT_NAMESPACE)
@prefix = ""
@namespace = ""
@name = name
elsif name =~ NAMESPLIT
if $1
@prefix = $1
else
@prefix = ""
@namespace = ""
end
@name = $2
when ""
elsif name == ""
@prefix = nil
@namespace = nil
@name = nil
Expand Down
10 changes: 6 additions & 4 deletions lib/rexml/text.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# frozen_string_literal: false
# frozen_string_literal: true
require_relative 'security'
require_relative 'entity'
require_relative 'doctype'
Expand Down Expand Up @@ -131,7 +131,7 @@ def parent= parent
def Text.check string, pattern, doctype

# illegal anywhere
if string !~ VALID_XML_CHARS
if !string.match?(VALID_XML_CHARS)
if String.method_defined? :encode
string.chars.each do |c|
case c.ord
Expand Down Expand Up @@ -371,7 +371,7 @@ def Text::normalize( input, doctype=nil, entity_filter=nil )
copy = input.to_s
# Doing it like this rather than in a loop improves the speed
#copy = copy.gsub( EREFERENCE, '&amp;' )
copy = copy.gsub( "&", "&amp;" )
copy = copy.gsub( "&", "&amp;" ) if copy.include?("&")
if doctype
# Replace all ampersands that aren't part of an entity
doctype.entities.each_value do |entity|
Expand All @@ -382,7 +382,9 @@ def Text::normalize( input, doctype=nil, entity_filter=nil )
else
# Replace all ampersands that aren't part of an entity
DocType::DEFAULT_ENTITIES.each_value do |entity|
copy = copy.gsub(entity.value, "&#{entity.name};" )
if copy.include?(entity.value)
copy = copy.gsub(entity.value, "&#{entity.name};" )
end
end
end
copy
Expand Down
2 changes: 1 addition & 1 deletion test/test_core.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1423,7 +1423,7 @@ def test_ticket_91
d.root.add_element( "bah" )
p=REXML::Formatters::Pretty.new(2)
p.compact = true # Don't add whitespace to text nodes unless necessary
p.write(d,out="")
p.write(d,out=+"")
assert_equal( expected, out )
end

Expand Down
8 changes: 3 additions & 5 deletions test/test_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,9 @@ def test_empty_value
<cd></cd>
EOF

assert_raise(REXML::ParseException) do
REXML::Document.new(xml)
end
REXML::Security.entity_expansion_limit = 100
assert_equal(100, REXML::Security.entity_expansion_limit)
REXML::Document.new(xml)
REXML::Security.entity_expansion_limit = 90
assert_equal(90, REXML::Security.entity_expansion_limit)
assert_raise(REXML::ParseException) do
REXML::Document.new(xml)
end
Expand Down

0 comments on commit 10184ad

Please sign in to comment.