From b8a5f4cd5c8fe29c65d7a00e67170223d9d2b50e Mon Sep 17 00:00:00 2001 From: Watson Date: Tue, 16 Jul 2024 10:48:53 +0900 Subject: [PATCH] Fix performance issue caused by using repeated `>` characters inside `/um + INSTRUCTION_TERM = "?>" TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um @@ -639,7 +640,7 @@ def parse_id_invalid_details(accept_external_id:, end def process_instruction(start_position) - match_data = @source.match(Private::INSTRUCTION_END, true) + match_data = @source.match(Private::INSTRUCTION_END, true, term: Private::INSTRUCTION_TERM) unless match_data message = "Invalid processing instruction node" @source.position = start_position diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index 5715c352..4c30532a 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -117,7 +117,7 @@ def read_until(term) def ensure_buffer end - def match(pattern, cons=false) + def match(pattern, cons=false, term: nil) if cons @scanner.scan(pattern).nil? ? nil : @scanner else @@ -240,7 +240,7 @@ def ensure_buffer # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats: # - ">" # - "XXX>" (X is any string excluding '>') - def match( pattern, cons=false ) + def match( pattern, cons=false, term: nil ) while true if cons md = @scanner.scan(pattern) @@ -250,7 +250,7 @@ def match( pattern, cons=false ) break if md return nil if pattern.is_a?(String) return nil if @source.nil? - return nil unless read + return nil unless read(term) end md.nil? ? nil : @scanner diff --git a/test/parse/test_processing_instruction.rb b/test/parse/test_processing_instruction.rb index 13384935..ac4c2ff0 100644 --- a/test/parse/test_processing_instruction.rb +++ b/test/parse/test_processing_instruction.rb @@ -1,8 +1,12 @@ require "test/unit" +require "core_assertions" + require "rexml/document" module REXMLTests class TestParseProcessinInstruction < Test::Unit::TestCase + include Test::Unit::CoreAssertions + def parse(xml) REXML::Document.new(xml) end @@ -69,5 +73,12 @@ def test_after_root assert_equal("abc", events[:processing_instruction]) end + + def test_gt_linear_performance + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new('" * n + ' ?>') + end + end end end