From 9aae492da495e85e875b75471d100b09512e917c Mon Sep 17 00:00:00 2001 From: Dan Allen Date: Mon, 19 Feb 2024 16:25:00 -0700 Subject: [PATCH] resolves #4468 treat bare URL enclosed in angle brackets as unconstrained syntax --- CHANGELOG.adoc | 2 + lib/asciidoctor/rx.rb | 2 +- lib/asciidoctor/substitutors.rb | 152 ++++++++++++++++---------------- test/links_test.rb | 32 +++++-- 4 files changed, 107 insertions(+), 81 deletions(-) diff --git a/CHANGELOG.adoc b/CHANGELOG.adoc index 25ee16216d..756f7edd16 100644 --- a/CHANGELOG.adoc +++ b/CHANGELOG.adoc @@ -79,6 +79,8 @@ Bug Fixes:: * Move abstract inside info tag in DocBook output (#3602) * Honor secondary and tertiary terms on `indexterm` macro when primary term is quoted and contains an equals sign (#3652) * Remove extra border below doctitle when sidebar toc is collapsed into main content area (#4523) + * Treat bare URL enclosed in angle brackets as unconstrained syntax; only match until closing angled bracket (#4468) + * Allow URL enclosed in angled brackets syntax to be escaped using backslash (#4468) == 2.0.20 (2023-05-18) - @mojavelinux diff --git a/lib/asciidoctor/rx.rb b/lib/asciidoctor/rx.rb index 00ccf87c5b..b6f6b11398 100644 --- a/lib/asciidoctor/rx.rb +++ b/lib/asciidoctor/rx.rb @@ -519,7 +519,7 @@ module Rx; end # "https://github.com[]" # (https://github.com) <= parenthesis not included in autolink # - InlineLinkRx = %r((^|link:|#{CG_BLANK}|<|[>\(\)\[\];"'])(\\?(?:https?|file|ftp|irc)://)(?:([^\s\[\]]+)\[(|#{CC_ALL}*?[^\\])\]|([^\s\[\]<]*([^\s,.?!\[\]<\)]))))m + InlineLinkRx = %r((^|link:|#{CG_BLANK}|\\?<()|[>\(\)\[\];"'])(\\?(?:https?|file|ftp|irc)://)(?:([^\s\[\]]+)\[(|#{CC_ALL}*?[^\\])\]|\2([^\s]*?)>|([^\s\[\]<]*([^\s,.?!\[\]<\)]))))m # Match a link or e-mail inline macro. # diff --git a/lib/asciidoctor/substitutors.rb b/lib/asciidoctor/substitutors.rb index 3973d75a72..1789ad0477 100644 --- a/lib/asciidoctor/substitutors.rb +++ b/lib/asciidoctor/substitutors.rb @@ -532,97 +532,101 @@ def sub_macros text end if found_colon && (text.include? '://') - # inline urls, target[text] (optionally prefixed with link: and optionally surrounded by <>) + # inline urls, target[text] (optionally prefixed with link: or enclosed in <>) text = text.gsub InlineLinkRx do - if (target = $2 + ($3 || $5)).start_with? RS - # honor the escape - next ($&.slice 0, (rs_idx = $1.length)) + ($&.slice rs_idx + 1, $&.length) - end - - prefix, suffix = $1, '' - # NOTE if $4 is set, we're looking at a formal macro (e.g., https://example.org[]) - if $4 - prefix = '' if prefix == 'link:' - link_text = nil if (link_text = $4).empty? + if $2 + # honor the escapes + next $&.slice 1, $&.length if $1.start_with? RS + next %(#{$1}#{$&.slice $1.length + 1, $&.length}) if $3.start_with? RS + target = $3 + $6 + next $& if target == $3 + doc.register :links, target + link_text = (doc_attrs.key? 'hide-uri-scheme') ? (target.sub UriSniffRx, '') : target + (Inline.new self, :anchor, link_text, type: :link, target: target, attributes: { 'role' => 'bare' }).convert else - # invalid macro syntax (link: prefix w/o trailing square brackets or enclosed in double quotes) - # FIXME we probably shouldn't even get here when the link: prefix is present; the regex is doing too much - case prefix - when 'link:', ?", ?' - next $& - end - case $6 - when ';' - if prefix == '<' && (target.end_with? '>') - # move surrounding <> out of URL - prefix = '' - target = target.slice 0, target.length - 4 - elsif (target = target.chop).end_with? ')' - # move trailing ); out of URL - target = target.chop - suffix = ');' - else - # move trailing ; out of URL - suffix = ';' + # honor the escape + next %(#{$1}#{$&.slice $1.length + 1, $&.length}) if $3.start_with? RS + prefix, target, suffix = $1, $3 + ($4 || $7), '' + # NOTE if $5 is set (the attrlist), we're looking at a formal macro (e.g., https://example.org[]) + if $5 + prefix = '' if prefix == 'link:' + link_text = nil if (link_text = $5).empty? + else + case prefix + # invalid macro syntax (link: prefix w/o trailing square brackets or URL enclosed in quotes) + # FIXME we probably shouldn't even get here when the link: prefix is present; the regex is doing too much + when 'link:', ?", ?' + next $& end - # NOTE handle case when modified target is a URI scheme (e.g., http://) - next $& if target.end_with? '://' - when ':' - if (target = target.chop).end_with? ')' - # move trailing ): out of URL - target = target.chop - suffix = '):' - else - # move trailing : out of URL - suffix = ':' + case $8 + when ';' + if (target = target.chop).end_with? ')' + # move trailing ); out of URL + target = target.chop + suffix = ');' + else + # move trailing ; out of URL + suffix = ';' + end + # NOTE handle case when modified target is a URI scheme (e.g., http://) + next $& if target == $3 + when ':' + if (target = target.chop).end_with? ')' + # move trailing ): out of URL + target = target.chop + suffix = '):' + else + # move trailing : out of URL + suffix = ':' + end + # NOTE handle case when modified target is a URI scheme (e.g., http://) + next $& if target == $3 end - # NOTE handle case when modified target is a URI scheme (e.g., http://) - next $& if target.end_with? '://' end - end - attrs, link_opts = nil, { type: :link } + link_opts = { type: :link } - if link_text - new_link_text = link_text = link_text.gsub ESC_R_SB, R_SB if link_text.include? R_SB - if !doc.compat_mode && (link_text.include? '=') - # NOTE if an equals sign (=) is present, extract attributes from link text - link_text, attrs = extract_attributes_from_text link_text, '' - new_link_text = link_text - link_opts[:id] = attrs['id'] - end + if link_text + new_link_text = link_text = link_text.gsub ESC_R_SB, R_SB if link_text.include? R_SB + if !doc.compat_mode && (link_text.include? '=') + # NOTE if an equals sign (=) is present, extract attributes from link text + link_text, attrs = extract_attributes_from_text link_text, '' + new_link_text = link_text + link_opts[:id] = attrs['id'] + end - if link_text.end_with? '^' - new_link_text = link_text = link_text.chop - if attrs - attrs['window'] ||= '_blank' - else - attrs = { 'window' => '_blank' } + if link_text.end_with? '^' + new_link_text = link_text = link_text.chop + if attrs + attrs['window'] ||= '_blank' + else + attrs = { 'window' => '_blank' } + end end - end - if new_link_text && new_link_text.empty? + if new_link_text && new_link_text.empty? + # NOTE it's not possible for the URI scheme to be bare in this case + link_text = (doc_attrs.key? 'hide-uri-scheme') ? (target.sub UriSniffRx, '') : target + bare = true + end + else # NOTE it's not possible for the URI scheme to be bare in this case link_text = (doc_attrs.key? 'hide-uri-scheme') ? (target.sub UriSniffRx, '') : target bare = true end - else - # NOTE it's not possible for the URI scheme to be bare in this case - link_text = (doc_attrs.key? 'hide-uri-scheme') ? (target.sub UriSniffRx, '') : target - bare = true - end - if bare - if attrs - attrs['role'] = (attrs.key? 'role') ? %(bare #{attrs['role']}) : 'bare' - else - attrs = { 'role' => 'bare' } + if bare + if attrs + attrs['role'] = (attrs.key? 'role') ? %(bare #{attrs['role']}) : 'bare' + else + attrs = { 'role' => 'bare' } + end end - end - doc.register :links, (link_opts[:target] = target) - link_opts[:attributes] = attrs if attrs - %(#{prefix}#{(Inline.new self, :anchor, link_text, link_opts).convert}#{suffix}) + doc.register :links, (link_opts[:target] = target) + link_opts[:attributes] = attrs if attrs + %(#{prefix}#{(Inline.new self, :anchor, link_text, link_opts).convert}#{suffix}) + end end end diff --git a/test/links_test.rb b/test/links_test.rb index 4ef23cbcb4..063f6674c9 100644 --- a/test/links_test.rb +++ b/test/links_test.rb @@ -78,7 +78,27 @@ end test 'qualified url surrounded by angled brackets' do - assert_xpath '//a[@href="http://asciidoc.org"][text()="http://asciidoc.org"]', convert_string(' is the project page for AsciiDoc.'), 1 + assert_xpath '//a[@href="http://asciidoc.org"][@class="bare"][text()="http://asciidoc.org"]', convert_string(' is the project page for AsciiDoc.'), 1 + end + + test 'qualified url surrounded by double angled brackets should preserve outer angled brackets' do + assert_includes convert_string_to_embedded('<>'), '<https://asciidoc.org>' + end + + test 'qualified url surrounded by angled brackets in unconstrained context' do + assert_xpath '//a[@href="http://asciidoc.org"][@class="bare"][text()="http://asciidoc.org"]', convert_string('URLは。fin'), 1 + end + + test 'multiple qualified urls surrounded by angled brackets in unconstrained context' do + assert_xpath '//a[@href="http://asciidoc.org"][@class="bare"][text()="http://asciidoc.org"]', convert_string('URLは。URLは。'), 2 + end + + test 'qualified url surrounded by escaped angled brackets should escape form' do + assert_xpath '//p[text()=""]', convert_string('\\'), 1 + end + + test 'escaped qualified url surrounded by angled brackets should escape autolink' do + assert_xpath '//p[text()=""]', convert_string('<\\http://asciidoc.org>'), 1 end test 'qualified url surrounded by round brackets' do @@ -209,16 +229,16 @@ assert_include '"https://asciidoctor.org"', output end - test 'should convert qualified url as macro with trailing period' do - result = convert_string_to_embedded 'Information about the https://symbols.example.org/.[.] character.' - assert_xpath '//a[@href="https://symbols.example.org/."][text()="."]', result, 1 - end - test 'should convert qualified url as macro enclosed in single quotes' do output = convert_string_to_embedded '\'https://asciidoctor.org[]\'' assert_include '\'https://asciidoctor.org\'', output end + test 'should convert qualified url as macro with trailing period' do + result = convert_string_to_embedded 'Information about the https://symbols.example.org/.[.] character.' + assert_xpath '//a[@href="https://symbols.example.org/."][text()="."]', result, 1 + end + test 'qualified url using invalid link macro should not create link' do assert_xpath '//a', convert_string('link:http://asciidoc.org is the project page for AsciiDoc.'), 0 end