Skip to content

Commit

Permalink
resolves asciidoctor#4468 treat bare URL enclosed in angle brackets a…
Browse files Browse the repository at this point in the history
…s unconstrained syntax
  • Loading branch information
mojavelinux committed Feb 20, 2024
1 parent 31af659 commit 9aae492
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 81 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ Bug Fixes::
* Move abstract inside info tag in DocBook output (#3602)
* Honor secondary and tertiary terms on `indexterm` macro when primary term is quoted and contains an equals sign (#3652)
* Remove extra border below doctitle when sidebar toc is collapsed into main content area (#4523)
* Treat bare URL enclosed in angle brackets as unconstrained syntax; only match until closing angled bracket (#4468)
* Allow URL enclosed in angled brackets syntax to be escaped using backslash (#4468)

== 2.0.20 (2023-05-18) - @mojavelinux

Expand Down
2 changes: 1 addition & 1 deletion lib/asciidoctor/rx.rb
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ module Rx; end
# "https://github.com[]"
# (https://github.com) <= parenthesis not included in autolink
#
InlineLinkRx = %r((^|link:|#{CG_BLANK}|&lt;|[>\(\)\[\];"'])(\\?(?:https?|file|ftp|irc)://)(?:([^\s\[\]]+)\[(|#{CC_ALL}*?[^\\])\]|([^\s\[\]<]*([^\s,.?!\[\]<\)]))))m
InlineLinkRx = %r((^|link:|#{CG_BLANK}|\\?&lt;()|[>\(\)\[\];"'])(\\?(?:https?|file|ftp|irc)://)(?:([^\s\[\]]+)\[(|#{CC_ALL}*?[^\\])\]|\2([^\s]*?)&gt;|([^\s\[\]<]*([^\s,.?!\[\]<\)]))))m

# Match a link or e-mail inline macro.
#
Expand Down
152 changes: 78 additions & 74 deletions lib/asciidoctor/substitutors.rb
Original file line number Diff line number Diff line change
Expand Up @@ -532,97 +532,101 @@ def sub_macros text
end

if found_colon && (text.include? '://')
# inline urls, target[text] (optionally prefixed with link: and optionally surrounded by <>)
# inline urls, target[text] (optionally prefixed with link: or enclosed in <>)
text = text.gsub InlineLinkRx do
if (target = $2 + ($3 || $5)).start_with? RS
# honor the escape
next ($&.slice 0, (rs_idx = $1.length)) + ($&.slice rs_idx + 1, $&.length)
end

prefix, suffix = $1, ''
# NOTE if $4 is set, we're looking at a formal macro (e.g., https://example.org[])
if $4
prefix = '' if prefix == 'link:'
link_text = nil if (link_text = $4).empty?
if $2
# honor the escapes
next $&.slice 1, $&.length if $1.start_with? RS
next %(#{$1}#{$&.slice $1.length + 1, $&.length}) if $3.start_with? RS
target = $3 + $6
next $& if target == $3
doc.register :links, target
link_text = (doc_attrs.key? 'hide-uri-scheme') ? (target.sub UriSniffRx, '') : target
(Inline.new self, :anchor, link_text, type: :link, target: target, attributes: { 'role' => 'bare' }).convert
else
# invalid macro syntax (link: prefix w/o trailing square brackets or enclosed in double quotes)
# FIXME we probably shouldn't even get here when the link: prefix is present; the regex is doing too much
case prefix
when 'link:', ?", ?'
next $&
end
case $6
when ';'
if prefix == '&lt;' && (target.end_with? '&gt;')
# move surrounding <> out of URL
prefix = ''
target = target.slice 0, target.length - 4
elsif (target = target.chop).end_with? ')'
# move trailing ); out of URL
target = target.chop
suffix = ');'
else
# move trailing ; out of URL
suffix = ';'
# honor the escape
next %(#{$1}#{$&.slice $1.length + 1, $&.length}) if $3.start_with? RS
prefix, target, suffix = $1, $3 + ($4 || $7), ''
# NOTE if $5 is set (the attrlist), we're looking at a formal macro (e.g., https://example.org[])
if $5
prefix = '' if prefix == 'link:'
link_text = nil if (link_text = $5).empty?
else
case prefix
# invalid macro syntax (link: prefix w/o trailing square brackets or URL enclosed in quotes)
# FIXME we probably shouldn't even get here when the link: prefix is present; the regex is doing too much
when 'link:', ?", ?'
next $&
end
# NOTE handle case when modified target is a URI scheme (e.g., http://)
next $& if target.end_with? '://'
when ':'
if (target = target.chop).end_with? ')'
# move trailing ): out of URL
target = target.chop
suffix = '):'
else
# move trailing : out of URL
suffix = ':'
case $8
when ';'
if (target = target.chop).end_with? ')'
# move trailing ); out of URL
target = target.chop
suffix = ');'
else
# move trailing ; out of URL
suffix = ';'
end
# NOTE handle case when modified target is a URI scheme (e.g., http://)
next $& if target == $3
when ':'
if (target = target.chop).end_with? ')'
# move trailing ): out of URL
target = target.chop
suffix = '):'
else
# move trailing : out of URL
suffix = ':'
end
# NOTE handle case when modified target is a URI scheme (e.g., http://)
next $& if target == $3
end
# NOTE handle case when modified target is a URI scheme (e.g., http://)
next $& if target.end_with? '://'
end
end

attrs, link_opts = nil, { type: :link }
link_opts = { type: :link }

if link_text
new_link_text = link_text = link_text.gsub ESC_R_SB, R_SB if link_text.include? R_SB
if !doc.compat_mode && (link_text.include? '=')
# NOTE if an equals sign (=) is present, extract attributes from link text
link_text, attrs = extract_attributes_from_text link_text, ''
new_link_text = link_text
link_opts[:id] = attrs['id']
end
if link_text
new_link_text = link_text = link_text.gsub ESC_R_SB, R_SB if link_text.include? R_SB
if !doc.compat_mode && (link_text.include? '=')
# NOTE if an equals sign (=) is present, extract attributes from link text
link_text, attrs = extract_attributes_from_text link_text, ''
new_link_text = link_text
link_opts[:id] = attrs['id']
end

if link_text.end_with? '^'
new_link_text = link_text = link_text.chop
if attrs
attrs['window'] ||= '_blank'
else
attrs = { 'window' => '_blank' }
if link_text.end_with? '^'
new_link_text = link_text = link_text.chop
if attrs
attrs['window'] ||= '_blank'
else
attrs = { 'window' => '_blank' }
end
end
end

if new_link_text && new_link_text.empty?
if new_link_text && new_link_text.empty?
# NOTE it's not possible for the URI scheme to be bare in this case
link_text = (doc_attrs.key? 'hide-uri-scheme') ? (target.sub UriSniffRx, '') : target
bare = true
end
else
# NOTE it's not possible for the URI scheme to be bare in this case
link_text = (doc_attrs.key? 'hide-uri-scheme') ? (target.sub UriSniffRx, '') : target
bare = true
end
else
# NOTE it's not possible for the URI scheme to be bare in this case
link_text = (doc_attrs.key? 'hide-uri-scheme') ? (target.sub UriSniffRx, '') : target
bare = true
end

if bare
if attrs
attrs['role'] = (attrs.key? 'role') ? %(bare #{attrs['role']}) : 'bare'
else
attrs = { 'role' => 'bare' }
if bare
if attrs
attrs['role'] = (attrs.key? 'role') ? %(bare #{attrs['role']}) : 'bare'
else
attrs = { 'role' => 'bare' }
end
end
end

doc.register :links, (link_opts[:target] = target)
link_opts[:attributes] = attrs if attrs
%(#{prefix}#{(Inline.new self, :anchor, link_text, link_opts).convert}#{suffix})
doc.register :links, (link_opts[:target] = target)
link_opts[:attributes] = attrs if attrs
%(#{prefix}#{(Inline.new self, :anchor, link_text, link_opts).convert}#{suffix})
end
end
end

Expand Down
32 changes: 26 additions & 6 deletions test/links_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,27 @@
end

test 'qualified url surrounded by angled brackets' do
assert_xpath '//a[@href="http://asciidoc.org"][text()="http://asciidoc.org"]', convert_string('<http://asciidoc.org> is the project page for AsciiDoc.'), 1
assert_xpath '//a[@href="http://asciidoc.org"][@class="bare"][text()="http://asciidoc.org"]', convert_string('<http://asciidoc.org> is the project page for AsciiDoc.'), 1
end

test 'qualified url surrounded by double angled brackets should preserve outer angled brackets' do
assert_includes convert_string_to_embedded('<<https://asciidoc.org>>'), '&lt;<a href="https://asciidoc.org" class="bare">https://asciidoc.org</a>&gt;'
end

test 'qualified url surrounded by angled brackets in unconstrained context' do
assert_xpath '//a[@href="http://asciidoc.org"][@class="bare"][text()="http://asciidoc.org"]', convert_string('URLは<http://asciidoc.org>。fin'), 1
end

test 'multiple qualified urls surrounded by angled brackets in unconstrained context' do
assert_xpath '//a[@href="http://asciidoc.org"][@class="bare"][text()="http://asciidoc.org"]', convert_string('URLは<http://asciidoc.org>。URLは<http://asciidoc.org>。'), 2
end

test 'qualified url surrounded by escaped angled brackets should escape form' do
assert_xpath '//p[text()="<http://asciidoc.org>"]', convert_string('\\<http://asciidoc.org>'), 1
end

test 'escaped qualified url surrounded by angled brackets should escape autolink' do
assert_xpath '//p[text()="<http://asciidoc.org>"]', convert_string('<\\http://asciidoc.org>'), 1
end

test 'qualified url surrounded by round brackets' do
Expand Down Expand Up @@ -209,16 +229,16 @@
assert_include '"<a href="https://asciidoctor.org" class="bare">https://asciidoctor.org</a>"', output
end

test 'should convert qualified url as macro with trailing period' do
result = convert_string_to_embedded 'Information about the https://symbols.example.org/.[.] character.'
assert_xpath '//a[@href="https://symbols.example.org/."][text()="."]', result, 1
end

test 'should convert qualified url as macro enclosed in single quotes' do
output = convert_string_to_embedded '\'https://asciidoctor.org[]\''
assert_include '\'<a href="https://asciidoctor.org" class="bare">https://asciidoctor.org</a>\'', output
end

test 'should convert qualified url as macro with trailing period' do
result = convert_string_to_embedded 'Information about the https://symbols.example.org/.[.] character.'
assert_xpath '//a[@href="https://symbols.example.org/."][text()="."]', result, 1
end

test 'qualified url using invalid link macro should not create link' do
assert_xpath '//a', convert_string('link:http://asciidoc.org is the project page for AsciiDoc.'), 0
end
Expand Down

0 comments on commit 9aae492

Please sign in to comment.