Skip to content

Commit

Permalink
allow escaping newlines with \ inside strings
Browse files Browse the repository at this point in the history
This allows the use of `\` in front of newlines inside
non-raw/non-custom string or command literals as a line continuation
character, so the following newline is ignored. This way, long strings
without any newlines in them don't have to be written in a single line
or be broken up. I think we might also want to use this to improve the
printing of long strings in the REPL by printing them as multiline
strings, making use of `\` for long lines if necessary, but that can be
discussed separately.

The command literal part is technically breaking, but the current
behavior is probably unintuitive enough that this can be considered a
minor change. For string literals, this should be entirely non-breaking
since a single `\` before a newline currently throws a parsing error.

closes #37728
  • Loading branch information
simeonschaub committed May 9, 2021
1 parent ccf7824 commit 8600586
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 9 deletions.
5 changes: 4 additions & 1 deletion base/shell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,10 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
in_double_quotes = !in_double_quotes
i = consume_upto!(arg, s, i, j)
elseif c == '\\'
if in_double_quotes
if !isempty(st) && peek(st)[2] == '\n'
i = consume_upto!(arg, s, i, j) + 1
_ = popfirst!(st)
elseif in_double_quotes
isempty(st) && error("unterminated double quote")
k, c′ = peek(st)
if c′ == '"' || c′ == '$' || c′ == '\\'
Expand Down
38 changes: 30 additions & 8 deletions src/julia-parser.scm
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,9 @@
(define (numchk n s)
(or n (error (string "invalid numeric constant \"" s "\""))))

(define (string-lastchar s)
(string.char s (string.dec s (length s))))

(define (read-number port leadingdot neg)
(let ((str (open-output-string))
(pred char-numeric?)
Expand Down Expand Up @@ -408,7 +411,7 @@
(string.sub s 1)
s)
r is-float32-literal)))
(if (and (eqv? #\. (string.char s (string.dec s (length s))))
(if (and (eqv? #\. (string-lastchar s))
(let ((nxt (peek-char port)))
(and (not (eof-object? nxt))
(or (identifier-start-char? nxt)
Expand Down Expand Up @@ -2114,13 +2117,32 @@
(define (parse-string-literal s delim raw)
(let ((p (ts:port s)))
((if raw identity unescape-parsed-string-literal)
(if (eqv? (peek-char p) delim)
(if (eqv? (peek-char (take-char p)) delim)
(map-first strip-leading-newline
(dedent-triplequoted-string
(parse-string-literal- 2 (take-char p) s delim raw)))
(list ""))
(parse-string-literal- 0 p s delim raw)))))
(map (lambda (s)
(if (and (not raw) (string? s))
;; remove `\` followed by a newline
(let ((spl (string-split s "\\\n")))
(foldl (lambda (line s)
;; if there is an odd number of backslashes before the backslash
;; preceding the newline, keep the backslash and newline since
;; the backslash is actually escaped
(define (odd-backslashes? (i (length s)))
(and (> i 0)
(let ((i (string.dec s i)))
(and (eqv? (string.char s i) #\\)
(not (odd-backslashes? i))))))
(if (odd-backslashes?)
(string s "\\\n" line)
(string s line)))
""
spl))
s))
(if (eqv? (peek-char p) delim)
(if (eqv? (peek-char (take-char p)) delim)
(map-first strip-leading-newline
(dedent-triplequoted-string
(parse-string-literal- 2 (take-char p) s delim raw)))
(list ""))
(parse-string-literal- 0 p s delim raw))))))

(define (strip-leading-newline s)
(let ((n (sizeof s)))
Expand Down
97 changes: 97 additions & 0 deletions test/syntax.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2787,3 +2787,100 @@ macro m_nospecialize_unnamed_hygiene()
end

@test @m_nospecialize_unnamed_hygiene()(1) === Any

@testset "escaping newlines inside strings" begin
c = "c"

@test "a\
b" == "ab"
@test "a\
b" == "a b"
@test raw"a\
b" == "a\\\nb"
@test "a$c\
b" == "acb"
@test "\\
" == "\\\n"


@test """
a\
b""" == "ab"
@test """
a\
b""" == "a b"
@test """
a\
b""" == " ab"
@test raw"""
a\
b""" == "a\\\nb"
@test """
a$c\
b""" == "acb"

@test """
\
""" == ""
@test """
\\
""" == "\\\n"
@test """
\\\
""" == "\\"
@test """
\\\\
""" == "\\\\\n"
@test """
\\\\\
""" == "\\\\"
@test """
\
\
""" == ""
@test """
\\
\
""" == "\\\n"
@test """
\\\
\
""" == "\\"


@test `a\
b` == `ab`
@test `a\
b` == `a b`
@test `a$c\
b` == `acb`
@test `"a\
b"` == `ab`
@test `'a\
b'` == `ab`
@test `\\
` == `'\'`


@test ```
a\
b``` == `ab`
@test ```
a\
b``` == `a b`
@test ```
a\
b``` == ` ab`
@test ```
a$c\
b``` == `acb`
@test ```
"a\
b"``` == `ab`
@test ```
'a\
b'``` == `ab`
@test ```
\\
``` == `'\'`
end

0 comments on commit 8600586

Please sign in to comment.