diff --git a/compiler/noirc_frontend/src/ast/expression.rs b/compiler/noirc_frontend/src/ast/expression.rs index d29e1670944..41807d7eca7 100644 --- a/compiler/noirc_frontend/src/ast/expression.rs +++ b/compiler/noirc_frontend/src/ast/expression.rs @@ -76,6 +76,10 @@ impl ExpressionKind { ExpressionKind::Literal(Literal::Str(contents)) } + pub fn raw_string(contents: String, hashes: u8) -> ExpressionKind { + ExpressionKind::Literal(Literal::RawStr(contents, hashes)) + } + pub fn format_string(contents: String) -> ExpressionKind { ExpressionKind::Literal(Literal::FmtStr(contents)) } @@ -312,6 +316,7 @@ pub enum Literal { Bool(bool), Integer(FieldElement), Str(String), + RawStr(String, u8), FmtStr(String), Unit, } @@ -507,6 +512,11 @@ impl Display for Literal { Literal::Bool(boolean) => write!(f, "{}", if *boolean { "true" } else { "false" }), Literal::Integer(integer) => write!(f, "{}", integer.to_u128()), Literal::Str(string) => write!(f, "\"{string}\""), + Literal::RawStr(string, num_hashes) => { + let hashes: String = + std::iter::once('#').cycle().take(*num_hashes as usize).collect(); + write!(f, "r{hashes}\"{string}\"{hashes}") + } Literal::FmtStr(string) => write!(f, "f\"{string}\""), Literal::Unit => write!(f, "()"), } diff --git a/compiler/noirc_frontend/src/hir/resolution/resolver.rs b/compiler/noirc_frontend/src/hir/resolution/resolver.rs index 4b829932b76..52d592404c8 100644 --- a/compiler/noirc_frontend/src/hir/resolution/resolver.rs +++ b/compiler/noirc_frontend/src/hir/resolution/resolver.rs @@ -1203,6 +1203,7 @@ impl<'a> Resolver<'a> { } Literal::Integer(integer) => HirLiteral::Integer(integer), Literal::Str(str) => HirLiteral::Str(str), + Literal::RawStr(str, _) => HirLiteral::Str(str), Literal::FmtStr(str) => self.resolve_fmt_str_literal(str, expr.span), Literal::Unit => HirLiteral::Unit, }), diff --git a/compiler/noirc_frontend/src/lexer/lexer.rs b/compiler/noirc_frontend/src/lexer/lexer.rs index be24c1249c6..7a2197ebb93 100644 --- a/compiler/noirc_frontend/src/lexer/lexer.rs +++ b/compiler/noirc_frontend/src/lexer/lexer.rs @@ -126,6 +126,7 @@ impl<'a> Lexer<'a> { Some(']') => self.single_char_token(Token::RightBracket), Some('"') => self.eat_string_literal(), Some('f') => self.eat_format_string_or_alpha_numeric(), + Some('r') => self.eat_raw_string_or_alpha_numeric(), Some('#') => self.eat_attribute(), Some(ch) if ch.is_ascii_alphanumeric() || ch == '_' => self.eat_alpha_numeric(ch), Some(ch) => { @@ -400,6 +401,78 @@ impl<'a> Lexer<'a> { } } + fn eat_raw_string(&mut self) -> SpannedTokenResult { + let start = self.position; + + let beginning_hashes = self.eat_while(None, |ch| ch == '#'); + let beginning_hashes_count = beginning_hashes.chars().count(); + if beginning_hashes_count > 255 { + // too many hashes (unlikely in practice) + // also, Rust disallows 256+ hashes as well + return Err(LexerErrorKind::UnexpectedCharacter { + span: Span::single_char(start + 255), + found: Some('#'), + expected: "\"".to_owned(), + }); + } + + if !self.peek_char_is('"') { + return Err(LexerErrorKind::UnexpectedCharacter { + span: Span::single_char(self.position), + found: self.next_char(), + expected: "\"".to_owned(), + }); + } + self.next_char(); + + let mut str_literal = String::new(); + loop { + let chars = self.eat_while(None, |ch| ch != '"'); + str_literal.push_str(&chars[..]); + if !self.peek_char_is('"') { + return Err(LexerErrorKind::UnexpectedCharacter { + span: Span::single_char(self.position), + found: self.next_char(), + expected: "\"".to_owned(), + }); + } + self.next_char(); + let mut ending_hashes_count = 0; + while let Some('#') = self.peek_char() { + if ending_hashes_count == beginning_hashes_count { + break; + } + self.next_char(); + ending_hashes_count += 1; + } + if ending_hashes_count == beginning_hashes_count { + break; + } else { + str_literal.push('"'); + for _ in 0..ending_hashes_count { + str_literal.push('#'); + } + } + } + + let str_literal_token = Token::RawStr(str_literal, beginning_hashes_count as u8); + + let end = self.position; + Ok(str_literal_token.into_span(start, end)) + } + + fn eat_raw_string_or_alpha_numeric(&mut self) -> SpannedTokenResult { + // Problem: we commit to eating raw strings once we see one or two characters. + // This is unclean, but likely ok in all practical cases, and works with existing + // `Lexer` methods. + let peek1 = self.peek_char().unwrap_or('X'); + let peek2 = self.peek2_char().unwrap_or('X'); + match (peek1, peek2) { + ('#', '#') | ('#', '"') | ('"', _) => self.eat_raw_string(), + _ => self.eat_alpha_numeric('r'), + } + } + fn parse_comment(&mut self, start: u32) -> SpannedTokenResult { let doc_style = match self.peek_char() { Some('!') => { diff --git a/compiler/noirc_frontend/src/lexer/token.rs b/compiler/noirc_frontend/src/lexer/token.rs index 72be71865cc..b16de42c0ba 100644 --- a/compiler/noirc_frontend/src/lexer/token.rs +++ b/compiler/noirc_frontend/src/lexer/token.rs @@ -15,6 +15,7 @@ pub enum Token { Int(FieldElement), Bool(bool), Str(String), + RawStr(String, u8), FmtStr(String), Keyword(Keyword), IntType(IntType), @@ -157,6 +158,10 @@ impl fmt::Display for Token { Token::Bool(b) => write!(f, "{b}"), Token::Str(ref b) => write!(f, "{b}"), Token::FmtStr(ref b) => write!(f, "f{b}"), + Token::RawStr(ref b, hashes) => { + let h: String = std::iter::once('#').cycle().take(hashes as usize).collect(); + write!(f, "r{h}\"{b}\"{h}") + } Token::Keyword(k) => write!(f, "{k}"), Token::Attribute(ref a) => write!(f, "{a}"), Token::LineComment(ref s, _style) => write!(f, "//{s}"), @@ -227,7 +232,11 @@ impl Token { pub fn kind(&self) -> TokenKind { match *self { Token::Ident(_) => TokenKind::Ident, - Token::Int(_) | Token::Bool(_) | Token::Str(_) | Token::FmtStr(_) => TokenKind::Literal, + Token::Int(_) + | Token::Bool(_) + | Token::Str(_) + | Token::RawStr(..) + | Token::FmtStr(_) => TokenKind::Literal, Token::Keyword(_) => TokenKind::Keyword, Token::Attribute(_) => TokenKind::Attribute, ref tok => TokenKind::Token(tok.clone()), diff --git a/compiler/noirc_frontend/src/parser/parser.rs b/compiler/noirc_frontend/src/parser/parser.rs index 6b8589cc6e5..7f0bf6376c6 100644 --- a/compiler/noirc_frontend/src/parser/parser.rs +++ b/compiler/noirc_frontend/src/parser/parser.rs @@ -1657,6 +1657,7 @@ fn literal() -> impl NoirParser { Token::Int(x) => ExpressionKind::integer(x), Token::Bool(b) => ExpressionKind::boolean(b), Token::Str(s) => ExpressionKind::string(s), + Token::RawStr(s, hashes) => ExpressionKind::raw_string(s, hashes), Token::FmtStr(s) => ExpressionKind::format_string(s), unexpected => unreachable!("Non-literal {} parsed as a literal", unexpected), }) @@ -2549,4 +2550,79 @@ mod test { check_cases_with_errors(&cases[..], block(fresh_statement())); } + + #[test] + fn parse_raw_string_expr() { + let cases = vec![ + Case { source: r##" r"foo" "##, expect: r##"r"foo""##, errors: 0 }, + Case { source: r##" r#"foo"# "##, expect: r##"r#"foo"#"##, errors: 0 }, + // backslash + Case { source: r##" r"\\" "##, expect: r##"r"\\""##, errors: 0 }, + Case { source: r##" r#"\"# "##, expect: r##"r#"\"#"##, errors: 0 }, + Case { source: r##" r#"\\"# "##, expect: r##"r#"\\"#"##, errors: 0 }, + Case { source: r##" r#"\\\"# "##, expect: r##"r#"\\\"#"##, errors: 0 }, + // escape sequence + Case { + source: r##" r#"\t\n\\t\\n\\\t\\\n\\\\"# "##, + expect: r##"r#"\t\n\\t\\n\\\t\\\n\\\\"#"##, + errors: 0, + }, + Case { source: r##" r#"\\\\\\\\"# "##, expect: r##"r#"\\\\\\\\"#"##, errors: 0 }, + // mismatch - errors: + Case { source: r###" r#"foo"## "###, expect: r###"r#"foo"#"###, errors: 1 }, + Case { source: r###" r##"foo"# "###, expect: "(none)", errors: 2 }, + // mismatch: short: + Case { source: r###" r"foo"# "###, expect: r###"r"foo""###, errors: 1 }, + Case { source: r###" r#"foo" "###, expect: "(none)", errors: 2 }, + // empty string + Case { source: r####"r"""####, expect: r####"r"""####, errors: 0 }, + Case { source: r####"r###""###"####, expect: r####"r###""###"####, errors: 0 }, + // miscellaneous + Case { source: r###" r#\"foo\"# "###, expect: "plain::r", errors: 2 }, + Case { source: r###" r\"foo\" "###, expect: "plain::r", errors: 1 }, + Case { source: r###" r##"foo"# "###, expect: "(none)", errors: 2 }, + // missing 'r' letter + Case { source: r###" ##"foo"# "###, expect: r#""foo""#, errors: 2 }, + Case { source: r###" #"foo" "###, expect: "plain::foo", errors: 2 }, + // whitespace + Case { source: r###" r #"foo"# "###, expect: "plain::r", errors: 2 }, + Case { source: r###" r# "foo"# "###, expect: "plain::r", errors: 3 }, + Case { source: r###" r#"foo" # "###, expect: "(none)", errors: 2 }, + // after identifier + Case { source: r###" bar#"foo"# "###, expect: "plain::bar", errors: 2 }, + // nested + Case { + source: r###"r##"foo r#"bar"# r"baz" ### bye"##"###, + expect: r###"r##"foo r#"bar"# r"baz" ### bye"##"###, + errors: 0, + }, + ]; + + check_cases_with_errors(&cases[..], expression()); + } + + #[test] + fn parse_raw_string_lit() { + let lit_cases = vec![ + Case { source: r##" r"foo" "##, expect: r##"r"foo""##, errors: 0 }, + Case { source: r##" r#"foo"# "##, expect: r##"r#"foo"#"##, errors: 0 }, + // backslash + Case { source: r##" r"\\" "##, expect: r##"r"\\""##, errors: 0 }, + Case { source: r##" r#"\"# "##, expect: r##"r#"\"#"##, errors: 0 }, + Case { source: r##" r#"\\"# "##, expect: r##"r#"\\"#"##, errors: 0 }, + Case { source: r##" r#"\\\"# "##, expect: r##"r#"\\\"#"##, errors: 0 }, + // escape sequence + Case { + source: r##" r#"\t\n\\t\\n\\\t\\\n\\\\"# "##, + expect: r##"r#"\t\n\\t\\n\\\t\\\n\\\\"#"##, + errors: 0, + }, + Case { source: r##" r#"\\\\\\\\"# "##, expect: r##"r#"\\\\\\\\"#"##, errors: 0 }, + // mismatch - errors: + Case { source: r###" r#"foo"## "###, expect: r###"r#"foo"#"###, errors: 1 }, + Case { source: r###" r##"foo"# "###, expect: "(none)", errors: 2 }, + ]; + + check_cases_with_errors(&lit_cases[..], literal()); + } } diff --git a/docs/docs/language_concepts/data_types/03_strings.md b/docs/docs/language_concepts/data_types/03_strings.md index c42f34ec3ad..e647a58472f 100644 --- a/docs/docs/language_concepts/data_types/03_strings.md +++ b/docs/docs/language_concepts/data_types/03_strings.md @@ -61,3 +61,19 @@ Example: let s = "Hello \"world" // prints "Hello "world" let s = "hey \tyou"; // prints "hey you" ``` + +## Raw strings + +A raw string begins with the letter `r` and is optionally delimited by a number of hashes `#`. + +Escape characters are *not* processed within raw strings. All contents are interpreted literally. + +Example: + +```rust +let s = r"Hello world"; +let s = r#"Simon says "hello world""#; + +// Any number of hashes may be used (>= 1) as long as the string also terminates with the same number of hashes +let s = r#####"One "#, Two "##, Three "###, Four "####, Five will end the string."#####; +``` diff --git a/tooling/nargo_cli/tests/compile_failure/raw_string_huge/Nargo.toml b/tooling/nargo_cli/tests/compile_failure/raw_string_huge/Nargo.toml new file mode 100644 index 00000000000..ecef0e2a07c --- /dev/null +++ b/tooling/nargo_cli/tests/compile_failure/raw_string_huge/Nargo.toml @@ -0,0 +1,5 @@ +[package] +name = "raw_string_huge" +type = "bin" +authors = [""] +[dependencies] \ No newline at end of file diff --git a/tooling/nargo_cli/tests/compile_failure/raw_string_huge/src/main.nr b/tooling/nargo_cli/tests/compile_failure/raw_string_huge/src/main.nr new file mode 100644 index 00000000000..7bca9942e7a --- /dev/null +++ b/tooling/nargo_cli/tests/compile_failure/raw_string_huge/src/main.nr @@ -0,0 +1,4 @@ +fn main() { + // Fails because of too many hashes for raw string (256+ hashes) + let _a = r##############################################################################################################################################################################################################################################################################"hello"##############################################################################################################################################################################################################################################################################; +} diff --git a/tooling/nargo_cli/tests/compile_success_empty/raw_string/Nargo.toml b/tooling/nargo_cli/tests/compile_success_empty/raw_string/Nargo.toml new file mode 100644 index 00000000000..81147e65f34 --- /dev/null +++ b/tooling/nargo_cli/tests/compile_success_empty/raw_string/Nargo.toml @@ -0,0 +1,6 @@ +[package] +name = "raw_string" +type = "bin" +authors = [""] + +[dependencies] diff --git a/tooling/nargo_cli/tests/compile_success_empty/raw_string/src/main.nr b/tooling/nargo_cli/tests/compile_success_empty/raw_string/src/main.nr new file mode 100644 index 00000000000..ad8dfe82ae5 --- /dev/null +++ b/tooling/nargo_cli/tests/compile_success_empty/raw_string/src/main.nr @@ -0,0 +1,13 @@ +global D = r#####"Hello "world""#####; + +fn main() { + let a = "Hello \"world\""; + let b = r#"Hello "world""#; + let c = r##"Hello "world""##; + assert(a == b); + assert(b == c); + assert(c == D); + let x = r#"Hello World"#; + let y = r"Hello World"; + assert(x == y); +} diff --git a/tooling/nargo_fmt/src/rewrite/expr.rs b/tooling/nargo_fmt/src/rewrite/expr.rs index 4d7279815df..48273073553 100644 --- a/tooling/nargo_fmt/src/rewrite/expr.rs +++ b/tooling/nargo_fmt/src/rewrite/expr.rs @@ -101,9 +101,11 @@ pub(crate) fn rewrite( format_parens(None, visitor.fork(), shape, exprs.len() == 1, exprs, span, false) } ExpressionKind::Literal(literal) => match literal { - Literal::Integer(_) | Literal::Bool(_) | Literal::Str(_) | Literal::FmtStr(_) => { - visitor.slice(span).to_string() - } + Literal::Integer(_) + | Literal::Bool(_) + | Literal::Str(_) + | Literal::RawStr(..) + | Literal::FmtStr(_) => visitor.slice(span).to_string(), Literal::Array(ArrayLiteral::Repeated { repeated_element, length }) => { let repeated = rewrite_sub_expr(visitor, shape, *repeated_element); let length = rewrite_sub_expr(visitor, shape, *length);