Skip to content

Commit

Permalink
added support for special chars (#309)
Browse files Browse the repository at this point in the history
  • Loading branch information
99NIMI committed Oct 5, 2021
1 parent b72f869 commit 8a513a5
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 1 deletion.
8 changes: 8 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ encoding_rs_io = "0.1"
codespan-reporting = "0.11.1"
mun_lld = "110.0.0"
generational-arena = "0.2.8"
regex = "1"
hex = "0.4.3"

[lib]
name = "rusty"
Expand Down
44 changes: 44 additions & 0 deletions src/codegen/tests/code_gen_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,50 @@ entry:
assert_eq!(result, expected);
}

#[test]
fn program_with_special_chars_in_string() {
let result = codegen!(
r#"PROGRAM prg
VAR
should_replace_s : STRING;
should_not_replace_s : STRING;
should_replace_ws : WSTRING;
should_not_replace_ws : WSTRING;
END_VAR
should_replace_s := 'a$l$L b$n$N c$p$P d$r$R e$t$T $$ $'single$' $57';
should_not_replace_s := '$0043 $"no replace$"';
should_replace_ws := "a$l$L b$n$N c$p$P d$r$R e$t$T $$ $"double$" $0057";
should_not_replace_ws := "$43 $'no replace$'";
END_PROGRAM
"#
);

let expected = r#"; ModuleID = 'main'
source_filename = "main"
%prg_interface = type { [81 x i8], [81 x i8], [162 x i8], [162 x i8] }
@prg_instance = global %prg_interface zeroinitializer
define void @prg(%prg_interface* %0) {
entry:
%should_replace_s = getelementptr inbounds %prg_interface, %prg_interface* %0, i32 0, i32 0
%should_not_replace_s = getelementptr inbounds %prg_interface, %prg_interface* %0, i32 0, i32 1
%should_replace_ws = getelementptr inbounds %prg_interface, %prg_interface* %0, i32 0, i32 2
%should_not_replace_ws = getelementptr inbounds %prg_interface, %prg_interface* %0, i32 0, i32 3
store [33 x i8] c"a\0A\0A b\0A\0A c\0C\0C d\0D\0D e\09\09 $ 'single' W\00", [81 x i8]* %should_replace_s, align 1
store [19 x i8] c"\0043 $\22no replace$\22\00", [81 x i8]* %should_not_replace_s, align 1
store [68 x i8] c"a\00\0A\00\0A\00 \00b\00\0A\00\0A\00 \00c\00\0C\00\0C\00 \00d\00\0D\00\0D\00 \00e\00\09\00\09\00 \00$\00 \00\22\00d\00o\00u\00b\00l\00e\00\22\00 \00\00\00W\00\00\00", [162 x i8]* %should_replace_ws, align 1
store [38 x i8] c"$\004\003\00 \00$\00'\00n\00o\00 \00r\00e\00p\00l\00a\00c\00e\00$\00'\00\00\00", [162 x i8]* %should_not_replace_ws, align 1
ret void
}
"#;

assert_eq!(result, expected);
}

#[test]
fn different_case_references() {
let result = codegen!(
Expand Down
75 changes: 74 additions & 1 deletion src/parser/expressions_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use crate::{
parser::parse_any_in_region,
Diagnostic,
};
use regex::{Captures, Regex};
use std::str::FromStr;

macro_rules! parse_left_associative_expression {
Expand Down Expand Up @@ -779,14 +780,55 @@ fn trim_quotes(quoted_string: &str) -> String {
quoted_string[1..quoted_string.len() - 1].to_string()
}

fn handle_special_chars(string: &str, is_wide: bool) -> String {
let re: Regex;
let re_hex: Regex;
if is_wide {
re = Regex::new(r#"(\$([lLnNpPrRtT$"]))"#).unwrap();
re_hex = Regex::new(r"(\$([0-9A-F]{4}))").unwrap();
} else {
re = Regex::new(r"(\$([lLnNpPrRtT$']))").unwrap();
re_hex = Regex::new(r"(\$([0-9A-F]{2}))").unwrap();
};

// separated re and re_hex to minimize copying
let res = re
.replace_all(string, |caps: &Captures| {
let cap_str = &caps[1];
match cap_str {
"$l" | "$L" => "\n",
"$n" | "$N" => "\n",
"$p" | "$P" => "\x0C",
"$r" | "$R" => "\r",
"$t" | "$T" => "\t",
"$$" => "$",
"$'" => "\'",
"$\"" => "\"",
_ => unreachable!(),
}
})
.to_string();

re_hex
.replace_all(&res, |caps: &Captures| {
let decoded = hex::decode(&caps[1].replace("$", "")).unwrap();
match std::str::from_utf8(&decoded) {
Ok(v) => v,
Err(e) => panic!("Invalid UTF-8 sequence: {}", e),
}
.to_string()
})
.into()
}

fn parse_literal_string(
lexer: &mut ParseSession,
is_wide: bool,
) -> Result<AstStatement, Diagnostic> {
let result = lexer.slice();
let location = lexer.location();
let string_literal = Ok(AstStatement::LiteralString {
value: trim_quotes(result),
value: handle_special_chars(&trim_quotes(result), is_wide),
is_wide,
location,
id: lexer.next_id(),
Expand Down Expand Up @@ -825,3 +867,34 @@ fn parse_literal_real(
))
}
}

#[cfg(test)]
mod tests {
use crate::parser::expressions_parser::handle_special_chars;

#[test]
fn replace_all_test() {
// following special chars should be replaced
let string = "a $l$L b $n$N test $p$P c $r$R d $t$T$$ $'quote$' $57";
let expected = "a \n\n b \n\n test \x0C\x0C c \r\r d \t\t$ 'quote' W";

let w_string = r#"a $l$L b $n$N test $p$P c $r$R d $t$T$$ $"double$" $0077"#;
let w_expected = "a \n\n b \n\n test \x0C\x0C c \r\r d \t\t$ \"double\" \u{0}w";

assert_eq!(handle_special_chars(w_string, true), w_expected);
assert_eq!(handle_special_chars(string, false), expected);
}

#[test]
fn should_not_replace_test() {
// following special chars should not be replaced
let string = r#"$0043 $"no replace$""#;
let expected = "\u{0}43 $\"no replace$\"";

let w_string = r#"$57 $'no replace$'"#;
let w_expected = "$57 $'no replace$'";

assert_eq!(handle_special_chars(w_string, true), w_expected);
assert_eq!(handle_special_chars(string, false), expected);
}
}

0 comments on commit 8a513a5

Please sign in to comment.