-
Notifications
You must be signed in to change notification settings - Fork 0
/
forgiving-messageformat-parser.pegjs
157 lines (132 loc) · 4.33 KB
/
forgiving-messageformat-parser.pegjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
//based on the grammar of https://github.com/SlexAxton/messageformat.js
start
= topLevelMessageFormatPattern
topLevelMessageFormatPattern
= st:(messageFormatElement/string/invalidMessageFormatElement/invalidTopLevelLiteral)* {
return { type: 'messageFormatPattern', statements: st};
}
messageFormatPattern
= st:(messageFormatElement/string/invalidMessageFormatElement/invalidLiteral)* {
return { type: 'messageFormatPattern', statements: st};
}
messageFormatElement
= '{' _ argIdx:id _ efmt:(',' elementFormat)? _ '}' {
var res = {
type: "messageFormatElement",
argumentIndex: argIdx
};
if (efmt && efmt.length) {
res.elementFormat = efmt[1];
} else {
res.output = true;
}
return res;
}
elementFormat
= _ p1:pos format:(
t:"plural" _ ',' _ s:pluralFormatPattern {
return { type: "elementFormat", key: t, val: s };
}
/ t:"selectordinal" _ ',' _ s:pluralFormatPattern _ {
return { type: "elementFormat", key: t, val: s };
}
/ t:"select" _ ',' _ s:selectFormatPattern {
return { type: "elementFormat", key: t, val: s };
}
/ t:id p:argStylePattern* {
return { type: "elementFormat", key: t, val: p };
}
) p2:pos _ {
format.start = p1;
format.end = p2;
return format;
}
pluralFormatPattern
= op:offsetPattern? pf:(pluralForm)+ {
return { type: "pluralFormatPattern", pluralForms: pf, offset: op || 0 };
}
offsetPattern
= _ "offset" _ ":" _ d:digits _ { return d; }
pluralForm
= _ k:pluralKey _ "{" _ mfp:messageFormatPattern _ "}" {
return { key: k, val: mfp };
}
pluralKey
= p1:pos k:(
i:id { return i; }
/ "=" d:digits{ return d; }
) p2:pos {
return { val: k, start: p1, end: p2 };
}
selectFormatPattern
= pf:selectForm+ { return { type: "selectFormatPattern", selectForms: pf }; }
selectForm
= _ p1:pos k:id p2:pos _ "{" _ mfp:messageFormatPattern _ "}" {
return { key: {val: k, start: p1, end: p2}, val: mfp };
}
argStylePattern
= _ "," _ p:id _ { return p; }
string
= s:(chars/whitespace)+ { return { type: "string", val: s.join('') }; }
// This is a subset to keep code size down
// More or less, it has to be a single word
// that doesn't contain punctuation, etc
id "identifier"
= s:$([0-9a-zA-Z$_][^ \t\n\r,.+={}]*) { return s; }
chars
= chars:char+ { return chars.join(''); }
char
= x:[^{}\\\0-\x1F\x7f \t\n\r] { return x; }
/ "\\#" { return "\\#"; }
/ "\\{" { return "\u007B"; }
/ "\\}" { return "\u007D"; }
/ "\\u" h1:hexDigit h2:hexDigit h3:hexDigit h4:hexDigit {
return String.fromCharCode(parseInt("0x" + h1 + h2 + h3 + h4));
}
digits
= ds:[0-9]+ {
//the number might start with 0 but must not be interpreted as an octal number
//Hence, the base is passed to parseInt explicitely
return parseInt((ds.join('')), 10);
}
hexDigit
= [0-9a-fA-F]
_ "whitespace"
= w:whitespace* { return w.join(''); }
// Whitespace is undefined in the original JSON grammar, so I assume a simple
// conventional definition consistent with ECMA-262, 5th ed.
whitespace
= [ \t\n\r]
//parses an invalid message format element
invalidMessageFormatElement
= p1:pos '{' v:([^{}]/invalidBlock)* t:'}'? p2:pos {
return {
type: "invalidMessageFormatElement",
val: v,
unterminated: t === null,
start: p1,
end: p2
};
}
//accepts a block starting with { until the closing bracket
invalidBlock
= '{' ([^{}]+/invalidBlock)* '}'
//common error for escape characters
invalidEscape
= p1:pos t:$("\\u" hexDigit? hexDigit? hexDigit? hexDigit?) p2:pos {
return {type: "invalidUnicode", text:t, start:p1, end:p2};
}
/ p1:pos t:$("\\" [^ \t\n\r]) p2:pos {
return {type: "invalidEscapeChar", text:t, start:p1, end:p2};
}
//accepts every character, but marks it as invalid
invalidTopLevelLiteral
= invalidEscape
/ p1:pos i:. p2:pos { return {type: "invalidLiteral", text:i, start: p1, end:p2}; }
//accepts every character except for }, but marks it as invalid
invalidLiteral
= invalidEscape
/ p1:pos i:[^}] p2:pos { return {type: "invalidLiteral", text:i, start: p1, end:p2}; }
//does not parse anything but only stores the current parser position
pos
= {return {line: line(), column: column(), offset: offset()};}