-
-
Notifications
You must be signed in to change notification settings - Fork 21
/
escaping.go
171 lines (151 loc) · 3.57 KB
/
escaping.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
/*
Copyright © 2014–2020 Thomas Michael Edwards. All rights reserved.
Use of this source code is governed by a Simplified BSD License which
can be found in the LICENSE file.
*/
package main
import (
"strings"
)
/*
HTML escaping/unescaping utilities.
*/
// Escape the minimum characters required for attribute values.
var attrEscaper = strings.NewReplacer(
`&`, `&`,
`"`, `"`,
// QUESTION: Keep the following? All markup we generate double quotes attribute
// values, so escaping single quotes/apostrophes isn't actually necessary.
`'`, `'`,
)
func attrEscapeString(s string) string {
if len(s) == 0 {
return s
}
return attrEscaper.Replace(s)
}
// Escape the minimum characters required for general HTML escaping—i.e., only
// the special characters (`&`, `<`, `>`, `"`, `'`).
//
// NOTE: The following exists because `html.EscapeString()` converts double
// quotes (`"`) to their decimal numeric character reference (`"`) rather
// than to their entity (`"`). While the behavior is entirely legal, and
// browsers will happily accept the NCRs, a not insignificant amount of code in
// the wild only checks for `"` and will fail to properly unescape the NCR.
//
// The primary special characters (`&`, `<`, `>`, `"`) should always be
// converted to their entity forms and never to an NCR form. Saving one byte
// (5 vs. 6) is not worth the issues it causes.
var htmlEscaper = strings.NewReplacer(
`&`, `&`,
`<`, `<`,
`>`, `>`,
`"`, `"`,
`'`, `'`,
)
func htmlEscapeString(s string) string {
if len(s) == 0 {
return s
}
return htmlEscaper.Replace(s)
}
var tiddlerEscaper = strings.NewReplacer(
`&`, `&`,
`<`, `<`,
`>`, `>`,
`"`, `"`,
`\`, `\s`,
"\t", `\t`,
"\n", `\n`,
)
func tiddlerEscapeString(s string) string {
if len(s) == 0 {
return s
}
return tiddlerEscaper.Replace(s)
}
// NOTE: We only need the newline, tab, and backslash escapes here since
// `tiddlerUnescapeString()` is only used when loading Twine 1 HTML and the
// `x/net/html` package already handles entity/reference unescaping for us.
var tiddlerUnescaper = strings.NewReplacer(
`\n`, "\n",
`\t`, "\t",
`\s`, `\`,
)
func tiddlerUnescapeString(s string) string {
if len(s) == 0 {
return s
}
return tiddlerUnescaper.Replace(s)
}
/*
Twee escaping/unescaping utilities.
*/
// Encode set: '\\', '[', ']', '{', '}'.
func tweeEscapeBytes(s []byte) []byte {
if len(s) == 0 {
return []byte(nil)
}
// NOTE: The slices this will be used with will be short enough that
// iterating a slice twice shouldn't be problematic. That said,
// assuming an escape count of 8 or so wouldn't be a terrible way to
// handle this either.
cnt := 0
for _, b := range s {
switch b {
case '\\', '[', ']', '{', '}':
cnt++
}
}
e := make([]byte, 0, len(s)+cnt)
for _, b := range s {
switch b {
case '\\', '[', ']', '{', '}':
e = append(e, '\\')
}
e = append(e, b)
}
return e
}
var tweeEscaper = strings.NewReplacer(
`\`, `\\`,
`[`, `\[`,
`]`, `\]`,
`{`, `\{`,
`}`, `\}`,
)
func tweeEscapeString(s string) string {
if len(s) == 0 {
return s
}
return tweeEscaper.Replace(s)
}
func tweeUnescapeBytes(s []byte) []byte {
if len(s) == 0 {
return []byte(nil)
}
u := make([]byte, 0, len(s))
for i, l := 0, len(s); i < l; i++ {
if s[i] == '\\' {
i++
if i >= l {
break
}
}
u = append(u, s[i])
}
return u
}
var tweeUnescaper = strings.NewReplacer(
`\\`, `\`,
`\[`, `[`,
`\]`, `]`,
`\{`, `{`,
`\}`, `}`,
)
func tweeUnescapeString(s string) string {
if len(s) == 0 {
return s
}
return tweeUnescaper.Replace(s)
}