-
Notifications
You must be signed in to change notification settings - Fork 83
/
scrubbers.rb
203 lines (177 loc) · 5.99 KB
/
scrubbers.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# frozen_string_literal: true
module Rails
module HTML
# === Rails::HTML::PermitScrubber
#
# +Rails::HTML::PermitScrubber+ allows you to permit only your own tags and/or attributes.
#
# +Rails::HTML::PermitScrubber+ can be subclassed to determine:
# - When a node should be skipped via +skip_node?+.
# - When a node is allowed via +allowed_node?+.
# - When an attribute should be scrubbed via +scrub_attribute?+.
#
# Subclasses don't need to worry if tags or attributes are set or not.
# If tags or attributes are not set, Loofah's behavior will be used.
# If you override +allowed_node?+ and no tags are set, it will not be called.
# Instead Loofahs behavior will be used.
# Likewise for +scrub_attribute?+ and attributes respectively.
#
# Text and CDATA nodes are skipped by default.
# Unallowed elements will be stripped, i.e. element is removed but its subtree kept.
# Supplied tags and attributes should be Enumerables.
#
# +tags=+
# If set, elements excluded will be stripped.
# If not, elements are stripped based on Loofahs +HTML5::Scrub.allowed_element?+.
#
# +attributes=+
# If set, attributes excluded will be removed.
# If not, attributes are removed based on Loofahs +HTML5::Scrub.scrub_attributes+.
#
# class CommentScrubber < Rails::HTML::PermitScrubber
# def initialize
# super
# self.tags = %w(form script comment blockquote)
# end
#
# def skip_node?(node)
# node.text?
# end
#
# def scrub_attribute?(name)
# name == "style"
# end
# end
#
# See the documentation for +Nokogiri::XML::Node+ to understand what's possible
# with nodes: https://nokogiri.org/rdoc/Nokogiri/XML/Node.html
class PermitScrubber < Loofah::Scrubber
attr_reader :tags, :attributes, :prune
def initialize(prune: false)
@prune = prune
@direction = @prune ? :top_down : :bottom_up
@tags, @attributes = nil, nil
end
def tags=(tags)
@tags = validate!(tags, :tags)
end
def attributes=(attributes)
@attributes = validate!(attributes, :attributes)
end
def scrub(node)
if Loofah::HTML5::Scrub.cdata_needs_escaping?(node)
replacement = Loofah::HTML5::Scrub.cdata_escape(node)
node.replace(replacement)
return CONTINUE
end
return CONTINUE if skip_node?(node)
unless (node.element? || node.comment?) && keep_node?(node)
return STOP unless scrub_node(node) == CONTINUE
end
scrub_attributes(node)
CONTINUE
end
protected
def allowed_node?(node)
@tags.include?(node.name)
end
def skip_node?(node)
node.text?
end
def scrub_attribute?(name)
[email protected]?(name)
end
def keep_node?(node)
if @tags
allowed_node?(node)
else
Loofah::HTML5::Scrub.allowed_element?(node.name)
end
end
def scrub_node(node)
node.before(node.children) unless prune # strip
node.remove
end
def scrub_attributes(node)
if @attributes
node.attribute_nodes.each do |attr|
if scrub_attribute?(attr.name)
attr.remove
else
scrub_attribute(node, attr)
end
end
scrub_css_attribute(node)
else
Loofah::HTML5::Scrub.scrub_attributes(node)
end
end
def scrub_css_attribute(node)
if Loofah::HTML5::Scrub.respond_to?(:scrub_css_attribute)
Loofah::HTML5::Scrub.scrub_css_attribute(node)
else
style = node.attributes["style"]
style.value = Loofah::HTML5::Scrub.scrub_css(style.value) if style
end
end
def validate!(var, name)
if var && !var.is_a?(Enumerable)
raise ArgumentError, "You should pass :#{name} as an Enumerable"
end
var
end
def scrub_attribute(node, attr_node)
attr_name = if attr_node.namespace
"#{attr_node.namespace.prefix}:#{attr_node.node_name}"
else
attr_node.node_name
end
return if Loofah::HTML5::SafeList::ATTR_VAL_IS_URI.include?(attr_name) && Loofah::HTML5::Scrub.scrub_uri_attribute(attr_node)
if Loofah::HTML5::SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
Loofah::HTML5::Scrub.scrub_attribute_that_allows_local_ref(attr_node)
end
if Loofah::HTML5::SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
attr_node.remove
end
node.remove_attribute(attr_node.name) if attr_name == "src" && attr_node.value !~ /[^[:space:]]/
Loofah::HTML5::Scrub.force_correct_attribute_escaping! node
end
end
# === Rails::HTML::TargetScrubber
#
# Where +Rails::HTML::PermitScrubber+ picks out tags and attributes to permit in
# sanitization, +Rails::HTML::TargetScrubber+ targets them for removal.
#
# +tags=+
# If set, elements included will be stripped.
#
# +attributes=+
# If set, attributes included will be removed.
class TargetScrubber < PermitScrubber
def allowed_node?(node)
!super
end
def scrub_attribute?(name)
!super
end
end
# === Rails::HTML::TextOnlyScrubber
#
# +Rails::HTML::TextOnlyScrubber+ allows you to permit text nodes.
#
# Unallowed elements will be stripped, i.e. element is removed but its subtree kept.
class TextOnlyScrubber < Loofah::Scrubber
def initialize
@direction = :bottom_up
end
def scrub(node)
if node.text?
CONTINUE
else
node.before node.children
node.remove
end
end
end
end
end