diff --git a/isso/tests/test_html.py b/isso/tests/test_html.py index 5b59b39a..5d5f87a3 100644 --- a/isso/tests/test_html.py +++ b/isso/tests/test_html.py @@ -70,6 +70,8 @@ def test_sanitizer(self): ('ld.so', 'ld.so'), ('/usr/lib/x86_64-linux-gnu/libc/memcpy-preload.so', '/usr/lib/x86_64-linux-gnu/libc/memcpy-preload.so'), ('
', 'Test
'), + ('Test
', 'Test
'),
+ ('Test
', 'Test
'),
('', 'alert("Onoe")')]
for (input, expected) in examples:
@@ -122,7 +124,7 @@ def test_code_blocks(self):
convert = html.Markdown(extensions=('fenced-code',))
examples = [
("```\nThis is a code-fence. This is a code-fence. <hello>\n
"),
- ("```c++\nThis is a code-fence. This is a code-fence. <hello>\n
"),
+ ("```cpp\nThis is a code-fence. This is a code-fence. <hello>\n
"),
(" This is a four-character indent. This is a four-character indent. <hello>\n
")]
for (input, expected) in examples:
diff --git a/isso/utils/html.py b/isso/utils/html.py
index b7b895d2..c1aafad1 100644
--- a/isso/utils/html.py
+++ b/isso/utils/html.py
@@ -1,6 +1,7 @@
# -*- encoding: utf-8 -*-
import html
+import re
import bleach
import misaka
@@ -8,6 +9,13 @@
class Sanitizer(object):
+ # pattern to match a valid class attribute for code tags
+ code_language_pattern = re.compile(r"^language-[a-zA-Z0-9]{1,20}$")
+
+ @staticmethod
+ def allow_attribute_class(tag, name, value):
+ return name == "class" and bool(Sanitizer.code_language_pattern.match(value))
+
def __init__(self, elements, attributes):
# attributes found in Sundown's HTML serializer [1]
# - except for tag, because images are not generated anyways.
@@ -20,8 +28,13 @@ def __init__(self, elements, attributes):
"h1", "h2", "h3", "h4", "h5", "h6", "sub", "sup",
"table", "thead", "tbody", "th", "td"] + elements
- # href for and align for