From db2bcd375a88cbcca7cc256e52f61a8a99badab3 Mon Sep 17 00:00:00 2001 From: Jerrie Pelser Date: Tue, 19 Dec 2023 15:44:46 +0700 Subject: [PATCH] Allows the user to override literal text element content encoding --- src/HtmlSanitizer/HtmlSanitizer.cs | 20 +++++++++++++++----- test/HtmlSanitizer.Tests/Tests.cs | 15 +++++++++++++++ 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/HtmlSanitizer/HtmlSanitizer.cs b/src/HtmlSanitizer/HtmlSanitizer.cs index 732a032..b7ac78d 100644 --- a/src/HtmlSanitizer/HtmlSanitizer.cs +++ b/src/HtmlSanitizer/HtmlSanitizer.cs @@ -99,6 +99,11 @@ public HtmlSanitizer(HtmlSanitizerOptions options) AllowedAtRules = new HashSet(options.AllowedAtRules); } + /// + /// Gets or sets the default method that encodes literal text content. + /// + public Action EncodeLiteralTextElementContent { get; set; } = DefaultEncodeLiteralTextElementContent; + /// /// Gets or sets the default value indicating whether to keep child nodes of elements that are removed. Default is false. /// @@ -465,6 +470,15 @@ private void RemoveComments(INode context) } } + private static void DefaultEncodeLiteralTextElementContent(IElement tag) + { + var escapedHtml = tag.InnerHtml.Replace("<", "<").Replace(">", ">"); + if (escapedHtml != tag.InnerHtml) + tag.InnerHtml = escapedHtml; + if (tag.InnerHtml != escapedHtml) // setting InnerHtml does not work for noscript + tag.SetInnerText(escapedHtml); + } + private void DoSanitize(IHtmlDocument dom, IParentNode context, string baseUrl = "") { // remove disallowed tags @@ -479,11 +493,7 @@ private void DoSanitize(IHtmlDocument dom, IParentNode context, string baseUrl = && t.Flags.HasFlag(NodeFlags.LiteralText) && !string.IsNullOrWhiteSpace(t.InnerHtml))) { - var escapedHtml = tag.InnerHtml.Replace("<", "<").Replace(">", ">"); - if (escapedHtml != tag.InnerHtml) - tag.InnerHtml = escapedHtml; - if (tag.InnerHtml != escapedHtml) // setting InnerHtml does not work for noscript - tag.SetInnerText(escapedHtml); + EncodeLiteralTextElementContent(tag); } SanitizeStyleSheets(dom, baseUrl); diff --git a/test/HtmlSanitizer.Tests/Tests.cs b/test/HtmlSanitizer.Tests/Tests.cs index f01b2db..a8ad6d0 100644 --- a/test/HtmlSanitizer.Tests/Tests.cs +++ b/test/HtmlSanitizer.Tests/Tests.cs @@ -3553,6 +3553,21 @@ public void Bypass4Test() Assert.Equal(expected, sanitized); } + [Fact] + public void OverrideLiteralTextElementContentEncoderTest() + { + var sanitizer = new HtmlSanitizer(); + sanitizer.AllowedTags.Add("script"); + sanitizer.EncodeLiteralTextElementContent = (e) => + { + // Do nothing - we do not want to encode the custom element inside the "; + var sanitized = sanitizer.Sanitize(bypass); + var expected = @""; + Assert.Equal(expected, sanitized); + } + [Fact] public void InlineCssTest() {