Skip to content

Commit

Permalink
Merge pull request #516 from jerriep/allow-overriding-literal-text-el…
Browse files Browse the repository at this point in the history
…ement-encoding

Allows the user to override literal text element content encoding
  • Loading branch information
mganss authored Dec 20, 2023
2 parents 3443acc + db2bcd3 commit ca23c69
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 5 deletions.
20 changes: 15 additions & 5 deletions src/HtmlSanitizer/HtmlSanitizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@ public HtmlSanitizer(HtmlSanitizerOptions options)
AllowedAtRules = new HashSet<CssRuleType>(options.AllowedAtRules);
}

/// <summary>
/// Gets or sets the default <see cref="Action{IElement}"/> method that encodes literal text content.
/// </summary>
public Action<IElement> EncodeLiteralTextElementContent { get; set; } = DefaultEncodeLiteralTextElementContent;

/// <summary>
/// Gets or sets the default value indicating whether to keep child nodes of elements that are removed. Default is false.
/// </summary>
Expand Down Expand Up @@ -465,6 +470,15 @@ private void RemoveComments(INode context)
}
}

private static void DefaultEncodeLiteralTextElementContent(IElement tag)
{
var escapedHtml = tag.InnerHtml.Replace("<", "&lt;").Replace(">", "&gt;");
if (escapedHtml != tag.InnerHtml)
tag.InnerHtml = escapedHtml;
if (tag.InnerHtml != escapedHtml) // setting InnerHtml does not work for noscript
tag.SetInnerText(escapedHtml);
}

private void DoSanitize(IHtmlDocument dom, IParentNode context, string baseUrl = "")
{
// remove disallowed tags
Expand All @@ -479,11 +493,7 @@ private void DoSanitize(IHtmlDocument dom, IParentNode context, string baseUrl =
&& t.Flags.HasFlag(NodeFlags.LiteralText)
&& !string.IsNullOrWhiteSpace(t.InnerHtml)))
{
var escapedHtml = tag.InnerHtml.Replace("<", "&lt;").Replace(">", "&gt;");
if (escapedHtml != tag.InnerHtml)
tag.InnerHtml = escapedHtml;
if (tag.InnerHtml != escapedHtml) // setting InnerHtml does not work for noscript
tag.SetInnerText(escapedHtml);
EncodeLiteralTextElementContent(tag);
}

SanitizeStyleSheets(dom, baseUrl);
Expand Down
15 changes: 15 additions & 0 deletions test/HtmlSanitizer.Tests/Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3553,6 +3553,21 @@ public void Bypass4Test()
Assert.Equal(expected, sanitized);
}

[Fact]
public void OverrideLiteralTextElementContentEncoderTest()
{
var sanitizer = new HtmlSanitizer();
sanitizer.AllowedTags.Add("script");
sanitizer.EncodeLiteralTextElementContent = (e) =>
{
// Do nothing - we do not want to encode the custom element inside the <script> element
};
var bypass = @"<script><custom-element>abc</custom-element></script>";
var sanitized = sanitizer.Sanitize(bypass);
var expected = @"<script><custom-element>abc</custom-element></script>";
Assert.Equal(expected, sanitized);
}

[Fact]
public void InlineCssTest()
{
Expand Down

0 comments on commit ca23c69

Please sign in to comment.