From cc00406b1d0c115e5c66dd4bdfb40db32496f55f Mon Sep 17 00:00:00 2001 From: Matthew Whitaker Date: Tue, 16 May 2023 08:51:23 -0600 Subject: [PATCH] fix: Cleaned up whitespace processing and added whitespace tests (#1267) --- lib/src/builtins/styled_element_builtin.dart | 13 +- lib/src/builtins/text_builtin.dart | 11 +- lib/src/processing/whitespace.dart | 116 +++++++++++++--- lib/src/tree/replaced_element.dart | 7 + test/test_utils.dart | 87 +++++++++++- test/whitespace_test.dart | 138 +++++++++++++++++++ 6 files changed, 347 insertions(+), 25 deletions(-) create mode 100644 test/whitespace_test.dart diff --git a/lib/src/builtins/styled_element_builtin.dart b/lib/src/builtins/styled_element_builtin.dart index edb4590edc..5076a31822 100644 --- a/lib/src/builtins/styled_element_builtin.dart +++ b/lib/src/builtins/styled_element_builtin.dart @@ -429,11 +429,12 @@ class StyledElementBuiltIn extends HtmlExtension { .entries .expandIndexed((i, child) => [ child.value, - if (i != context.styledElement!.children.length - 1 && + if (context.parser.shrinkWrap && + i != context.styledElement!.children.length - 1 && child.key.style.display == Display.block && child.key.element?.localName != "html" && child.key.element?.localName != "body") - const TextSpan(text: "\n"), + const TextSpan(text: "\n", style: TextStyle(fontSize: 0)), ]) .toList(), ), @@ -444,14 +445,16 @@ class StyledElementBuiltIn extends HtmlExtension { style: context.styledElement!.style.generateTextStyle(), children: buildChildren() .entries - .expand((child) => [ + .expandIndexed((index, child) => [ child.value, - if (child.key.style.display == Display.block && + if (context.parser.shrinkWrap && + child.key.style.display == Display.block && + index != context.styledElement!.children.length - 1 && child.key.element?.parent?.localName != "th" && child.key.element?.parent?.localName != "td" && child.key.element?.localName != "html" && child.key.element?.localName != "body") - const TextSpan(text: "\n"), + const TextSpan(text: "\n", style: TextStyle(fontSize: 0)), ]) .toList(), ); diff --git a/lib/src/builtins/text_builtin.dart b/lib/src/builtins/text_builtin.dart index 2e170d30d5..2221c3a7bd 100644 --- a/lib/src/builtins/text_builtin.dart +++ b/lib/src/builtins/text_builtin.dart @@ -22,10 +22,8 @@ class TextBuiltIn extends HtmlExtension { StyledElement prepare( ExtensionContext context, List children) { if (context.elementName == "br") { - return TextContentElement( - text: '\n', + return LinebreakContentElement( style: Style(whiteSpace: WhiteSpace.pre), - element: context.node as dom.Element?, node: context.node, ); } @@ -45,6 +43,13 @@ class TextBuiltIn extends HtmlExtension { @override InlineSpan build(ExtensionContext context, Map Function() buildChildren) { + if (context.styledElement is LinebreakContentElement) { + return TextSpan( + text: '\n', + style: context.styledElement!.style.generateTextStyle(), + ); + } + final element = context.styledElement! as TextContentElement; return TextSpan( style: element.style.generateTextStyle(), diff --git a/lib/src/processing/whitespace.dart b/lib/src/processing/whitespace.dart index c9bb5a6066..6dc2e74a3f 100644 --- a/lib/src/processing/whitespace.dart +++ b/lib/src/processing/whitespace.dart @@ -6,26 +6,30 @@ import 'package:html/dom.dart' as html; class WhitespaceProcessing { /// [processWhitespace] handles the removal of unnecessary whitespace from /// a StyledElement tree. + /// + /// The criteria for determining which whitespace is replaceable is outlined + /// at https://www.w3.org/TR/css-text-3/ + /// and summarized at https://medium.com/@patrickbrosset/when-does-white-space-matter-in-html-b90e8a7cdd33 static StyledElement processWhitespace(StyledElement tree) { tree = _processInternalWhitespace(tree); tree = _processInlineWhitespace(tree); + tree = _processBlockWhitespace(tree); tree = _removeEmptyElements(tree); return tree; } /// [_processInternalWhitespace] removes unnecessary whitespace from the StyledElement tree. - /// - /// The criteria for determining which whitespace is replaceable is outlined - /// at https://www.w3.org/TR/css-text-3/ - /// and summarized at https://medium.com/@patrickbrosset/when-does-white-space-matter-in-html-b90e8a7cdd33 static StyledElement _processInternalWhitespace(StyledElement tree) { - if ((tree.style.whiteSpace ?? WhiteSpace.normal) == WhiteSpace.pre) { - // Preserve this whitespace - } else if (tree is TextContentElement) { + if (tree.style.whiteSpace == WhiteSpace.pre) { + return tree; + } + + if (tree is TextContentElement) { tree.text = _removeUnnecessaryWhitespace(tree.text!); } else { tree.children.forEach(_processInternalWhitespace); } + return tree; } @@ -36,6 +40,84 @@ class WhitespaceProcessing { return _processInlineWhitespaceRecursive(tree, Context(false)); } + /// [_processBlockWhitespace] removes unnecessary whitespace from block + /// rendering contexts. Specifically, a space at the beginning and end of + /// each inline rendering context should be removed. + static StyledElement _processBlockWhitespace(StyledElement tree) { + if (tree.style.whiteSpace == WhiteSpace.pre) { + return tree; + } + + bool isBlockContext = false; + for (final child in tree.children) { + if (child.style.display == Display.block || child.name == "br") { + isBlockContext = true; + } + + _processBlockWhitespace(child); + } + + if (isBlockContext) { + for (int i = 0; i < tree.children.length; i++) { + final lastChild = i != 0 ? tree.children[i - 1] : null; + final child = tree.children[i]; + final nextChild = + (i + 1) != tree.children.length ? tree.children[i + 1] : null; + + if (child.style.whiteSpace == WhiteSpace.pre) { + continue; + } + + if (child.style.display == Display.block) { + _removeLeadingSpace(child); + _removeTrailingSpace(child); + } + + if (lastChild?.style.display == Display.block || + lastChild?.name == "br") { + _removeLeadingSpace(child); + } + + if (nextChild?.style.display == Display.block || + nextChild?.name == "br") { + _removeTrailingSpace(child); + } + } + } + + return tree; + } + + /// [_removeLeadingSpace] removes any leading space + /// from the text of the tree at this level, no matter how deep in the tree + /// it may be. + static void _removeLeadingSpace(StyledElement element) { + if (element.style.whiteSpace == WhiteSpace.pre) { + return; + } + + if (element is TextContentElement) { + element.text = element.text?.trimLeft(); + } else if (element.children.isNotEmpty) { + _removeLeadingSpace(element.children.first); + } + } + + /// [_removeTrailingSpace] removes any leading space + /// from the text of the tree at this level, no matter how deep in the tree + /// it may be. + static void _removeTrailingSpace(StyledElement element) { + if (element.style.whiteSpace == WhiteSpace.pre) { + return; + } + + if (element is TextContentElement) { + element.text = element.text?.trimRight(); + } else if (element.children.isNotEmpty) { + _removeTrailingSpace(element.children.last); + } + } + /// [_processInlineWhitespaceRecursive] analyzes the whitespace between and among different /// inline elements, and replaces any instance of two or more spaces with a single space, according /// to the w3's HTML whitespace processing specification linked to above. @@ -43,6 +125,10 @@ class WhitespaceProcessing { StyledElement tree, Context keepLeadingSpace, ) { + if (tree.style.whiteSpace == WhiteSpace.pre) { + return tree; + } + if (tree is TextContentElement) { /// initialize indices to negative numbers to make conditionals a little easier int textIndex = -1; @@ -62,9 +148,9 @@ class WhitespaceProcessing { final parentNodes = tree.element?.parent?.nodes; /// find the index of the tree itself in the parent nodes - if ((parentNodes?.length ?? 0) >= 1) { + if (parentNodes?.isNotEmpty ?? false) { elementIndex = - parentNodes?.indexWhere((element) => element == tree.element) ?? -1; + parentNodes!.indexWhere((element) => element == tree.element); } /// if the tree is any node except the last node in the node list and the @@ -117,9 +203,7 @@ class WhitespaceProcessing { /// update the [Context] to signify to that next text node whether it should /// keep its whitespace. This is based on whether the current text ends with a /// whitespace. - if (textIndex == - ((tree.element?.nodes.length ?? 0) - - 1) && //TODO is this the proper ?? + if (textIndex == (tree.node.nodes.length - 1) && tree.element?.localName != "br" && parentAfterText.startsWith(' ')) { keepLeadingSpace.data = !tree.text!.endsWith(' '); @@ -142,11 +226,11 @@ class WhitespaceProcessing { /// (4) Replace any instances of two or more spaces with a single space. static String _removeUnnecessaryWhitespace(String text) { return text - .replaceAll(RegExp("\\ *(?=\n)"), "\n") - .replaceAll(RegExp("(?:\n)\\ *"), "\n") + .replaceAll(RegExp(r" *(?=\n)"), "") + .replaceAll(RegExp(r"(?<=\n) *"), "") .replaceAll("\n", " ") .replaceAll("\t", " ") - .replaceAll(RegExp(" {2,}"), " "); + .replaceAll(RegExp(r" {2,}"), " "); } /// [_removeEmptyElements] recursively removes empty elements. @@ -155,7 +239,7 @@ class WhitespaceProcessing { /// or any block-level [TextContentElement] that contains only whitespace and doesn't follow /// a block element or a line break. static StyledElement _removeEmptyElements(StyledElement tree) { - List toRemove = []; + Set toRemove = {}; bool lastChildBlock = true; tree.children.forEachIndexed((index, child) { if (child is EmptyContentElement) { diff --git a/lib/src/tree/replaced_element.dart b/lib/src/tree/replaced_element.dart index 59e459fbba..f2ae859ca0 100644 --- a/lib/src/tree/replaced_element.dart +++ b/lib/src/tree/replaced_element.dart @@ -46,6 +46,13 @@ class TextContentElement extends ReplacedElement { } } +class LinebreakContentElement extends ReplacedElement { + LinebreakContentElement({ + required super.style, + required super.node, + }) : super(name: 'br', elementId: "[[No ID]]"); +} + class EmptyContentElement extends ReplacedElement { EmptyContentElement({required super.node, String name = "empty"}) : super(name: name, style: Style(), elementId: "[[No ID]]"); diff --git a/test/test_utils.dart b/test/test_utils.dart index 9a1a00625d..53a6afad8a 100644 --- a/test/test_utils.dart +++ b/test/test_utils.dart @@ -1,5 +1,7 @@ +import 'dart:async'; + import 'package:flutter/material.dart'; -import 'package:flutter_html/src/css_box_widget.dart'; +import 'package:flutter_html/flutter_html.dart'; import 'package:flutter_test/flutter_test.dart'; class TestApp extends StatelessWidget { @@ -141,3 +143,86 @@ CssBoxWidget? findCssBox(Finder finder) { return found.first.widget as CssBoxWidget; } } + +Future generateStyledElementTreeFromHtml( + WidgetTester tester, { + required String html, + bool applyStyleSteps = true, + bool applyProcessingSteps = true, + bool shrinkWrap = false, + List extensions = const [], + Map styles = const {}, +}) async { + final completer = Completer(); + + await tester.pumpWidget(TestApp( + child: Html( + data: html, + shrinkWrap: shrinkWrap, + extensions: [ + ...extensions, + TestExtension( + beforeStyleCallback: (tree) { + if (!applyStyleSteps) { + completer.complete(tree); + } + }, + beforeProcessingCallback: (tree) { + if (!completer.isCompleted && !applyProcessingSteps) { + completer.complete(tree); + } + }, + finalCallback: (tree) { + if (!completer.isCompleted) { + completer.complete(tree); + } + }, + ), + ], + style: styles, + ), + )); + + return completer.future; +} + +class TestExtension extends HtmlExtension { + final void Function(StyledElement)? beforeStyleCallback; + final void Function(StyledElement)? beforeProcessingCallback; + final void Function(StyledElement)? finalCallback; + + TestExtension({ + this.beforeStyleCallback, + this.beforeProcessingCallback, + this.finalCallback, + }); + + @override + Set get supportedTags => {}; + + @override + bool matches(ExtensionContext context) { + return context.currentStep != CurrentStep.preparing && + context.elementName == "html"; + } + + @override + void beforeStyle(ExtensionContext context) { + beforeStyleCallback?.call(context.styledElement!); + } + + @override + void beforeProcessing(ExtensionContext context) { + beforeProcessingCallback?.call(context.styledElement!); + } + + @override + InlineSpan build(ExtensionContext context, buildChildren) { + finalCallback?.call(context.styledElement!); + return context.parser.buildFromExtension( + context, + buildChildren, + extensionsToIgnore: {this}, + ); + } +} diff --git a/test/whitespace_test.dart b/test/whitespace_test.dart new file mode 100644 index 0000000000..39fe5fc42e --- /dev/null +++ b/test/whitespace_test.dart @@ -0,0 +1,138 @@ +import 'package:flutter_html/flutter_html.dart'; +import 'package:flutter_test/flutter_test.dart'; + +import 'test_utils.dart'; + +void main() { + testWidgets( + 'test that lots of unnecessary whitespace is removed', + (tester) async { + await tester.pumpWidget(TestApp( + child: Html( + data: """\n

Hello \t \t \n\n \t\t\t World!

\n """, + ), + )); + expect(find.text("Hello World!", findRichText: true), findsOneWidget); + }, + ); + + // See https://github.com/Sub6Resources/flutter_html/issues/1007 + testWidgets( + 'test that whitespace parser does not remove inline whitespace', + (tester) async { + final tree = await generateStyledElementTreeFromHtml( + tester, + html: "Harry Potter.", + ); + + expect(tree.getWhitespace(), + equals("HarryPotter.")); + }, + ); + + // See https://github.com/Sub6Resurces/flutter_html/issues/1146 + testWidgets( + "test that extra newlines aren't added unnecessarily", + (tester) async { + final tree = await generateStyledElementTreeFromHtml( + tester, + html: """ +
+
+
+
+
center of nested div tree
+
+
+
+
+ bottom of nested div tree +
+
+
+
center of div list
+
+
+
+ bottom of div list + + """, + ); + + expect( + tree.getWhitespace(), + equals( + "
center◦of◦nested◦div◦tree
bottom◦of◦nested◦div◦tree
center◦of◦div◦list
bottom◦of◦div◦list")); + }, + ); + + // See https://github.com/Sub6Resources/flutter_html/issues/1251 + testWidgets( + 'test that preserved whitespace is actually preserved', + (tester) async { + final tree = await generateStyledElementTreeFromHtml( + tester, + html: """

test1

test2

""", + styles: { + "p": Style( + whiteSpace: WhiteSpace.pre, + ), + }, + ); + + expect(tree.getWhitespace(), + equals("

◦test1

◦test2

")); + }, + ); + + // Note that line-breaks in code between inline elements are converted into + // spaces, except when immediately following or preceding a
+ testWidgets( + "test that
doesn't cause extra space that should be removed", + (tester) async { + final tree = await generateStyledElementTreeFromHtml( + tester, + html: """ + ... +
+ 3x + log2(x)
+ 3x + log2(x) + """, + ); + + expect( + tree.getWhitespace(), + equals( + "...
3xlog2(x)
3xlog2(x)")); + }, + ); +} + +extension PrintWhitespace on StyledElement { + String getWhitespace() { + String whitespace = ""; + + if (this is TextContentElement) { + whitespace += (this as TextContentElement) + .text + ?.replaceAll("\n", "⏎") + .replaceAll("\t", "⇥") + .replaceAll(" ", "◦") ?? + ""; + } + + for (final child in children) { + if (child.name != "[text]") { + whitespace += "<${child.name}>"; + } + whitespace += child.getWhitespace(); + if (child.name != "[text]" && child.name != "br") { + whitespace += ""; + } + } + + return whitespace; + } +}