From 0f3426c3b68e92754eeed9ff37d571c31e55b07c Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Tue, 13 Dec 2022 15:39:15 +0100 Subject: [PATCH 01/54] Remove ssr.mjs --- package.json | 6 ++---- ssr.mjs | 31 ------------------------------- 2 files changed, 2 insertions(+), 35 deletions(-) delete mode 100644 ssr.mjs diff --git a/package.json b/package.json index dff3a556..2044acd9 100644 --- a/package.json +++ b/package.json @@ -10,8 +10,7 @@ "test": "jest", "test:watch": "jest --watch", "plugin-zip": "wp-scripts plugin-zip", - "wp-env": "wp-env", - "ssr": "node ssr.mjs" + "wp-env": "wp-env" }, "prettier": { "useTabs": true, @@ -39,7 +38,6 @@ "dependencies": { "@preact/signals": "^1.1.2", "hpq": "^1.3.0", - "preact": "^10.10.6", - "ultrahtml": "^0.4.0" + "preact": "^10.10.6" } } diff --git a/ssr.mjs b/ssr.mjs deleted file mode 100644 index 03c4062d..00000000 --- a/ssr.mjs +++ /dev/null @@ -1,31 +0,0 @@ -import { transform, html } from 'ultrahtml'; -import { readFile, writeFile } from 'fs/promises'; - -const file = '/blocks/tabs/render.php'; - -const propsToArray = (props) => { - let result = '['; - Object.entries(props).forEach(([key, value]) => { - result += `["${key}", "${value}"]`; - }); - result += ']'; - return result; -}; - -const start = async () => { - const php = await readFile('./src' + file, { - encoding: 'utf8', - }); - const output = await transform(php, { - components: { - 'wp-show': (props, children) => - html` - ${children} - `, - }, - }); - await writeFile('./build' + file, output); - console.log('done!'); -}; - -start(); From 2618b45e1f3c3c4f01210e6b50c830cc4634f10a Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Tue, 13 Dec 2022 15:59:38 +0100 Subject: [PATCH 02/54] Remove now-obsolete SSR helpers --- wp-directives.php | 55 ----------------------------------------------- 1 file changed, 55 deletions(-) diff --git a/wp-directives.php b/wp-directives.php index 60fb3e11..076ee967 100644 --- a/wp-directives.php +++ b/wp-directives.php @@ -142,32 +142,6 @@ function wp_directives_client_site_transitions_option() * SSR in PHP */ -// wp-show -function wpx_show_open_tag($prop_entries) -{ - global $wpx; - $props = wpx_prop_entries_to_array($prop_entries); - $attributes = wpx_prop_entries_to_attributes($prop_entries); - $value = wpx_get_state($props['when']); - if ($value) { - echo ""; - } else { - echo "'; - } -} - // Utils $GLOBALS['wpx'] = []; function wpx($data) @@ -175,32 +149,3 @@ function wpx($data) global $wpx; $wpx = array_merge_recursive($wpx, $data); } - -function wpx_get_state($path) -{ - global $wpx; - $current = $wpx; - $array = explode('.', $path); - foreach ($array as $p) { - $current = $current[$p]; - } - return $current; -} - -function wpx_prop_entries_to_array($prop_entries) -{ - $array = []; - foreach ($prop_entries as list($key, $value)) { - $array[$key] = $value; - } - return $array; -} - -function wpx_prop_entries_to_attributes($prop_entries) -{ - $attributes = ''; - foreach ($prop_entries as list($key, $value)) { - $attributes .= "$key='$value'"; - } - return $attributes; -} From d246f7084813634619889898b61cea48513774fa Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Tue, 13 Dec 2022 16:29:30 +0100 Subject: [PATCH 03/54] Copy WP_HTML_Tag_Processor code from Gutenberg --- src/html/class-wp-html-attribute-token.php | 89 + src/html/class-wp-html-span.php | 52 + src/html/class-wp-html-tag-processor.php | 1823 +++++++++++++++++++ src/html/class-wp-html-text-replacement.php | 59 + src/html/index.php | 12 + 5 files changed, 2035 insertions(+) create mode 100644 src/html/class-wp-html-attribute-token.php create mode 100644 src/html/class-wp-html-span.php create mode 100644 src/html/class-wp-html-tag-processor.php create mode 100644 src/html/class-wp-html-text-replacement.php create mode 100644 src/html/index.php diff --git a/src/html/class-wp-html-attribute-token.php b/src/html/class-wp-html-attribute-token.php new file mode 100644 index 00000000..7b3d5718 --- /dev/null +++ b/src/html/class-wp-html-attribute-token.php @@ -0,0 +1,89 @@ +name = $name; + $this->value_starts_at = $value_start; + $this->value_length = $value_length; + $this->start = $start; + $this->end = $end; + $this->is_true = $is_true; + } +} diff --git a/src/html/class-wp-html-span.php b/src/html/class-wp-html-span.php new file mode 100644 index 00000000..39e60366 --- /dev/null +++ b/src/html/class-wp-html-span.php @@ -0,0 +1,52 @@ +start = $start; + $this->end = $end; + } +} diff --git a/src/html/class-wp-html-tag-processor.php b/src/html/class-wp-html-tag-processor.php new file mode 100644 index 00000000..affbb6fb --- /dev/null +++ b/src/html/class-wp-html-tag-processor.php @@ -0,0 +1,1823 @@ + "c" not " c" + * @TODO: Skip over `/` in attributes area, split attribute names by `/` + * @TODO: Decode HTML references/entities in class names when matching. + * E.g. match having class `1<"2` needs to recognize `class="1<"2"`. + * @TODO: Decode character references in `get_attribute()` + * @TODO: Properly escape attribute value in `set_attribute()` + * @TODO: Add slow mode to escape character entities in CSS class names? + * (This requires a custom decoder since `html_entity_decode()` + * doesn't handle attribute character reference decoding rules. + * + * @package WordPress + * @subpackage HTML + * @since 6.2.0 + */ + +/** + * Processes an input HTML document by applying a specified set + * of patches to that input. Tokenizes HTML but does not fully + * parse the input document. + * + * ## Usage + * + * Use of this class requires three steps: + * + * 1. Create a new class instance with your input HTML document. + * 2. Find the tag(s) you are looking for. + * 3. Request changes to the attributes in those tag(s). + * + * Example: + * ```php + * $tags = new WP_HTML_Tag_Processor( $html ); + * if ( $tags->next_tag( [ 'tag_name' => 'option' ] ) ) { + * $tags->set_attribute( 'selected', true ); + * } + * ``` + * + * ### Finding tags + * + * The `next_tag()` function moves the internal cursor through + * your input HTML document until it finds a tag meeting any of + * the supplied restrictions in the optional query argument. If + * no argument is provided then it will find the next HTML tag, + * regardless of what kind it is. + * + * If you want to _find whatever the next tag is_: + * ```php + * $tags->next_tag(); + * ``` + * + * | Goal | Query | + * |-----------------------------------------------------------|----------------------------------------------------------------------------| + * | Find any tag. | `$tags->next_tag();` | + * | Find next image tag. | `$tags->next_tag( [ 'tag_name' => 'img' ] );` | + * | Find next tag containing the `fullwidth` CSS class. | `$tags->next_tag( [ 'class_name' => 'fullwidth' ] );` | + * | Find next image tag containing the `fullwidth` CSS class. | `$tags->next_tag( [ 'tag_name' => 'img', 'class_name' => 'fullwidth' ] );` | + * + * If a tag was found meeting your criteria then `next_tag()` + * will return `true` and you can proceed to modify it. If it + * returns `false`, however, it failed to find the tag and + * moved the cursor to the end of the file. + * + * Once the cursor reaches the end of the file the processor + * is done and if you want to reach an earlier tag you will + * need to recreate the processor and start over. The internal + * cursor can only proceed forward, never backing up. + * + * #### Custom queries + * + * Sometimes it's necessary to further inspect an HTML tag than + * the query syntax here permits. In these cases one may further + * inspect the search results using the read-only functions + * provided by the processor or external state or variables. + * + * Example: + * ```php + * // Paint up to the first five DIV or SPAN tags marked with the "jazzy" style. + * $remaining_count = 5; + * while ( $remaining_count > 0 && $tags->next_tag() ) { + * if ( + * ( 'DIV' === $tags->get_tag() || 'SPAN' === $tags->get_tag() ) && + * 'jazzy' === $tags->get_attribute( 'data-style' ) + * ) { + * $tags->add_class( 'theme-style-everest-jazz' ); + * $remaining_count--; + * } + * } + * ``` + * + * `get_attribute()` will return `null` if the attribute wasn't present + * on the tag when it was called. It may return `""` (the empty string) + * in cases where the attribute was present but its value was empty. + * For boolean attributes, those whose name is present but no value is + * given, it will return `true` (the only way to set `false` for an + * attribute is to remove it). + * + * ### Modifying HTML attributes for a found tag + * + * Once you've found the start of an opening tag you can modify + * any number of the attributes on that tag. You can set a new + * value for an attribute, remove the entire attribute, or do + * nothing and move on to the next opening tag. + * + * Example: + * ```php + * if ( $tags->next_tag( [ 'class' => 'wp-group-block' ] ) ) { + * $tags->set_attribute( 'title', 'This groups the contained content.' ); + * $tags->remove_attribute( 'data-test-id' ); + * } + * ``` + * + * If `set_attribute()` is called for an existing attribute it will + * overwrite the existing value. Similarly, calling `remove_attribute()` + * for a non-existing attribute has no effect on the document. Both + * of these methods are safe to call without knowing if a given attribute + * exists beforehand. + * + * ### Modifying CSS classes for a found tag + * + * The tag processor treats the `class` attribute as a special case. + * Because it's a common operation to add or remove CSS classes you + * can do so using this interface. + * + * As with attribute values, adding or removing CSS classes is a safe + * operation that doesn't require checking if the attribute or class + * exists before making changes. If removing the only class then the + * entire `class` attribute will be removed. + * + * Example: + * ```php + * // from `Yippee!` + * // to `Yippee!` + * $tags->add_class( 'is-active' ); + * + * // from `Yippee!` + * // to `Yippee!` + * $tags->add_class( 'is-active' ); + * + * // from `Yippee!` + * // to `Yippee!` + * $tags->add_class( 'is-active' ); + * + * // from `` + * // to ` + * $tags->remove_class( 'rugby' ); + * + * // from `` + * // to ` + * $tags->remove_class( 'rugby' ); + * + * // from `` + * // to ` + * $tags->remove_class( 'rugby' ); + * ``` + * + * ## Design limitations + * + * @TODO: Expand this section + * + * - no nesting: cannot match open and close tag + * - only move forward, never backward + * - class names not decoded if they contain character references + * - only secures against HTML escaping issues; requires + * manually sanitizing or escaping values based on the needs of + * each individual attribute, since different attributes have + * different needs. + * + * @since 6.2.0 + */ +class WP_HTML_Tag_Processor { + /** + * The maximum number of bookmarks allowed to exist at + * any given time. + * + * @see set_bookmark(); + * @since 6.2.0 + * @var int + */ + const MAX_BOOKMARKS = 10; + + /** + * Maximum number of times seek() can be called. + * Prevents accidental infinite loops. + * + * @see seek() + * @since 6.2.0 + * @var int + */ + const MAX_SEEK_OPS = 1000; + + /** + * The HTML document to parse. + * + * @since 6.2.0 + * @var string + */ + private $html; + + /** + * The last query passed to next_tag(). + * + * @since 6.2.0 + * @var array|null + */ + private $last_query; + + /** + * The tag name this processor currently scans for. + * + * @since 6.2.0 + * @var string|null + */ + private $sought_tag_name; + + /** + * The CSS class name this processor currently scans for. + * + * @since 6.2.0 + * @var string|null + */ + private $sought_class_name; + + /** + * The match offset this processor currently scans for. + * + * @since 6.2.0 + * @var int|null + */ + private $sought_match_offset; + + /** + * Whether to visit tag closers, e.g. , when walking an input document. + * + * @since 6.2.0 + * @var boolean + */ + private $stop_on_tag_closers; + + /** + * The updated HTML document. + * + * @since 6.2.0 + * @var string + */ + private $updated_html = ''; + + /** + * How many bytes from the original HTML document were already read. + * + * @since 6.2.0 + * @var int + */ + private $parsed_bytes = 0; + + /** + * How many bytes from the original HTML document were already treated + * with the requested replacements. + * + * @since 6.2.0 + * @var int + */ + private $updated_bytes = 0; + + /** + * Byte offset in input document where current tag name starts. + * + * Example: + * ``` + *
... + * 01234 + * - tag name starts at 1 + * ``` + * + * @since 6.2.0 + * @var ?int + */ + private $tag_name_starts_at; + + /** + * Byte length of current tag name. + * + * Example: + * ``` + *
... + * 01234 + * --- tag name length is 3 + * ``` + * + * @since 6.2.0 + * @var ?int + */ + private $tag_name_length; + + /** + * Byte offset in input document where current tag token ends. + * + * Example: + * ``` + *
... + * 0 1 | + * 01234567890123456 + * --- tag name ends at 14 + * ``` + * + * @since 6.2.0 + * @var ?int + */ + private $tag_ends_at; + + /** + * Whether the current tag is an opening tag, e.g.
, or a closing tag, e.g.
. + * + * @var boolean + */ + private $is_closing_tag; + + /** + * Lazily-built index of attributes found within an HTML tag, keyed by the attribute name. + * + * Example: + * + * // supposing the parser is working through this content + * // and stops after recognizing the `id` attribute + * //
+ * // ^ parsing will continue from this point + * $this->attributes = [ + * 'id' => new WP_HTML_Attribute_Match( 'id', null, 6, 17 ) + * ]; + * + * // when picking up parsing again, or when asking to find the + * // `class` attribute we will continue and add to this array + * $this->attributes = [ + * 'id' => new WP_HTML_Attribute_Match( 'id', null, 6, 17 ), + * 'class' => new WP_HTML_Attribute_Match( 'class', 'outline', 18, 32 ) + * ]; + * + * // Note that only the `class` attribute value is stored in the index. + * // That's because it is the only value used by this class at the moment. + * + * + * @since 6.2.0 + * @var WP_HTML_Attribute_Token[] + */ + private $attributes = array(); + + /** + * Which class names to add or remove from a tag. + * + * These are tracked separately from attribute updates because they are + * semantically distinct, whereas this interface exists for the common + * case of adding and removing class names while other attributes are + * generally modified as with DOM `setAttribute` calls. + * + * When modifying an HTML document these will eventually be collapsed + * into a single lexical update to replace the `class` attribute. + * + * Example: + * + * // Add the `wp-block-group` class, remove the `wp-group` class. + * $classname_updates = [ + * // Indexed by a comparable class name + * 'wp-block-group' => WP_HTML_Tag_Processor::ADD_CLASS, + * 'wp-group' => WP_HTML_Tag_Processor::REMOVE_CLASS + * ]; + * + * + * @since 6.2.0 + * @var bool[] + */ + private $classname_updates = array(); + + /** + * Tracks a semantic location in the original HTML which + * shifts with updates as they are applied to the document. + * + * @since 6.2.0 + * @var WP_HTML_Span[] + */ + private $bookmarks = array(); + + const ADD_CLASS = true; + const REMOVE_CLASS = false; + const SKIP_CLASS = null; + + /** + * Lexical replacements to apply to input HTML document. + * + * HTML modifications collapse into lexical replacements in order to + * provide an efficient mechanism to update documents lazily and in + * order to support a variety of semantic modifications without + * building a complicated parsing machinery. That is, it's up to + * the calling class to generate the lexical modification from the + * semantic change requested. + * + * Example: + * + * // Replace an attribute stored with a new value, indices + * // sourced from the lazily-parsed HTML recognizer. + * $start = $attributes['src']->start; + * $end = $attributes['src']->end; + * $modifications[] = new WP_HTML_Text_Replacement( $start, $end, get_the_post_thumbnail_url() ); + * + * // Correspondingly, something like this + * // will appear in the replacements array. + * $replacements = [ + * WP_HTML_Text_Replacement( 14, 28, 'https://my-site.my-domain/wp-content/uploads/2014/08/kittens.jpg' ) + * ]; + * + * + * @since 6.2.0 + * @var WP_HTML_Text_Replacement[] + */ + private $attribute_updates = array(); + + /** + * Tracks how many times we've performed a `seek()` + * so that we can prevent accidental infinite loops. + * + * @see seek + * @since 6.2.0 + * @var int + */ + private $seek_count = 0; + + /** + * Constructor. + * + * @since 6.2.0 + * + * @param string $html HTML to process. + */ + public function __construct( $html ) { + $this->html = $html; + } + + /** + * Finds the next tag matching the $query. + * + * @since 6.2.0 + * + * @param array|string $query { + * Which tag name to find, having which class, etc. + * + * @type string|null $tag_name Which tag to find, or `null` for "any tag." + * @type int|null $match_offset Find the Nth tag matching all search criteria. + * 0 for "first" tag, 2 for "third," etc. + * Defaults to first tag. + * @type string|null $class_name Tag must contain this whole class name to match. + * } + * @return boolean Whether a tag was matched. + */ + public function next_tag( $query = null ) { + $this->parse_query( $query ); + $already_found = 0; + + do { + if ( $this->parsed_bytes >= strlen( $this->html ) ) { + return false; + } + + /* + * Unfortunately we can't try to search for only the tag name we want because that might + * lead us to skip over other tags and lose track of our place. So we need to search for + * _every_ tag and then check after we find one if it's the one we are looking for. + */ + if ( false === $this->parse_next_tag() ) { + $this->parsed_bytes = strlen( $this->html ); + + return false; + } + + while ( $this->parse_next_attribute() ) { + continue; + } + + $tag_ends_at = strpos( $this->html, '>', $this->parsed_bytes ); + if ( false === $tag_ends_at ) { + return false; + } + $this->tag_ends_at = $tag_ends_at; + $this->parsed_bytes = $tag_ends_at; + + if ( $this->matches() ) { + ++$already_found; + } + + // Avoid copying the tag name string when possible. + $t = $this->html[ $this->tag_name_starts_at ]; + if ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) { + $tag_name = $this->get_tag(); + + if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) { + $this->parsed_bytes = strlen( $this->html ); + return false; + } elseif ( + ( 'TEXTAREA' === $tag_name || 'TITLE' === $tag_name ) && + ! $this->skip_rcdata( $tag_name ) + ) { + $this->parsed_bytes = strlen( $this->html ); + return false; + } + } + } while ( $already_found < $this->sought_match_offset ); + + return true; + } + + + /** + * Sets a bookmark in the HTML document. + * + * Bookmarks represent specific places or tokens in the HTML + * document, such as a tag opener or closer. When applying + * edits to a document, such as setting an attribute, the + * text offsets of that token may shift; the bookmark is + * kept updated with those shifts and remains stable unless + * the entire span of text in which the token sits is removed. + * + * Release bookmarks when they are no longer needed. + * + * Example: + * ``` + *

Surprising fact you may not know!

+ * ^ ^ + * \-|-- this `H2` opener bookmark tracks the token + * + *

Surprising fact you may no… + * ^ ^ + * \-|-- it shifts with edits + * ``` + * + * Bookmarks provide the ability to seek to a previously-scanned + * place in the HTML document. This avoids the need to re-scan + * the entire thing. + * + * Example: + * ``` + *
  • One
  • Two
  • Three
+ * ^^^^ + * want to note this last item + * + * $p = new WP_HTML_Tag_Processor( $html ); + * $in_list = false; + * while ( $p->next_tag( [ 'tag_closers' => $in_list ? 'visit' : 'skip' ] ) ) { + * if ( 'UL' === $p->get_tag() ) { + * if ( $p->is_tag_closer() ) { + * $in_list = false; + * $p->set_bookmark( 'resume' ); + * if ( $p->seek( 'last-li' ) ) { + * $p->add_class( 'last-li' ); + * } + * $p->seek( 'resume' ); + * $p->release_bookmark( 'last-li' ); + * $p->release_bookmark( 'resume' ); + * } else { + * $in_list = true; + * } + * } + * + * if ( 'LI' === $p->get_tag() ) { + * $p->set_bookmark( 'last-li' ); + * } + * } + * ``` + * + * Because bookmarks maintain their position they don't + * expose any internal offsets for the HTML document + * and can't be used with normal string functions. + * + * Because bookmarks allocate memory and require processing + * for every applied update they are limited and require + * a name. They should not be created inside a loop. + * + * Bookmarks are a powerful tool to enable complicated behavior; + * consider double-checking that you need this tool if you are + * reaching for it, as inappropriate use could lead to broken + * HTML structure or unwanted processing overhead. + * + * @param string $name Identifies this particular bookmark. + * @return false|void + * @throws Exception Throws on invalid bookmark name if WP_DEBUG set. + */ + public function set_bookmark( $name ) { + if ( null === $this->tag_name_starts_at ) { + return false; + } + + if ( ! array_key_exists( $name, $this->bookmarks ) && count( $this->bookmarks ) >= self::MAX_BOOKMARKS ) { + if ( defined( 'WP_DEBUG' ) && WP_DEBUG ) { + throw new Exception( "Tried to jump to a non-existent HTML bookmark {$name}." ); + } + return false; + } + + $this->bookmarks[ $name ] = new WP_HTML_Span( + $this->tag_name_starts_at - 1, + $this->tag_ends_at + ); + + return true; + } + + + /** + * Removes a bookmark if you no longer need to use it. + * + * Releasing a bookmark frees up the small performance + * overhead they require, mainly in the form of compute + * costs when modifying the document. + * + * @param string $name Name of the bookmark to remove. + * @return bool + */ + public function release_bookmark( $name ) { + if ( ! array_key_exists( $name, $this->bookmarks ) ) { + return false; + } + + unset( $this->bookmarks[ $name ] ); + + return true; + } + + + /** + * Skips the contents of the title and textarea tags until an appropriate + * tag closer is found. + * + * @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state + * @param string $tag_name – the lowercase tag name which will close the RCDATA region. + * @since 6.2.0 + */ + private function skip_rcdata( $tag_name ) { + $html = $this->html; + $doc_length = strlen( $html ); + $tag_length = strlen( $tag_name ); + + $at = $this->parsed_bytes; + + while ( false !== $at && $at < $doc_length ) { + $at = strpos( $this->html, '= $doc_length ) { + $this->parsed_bytes = $doc_length; + return false; + } + + $at += 2; + + /* + * We have to find a case-insensitive match to the tag name. + * Note also that since tag names are limited to US-ASCII + * characters we can ignore any kind of Unicode normalizing + * forms when comparing. If we get a non-ASCII character it + * will never be a match. + */ + for ( $i = 0; $i < $tag_length; $i++ ) { + $tag_char = $tag_name[ $i ]; + $html_char = $html[ $at + $i ]; + + if ( $html_char !== $tag_char && strtoupper( $html_char ) !== $tag_char ) { + $at += $i; + continue 2; + } + } + + $at += $tag_length; + $this->parsed_bytes = $at; + + /* + * Ensure we terminate the tag name, otherwise we might, + * for example, accidentally match the sequence + * "" for "". + */ + $c = $html[ $at ]; + if ( ' ' !== $c && "\t" !== $c && "\r" !== $c && "\n" !== $c && '/' !== $c && '>' !== $c ) { + continue; + } + + while ( $this->parse_next_attribute() ) { + continue; + } + $at = $this->parsed_bytes; + if ( $at >= strlen( $this->html ) ) { + return false; + } + + if ( '>' === $html[ $at ] || '/' === $html[ $at ] ) { + ++$this->parsed_bytes; + return true; + } + } + + return false; + } + + /** + * Skips the contents of