@@ -1174,7 +1174,10 @@ public function has_class( $wanted_class ) {
11741174 */
11751175 public function set_bookmark ( $ name ) {
11761176 // It only makes sense to set a bookmark if the parser has paused on a concrete token.
1177- if ( self ::STATE_INCOMPLETE === $ this ->parser_state ) {
1177+ if (
1178+ self ::STATE_COMPLETE === $ this ->parser_state ||
1179+ self ::STATE_INCOMPLETE === $ this ->parser_state
1180+ ) {
11781181 return false ;
11791182 }
11801183
@@ -1555,12 +1558,12 @@ private function parse_next_tag() {
15551558 }
15561559
15571560 /*
1558- * <! transitions to markup declaration open state
1561+ * `<!` transitions to markup declaration open state
15591562 * https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
15601563 */
15611564 if ( '! ' === $ html [ $ at + 1 ] ) {
15621565 /*
1563- * <!-- transitions to a bogus comment state – skip to the nearest -->
1566+ * ` <!--` transitions to a comment state – apply further comment rules.
15641567 * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
15651568 */
15661569 if (
@@ -1579,7 +1582,14 @@ private function parse_next_tag() {
15791582 // Abruptly-closed empty comments are a sequence of dashes followed by `>`.
15801583 $ span_of_dashes = strspn ( $ html , '- ' , $ closer_at );
15811584 if ( '> ' === $ html [ $ closer_at + $ span_of_dashes ] ) {
1582- // @todo This could go wrong if the closer is shorter than `<!---->` because there's no inside content.
1585+ /*
1586+ * @todo When implementing `set_modifiable_text()` ensure that updates to this token
1587+ * don't break the syntax for short comments, e.g. `<!--->`. Unlike other comment
1588+ * and bogus comment syntax, these leave no clear insertion point for text and
1589+ * they need to be modified specially in order to contain text. E.g. to store
1590+ * `?` as the modifiable text, the `<!--->` needs to become `<!--?-->`, which
1591+ * involves inserting an additional `-` into the token after the modifiable text.
1592+ */
15831593 $ this ->parser_state = self ::STATE_COMMENT ;
15841594 $ this ->token_length = $ closer_at + $ span_of_dashes + 1 - $ this ->token_starts_at ;
15851595 $ this ->text_starts_at = $ this ->token_starts_at + 4 ;
@@ -1628,7 +1638,7 @@ private function parse_next_tag() {
16281638 }
16291639
16301640 /*
1631- * <!DOCTYPE transitions to DOCTYPE state – skip to the nearest >
1641+ * ` <!DOCTYPE` transitions to DOCTYPE state – skip to the nearest >
16321642 * These are ASCII-case-insensitive.
16331643 * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
16341644 */
@@ -1726,7 +1736,7 @@ private function parse_next_tag() {
17261736 }
17271737
17281738 /*
1729- * <? transitions to a bogus comment state – skip to the nearest >
1739+ * `<?` transitions to a bogus comment state – skip to the nearest >
17301740 * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
17311741 */
17321742 if ( '? ' === $ html [ $ at + 1 ] ) {
@@ -1789,6 +1799,9 @@ private function parse_next_tag() {
17891799 * If a non-alpha starts the tag name in a tag closer it's a comment.
17901800 * Find the first `>`, which closes the comment.
17911801 *
1802+ * This parser classifies these particular comments as special "funky comments"
1803+ * which are made available for further processing.
1804+ *
17921805 * See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
17931806 */
17941807 if ( $ this ->is_closing_tag ) {
@@ -2576,6 +2589,7 @@ public function is_tag_closer() {
25762589 * - `#cdata-section` when matched on a CDATA node.
25772590 * - `#processing-instruction` when matched on a processing instruction.
25782591 * - `#comment` when matched on a comment.
2592+ * - `#doctype` when matched on a DOCTYPE declaration.
25792593 * - `#presumptuous-tag` when matched on an empty tag closer.
25802594 * - `#funky-comment` when matched on a funky comment.
25812595 *
@@ -2667,20 +2681,25 @@ public function get_token_name() {
26672681 * @return string
26682682 */
26692683 public function get_modifiable_text () {
2670- $ at = $ this ->text_starts_at ;
2671- $ length = $ this ->text_length ;
2672- $ text = substr ( $ this ->html , $ at , $ length );
2684+ if ( null === $ this ->text_starts_at ) {
2685+ return '' ;
2686+ }
2687+
2688+ $ text = substr ( $ this ->html , $ this ->text_starts_at , $ this ->text_length );
26732689
26742690 if (
26752691 self ::STATE_CDATA_NODE === $ this ->parser_state ||
2676- self ::STATE_PI_NODE === $ this ->parser_state
2692+ self ::STATE_COMMENT === $ this ->parser_state ||
2693+ self ::STATE_DOCTYPE === $ this ->parser_state ||
2694+ self ::STATE_PI_NODE === $ this ->parser_state ||
2695+ self ::STATE_FUNKY_COMMENT === $ this ->parser_state
26772696 ) {
26782697 return $ text ;
26792698 }
26802699
2681- $ text = html_entity_decode ( $ text , ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE );
2700+ $ decoded = html_entity_decode ( $ text , ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE );
26822701
2683- if ( empty ( $ text ) ) {
2702+ if ( empty ( $ decoded ) ) {
26842703 return '' ;
26852704 }
26862705
@@ -2694,14 +2713,14 @@ public function get_modifiable_text() {
26942713 switch ( $ this ->get_tag () ) {
26952714 case 'PRE ' :
26962715 case 'TEXTAREA ' :
2697- if ( "\n" === $ text [0 ] ) {
2698- return substr ( $ text , 1 );
2716+ if ( "\n" === $ decoded [0 ] ) {
2717+ return substr ( $ decoded , 1 );
26992718 }
27002719 break ;
27012720 }
27022721 }
27032722
2704- return $ text ;
2723+ return $ decoded ;
27052724 }
27062725
27072726 /**
@@ -3286,7 +3305,8 @@ private function matches() {
32863305 const STATE_DOCTYPE = 'STATE_DOCTYPE ' ;
32873306
32883307 /**
3289- * Indicates that the parser has found an empty tag closer.
3308+ * Indicates that the parser has found an empty tag closer `</>`.
3309+ *
32903310 * Note that in HTML there are no empty tag closers, and they
32913311 * are ignored. Nonetheless, the Tag Processor still
32923312 * recognizes them as they appear in the HTML stream.
@@ -3305,8 +3325,14 @@ private function matches() {
33053325 * Indicates that the parser has found a "funky comment"
33063326 * and it's possible to read and modify its modifiable text.
33073327 *
3328+ * Example:
3329+ *
3330+ * </%url>
3331+ * </{"wp-bit":"query/post-author"}>
3332+ * </2>
3333+ *
33083334 * Funky comments are tag closers with invalid tag names. Note
3309- * that in HTML these are treated as HTML comments. Nonetheless,
3335+ * that in HTML these are turn into bogus comments. Nonetheless,
33103336 * the Tag Processor recognizes them in a stream of HTML and
33113337 * exposes them for inspection and modification.
33123338 *
0 commit comments