Skip to content

Commit 9f29920

Browse files
dmsnellsirreal
andcommitted
PR Feedback
Co-authored-by: Jon Surrell <sirreal@users.noreply.github.com>
1 parent 4639ff8 commit 9f29920

1 file changed

Lines changed: 43 additions & 17 deletions

File tree

src/wp-includes/html-api/class-wp-html-tag-processor.php

Lines changed: 43 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,7 +1174,10 @@ public function has_class( $wanted_class ) {
11741174
*/
11751175
public function set_bookmark( $name ) {
11761176
// It only makes sense to set a bookmark if the parser has paused on a concrete token.
1177-
if ( self::STATE_INCOMPLETE === $this->parser_state ) {
1177+
if (
1178+
self::STATE_COMPLETE === $this->parser_state ||
1179+
self::STATE_INCOMPLETE === $this->parser_state
1180+
) {
11781181
return false;
11791182
}
11801183

@@ -1555,12 +1558,12 @@ private function parse_next_tag() {
15551558
}
15561559

15571560
/*
1558-
* <! transitions to markup declaration open state
1561+
* `<!` transitions to markup declaration open state
15591562
* https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
15601563
*/
15611564
if ( '!' === $html[ $at + 1 ] ) {
15621565
/*
1563-
* <!-- transitions to a bogus comment state – skip to the nearest -->
1566+
* `<!--` transitions to a comment state – apply further comment rules.
15641567
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
15651568
*/
15661569
if (
@@ -1579,7 +1582,14 @@ private function parse_next_tag() {
15791582
// Abruptly-closed empty comments are a sequence of dashes followed by `>`.
15801583
$span_of_dashes = strspn( $html, '-', $closer_at );
15811584
if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
1582-
// @todo This could go wrong if the closer is shorter than `<!---->` because there's no inside content.
1585+
/*
1586+
* @todo When implementing `set_modifiable_text()` ensure that updates to this token
1587+
* don't break the syntax for short comments, e.g. `<!--->`. Unlike other comment
1588+
* and bogus comment syntax, these leave no clear insertion point for text and
1589+
* they need to be modified specially in order to contain text. E.g. to store
1590+
* `?` as the modifiable text, the `<!--->` needs to become `<!--?-->`, which
1591+
* involves inserting an additional `-` into the token after the modifiable text.
1592+
*/
15831593
$this->parser_state = self::STATE_COMMENT;
15841594
$this->token_length = $closer_at + $span_of_dashes + 1 - $this->token_starts_at;
15851595
$this->text_starts_at = $this->token_starts_at + 4;
@@ -1628,7 +1638,7 @@ private function parse_next_tag() {
16281638
}
16291639

16301640
/*
1631-
* <!DOCTYPE transitions to DOCTYPE state – skip to the nearest >
1641+
* `<!DOCTYPE` transitions to DOCTYPE state – skip to the nearest >
16321642
* These are ASCII-case-insensitive.
16331643
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
16341644
*/
@@ -1726,7 +1736,7 @@ private function parse_next_tag() {
17261736
}
17271737

17281738
/*
1729-
* <? transitions to a bogus comment state – skip to the nearest >
1739+
* `<?` transitions to a bogus comment state – skip to the nearest >
17301740
* See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
17311741
*/
17321742
if ( '?' === $html[ $at + 1 ] ) {
@@ -1789,6 +1799,9 @@ private function parse_next_tag() {
17891799
* If a non-alpha starts the tag name in a tag closer it's a comment.
17901800
* Find the first `>`, which closes the comment.
17911801
*
1802+
* This parser classifies these particular comments as special "funky comments"
1803+
* which are made available for further processing.
1804+
*
17921805
* See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
17931806
*/
17941807
if ( $this->is_closing_tag ) {
@@ -2576,6 +2589,7 @@ public function is_tag_closer() {
25762589
* - `#cdata-section` when matched on a CDATA node.
25772590
* - `#processing-instruction` when matched on a processing instruction.
25782591
* - `#comment` when matched on a comment.
2592+
* - `#doctype` when matched on a DOCTYPE declaration.
25792593
* - `#presumptuous-tag` when matched on an empty tag closer.
25802594
* - `#funky-comment` when matched on a funky comment.
25812595
*
@@ -2667,20 +2681,25 @@ public function get_token_name() {
26672681
* @return string
26682682
*/
26692683
public function get_modifiable_text() {
2670-
$at = $this->text_starts_at;
2671-
$length = $this->text_length;
2672-
$text = substr( $this->html, $at, $length );
2684+
if ( null === $this->text_starts_at ) {
2685+
return '';
2686+
}
2687+
2688+
$text = substr( $this->html, $this->text_starts_at, $this->text_length );
26732689

26742690
if (
26752691
self::STATE_CDATA_NODE === $this->parser_state ||
2676-
self::STATE_PI_NODE === $this->parser_state
2692+
self::STATE_COMMENT === $this->parser_state ||
2693+
self::STATE_DOCTYPE === $this->parser_state ||
2694+
self::STATE_PI_NODE === $this->parser_state ||
2695+
self::STATE_FUNKY_COMMENT === $this->parser_state
26772696
) {
26782697
return $text;
26792698
}
26802699

2681-
$text = html_entity_decode( $text, ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE );
2700+
$decoded = html_entity_decode( $text, ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE );
26822701

2683-
if ( empty( $text ) ) {
2702+
if ( empty( $decoded ) ) {
26842703
return '';
26852704
}
26862705

@@ -2694,14 +2713,14 @@ public function get_modifiable_text() {
26942713
switch ( $this->get_tag() ) {
26952714
case 'PRE':
26962715
case 'TEXTAREA':
2697-
if ( "\n" === $text[0] ) {
2698-
return substr( $text, 1 );
2716+
if ( "\n" === $decoded[0] ) {
2717+
return substr( $decoded, 1 );
26992718
}
27002719
break;
27012720
}
27022721
}
27032722

2704-
return $text;
2723+
return $decoded;
27052724
}
27062725

27072726
/**
@@ -3286,7 +3305,8 @@ private function matches() {
32863305
const STATE_DOCTYPE = 'STATE_DOCTYPE';
32873306

32883307
/**
3289-
* Indicates that the parser has found an empty tag closer.
3308+
* Indicates that the parser has found an empty tag closer `</>`.
3309+
*
32903310
* Note that in HTML there are no empty tag closers, and they
32913311
* are ignored. Nonetheless, the Tag Processor still
32923312
* recognizes them as they appear in the HTML stream.
@@ -3305,8 +3325,14 @@ private function matches() {
33053325
* Indicates that the parser has found a "funky comment"
33063326
* and it's possible to read and modify its modifiable text.
33073327
*
3328+
* Example:
3329+
*
3330+
* </%url>
3331+
* </{"wp-bit":"query/post-author"}>
3332+
* </2>
3333+
*
33083334
* Funky comments are tag closers with invalid tag names. Note
3309-
* that in HTML these are treated as HTML comments. Nonetheless,
3335+
* that in HTML these are turn into bogus comments. Nonetheless,
33103336
* the Tag Processor recognizes them in a stream of HTML and
33113337
* exposes them for inspection and modification.
33123338
*

0 commit comments

Comments
 (0)