Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions src/wp-includes/formatting.php
Original file line number Diff line number Diff line change
Expand Up @@ -4054,14 +4054,22 @@ function wp_trim_words( $text, $num_words = 55, $more = null ) {
$original_text = $text;
$text = wp_strip_all_tags( $text );
$num_words = (int) $num_words;
$is_utf8 = preg_match( '/^utf\-?8$/i', get_option( 'blog_charset' ) );

if ( str_starts_with( wp_get_word_count_type(), 'characters' ) && preg_match( '/^utf\-?8$/i', get_option( 'blog_charset' ) ) ) {
$text = trim( preg_replace( "/[\n\r\t ]+/", ' ', $text ), ' ' );
/*
* When the charset is UTF-8, use a Unicode-aware whitespace pattern
* so that non-breaking spaces, ideographic spaces (U+3000), and other
* Unicode whitespace characters are treated as word separators.
*/
$spaces = $is_utf8 ? '/\s+/u' : "/[\n\r\t ]+/";

if ( str_starts_with( wp_get_word_count_type(), 'characters' ) && $is_utf8 ) {
$text = trim( preg_replace( $spaces, ' ', $text ), ' ' );
preg_match_all( '/./u', $text, $words_array );
$words_array = array_slice( $words_array[0], 0, $num_words + 1 );
$sep = '';
} else {
$words_array = preg_split( "/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY );
$words_array = preg_split( $spaces, $text, $num_words + 1, PREG_SPLIT_NO_EMPTY );
$sep = ' ';
}

Expand Down
37 changes: 37 additions & 0 deletions tests/phpunit/tests/formatting/wpTrimWords.php
Original file line number Diff line number Diff line change
Expand Up @@ -87,4 +87,41 @@ public function test_works_with_non_numeric_num_words() {
$this->assertSame( '', wp_trim_words( $this->long_text, null, '' ) );
$this->assertSame( 'Lorem ipsum dolor', wp_trim_words( $this->long_text, '3', '' ) );
}

/**
* Verifies that ideographic spaces (U+3000) are treated as word separators.
*
* @ticket 64552
*/
public function test_splits_on_ideographic_space() {
$ideo = "\xe3\x80\x80"; // U+3000 Ideographic Space.
$text = "one{$ideo}two{$ideo}three{$ideo}four{$ideo}five{$ideo}six";

$this->assertSame( 'one two three', wp_trim_words( $text, 3, '' ) );
}

/**
* Verifies that non-breaking spaces (U+00A0) are treated as word separators.
*
* @ticket 64552
*/
public function test_splits_on_non_breaking_space() {
$nbsp = "\xc2\xa0"; // U+00A0 Non-Breaking Space.
$text = "one{$nbsp}two{$nbsp}three{$nbsp}four";

$this->assertSame( 'one two', wp_trim_words( $text, 2, '' ) );
}

/**
* Verifies that mixed Unicode whitespace characters are handled correctly.
*
* @ticket 64552
*/
public function test_splits_on_mixed_unicode_whitespace() {
$ideo = "\xe3\x80\x80"; // U+3000 Ideographic Space.
$nbsp = "\xc2\xa0"; // U+00A0 Non-Breaking Space.
$text = "alpha bravo{$ideo}charlie{$nbsp}delta\techo";

$this->assertSame( 'alpha bravo charlie delta', wp_trim_words( $text, 4, '' ) );
}
}
Loading