Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 96 additions & 5 deletions lib/Cleantalk/ApbctWP/ContactsEncoder/Shortcodes/EncodeContentSC.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
namespace Cleantalk\ApbctWP\ContactsEncoder\Shortcodes;

use Cleantalk\ApbctWP\ContactsEncoder\ContactsEncoder;
use Cleantalk\ApbctWP\Escape;
use Cleantalk\ApbctWP\Variables\Cookie;
use Cleantalk\Common\ContactsEncoder\Dto\Params;
use Cleantalk\Common\ContactsEncoder\Exclusions\ExclusionsService;
Expand Down Expand Up @@ -102,24 +103,114 @@ public function changeContentBeforeEncoderModify($content)
return $content;
}

if ($this->isShortcodeInsideHtmlTag($content)) {
return $content;
}
Comment thread
alexandergull marked this conversation as resolved.
Comment thread
alexandergull marked this conversation as resolved.

// skip encoding if the content is already encoded with hook
// Extract shortcode content to protect it from email encoding
$shortcode_exist_pattern = sprintf('/\[%s\](.*?)\[\/%s\]/s', $this->public_name, $this->public_name);
// Extract shortcode content to protect it from email encoding, supports sc attributes(!)
$shortcode_exist_pattern = sprintf('/(\[%s(?:\s[^\]]*)?\])([\s\S]*?)(\[\/%s\])/s', $this->public_name, $this->public_name);
$content = preg_replace_callback($shortcode_exist_pattern, function ($matches) {
$placeholder = preg_replace('/EE\_\d+/', 'EE_' . (string)$this->shortcode_counter++, $this->exclusion_wrapper);
if (is_null($placeholder)) {
$placeholder = $this->exclusion_wrapper;
}
if (isset($matches[0])) {
$this->shortcode_replacements[$placeholder] = $matches[0];
if (isset($matches[1], $matches[2], $matches[3])) {
$prefix = $matches[1];
$entity = $matches[2];
$suffix = $matches[3];
$entity = Escape::escKsesPost($entity);
$this->shortcode_replacements[$placeholder] = $prefix . $entity . $suffix;
}

return $placeholder;
}, $content);

return $content;
}

/**
* Checks whether any shortcode occurrence is located inside an HTML tag.
*
* This validation is used to prevent shortcode extraction from HTML
* attribute contexts such as:
*
* <a title="[apbct_encode_data]...[/apbct_encode_data]">
*
* Processing shortcodes inside HTML tags may lead to malformed markup
* after WordPress content filters (e.g. wptexturize()) mutate surrounding
* content. Such mutations may potentially lead to attribute injection or
* mutation-XSS issues.
*
* The method scans all opening and closing shortcode tags and verifies
* whether their offsets are located between an unclosed "<" and ">" pair.
*
* @param string $content The content to validate.
*
* @return bool True if any shortcode boundary is detected inside an HTML tag,
* false otherwise.
*/
protected function isShortcodeInsideHtmlTag($content)
{
preg_match_all(
sprintf(
'/\[\/?%s(?:\s[^\]]*)?\]/', //supports sc attributes(!)
preg_quote($this->public_name, '/')
),
$content,
$matches,
PREG_OFFSET_CAPTURE
);
Comment thread
alexandergull marked this conversation as resolved.

if (isset($matches[0])) {
foreach ($matches[0] as $match) {
$offset = $match[1] ?? null;

if ($offset === null) {
continue;
}

if ($this->isOffsetInsideHtmlTag($content, $offset)) {
return true;
}
}
}

return false;
}


/**
* Determines whether a given character offset is located inside an HTML tag.
*
* The method performs a lightweight context check by locating the nearest
* "<" and ">" characters before the specified offset.
*
* If the last "<" appears after the last ">", the offset is considered
* to be inside an HTML tag or attribute context.
*
* Example:
*
* <a href="value [OFFSET HERE]
*
* In this case the offset is inside the opening <a> tag.
*
* @param string $content The full content string.
* @param int $offset Character offset to validate.
*
* @return bool True if the offset is located inside an HTML tag,
* false otherwise.
*/
public function isOffsetInsideHtmlTag($content, $offset)
{
$before = substr($content, 0, $offset);

$last_open = strrpos($before, '<');
$last_close = strrpos($before, '>');

return $last_open !== false &&
($last_close === false || $last_open > $last_close);
}

/**
* Modifies the content after the encoder processes it.
*
Expand Down
143 changes: 143 additions & 0 deletions tests/ApbctWP/ContactsEncoder/TestContactsEncoderShortCodeEncode.php
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,147 @@ public function testChangeContentAfterEncoderModifyRestoresShortcodes()

$this->assertEquals('Test content', $result);
}

public function testShortcodeInsideHtmlAttributeIsNotProcessed()
{
$content = '<a title="[apbct_encode_data]test[/apbct_encode_data]">X</a>';

$result = $this->shortcode->changeContentBeforeEncoderModify($content);

// shortcode should NOT be replaced because it's inside HTML tag
$this->assertEquals($content, $result);
}

public function testShortcodeOutsideHtmlIsProcessed()
{
$content = '[apbct_encode_data]Test content[/apbct_encode_data]';

$result = $this->shortcode->changeContentBeforeEncoderModify($content);

$this->assertStringContainsString(
'%%APBCT_SHORT_CODE_INCLUDE_EE_0%%',
$result
);

$this->assertNotEquals($content, $result);
}

public function testMultipleShortcodesAreHandled()
{
$content =
'[apbct_encode_data]A[/apbct_encode_data]' .
' middle ' .
'[apbct_encode_data]B[/apbct_encode_data]';

$result = $this->shortcode->changeContentBeforeEncoderModify($content);

$this->assertStringContainsString('%%APBCT_SHORT_CODE_INCLUDE_EE_0%%', $result);
$this->assertStringContainsString('%%APBCT_SHORT_CODE_INCLUDE_EE_1%%', $result);
}

public function testHtmlAttributeBreakPayloadDoesNotExplode()
{
$content = '<a href="http://x" title="[/apbct_encode_data]">Test</a>';

$result = $this->shortcode->changeContentBeforeEncoderModify($content);

// must remain stable, no corruption, no placeholder injection inside tag
$this->assertStringContainsString('<a', $result);
$this->assertStringContainsString('</a>', $result);
}

public function testOffsetDetectionInsideHtmlTag()
{
$content = '<a title="[apbct_encode_data]">X</a>';

$pos = strpos($content, '[apbct_encode_data]');

$this->assertTrue(
$this->shortcode->isOffsetInsideHtmlTag($content, $pos)
);
}

public function testOffsetDetectionOutsideHtmlTag()
{
$content = '[apbct_encode_data]test[/apbct_encode_data]';

$pos = strpos($content, '[apbct_encode_data]');

$this->assertFalse(
$this->shortcode->isOffsetInsideHtmlTag($content, $pos)
);
}

public function testShortcodeWithAttributesIsProcessed()
{
$content = '[apbct_encode_data mode="blur"]Test[/apbct_encode_data]';

$result = $this->shortcode->changeContentBeforeEncoderModify($content);

$this->assertStringContainsString(
'%%APBCT_SHORT_CODE_INCLUDE_EE_0%%',
$result
);

$this->assertNotEquals($content, $result);
}

public function testShortcodeWithAttributesIsDetectedInHtmlContext()
{
$content = '<a title="[apbct_encode_data mode=\"blur\"]test[/apbct_encode_data]">X</a>';

$result = $this->shortcode->changeContentBeforeEncoderModify($content);

// must be blocked due to HTML attribute context
$this->assertEquals($content, $result);
}

public function testMixedShortcodesSafeAndUnsafe()
{
$content =
'[apbct_encode_data]SAFE[/apbct_encode_data]' .
'<a title="[apbct_encode_data]BAD[/apbct_encode_data]">' .
'X</a>';

$result = $this->shortcode->changeContentBeforeEncoderModify($content);

// because of current design: full block is skipped if ANY HTML-unsafe shortcode exists
$this->assertEquals($content, $result);
}

public function testPlaceholderNeverAppearsInsideHtmlAttribute()
{
$content = '<a title="[apbct_encode_data]Test[/apbct_encode_data]">X</a>';

$result = $this->shortcode->changeContentBeforeEncoderModify($content);

$this->assertStringNotContainsString('%%APBCT_SHORT_CODE_INCLUDE_EE_0%%', $result);
}

public function testCallbackEscapesReplacingText()
{
$result = $this->shortcode->callback(
['replacing_text' => '<script>alert(1)</script>'],
'content',
'apbct_encode_data'
);

$this->assertStringNotContainsString('<script>', $result);
}

public function testRestoreIntegrityWithMultiplePlaceholders()
{
$this->shortcode->shortcode_replacements = [
'%%APBCT_SHORT_CODE_INCLUDE_EE_0%%' => '[apbct_encode_data]A[/apbct_encode_data]',
'%%APBCT_SHORT_CODE_INCLUDE_EE_1%%' => '[apbct_encode_data]B[/apbct_encode_data]',
];

$content = '%%APBCT_SHORT_CODE_INCLUDE_EE_0%% and %%APBCT_SHORT_CODE_INCLUDE_EE_1%%';

$result = $this->shortcode->changeContentAfterEncoderModify($content);

$this->assertStringContainsString('A', $result);
$this->assertStringContainsString('B', $result);
}

}
Loading