Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TextStrings::getCompleteTextString(): add BC for incorrectly tokenized double quoted strings #317

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
46 changes: 46 additions & 0 deletions PHPCSUtils/Utils/TextStrings.php
Expand Up @@ -33,6 +33,10 @@ class TextStrings
* where the content matching might result in false positives/false negatives if the text
* were to be examined line by line.
*
* Additionally, this method correctly handles a particular type of double quoted string
* with an embedded expression which is incorrectly tokenized in PHPCS itself prior to
* PHPCS version 3.x.x.
*
* @since 1.0.0
*
* @param \PHP_CodeSniffer\Files\File $phpcsFile The file where this token was found.
Expand Down Expand Up @@ -100,6 +104,48 @@ public static function getCompleteTextString(File $phpcsFile, $stackPtr, $stripQ
++$current;
} while (isset($tokens[$current]) && $tokens[$current]['code'] === $targetType);

if ($targetType === \T_DOUBLE_QUOTED_STRING) {
/*
* BC for PHPCS < ??.
* Prior to PHPCS 3.x.x, when a select group of embedded variables/expressions was encountered
* in a double quoted string, the embed would not be tokenized as part of the T_DOUBLE_QUOTED_STRING,
* but would still have the PHP native tokenization.
*/
if (isset($tokens[$current]) && $tokens[$current]['code'] === \T_DOLLAR_OPEN_CURLY_BRACES) {
$embeddedContent = $tokens[$current]['content'];
$nestedVars = [$current];
$foundEnd = false;

for ($current = ($current + 1); $current < $phpcsFile->numTokens; $current++) {
if ($tokens[$current]['code'] === \T_DOUBLE_QUOTED_STRING
&& empty($nestedVars) === true
) {
$embeddedContent .= self::getCompleteTextString($phpcsFile, $current, false);
$foundEnd = true;
break;
}

$embeddedContent .= $tokens[$current]['content'];

if (\strpos($tokens[$current]['content'], '{') !== false) {
$nestedVars[] = $current;
}

if (\strpos($tokens[$current]['content'], '}') !== false) {
\array_pop($nestedVars);
}
}

/*
* Only accept this as one of the broken tokenizations if this is not a parse error
* or if we reached the end of the file.
*/
if ($foundEnd === true || $current === $phpcsFile->numTokens) {
$string .= $embeddedContent;
}
}
}

if ($stripNewline === true) {
// Heredoc/nowdoc: strip the new line at the end of the string to emulate how PHP sees the string.
$string = \rtrim($string, "\r\n");
Expand Down
53 changes: 53 additions & 0 deletions Tests/Utils/TextStrings/GetCompleteTextString3604Test.inc
@@ -0,0 +1,53 @@
<?php

// These tests mirror the upstream Core\Tokenizer\DoubleQuotedStringsTest.
// Test source: https://gist.github.com/iluuu1994/72e2154fc4150f2258316b0255b698f2#file-test-php

/* testSimple1 */
"$foo";
/* testSimple2 */
"{$foo}";
/* testSimple3 */
"${foo}";

/* testDIM1 */
"$foo[bar]";
/* testDIM2 */
"{$foo['bar']}";
/* testDIM3 */
"${foo['bar']}";

/* testProperty1 */
"$foo->bar";
/* testProperty2 */
"{$foo->bar}";

/* testMethod1 */
"{$foo->bar()}";

/* testClosure1 */
"{$foo()}";

/* testChain1 */
"{$foo['bar']->baz()()}";

/* testVariableVar1 */
"${$bar}";
/* testVariableVar2 */
"${(foo)}";
/* testVariableVar3 */
"${foo->bar}";

/* testNested1 */
"${foo["${bar}"]}";
/* testNested2 */
"${foo["${bar['baz']}"]}";
/* testNested3 */
"${foo->{$baz}}";
/* testNested4 */
"${foo->{${'a'}}}";
/* testNested5 */
"${foo->{"${'a'}"}}";

/* testParseError */
"${foo["${bar
140 changes: 140 additions & 0 deletions Tests/Utils/TextStrings/GetCompleteTextString3604Test.php
@@ -0,0 +1,140 @@
<?php
/**
* PHPCSUtils, utility functions and classes for PHP_CodeSniffer sniff developers.
*
* @package PHPCSUtils
* @copyright 2019-2020 PHPCSUtils Contributors
* @license https://opensource.org/licenses/LGPL-3.0 LGPL3
* @link https://github.com/PHPCSStandards/PHPCSUtils
*/

namespace PHPCSUtils\Tests\Utils\TextStrings;

use PHPCSUtils\TestUtils\UtilityMethodTestCase;
use PHPCSUtils\Utils\TextStrings;

/**
* Tests for the \PHPCSUtils\Utils\TextStrings::getCompleteTextString() method covering a specific tokenizer
* issue as reported upstream in {@link https://github.com/squizlabs/PHP_CodeSniffer/pull/3604 PHPCS 3604}.
*
* @covers \PHPCSUtils\Utils\TextStrings::getCompleteTextString
*
* @group textstrings
*
* @since 1.0.0
*/
class GetCompleteTextString3604Test extends UtilityMethodTestCase
{

/**
* Test correctly retrieving the contents of a double quoted text string with embedded variables/expressions.
*
* @dataProvider dataGetCompleteTextString
*
* @param string $testMarker The comment which prefaces the target token in the test file.
* @param string $expectedContent The expected function return value.
*
* @return void
*/
public function testGetCompleteTextString($testMarker, $expectedContent)
{
$stackPtr = $this->getTargetToken($testMarker, \T_DOUBLE_QUOTED_STRING);

$result = TextStrings::getCompleteTextString(self::$phpcsFile, $stackPtr);
$this->assertSame($expectedContent, $result);
}

/**
* Data provider.
*
* @see testGetCompleteTextString() For the array format.
*
* @return array
*/
public function dataGetCompleteTextString()
{
return [
'Simple embedded variable 1' => [
'testMarker' => '/* testSimple1 */',
'expectedContent' => '$foo',
],
'Simple embedded variable 2' => [
'testMarker' => '/* testSimple2 */',
'expectedContent' => '{$foo}',
],
'Simple embedded variable 3' => [
'testMarker' => '/* testSimple3 */',
'expectedContent' => '${foo}',
],
'Embedded array access 1' => [
'testMarker' => '/* testDIM1 */',
'expectedContent' => '$foo[bar]',
],
'Embedded array access 2' => [
'testMarker' => '/* testDIM2 */',
'expectedContent' => '{$foo[\'bar\']}',
],
'Embedded array access 3' => [
'testMarker' => '/* testDIM3 */',
'expectedContent' => '${foo[\'bar\']}',
],
'Embedded property access 1' => [
'testMarker' => '/* testProperty1 */',
'expectedContent' => '$foo->bar',
],
'Embedded property access 2' => [
'testMarker' => '/* testProperty2 */',
'expectedContent' => '{$foo->bar}',
],
'Embedded method call 1' => [
'testMarker' => '/* testMethod1 */',
'expectedContent' => '{$foo->bar()}',
],
'Embedded closure call 1' => [
'testMarker' => '/* testClosure1 */',
'expectedContent' => '{$foo()}',
],
'Embedded chained array access -> method call -> call' => [
'testMarker' => '/* testChain1 */',
'expectedContent' => '{$foo[\'bar\']->baz()()}',
],
'Embedded variable variable 1' => [
'testMarker' => '/* testVariableVar1 */',
'expectedContent' => '${$bar}',
],
'Embedded variable variable 1' => [
'testMarker' => '/* testVariableVar2 */',
'expectedContent' => '${(foo)}',
],
'Embedded variable variable 2' => [
'testMarker' => '/* testVariableVar3 */',
'expectedContent' => '${foo->bar}',
],
'Embedded nested variable variable 1' => [
'testMarker' => '/* testNested1 */',
'expectedContent' => '${foo["${bar}"]}',
],
'Embedded nested variable variable 2' => [
'testMarker' => '/* testNested2 */',
'expectedContent' => '${foo["${bar[\'baz\']}"]}',
],
'Embedded nested variable variable 3' => [
'testMarker' => '/* testNested3 */',
'expectedContent' => '${foo->{$baz}}',
],
'Embedded nested variable variable 4' => [
'testMarker' => '/* testNested4 */',
'expectedContent' => '${foo->{${\'a\'}}}',
],
'Embedded nested variable variable 5' => [
'testMarker' => '/* testNested5 */',
'expectedContent' => '${foo->{"${\'a\'}"}}',
],
'Parse error at end of file' => [
'testMarker' => '/* testParseError */',
'expectedContent' => '"${foo["${bar
',
],
];
}
}