From 99b6dd8c6a8a2a6da49bdb78400a4ea13726dde2 Mon Sep 17 00:00:00 2001 From: jrfnl Date: Thu, 2 Jun 2022 15:39:03 +0200 Subject: [PATCH 1/2] Tokenizer/PHP: bug fix for double quoted strings using `${` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While creating a sniff for PHPCompatibility to detect the PHP 8.2 deprecation of two of the four syntaxes to embed variables/expressions within text strings, I realized that for select examples of the "type 4" syntax - "Variable variables (“${expr}”, equivalent to (string) ${expr})" -, PHPCS did not, and probably never did, tokenize those correctly. Double quoted strings in PHPCS are normally tokenized as one token (per line), including any and all embedded variables and expressions to prevent problems in the scope map caused by braces within the string/expression. However, for some double quoted strings using `${` syntax, this was not the case and these were tokenized as outlined below, which would still lead to the problems in the scope map. Luckily, these syntax variants aren't used that frequently. Though I suspect if they were, this bug would have been discovered a lot sooner. Either way, fixed now. Including adding a dedicated test file based on examples related to the PHP 8.2 RFC. Note: this test file tests that these embedding syntaxes are tokenized correctly, it doesn't test the other parts of the related `Tokenizers\PHP` code block (re-tokenizing to one token per line, handling of binary casts). Example of the code for which the tokenizer was buggy: ```php "${foo["${bar}"]}"; ``` Was originally tokenized like so: ``` Ptr | Ln | Col | Cond | ( #) | Token Type | [len]: Content ------------------------------------------------------------------------- 2 | L3 | C 1 | CC 0 | ( 0) | T_ECHO | [ 4]: echo 3 | L3 | C 5 | CC 0 | ( 0) | T_WHITESPACE | [ 1]: ⸱ 4 | L3 | C 6 | CC 0 | ( 0) | T_DOUBLE_QUOTED_STRING | [ 8]: "${foo[" 5 | L3 | C 14 | CC 0 | ( 0) | T_DOLLAR_OPEN_CURLY_BRACES | [ 2]: ${ 6 | L3 | C 16 | CC 0 | ( 0) | T_STRING_VARNAME | [ 3]: bar 7 | L3 | C 19 | CC 0 | ( 0) | T_CLOSE_CURLY_BRACKET | [ 1]: } 8 | L3 | C 20 | CC 0 | ( 0) | T_DOUBLE_QUOTED_STRING | [ 4]: "]}" 9 | L3 | C 24 | CC 0 | ( 0) | T_SEMICOLON | [ 1]: ; 10 | L3 | C 25 | CC 0 | ( 0) | T_WHITESPACE | [ 0]: ``` Will be tokenized (correctly) like so with the fix included in this PR: ``` Ptr | Ln | Col | Cond | ( #) | Token Type | [len]: Content ------------------------------------------------------------------------- 2 | L3 | C 1 | CC 0 | ( 0) | T_ECHO | [ 4]: echo 3 | L3 | C 5 | CC 0 | ( 0) | T_WHITESPACE | [ 1]: ⸱ 4 | L3 | C 6 | CC 0 | ( 0) | T_DOUBLE_QUOTED_STRING | [ 18]: "${foo["${bar}"]}" 5 | L3 | C 24 | CC 0 | ( 0) | T_SEMICOLON | [ 1]: ; 6 | L3 | C 25 | CC 0 | ( 0) | T_WHITESPACE | [ 0]: ``` Refs: * https://www.php.net/manual/en/language.types.string.php#language.types.string.parsing * https://wiki.php.net/rfc/deprecate_dollar_brace_string_interpolation * https://gist.github.com/iluuu1994/72e2154fc4150f2258316b0255b698f2 --- package.xml | 6 + src/Tokenizers/PHP.php | 3 +- .../Core/Tokenizer/DoubleQuotedStringTest.inc | 49 +++++++ .../Core/Tokenizer/DoubleQuotedStringTest.php | 131 ++++++++++++++++++ 4 files changed, 188 insertions(+), 1 deletion(-) create mode 100644 tests/Core/Tokenizer/DoubleQuotedStringTest.inc create mode 100644 tests/Core/Tokenizer/DoubleQuotedStringTest.php diff --git a/package.xml b/package.xml index 0370ecb045..983391b5a9 100644 --- a/package.xml +++ b/package.xml @@ -182,6 +182,8 @@ http://pear.php.net/dtd/package-2.0.xsd"> + + @@ -2153,6 +2155,8 @@ http://pear.php.net/dtd/package-2.0.xsd"> + + @@ -2255,6 +2259,8 @@ http://pear.php.net/dtd/package-2.0.xsd"> + + diff --git a/src/Tokenizers/PHP.php b/src/Tokenizers/PHP.php index 4b0bf76550..5f2dc2c8c5 100644 --- a/src/Tokenizers/PHP.php +++ b/src/Tokenizers/PHP.php @@ -789,7 +789,8 @@ protected function tokenize($string) if ($subTokenIsArray === true) { $tokenContent .= $subToken[1]; - if ($subToken[1] === '{' + if (($subToken[1] === '{' + || $subToken[1] === '${') && $subToken[0] !== T_ENCAPSED_AND_WHITESPACE ) { $nestedVars[] = $i; diff --git a/tests/Core/Tokenizer/DoubleQuotedStringTest.inc b/tests/Core/Tokenizer/DoubleQuotedStringTest.inc new file mode 100644 index 0000000000..52253619ed --- /dev/null +++ b/tests/Core/Tokenizer/DoubleQuotedStringTest.inc @@ -0,0 +1,49 @@ +bar"; +/* testProperty2 */ +"{$foo->bar}"; + +/* testMethod1 */ +"{$foo->bar()}"; + +/* testClosure1 */ +"{$foo()}"; + +/* testChain1 */ +"{$foo['bar']->baz()()}"; + +/* testVariableVar1 */ +"${$bar}"; +/* testVariableVar2 */ +"${(foo)}"; +/* testVariableVar3 */ +"${foo->bar}"; + +/* testNested1 */ +"${foo["${bar}"]}"; +/* testNested2 */ +"${foo["${bar['baz']}"]}"; +/* testNested3 */ +"${foo->{$baz}}"; +/* testNested4 */ +"${foo->{${'a'}}}"; +/* testNested5 */ +"${foo->{"${'a'}"}}"; diff --git a/tests/Core/Tokenizer/DoubleQuotedStringTest.php b/tests/Core/Tokenizer/DoubleQuotedStringTest.php new file mode 100644 index 0000000000..f455fe7172 --- /dev/null +++ b/tests/Core/Tokenizer/DoubleQuotedStringTest.php @@ -0,0 +1,131 @@ + + * @copyright 2022 Squiz Pty Ltd (ABN 77 084 670 600) + * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence + */ + +namespace PHP_CodeSniffer\Tests\Core\Tokenizer; + +use PHP_CodeSniffer\Tests\Core\AbstractMethodUnitTest; + +class DoubleQuotedStringTest extends AbstractMethodUnitTest +{ + + + /** + * Test that double quoted strings contain the complete string. + * + * @param string $testMarker The comment which prefaces the target token in the test file. + * @param string $expectedContent The expected content of the double quoted string. + * + * @dataProvider dataDoubleQuotedString + * @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize + * + * @return void + */ + public function testDoubleQuotedString($testMarker, $expectedContent) + { + $tokens = self::$phpcsFile->getTokens(); + + $target = $this->getTargetToken($testMarker, T_DOUBLE_QUOTED_STRING); + $this->assertSame($expectedContent, $tokens[$target]['content']); + + }//end testDoubleQuotedString() + + + /** + * Data provider. + * + * @see testDoubleQuotedString() + * + * @return array + */ + public function dataDoubleQuotedString() + { + return [ + [ + 'testMarker' => '/* testSimple1 */', + 'expectedContent' => '"$foo"', + ], + [ + 'testMarker' => '/* testSimple2 */', + 'expectedContent' => '"{$foo}"', + ], + [ + 'testMarker' => '/* testSimple3 */', + 'expectedContent' => '"${foo}"', + ], + [ + 'testMarker' => '/* testDIM1 */', + 'expectedContent' => '"$foo[bar]"', + ], + [ + 'testMarker' => '/* testDIM2 */', + 'expectedContent' => '"{$foo[\'bar\']}"', + ], + [ + 'testMarker' => '/* testDIM3 */', + 'expectedContent' => '"${foo[\'bar\']}"', + ], + [ + 'testMarker' => '/* testProperty1 */', + 'expectedContent' => '"$foo->bar"', + ], + [ + 'testMarker' => '/* testProperty2 */', + 'expectedContent' => '"{$foo->bar}"', + ], + [ + 'testMarker' => '/* testMethod1 */', + 'expectedContent' => '"{$foo->bar()}"', + ], + [ + 'testMarker' => '/* testClosure1 */', + 'expectedContent' => '"{$foo()}"', + ], + [ + 'testMarker' => '/* testChain1 */', + 'expectedContent' => '"{$foo[\'bar\']->baz()()}"', + ], + [ + 'testMarker' => '/* testVariableVar1 */', + 'expectedContent' => '"${$bar}"', + ], + [ + 'testMarker' => '/* testVariableVar2 */', + 'expectedContent' => '"${(foo)}"', + ], + [ + 'testMarker' => '/* testVariableVar3 */', + 'expectedContent' => '"${foo->bar}"', + ], + [ + 'testMarker' => '/* testNested1 */', + 'expectedContent' => '"${foo["${bar}"]}"', + ], + [ + 'testMarker' => '/* testNested2 */', + 'expectedContent' => '"${foo["${bar[\'baz\']}"]}"', + ], + [ + 'testMarker' => '/* testNested3 */', + 'expectedContent' => '"${foo->{$baz}}"', + ], + [ + 'testMarker' => '/* testNested4 */', + 'expectedContent' => '"${foo->{${\'a\'}}}"', + ], + [ + 'testMarker' => '/* testNested5 */', + 'expectedContent' => '"${foo->{"${\'a\'}"}}"', + ], + ]; + + }//end dataDoubleQuotedString() + + +}//end class From 150c8c408f1e741b823bd9988a1c7bf8e350dec4 Mon Sep 17 00:00:00 2001 From: jrfnl Date: Thu, 2 Jun 2022 21:04:22 +0200 Subject: [PATCH 2/2] Tokenizer/PHP/DoubleQuotedStringTest: document how parse errors are handled --- tests/Core/Tokenizer/DoubleQuotedStringTest.inc | 3 +++ tests/Core/Tokenizer/DoubleQuotedStringTest.php | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/tests/Core/Tokenizer/DoubleQuotedStringTest.inc b/tests/Core/Tokenizer/DoubleQuotedStringTest.inc index 52253619ed..62535b1e41 100644 --- a/tests/Core/Tokenizer/DoubleQuotedStringTest.inc +++ b/tests/Core/Tokenizer/DoubleQuotedStringTest.inc @@ -47,3 +47,6 @@ "${foo->{${'a'}}}"; /* testNested5 */ "${foo->{"${'a'}"}}"; + +/* testParseError */ +"${foo["${bar diff --git a/tests/Core/Tokenizer/DoubleQuotedStringTest.php b/tests/Core/Tokenizer/DoubleQuotedStringTest.php index f455fe7172..cc9fe49ec4 100644 --- a/tests/Core/Tokenizer/DoubleQuotedStringTest.php +++ b/tests/Core/Tokenizer/DoubleQuotedStringTest.php @@ -123,6 +123,11 @@ public function dataDoubleQuotedString() 'testMarker' => '/* testNested5 */', 'expectedContent' => '"${foo->{"${\'a\'}"}}"', ], + [ + 'testMarker' => '/* testParseError */', + 'expectedContent' => '"${foo["${bar +', + ], ]; }//end dataDoubleQuotedString()