From 964a38c7cd4322daa085764979726b8d4a55f66e Mon Sep 17 00:00:00 2001 From: jrfnl Date: Wed, 12 May 2021 04:57:39 +0200 Subject: [PATCH 1/2] Tokenizer/PHP: bug fix - fix performance issue Happened to come across this while investigating something else. As it was, as long as no open parenthesis or variable was encountered, this snippet would loop to the end of the file for each `T_ARRAY` token encountered as it would only `break` in the `if/elseif` and there was no `else` clause. Basically, we only want the `array` keyword to be tokenized as `T_ARRAY` if it is an actual array declaration. In all other cases, it should be tokenized as `T_STRING`. This fixes the performance leak by only looping to the first non-empty token after the keyword, checking if it's an open parenthesis and retokenizing the `T_ARRAY` to `T_STRING` in all other cases. It also removes the need for the separate _return type_ retokenization of the array keyword. Includes adding unit tests specifically for the array keyword. --- package.xml | 6 + src/Tokenizers/PHP.php | 40 ++--- tests/Core/Tokenizer/ArrayKeywordTest.inc | 35 +++++ tests/Core/Tokenizer/ArrayKeywordTest.php | 170 ++++++++++++++++++++++ 4 files changed, 225 insertions(+), 26 deletions(-) create mode 100644 tests/Core/Tokenizer/ArrayKeywordTest.inc create mode 100644 tests/Core/Tokenizer/ArrayKeywordTest.php diff --git a/package.xml b/package.xml index ecd920dfa5..ce682c585c 100644 --- a/package.xml +++ b/package.xml @@ -150,6 +150,8 @@ http://pear.php.net/dtd/package-2.0.xsd"> + + @@ -2098,6 +2100,8 @@ http://pear.php.net/dtd/package-2.0.xsd"> + + @@ -2186,6 +2190,8 @@ http://pear.php.net/dtd/package-2.0.xsd"> + + diff --git a/src/Tokenizers/PHP.php b/src/Tokenizers/PHP.php index fec0e9b124..49b232cbc5 100644 --- a/src/Tokenizers/PHP.php +++ b/src/Tokenizers/PHP.php @@ -1769,23 +1769,6 @@ function return types. We want to keep the parenthesis map clean, break; }//end for - - // Any T_ARRAY tokens we find between here and the next - // token that can't be part of the return type, need to be - // converted to T_STRING tokens. - for ($x; $x < $numTokens; $x++) { - if ((is_array($tokens[$x]) === false && $tokens[$x] !== '|') - || (is_array($tokens[$x]) === true && isset($allowed[$tokens[$x][0]]) === false) - ) { - break; - } else if (is_array($tokens[$x]) === true && $tokens[$x][0] === T_ARRAY) { - $tokens[$x][0] = T_STRING; - - if (PHP_CODESNIFFER_VERBOSITY > 1) { - echo "\t\t* token $x changed from T_ARRAY to T_STRING".PHP_EOL; - } - } - } }//end if }//end if }//end if @@ -2066,20 +2049,25 @@ function return types. We want to keep the parenthesis map clean, } }//end if - // This is a special condition for T_ARRAY tokens used for - // type hinting function arguments as being arrays. We want to keep - // the parenthesis map clean, so let's tag these tokens as + // This is a special condition for T_ARRAY tokens used for anything else + // but array declarations, like type hinting function arguments as + // being arrays. + // We want to keep the parenthesis map clean, so let's tag these tokens as // T_STRING. if ($newToken['code'] === T_ARRAY) { - for ($i = $stackPtr; $i < $numTokens; $i++) { - if ($tokens[$i] === '(') { - break; - } else if ($tokens[$i][0] === T_VARIABLE) { - $newToken['code'] = T_STRING; - $newToken['type'] = 'T_STRING'; + for ($i = ($stackPtr + 1); $i < $numTokens; $i++) { + if (is_array($tokens[$i]) === false + || isset(Util\Tokens::$emptyTokens[$tokens[$i][0]]) === false + ) { + // Non-empty content. break; } } + + if ($tokens[$i] !== '(' && $i !== $numTokens) { + $newToken['code'] = T_STRING; + $newToken['type'] = 'T_STRING'; + } } // This is a special case when checking PHP 5.5+ code in PHP < 5.5 diff --git a/tests/Core/Tokenizer/ArrayKeywordTest.inc b/tests/Core/Tokenizer/ArrayKeywordTest.inc new file mode 100644 index 0000000000..ce5c553cf6 --- /dev/null +++ b/tests/Core/Tokenizer/ArrayKeywordTest.inc @@ -0,0 +1,35 @@ + 10); + +/* testArrayWithComment */ +$var = Array /*comment*/ (1 => 10); + +/* testNestingArray */ +$var = array( + /* testNestedArray */ + array( + 'key' => 'value', + + /* testClosureReturnType */ + 'closure' => function($a) use($global) : Array {}, + ), +); + +/* testFunctionDeclarationParamType */ +function foo(array $a) {} + +/* testFunctionDeclarationReturnType */ +function foo($a) : int|array|null {} + +class Bar { + /* testClassConst */ + const ARRAY = []; + + /* testClassMethod */ + public function array() {} +} diff --git a/tests/Core/Tokenizer/ArrayKeywordTest.php b/tests/Core/Tokenizer/ArrayKeywordTest.php new file mode 100644 index 0000000000..237258a62a --- /dev/null +++ b/tests/Core/Tokenizer/ArrayKeywordTest.php @@ -0,0 +1,170 @@ + + * @copyright 2021 Squiz Pty Ltd (ABN 77 084 670 600) + * @license https://github.com/squizlabs/PHP_CodeSniffer/blob/master/licence.txt BSD Licence + */ + +namespace PHP_CodeSniffer\Tests\Core\Tokenizer; + +use PHP_CodeSniffer\Tests\Core\AbstractMethodUnitTest; + +class ArrayKeywordTest extends AbstractMethodUnitTest +{ + + + /** + * Test that the array keyword is correctly tokenized as `T_ARRAY`. + * + * @param string $testMarker The comment prefacing the target token. + * @param string $testContent Optional. The token content to look for. + * + * @dataProvider dataArrayKeyword + * @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize + * @covers PHP_CodeSniffer\Tokenizers\Tokenizer::createTokenMap + * + * @return void + */ + public function testArrayKeyword($testMarker, $testContent='array') + { + $tokens = self::$phpcsFile->getTokens(); + + $token = $this->getTargetToken($testMarker, [T_ARRAY, T_STRING], $testContent); + $tokenArray = $tokens[$token]; + + $this->assertSame(T_ARRAY, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_ARRAY (code)'); + $this->assertSame('T_ARRAY', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_ARRAY (type)'); + + $this->assertArrayHasKey('parenthesis_owner', $tokenArray, 'Parenthesis owner is not set'); + $this->assertArrayHasKey('parenthesis_opener', $tokenArray, 'Parenthesis opener is not set'); + $this->assertArrayHasKey('parenthesis_closer', $tokenArray, 'Parenthesis closer is not set'); + + }//end testArrayKeyword() + + + /** + * Data provider. + * + * @see testArrayKeyword() + * + * @return array + */ + public function dataArrayKeyword() + { + return [ + 'empty array' => ['/* testEmptyArray */'], + 'array with space before parenthesis' => ['/* testArrayWithSpace */'], + 'array with comment before parenthesis' => [ + '/* testArrayWithComment */', + 'Array', + ], + 'nested: outer array' => ['/* testNestingArray */'], + 'nested: inner array' => ['/* testNestedArray */'], + ]; + + }//end dataArrayKeyword() + + + /** + * Test that the array keyword when used in a type declaration is correctly tokenized as `T_STRING`. + * + * @param string $testMarker The comment prefacing the target token. + * @param string $testContent Optional. The token content to look for. + * + * @dataProvider dataArrayType + * @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize + * @covers PHP_CodeSniffer\Tokenizers\Tokenizer::createTokenMap + * + * @return void + */ + public function testArrayType($testMarker, $testContent='array') + { + $tokens = self::$phpcsFile->getTokens(); + + $token = $this->getTargetToken($testMarker, [T_ARRAY, T_STRING], $testContent); + $tokenArray = $tokens[$token]; + + $this->assertSame(T_STRING, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (code)'); + $this->assertSame('T_STRING', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (type)'); + + $this->assertArrayNotHasKey('parenthesis_owner', $tokenArray, 'Parenthesis owner is set'); + $this->assertArrayNotHasKey('parenthesis_opener', $tokenArray, 'Parenthesis opener is set'); + $this->assertArrayNotHasKey('parenthesis_closer', $tokenArray, 'Parenthesis closer is set'); + + }//end testArrayType() + + + /** + * Data provider. + * + * @see testArrayType() + * + * @return array + */ + public function dataArrayType() + { + return [ + 'closure return type' => [ + '/* testClosureReturnType */', + 'Array', + ], + 'function param type' => ['/* testFunctionDeclarationParamType */'], + 'function union return type' => ['/* testFunctionDeclarationReturnType */'], + ]; + + }//end dataArrayType() + + + /** + * Verify that the retokenization of `T_ARRAY` tokens to `T_STRING` is handled correctly + * for tokens with the contents 'array' which aren't in actual fact the array keyword. + * + * @param string $testMarker The comment prefacing the target token. + * @param string $testContent The token content to look for. + * + * @dataProvider dataNotArrayKeyword + * @covers PHP_CodeSniffer\Tokenizers\PHP::tokenize + * @covers PHP_CodeSniffer\Tokenizers\Tokenizer::createTokenMap + * + * @return void + */ + public function testNotArrayKeyword($testMarker, $testContent='array') + { + $tokens = self::$phpcsFile->getTokens(); + + $token = $this->getTargetToken($testMarker, [T_ARRAY, T_STRING], $testContent); + $tokenArray = $tokens[$token]; + + $this->assertSame(T_STRING, $tokenArray['code'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (code)'); + $this->assertSame('T_STRING', $tokenArray['type'], 'Token tokenized as '.$tokenArray['type'].', not T_STRING (type)'); + + $this->assertArrayNotHasKey('parenthesis_owner', $tokenArray, 'Parenthesis owner is set'); + $this->assertArrayNotHasKey('parenthesis_opener', $tokenArray, 'Parenthesis opener is set'); + $this->assertArrayNotHasKey('parenthesis_closer', $tokenArray, 'Parenthesis closer is set'); + + }//end testNotArrayKeyword() + + + /** + * Data provider. + * + * @see testNotArrayKeyword() + * + * @return array + */ + public function dataNotArrayKeyword() + { + return [ + 'class-constant-name' => [ + '/* testClassConst */', + 'ARRAY', + ], + 'class-method-name' => ['/* testClassMethod */'], + ]; + + }//end dataNotArrayKeyword() + + +}//end class From a6daa05b1160873a661f28eee8d3937299564569 Mon Sep 17 00:00:00 2001 From: jrfnl Date: Wed, 12 May 2021 06:04:19 +0200 Subject: [PATCH 2/2] Sniff test: adjust two test files to match This removes two expected errors due to the tokenizer change made in the previous commit. ### `Generic.Arrays.DisallowLongArraySyntax` The code on line 13 is a parse error, so the sniff no longer throwing an error for it should not be our concern. ```php $var = array; ``` ### `Squiz.PHP.CommentedOutCode` The code which triggered the warning on line 35 was a docblock written as a block comment and not really commented out code anyway, so this could be considered a fix for a false positive. ```php /* * The listeners array. * * @var array(PHP_CodeSniffer_Sniff) */ ``` --- .../Generic/Tests/Arrays/DisallowLongArraySyntaxUnitTest.php | 1 - src/Standards/Squiz/Tests/PHP/CommentedOutCodeUnitTest.php | 1 - 2 files changed, 2 deletions(-) diff --git a/src/Standards/Generic/Tests/Arrays/DisallowLongArraySyntaxUnitTest.php b/src/Standards/Generic/Tests/Arrays/DisallowLongArraySyntaxUnitTest.php index 0297681061..af1d9c9a86 100644 --- a/src/Standards/Generic/Tests/Arrays/DisallowLongArraySyntaxUnitTest.php +++ b/src/Standards/Generic/Tests/Arrays/DisallowLongArraySyntaxUnitTest.php @@ -35,7 +35,6 @@ public function getErrorList($testFile='') 6 => 1, 7 => 1, 12 => 1, - 13 => 1, ]; case 'DisallowLongArraySyntaxUnitTest.2.inc': return [ diff --git a/src/Standards/Squiz/Tests/PHP/CommentedOutCodeUnitTest.php b/src/Standards/Squiz/Tests/PHP/CommentedOutCodeUnitTest.php index d51f23ca3b..36c556d8c2 100644 --- a/src/Standards/Squiz/Tests/PHP/CommentedOutCodeUnitTest.php +++ b/src/Standards/Squiz/Tests/PHP/CommentedOutCodeUnitTest.php @@ -49,7 +49,6 @@ public function getWarningList($testFile='CommentedOutCodeUnitTest.inc') 8 => 1, 15 => 1, 19 => 1, - 35 => 1, 87 => 1, 91 => 1, 97 => 1,