From b66b23a03f7db6c09c101a7805dbfa896f38b374 Mon Sep 17 00:00:00 2001 From: jrfnl Date: Sat, 21 Aug 2021 00:16:46 +0200 Subject: [PATCH 1/7] ClassMapGeneratorTest: add test with consecutive duplicate heredoc markers ... as well as a test with heredoc markers with only a newline character between the start and end marker. --- .../Test/Autoload/ClassMapGeneratorTest.php | 3 +++ .../Autoload/Fixtures/classmap/StripNoise.php | 24 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/tests/Composer/Test/Autoload/ClassMapGeneratorTest.php b/tests/Composer/Test/Autoload/ClassMapGeneratorTest.php index 261fb7cede6e..d367d72e0f3f 100644 --- a/tests/Composer/Test/Autoload/ClassMapGeneratorTest.php +++ b/tests/Composer/Test/Autoload/ClassMapGeneratorTest.php @@ -55,6 +55,9 @@ public function getTestCreateMapTests() 'Foo\\LargeGap' => realpath(__DIR__) . '/Fixtures/classmap/LargeGap.php', 'Foo\\MissingSpace' => realpath(__DIR__) . '/Fixtures/classmap/MissingSpace.php', 'Foo\\StripNoise' => realpath(__DIR__) . '/Fixtures/classmap/StripNoise.php', + 'Foo\\First' => realpath(__DIR__) . '/Fixtures/classmap/StripNoise.php', + 'Foo\\Second' => realpath(__DIR__) . '/Fixtures/classmap/StripNoise.php', + 'Foo\\Third' => realpath(__DIR__) . '/Fixtures/classmap/StripNoise.php', 'Foo\\SlashedA' => realpath(__DIR__) . '/Fixtures/classmap/BackslashLineEndingString.php', 'Foo\\SlashedB' => realpath(__DIR__) . '/Fixtures/classmap/BackslashLineEndingString.php', 'Unicode\\↑\\↑' => realpath(__DIR__) . '/Fixtures/classmap/Unicode.php', diff --git a/tests/Composer/Test/Autoload/Fixtures/classmap/StripNoise.php b/tests/Composer/Test/Autoload/Fixtures/classmap/StripNoise.php index 02da0a6c435c..17c3aaeeef66 100644 --- a/tests/Composer/Test/Autoload/Fixtures/classmap/StripNoise.php +++ b/tests/Composer/Test/Autoload/Fixtures/classmap/StripNoise.php @@ -90,3 +90,27 @@ public function test_simple_string() return 'class FailSimpleString {}'; } } + +// Issue #10067. +abstract class First { + public function heredocDuplicateMarker(): void { + echo << Date: Sat, 21 Aug 2021 15:20:16 +0200 Subject: [PATCH 2/7] ClassMapGenerator: stabilize the heredoc/nowdoc stripping I've looked into 10067 and have come to the conclusion that using a single regex to strip the heredoc/nowdocs is always going to run into trouble as: * Either the matching will be too greedy (issue 10067); * Or the matching will run into backtrace limits for large heredoc/nowdocs. We cannot solve both within a single regex. So, I'm proposing a slightly different solution which should support both and should also improve performance for files containing large heredoc/nowdocs. The `stripHereNowDocs()` function will find a start marker and remember the offset of the start marker. It will then find the end marker and strip the contents between the two (replace with `null`). The function will then recurse onto itself until all heredocs/nowdocs in a file have been removed. --- src/Composer/Autoload/ClassMapGenerator.php | 33 ++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/Composer/Autoload/ClassMapGenerator.php b/src/Composer/Autoload/ClassMapGenerator.php index 085136f0e151..6d9c00338958 100644 --- a/src/Composer/Autoload/ClassMapGenerator.php +++ b/src/Composer/Autoload/ClassMapGenerator.php @@ -246,7 +246,7 @@ private static function findClasses($path) } // strip heredocs/nowdocs - $contents = preg_replace('{<<<[ \t]*([\'"]?)(\w+)\\1(?:\r\n|\n|\r)(?:.*(?=[\r\n]+[ \t]*\\2))[\r\n]+[ \t]*\\2(?=\s*[;,.)])}s', 'null', $contents); + $contents = self::stripHereNowDocs($contents); // strip strings $contents = preg_replace('{"[^"\\\\]*+(\\\\.[^"\\\\]*+)*+"|\'[^\'\\\\]*+(\\\\.[^\'\\\\]*+)*+\'}s', 'null', $contents); // strip leading non-php code if needed @@ -303,4 +303,35 @@ private static function findClasses($path) return $classes; } + + /** + * Strip heredoc and nowdoc blocks from the contents of a file. + * + * @param string $contents File contents. + * + * @return string The cleaned up file contents. + */ + private static function stripHereNowDocs($contents) + { + // Find a heredoc/nowdoc start marker an its offset in the file. + $result = preg_match('{<<<[ \t]*([\'"]?)(?P\w+)\\1[\r\n]}', $contents, $startMatches, PREG_OFFSET_CAPTURE); + if ($result < 1) { + return $contents; + } + + $offset = ($startMatches['marker'][1] + strlen($startMatches['marker'][0])); + $pattern = '`[\r\n]+[ \t]*' . preg_quote($startMatches['marker'][0], '`') . '(?=\s*[;,.)])`'; + + // Find the corresponding heredoc/nowdoc end marker an its offset in the file. + $result = preg_match($pattern, $contents, $endMatches, PREG_OFFSET_CAPTURE, $offset); + if ($result < 1) { + return $contents; + } + + // Strip the complete heredoc/nowdoc and replace it with "null". + $contents = substr_replace($contents, 'null', $startMatches[0][1], (($endMatches[0][1] + strlen($endMatches[0][0])) - $startMatches[0][1])); + + // Recurse to strip the next heredoc/nowdoc until there are none left. + return self::stripHereNowDocs($contents); + } } From c44be998ab707f070a41316a7d23dfec55a5df06 Mon Sep 17 00:00:00 2001 From: Jordi Boggiano Date: Sat, 21 Aug 2021 17:45:32 +0200 Subject: [PATCH 3/7] Undo new stripping code and fix regex --- src/Composer/Autoload/ClassMapGenerator.php | 55 +++++++++------------ 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/src/Composer/Autoload/ClassMapGenerator.php b/src/Composer/Autoload/ClassMapGenerator.php index 6d9c00338958..b493e1ddce1c 100644 --- a/src/Composer/Autoload/ClassMapGenerator.php +++ b/src/Composer/Autoload/ClassMapGenerator.php @@ -246,7 +246,29 @@ private static function findClasses($path) } // strip heredocs/nowdocs - $contents = self::stripHereNowDocs($contents); + $contents = preg_replace('{ + # opening heredoc/nowdoc delimiter (word-chars) + <<<[ \t]*([\'"]?)(\w+)\\1 + # needs to be followed by a newline + (?:\r\n|\n|\r) + # the meat of it, matching line by line until end delimiter + (?: + # a valid line is either.. + (?: + # non-word or non-space char, then anything goes for the rest of the line + [^\s\w][^\r\n]+ + # white-space (possessive match) not followed by the delimiter, then anything goes for the rest of the line + | \s*+(?!\\2)[^\r\n]+ + # white-space but no new lines + | [\t\f\v ]+ + ) + # end of line(s) + [\r\n]+ + )* + # end delimiter + \s* \\2 (?=\s*[;,.)]) + }x', 'null', $contents); + // strip strings $contents = preg_replace('{"[^"\\\\]*+(\\\\.[^"\\\\]*+)*+"|\'[^\'\\\\]*+(\\\\.[^\'\\\\]*+)*+\'}s', 'null', $contents); // strip leading non-php code if needed @@ -303,35 +325,4 @@ private static function findClasses($path) return $classes; } - - /** - * Strip heredoc and nowdoc blocks from the contents of a file. - * - * @param string $contents File contents. - * - * @return string The cleaned up file contents. - */ - private static function stripHereNowDocs($contents) - { - // Find a heredoc/nowdoc start marker an its offset in the file. - $result = preg_match('{<<<[ \t]*([\'"]?)(?P\w+)\\1[\r\n]}', $contents, $startMatches, PREG_OFFSET_CAPTURE); - if ($result < 1) { - return $contents; - } - - $offset = ($startMatches['marker'][1] + strlen($startMatches['marker'][0])); - $pattern = '`[\r\n]+[ \t]*' . preg_quote($startMatches['marker'][0], '`') . '(?=\s*[;,.)])`'; - - // Find the corresponding heredoc/nowdoc end marker an its offset in the file. - $result = preg_match($pattern, $contents, $endMatches, PREG_OFFSET_CAPTURE, $offset); - if ($result < 1) { - return $contents; - } - - // Strip the complete heredoc/nowdoc and replace it with "null". - $contents = substr_replace($contents, 'null', $startMatches[0][1], (($endMatches[0][1] + strlen($endMatches[0][0])) - $startMatches[0][1])); - - // Recurse to strip the next heredoc/nowdoc until there are none left. - return self::stripHereNowDocs($contents); - } } From f6c446bdd71d568c04e3c02a0aac0169e9575f99 Mon Sep 17 00:00:00 2001 From: Jordi Boggiano Date: Sun, 22 Aug 2021 12:13:25 +0200 Subject: [PATCH 4/7] Tweak to allow matching delimiter within the string --- src/Composer/Autoload/ClassMapGenerator.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Composer/Autoload/ClassMapGenerator.php b/src/Composer/Autoload/ClassMapGenerator.php index b493e1ddce1c..b37fb25ca272 100644 --- a/src/Composer/Autoload/ClassMapGenerator.php +++ b/src/Composer/Autoload/ClassMapGenerator.php @@ -257,8 +257,8 @@ private static function findClasses($path) (?: # non-word or non-space char, then anything goes for the rest of the line [^\s\w][^\r\n]+ - # white-space (possessive match) not followed by the delimiter, then anything goes for the rest of the line - | \s*+(?!\\2)[^\r\n]+ + # white-space (possessive match) not followed by the end delimiter, then anything goes for the rest of the line + | \s*+(?!\\2 \s*[;,.)])[^\r\n]+ # white-space but no new lines | [\t\f\v ]+ ) From 3f79e59f69da9faba32aaeeb548112ad6e1b2a9f Mon Sep 17 00:00:00 2001 From: Jordi Boggiano Date: Sun, 22 Aug 2021 13:49:03 +0200 Subject: [PATCH 5/7] Add test assertions for heredoc marker inside the text --- .../Composer/Test/Autoload/Fixtures/classmap/StripNoise.php | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/Composer/Test/Autoload/Fixtures/classmap/StripNoise.php b/tests/Composer/Test/Autoload/Fixtures/classmap/StripNoise.php index 17c3aaeeef66..c1d6c3df4c07 100644 --- a/tests/Composer/Test/Autoload/Fixtures/classmap/StripNoise.php +++ b/tests/Composer/Test/Autoload/Fixtures/classmap/StripNoise.php @@ -19,8 +19,14 @@ class FailHeredocWhitespace } WHITESPACE . <<< MARKERINTEXT In PHP < 7.3, the docblock marker could occur in the text as long as it did not occur at the very start of the line. +class FailHeredocMarkerInText +{ +} But, what are you blind McFly, it's there. How else do you explain that wreck out there? Doc, Doc. Oh, no. You're alive. Bullet proof vest, how did you know, I never got a chance to tell you. About all that talk about screwing up future events, the space time continuum. Okay, alright, I'll prove it to you. MARKERINTEXT +class FailHeredocMarkerInText2 +{ +} Look at my driver's license, expires 1987. Look at my birthday, for crying out load I haven't even been born yet. And, look at this picture, my brother, my sister, and me. Look at the sweatshirt, Doc, class of 1984. Why do you keep following me around? Hey beat it, spook, this don't concern you. MARKERINTEXT . <<<"DOUBLEQUOTES" class FailHeredocDoubleQuotes From 6ab1b6a7d2d4a9cd708030b85181d70883d50b24 Mon Sep 17 00:00:00 2001 From: Jordi Boggiano Date: Mon, 23 Aug 2021 22:18:03 +0200 Subject: [PATCH 6/7] Regex simplifications --- src/Composer/Autoload/ClassMapGenerator.php | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/Composer/Autoload/ClassMapGenerator.php b/src/Composer/Autoload/ClassMapGenerator.php index b37fb25ca272..2c46d000ef84 100644 --- a/src/Composer/Autoload/ClassMapGenerator.php +++ b/src/Composer/Autoload/ClassMapGenerator.php @@ -253,20 +253,13 @@ private static function findClasses($path) (?:\r\n|\n|\r) # the meat of it, matching line by line until end delimiter (?: - # a valid line is either.. - (?: - # non-word or non-space char, then anything goes for the rest of the line - [^\s\w][^\r\n]+ - # white-space (possessive match) not followed by the end delimiter, then anything goes for the rest of the line - | \s*+(?!\\2 \s*[;,.)])[^\r\n]+ - # white-space but no new lines - | [\t\f\v ]+ - ) + # a valid line is optional white-space (possessive match) not followed by the end delimiter, then anything goes for the rest of the line + [\t ]*+(?!\\2 [\t \r\n]*[;,.)])[^\r\n]* # end of line(s) [\r\n]+ )* # end delimiter - \s* \\2 (?=\s*[;,.)]) + [\t ]* \\2 (?=[\t \r\n]*[;,.)]) }x', 'null', $contents); // strip strings From d8054d1d2f2c55f26a032fd6c6b43c9b340bb5b8 Mon Sep 17 00:00:00 2001 From: Jordi Boggiano Date: Sun, 29 Aug 2021 12:19:33 +0200 Subject: [PATCH 7/7] Add more possessive quantifiers, unicode flag and support for more post-heredoc syntax, fix test file syntax being invalid --- src/Composer/Autoload/ClassMapGenerator.php | 10 +++---- .../Autoload/Fixtures/classmap/StripNoise.php | 29 ++++++++++++++++++- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/src/Composer/Autoload/ClassMapGenerator.php b/src/Composer/Autoload/ClassMapGenerator.php index 2c46d000ef84..bad8b56f9ff3 100644 --- a/src/Composer/Autoload/ClassMapGenerator.php +++ b/src/Composer/Autoload/ClassMapGenerator.php @@ -248,19 +248,19 @@ private static function findClasses($path) // strip heredocs/nowdocs $contents = preg_replace('{ # opening heredoc/nowdoc delimiter (word-chars) - <<<[ \t]*([\'"]?)(\w+)\\1 + <<<[ \t]*+([\'"]?)(\w++)\\1 # needs to be followed by a newline (?:\r\n|\n|\r) # the meat of it, matching line by line until end delimiter (?: # a valid line is optional white-space (possessive match) not followed by the end delimiter, then anything goes for the rest of the line - [\t ]*+(?!\\2 [\t \r\n]*[;,.)])[^\r\n]* + [\t ]*+(?!\\2 \b)[^\r\n]*+ # end of line(s) - [\r\n]+ + [\r\n]++ )* # end delimiter - [\t ]* \\2 (?=[\t \r\n]*[;,.)]) - }x', 'null', $contents); + [\t ]*+ \\2 (?=\b) + }xu', 'null', $contents); // strip strings $contents = preg_replace('{"[^"\\\\]*+(\\\\.[^"\\\\]*+)*+"|\'[^\'\\\\]*+(\\\\.[^\'\\\\]*+)*+\'}s', 'null', $contents); diff --git a/tests/Composer/Test/Autoload/Fixtures/classmap/StripNoise.php b/tests/Composer/Test/Autoload/Fixtures/classmap/StripNoise.php index c1d6c3df4c07..caa50e41d42a 100644 --- a/tests/Composer/Test/Autoload/Fixtures/classmap/StripNoise.php +++ b/tests/Composer/Test/Autoload/Fixtures/classmap/StripNoise.php @@ -23,7 +23,7 @@ class FailHeredocMarkerInText { } But, what are you blind McFly, it's there. How else do you explain that wreck out there? Doc, Doc. Oh, no. You're alive. Bullet proof vest, how did you know, I never got a chance to tell you. About all that talk about screwing up future events, the space time continuum. Okay, alright, I'll prove it to you. - MARKERINTEXT + . MARKERINTEXT class FailHeredocMarkerInText2 { } @@ -95,6 +95,33 @@ public function test_simple_string() { return 'class FailSimpleString {}'; } + + public function test_unicode_heredoc() + { + return array(1, 2, <<<öéçив必 + class FailUnicode + { + } + öéçив必, 3, 4); + } + + public function test_wrapped_in_curly_brackets() + { + return ${<<