Skip to content

Commit

Permalink
Drop Lexer::getTokens() method
Browse files Browse the repository at this point in the history
This doesn't make a lot of sense now that Lexer::tokenize() returns
the tokens.

The tokens for the last parse should be fetched via
Parser::getTokens() instead.
  • Loading branch information
nikic committed Sep 16, 2023
1 parent 263fa80 commit 06c7ab5
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 35 deletions.
39 changes: 15 additions & 24 deletions lib/PhpParser/Lexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,6 @@
require __DIR__ . '/compatibility_tokens.php';

class Lexer {
/** @var list<Token> List of tokens */
protected array $tokens;

/**
* Tokenize the provided source code.
*
Expand All @@ -31,14 +28,14 @@ public function tokenize(string $code, ?ErrorHandler $errorHandler = null): arra

$scream = ini_set('xdebug.scream', '0');

$this->tokens = @Token::tokenize($code);
$this->postprocessTokens($errorHandler);
$tokens = @Token::tokenize($code);
$this->postprocessTokens($tokens, $errorHandler);

if (false !== $scream) {
ini_set('xdebug.scream', $scream);
}

return $this->tokens;
return $tokens;
}

private function handleInvalidCharacter(Token $token, ErrorHandler $errorHandler): void {
Expand Down Expand Up @@ -66,41 +63,44 @@ private function isUnterminatedComment(Token $token): bool {
&& substr($token->text, -2) !== '*/';
}

protected function postprocessTokens(ErrorHandler $errorHandler): void {
/**
* @param list<Token> $tokens
*/
protected function postprocessTokens(array &$tokens, ErrorHandler $errorHandler): void {
// This function reports errors (bad characters and unterminated comments) in the token
// array, and performs certain canonicalizations:
// * Use PHP 8.1 T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG and
// T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG tokens used to disambiguate intersection types.
// * Add a sentinel token with ID 0.

$numTokens = \count($this->tokens);
$numTokens = \count($tokens);
if ($numTokens === 0) {
// Empty input edge case: Just add the sentinel token.
$this->tokens[] = new Token(0, "\0", 1, 0);
$tokens[] = [new Token(0, "\0", 1, 0)];
return;
}

for ($i = 0; $i < $numTokens; $i++) {
$token = $this->tokens[$i];
$token = $tokens[$i];
if ($token->id === \T_BAD_CHARACTER) {
$this->handleInvalidCharacter($token, $errorHandler);
}

if ($token->id === \ord('&')) {
$next = $i + 1;
while (isset($this->tokens[$next]) && $this->tokens[$next]->id === \T_WHITESPACE) {
while (isset($tokens[$next]) && $tokens[$next]->id === \T_WHITESPACE) {
$next++;
}
$followedByVarOrVarArg = isset($this->tokens[$next]) &&
$this->tokens[$next]->is([\T_VARIABLE, \T_ELLIPSIS]);
$followedByVarOrVarArg = isset($tokens[$next]) &&
$tokens[$next]->is([\T_VARIABLE, \T_ELLIPSIS]);
$token->id = $followedByVarOrVarArg
? \T_AMPERSAND_FOLLOWED_BY_VAR_OR_VARARG
: \T_AMPERSAND_NOT_FOLLOWED_BY_VAR_OR_VARARG;
}
}

// Check for unterminated comment
$lastToken = $this->tokens[$numTokens - 1];
$lastToken = $tokens[$numTokens - 1];
if ($this->isUnterminatedComment($lastToken)) {
$errorHandler->handleError(new Error('Unterminated comment', [
'startLine' => $lastToken->line,
Expand All @@ -111,15 +111,6 @@ protected function postprocessTokens(ErrorHandler $errorHandler): void {
}

// Add sentinel token.
$this->tokens[] = new Token(0, "\0", $lastToken->getEndLine(), $lastToken->getEndPos());
}

/**
* Returns the token array for the last tokenized source code.
*
* @return Token[] Array of tokens
*/
public function getTokens(): array {
return $this->tokens;
$tokens[] = new Token(0, "\0", $lastToken->getEndLine(), $lastToken->getEndPos());
}
}
24 changes: 15 additions & 9 deletions lib/PhpParser/Lexer/Emulative.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
use PhpParser\Lexer\TokenEmulator\ReverseEmulator;
use PhpParser\Lexer\TokenEmulator\TokenEmulator;
use PhpParser\PhpVersion;
use PhpParser\Token;

class Emulative extends Lexer {
/** @var array{int, string, string}[] Patches used to reverse changes introduced in the code */
Expand Down Expand Up @@ -81,9 +82,9 @@ public function tokenize(string $code, ?ErrorHandler $errorHandler = null): arra
}

$collector = new ErrorHandler\Collecting();
parent::tokenize($code, $collector);
$tokens = parent::tokenize($code, $collector);
$this->sortPatches();
$this->fixupTokens();
$tokens = $this->fixupTokens($tokens);

$errors = $collector->getErrors();
if (!empty($errors)) {
Expand All @@ -94,10 +95,10 @@ public function tokenize(string $code, ?ErrorHandler $errorHandler = null): arra
}

foreach ($emulators as $emulator) {
$this->tokens = $emulator->emulate($code, $this->tokens);
$tokens = $emulator->emulate($code, $tokens);
}

return $this->tokens;
return $tokens;
}

private function isForwardEmulationNeeded(PhpVersion $emulatorPhpVersion): bool {
Expand All @@ -118,9 +119,13 @@ private function sortPatches(): void {
});
}

private function fixupTokens(): void {
/**
* @param list<Token> $tokens
* @return list<Token>
*/
private function fixupTokens(array $tokens): array {
if (\count($this->patches) === 0) {
return;
return $tokens;
}

// Load first patch
Expand All @@ -130,8 +135,8 @@ private function fixupTokens(): void {
// We use a manual loop over the tokens, because we modify the array on the fly
$posDelta = 0;
$lineDelta = 0;
for ($i = 0, $c = \count($this->tokens); $i < $c; $i++) {
$token = $this->tokens[$i];
for ($i = 0, $c = \count($tokens); $i < $c; $i++) {
$token = $tokens[$i];
$pos = $token->pos;
$token->pos += $posDelta;
$token->line += $lineDelta;
Expand All @@ -142,7 +147,7 @@ private function fixupTokens(): void {
if ($patchType === 'remove') {
if ($patchPos === $pos && $patchTextLen === $len) {
// Remove token entirely
array_splice($this->tokens, $i, 1, []);
array_splice($tokens, $i, 1, []);
$i--;
$c--;
} else {
Expand Down Expand Up @@ -182,6 +187,7 @@ private function fixupTokens(): void {

$posDelta += $localPosDelta;
}
return $tokens;
}

/**
Expand Down
4 changes: 2 additions & 2 deletions test/PhpParser/PrettyPrinterTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ public function testFormatPreservingPrint($name, $code, $modification, $expected
$printer = new PrettyPrinter\Standard();

$oldStmts = $parser->parse($code);
$oldTokens = $lexer->getTokens();
$oldTokens = $parser->getTokens();

$newStmts = $traverser->traverse($oldStmts);

Expand Down Expand Up @@ -241,7 +241,7 @@ public function testRoundTripPrint($name, $code, $expected, $modeLine) {
return;
}

$oldTokens = $lexer->getTokens();
$oldTokens = $parser->getTokens();

$newStmts = $traverser->traverse($oldStmts);

Expand Down

0 comments on commit 06c7ab5

Please sign in to comment.