Skip to content

Commit

Permalink
Replace startLexing() with tokenize()
Browse files Browse the repository at this point in the history
For now Lexer::getTokens() still exists, but should probably be
removed.
  • Loading branch information
nikic committed Aug 13, 2023
1 parent d1d784a commit ba85124
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 68 deletions.
11 changes: 2 additions & 9 deletions doc/component/Pretty_printing.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -66,21 +66,14 @@ Use of the formatting-preservation functionality requires some additional prepar
```php
use PhpParser\{Lexer, NodeTraverser, NodeVisitor, ParserFactory, PrettyPrinter};

$lexerOptions = new [
'usedAttributes' => [
'comments',
'startLine', 'endLine',
'startTokenPos', 'endTokenPos',
],
];
$parser = (new ParserFactory())->createForHostVersion($lexerOptions);
$parser = (new ParserFactory())->createForHostVersion();

$traverser = new NodeTraverser(new NodeVisitor\CloningVisitor());

$printer = new PrettyPrinter\Standard();

$oldStmts = $parser->parse($code);
$oldTokens = $parser->getLexer()->getTokens();
$oldTokens = $parser->getTokens();

$newStmts = $traverser->traverse($oldStmts);

Expand Down
30 changes: 15 additions & 15 deletions lib/PhpParser/Lexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,22 @@ class Lexer {
protected $tokens;

/**
* Initializes the lexer for lexing the provided source code.
* Tokenize the provided source code.
*
* This function does not throw if lexing errors occur. Instead, errors may be retrieved using
* the getErrors() method.
* The token array is in the same format as provided by the PhpToken::tokenize() method in
* PHP 8.0. The tokens are instances of PhpParser\Token, to abstract over a polyfill
* implementation in earlier PHP version.
*
* @param string $code The source code to lex
* The token array is terminated by a sentinel token with token ID 0.
* The token array does not discard any tokens (i.e. whitespace and comments are included).
* The token position attributes are against this token array.
*
* @param string $code The source code to tokenize.
* @param ErrorHandler|null $errorHandler Error handler to use for lexing errors. Defaults to
* ErrorHandler\Throwing
* ErrorHandler\Throwing.
* @return Token[] Tokens
*/
public function startLexing(string $code, ?ErrorHandler $errorHandler = null): void {
public function tokenize(string $code, ?ErrorHandler $errorHandler = null): array {
if (null === $errorHandler) {
$errorHandler = new ErrorHandler\Throwing();
}
Expand All @@ -31,6 +37,8 @@ public function startLexing(string $code, ?ErrorHandler $errorHandler = null): v
if (false !== $scream) {
ini_set('xdebug.scream', $scream);
}

return $this->tokens;
}

private function handleInvalidCharacter(Token $token, ErrorHandler $errorHandler): void {
Expand Down Expand Up @@ -107,15 +115,7 @@ protected function postprocessTokens(ErrorHandler $errorHandler): void {
}

/**
* Returns the token array for current code.
*
* The token array is in the same format as provided by the PhpToken::tokenize() method in
* PHP 8.0. The tokens are instances of PhpParser\Token, to abstract over a polyfill
* implementation in earlier PHP version.
*
* The token array is terminated by a sentinel token with token ID 0.
* The token array does not discard any tokens (i.e. whitespace and comments are included).
* The token position attributes are against this token array.
* Returns the token array for the last tokenized source code.
*
* @return Token[] Array of tokens
*/
Expand Down
9 changes: 5 additions & 4 deletions lib/PhpParser/Lexer/Emulative.php
Original file line number Diff line number Diff line change
Expand Up @@ -65,15 +65,14 @@ public function __construct(?PhpVersion $phpVersion = null) {
}
}

public function startLexing(string $code, ?ErrorHandler $errorHandler = null): void {
public function tokenize(string $code, ?ErrorHandler $errorHandler = null): array {
$emulators = array_filter($this->emulators, function ($emulator) use ($code) {
return $emulator->isEmulationNeeded($code);
});

if (empty($emulators)) {
// Nothing to emulate, yay
parent::startLexing($code, $errorHandler);
return;
return parent::tokenize($code, $errorHandler);
}

if ($errorHandler === null) {
Expand All @@ -86,7 +85,7 @@ public function startLexing(string $code, ?ErrorHandler $errorHandler = null): v
}

$collector = new ErrorHandler\Collecting();
parent::startLexing($code, $collector);
parent::tokenize($code, $collector);
$this->sortPatches();
$this->fixupTokens();

Expand All @@ -101,6 +100,8 @@ public function startLexing(string $code, ?ErrorHandler $errorHandler = null): v
foreach ($emulators as $emulator) {
$this->tokens = $emulator->emulate($code, $this->tokens);
}

return $this->tokens;
}

private function isForwardEmulationNeeded(PhpVersion $emulatorPhpVersion): bool {
Expand Down
6 changes: 3 additions & 3 deletions lib/PhpParser/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ interface Parser {
public function parse(string $code, ?ErrorHandler $errorHandler = null): ?array;

/**
* Return the lexer used by this parser instance.
* Return tokens for the last parse.
*
* @return Lexer
* @return Token[]
*/
public function getLexer(): Lexer;
public function getTokens(): array;
}
8 changes: 3 additions & 5 deletions lib/PhpParser/ParserAbstract.php
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,7 @@ public function parse(string $code, ?ErrorHandler $errorHandler = null): ?array
$this->errorHandler = $errorHandler ?: new ErrorHandler\Throwing();
$this->createdArrays = new \SplObjectStorage();

$this->lexer->startLexing($code, $this->errorHandler);
$this->tokens = $this->lexer->getTokens();
$this->tokens = $this->lexer->tokenize($code, $this->errorHandler);
$result = $this->doParse();

// Report errors for any empty elements used inside arrays. This is delayed until after the main parse,
Expand All @@ -197,7 +196,6 @@ public function parse(string $code, ?ErrorHandler $errorHandler = null): ?array

// Clear out some of the interior state, so we don't hold onto unnecessary
// memory between uses of the parser
$this->tokens = [];
$this->tokenStartStack = [];
$this->tokenEndStack = [];
$this->semStack = [];
Expand All @@ -207,8 +205,8 @@ public function parse(string $code, ?ErrorHandler $errorHandler = null): ?array
return $result;
}

public function getLexer(): Lexer {
return $this->lexer;
public function getTokens(): array {
return $this->tokens;
}

/** @return Stmt[]|null */
Expand Down
36 changes: 14 additions & 22 deletions test/PhpParser/Lexer/EmulativeTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,11 @@ protected function getLexer() {
public function testReplaceKeywords(string $keyword, int $expectedToken) {
$lexer = $this->getLexer();
$code = '<?php ' . $keyword;
$lexer->startLexing($code);
$this->assertEquals([
new Token(\T_OPEN_TAG, '<?php ', 1, 0),
new Token($expectedToken, $keyword, 1, 6),
new Token(0, "\0", 1, \strlen($code)),
], $lexer->getTokens());
], $lexer->tokenize($code));
}

/**
Expand All @@ -36,13 +35,12 @@ public function testReplaceKeywords(string $keyword, int $expectedToken) {
public function testReplaceKeywordsUppercase(string $keyword, int $expectedToken) {
$lexer = $this->getLexer();
$code = '<?php ' . strtoupper($keyword);
$lexer->startLexing($code);

$this->assertEquals([
new Token(\T_OPEN_TAG, '<?php ', 1, 0),
new Token($expectedToken, \strtoupper($keyword), 1, 6),
new Token(0, "\0", 1, \strlen($code)),
], $lexer->getTokens());
], $lexer->tokenize($code));
}

/**
Expand All @@ -51,14 +49,13 @@ public function testReplaceKeywordsUppercase(string $keyword, int $expectedToken
public function testNoReplaceKeywordsAfterObjectOperator(string $keyword) {
$lexer = $this->getLexer();
$code = '<?php ->' . $keyword;
$lexer->startLexing($code);

$this->assertEquals([
new Token(\T_OPEN_TAG, '<?php ', 1, 0),
new Token(\T_OBJECT_OPERATOR, '->', 1, 6),
new Token(\T_STRING, $keyword, 1, 8),
new Token(0, "\0", 1, \strlen($code)),
], $lexer->getTokens());
], $lexer->tokenize($code));
}

/**
Expand All @@ -67,15 +64,14 @@ public function testNoReplaceKeywordsAfterObjectOperator(string $keyword) {
public function testNoReplaceKeywordsAfterObjectOperatorWithSpaces(string $keyword) {
$lexer = $this->getLexer();
$code = '<?php -> ' . $keyword;
$lexer->startLexing($code);

$this->assertEquals([
new Token(\T_OPEN_TAG, '<?php ', 1, 0),
new Token(\T_OBJECT_OPERATOR, '->', 1, 6),
new Token(\T_WHITESPACE, ' ', 1, 8),
new Token(\T_STRING, $keyword, 1, 12),
new Token(0, "\0", 1, \strlen($code)),
], $lexer->getTokens());
], $lexer->tokenize($code));
}

/**
Expand All @@ -84,14 +80,13 @@ public function testNoReplaceKeywordsAfterObjectOperatorWithSpaces(string $keywo
public function testNoReplaceKeywordsAfterNullsafeObjectOperator(string $keyword) {
$lexer = $this->getLexer();
$code = '<?php ?->' . $keyword;
$lexer->startLexing($code);

$this->assertEquals([
new Token(\T_OPEN_TAG, '<?php ', 1, 0),
new Token(\T_NULLSAFE_OBJECT_OPERATOR, '?->', 1, 6),
new Token(\T_STRING, $keyword, 1, 9),
new Token(0, "\0", 1, \strlen($code)),
], $lexer->getTokens());
], $lexer->tokenize($code));
}

public function provideTestReplaceKeywords() {
Expand Down Expand Up @@ -120,24 +115,23 @@ public function provideTestReplaceKeywords() {
];
}

private function assertSameTokens(array $expectedTokens, Lexer $lexer) {
$tokens = [];
foreach ($lexer->getTokens() as $token) {
private function assertSameTokens(array $expectedTokens, array $tokens) {
$reducedTokens = [];
foreach ($tokens as $token) {
if ($token->id === 0 || $token->isIgnorable()) {
continue;
}
$tokens[] = [$token->id, $token->text];
$reducedTokens[] = [$token->id, $token->text];
}
$this->assertSame($expectedTokens, $tokens);
$this->assertSame($expectedTokens, $reducedTokens);
}

/**
* @dataProvider provideTestLexNewFeatures
*/
public function testLexNewFeatures(string $code, array $expectedTokens) {
$lexer = $this->getLexer();
$lexer->startLexing('<?php ' . $code);
$this->assertSameTokens($expectedTokens, $lexer);
$this->assertSameTokens($expectedTokens, $lexer->tokenize('<?php ' . $code));
}

/**
Expand All @@ -148,13 +142,12 @@ public function testLeaveStuffAloneInStrings(string $code) {

$lexer = $this->getLexer();
$fullCode = '<?php ' . $stringifiedToken;
$lexer->startLexing($fullCode);

$this->assertEquals([
new Token(\T_OPEN_TAG, '<?php ', 1, 0),
new Token(\T_CONSTANT_ENCAPSED_STRING, $stringifiedToken, 1, 6),
new Token(0, "\0", \substr_count($fullCode, "\n") + 1, \strlen($fullCode)),
], $lexer->getTokens());
], $lexer->tokenize($fullCode));
}

/**
Expand All @@ -163,7 +156,7 @@ public function testLeaveStuffAloneInStrings(string $code) {
public function testErrorAfterEmulation($code) {
$errorHandler = new ErrorHandler\Collecting();
$lexer = $this->getLexer();
$lexer->startLexing('<?php ' . $code . "\0", $errorHandler);
$lexer->tokenize('<?php ' . $code . "\0", $errorHandler);

$errors = $errorHandler->getErrors();
$this->assertCount(1, $errors);
Expand Down Expand Up @@ -405,8 +398,7 @@ public function provideTestLexNewFeatures() {
*/
public function testTargetVersion(string $phpVersion, string $code, array $expectedTokens) {
$lexer = new Emulative(PhpVersion::fromString($phpVersion));
$lexer->startLexing('<?php ' . $code);
$this->assertSameTokens($expectedTokens, $lexer);
$this->assertSameTokens($expectedTokens, $lexer->tokenize('<?php ' . $code));
}

public function provideTestTargetVersion() {
Expand Down
10 changes: 4 additions & 6 deletions test/PhpParser/LexerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ public function testError($code, $messages) {

$errorHandler = new ErrorHandler\Collecting();
$lexer = $this->getLexer();
$lexer->startLexing($code, $errorHandler);
$lexer->tokenize($code, $errorHandler);
$errors = $errorHandler->getErrors();

$this->assertCount(count($messages), $errors);
Expand Down Expand Up @@ -49,16 +49,15 @@ public function testDefaultErrorHandler() {
$this->expectException(Error::class);
$this->expectExceptionMessage('Unterminated comment on line 1');
$lexer = $this->getLexer();
$lexer->startLexing("<?php readonly /*");
$lexer->tokenize("<?php readonly /*");
}

/**
* @dataProvider provideTestLex
*/
public function testLex($code, $expectedTokens) {
$lexer = $this->getLexer();
$lexer->startLexing($code);
$tokens = $lexer->getTokens();
$tokens = $lexer->tokenize($code);
foreach ($tokens as $token) {
if ($token->id === 0 || $token->isIgnorable()) {
continue;
Expand Down Expand Up @@ -115,7 +114,6 @@ public function testGetTokens() {
];

$lexer = $this->getLexer();
$lexer->startLexing($code);
$this->assertEquals($expectedTokens, $lexer->getTokens());
$this->assertEquals($expectedTokens, $lexer->tokenize($code));
}
}
16 changes: 12 additions & 4 deletions test/PhpParser/ParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -189,16 +189,24 @@ public function testListKindAttribute() {
$this->assertSame($stmts[1]->expr->var->items[0]->value->getAttribute('kind'), Expr\List_::KIND_ARRAY);
}

public function testGetLexer() {
public function testGetTokens() {
$lexer = new Lexer();
$parser = $this->getParser($lexer);
$this->assertSame($lexer, $parser->getLexer());
$parser->parse('<?php echo "Foo";');
$this->assertEquals([
new Token(\T_OPEN_TAG, '<?php ', 1, 0),
new Token(\T_ECHO, 'echo', 1, 6),
new Token(\T_WHITESPACE, ' ', 1, 10),
new Token(\T_CONSTANT_ENCAPSED_STRING, '"Foo"', 1, 11),
new Token(ord(';'), ';', 1, 16),
new Token(0, "\0", 1, 17),
], $parser->getTokens());
}
}

class InvalidTokenLexer extends Lexer {
public function startLexing(string $code, ?ErrorHandler $errorHandler = null): void {
$this->tokens = [
public function tokenize(string $code, ?ErrorHandler $errorHandler = null): array {
return [
new Token(999, 'foobar', 42),
];
}
Expand Down

0 comments on commit ba85124

Please sign in to comment.