Skip to content

Commit

Permalink
Move attribute handling into parser
Browse files Browse the repository at this point in the history
The Lexer now only provides the tokens to the parser, while the
parser is responsible for determining which attributes are placed
on notes. This only needs to be done when the attributes are
actually needed, rather than for all tokens.

This removes the usedAttributes lexer option (and lexer options
entirely). The attributes are now enabled unconditionally. They
have less overhead now, and the need to explicitly enable them for
some use cases (e.g. formatting-preserving printing) doesn't seem
like a good tradeoff anymore.

There are some additional changes to the Lexer interface that
should be done after this, and the docs / upgrading guide haven't
been adjusted yet.
  • Loading branch information
nikic committed Aug 13, 2023
1 parent b20267c commit 4b49704
Show file tree
Hide file tree
Showing 18 changed files with 963 additions and 1,135 deletions.
5 changes: 1 addition & 4 deletions bin/php-parse
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,7 @@ if (empty($files)) {
showHelp("Must specify at least one file.");
}

$lexerOptions = ['usedAttributes' => [
'startLine', 'endLine', 'startFilePos', 'endFilePos', 'comments'
]];
$parser = (new PhpParser\ParserFactory())->createForVersion($attributes['version'], $lexerOptions);
$parser = (new PhpParser\ParserFactory())->createForVersion($attributes['version']);
$dumper = new PhpParser\NodeDumper([
'dumpComments' => true,
'dumpPositions' => $attributes['with-positions'],
Expand Down
19 changes: 11 additions & 8 deletions grammar/php.y
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ top_statement_list_ex:

top_statement_list:
top_statement_list_ex
{ makeZeroLengthNop($nop, $this->lookaheadStartAttributes);
{ makeZeroLengthNop($nop);
if ($nop !== null) { $1[] = $nop; } $$ = $1; }
;

Expand Down Expand Up @@ -237,7 +237,7 @@ top_statement:
| function_declaration_statement
| class_declaration_statement
| T_HALT_COMPILER '(' ')' ';'
{ $$ = Stmt\HaltCompiler[$this->lexer->handleHaltCompiler()]; }
{ $$ = Stmt\HaltCompiler[$this->handleHaltCompiler()]; }
| T_NAMESPACE namespace_declaration_name semi
{ $$ = Stmt\Namespace_[$2, null];
$$->setAttribute('kind', Stmt\Namespace_::KIND_SEMICOLON);
Expand Down Expand Up @@ -353,7 +353,7 @@ inner_statement_list_ex:

inner_statement_list:
inner_statement_list_ex
{ makeZeroLengthNop($nop, $this->lookaheadStartAttributes);
{ makeZeroLengthNop($nop);
if ($nop !== null) { $1[] = $nop; } $$ = $1; }
;

Expand All @@ -371,7 +371,7 @@ non_empty_statement:
if ($2) {
$$ = $2; prependLeadingComments($$);
} else {
makeNop($$, $this->startAttributeStack[#1], $this->endAttributes);
makeNop($$);
if (null === $$) { $$ = array(); }
}
}
Expand All @@ -390,7 +390,10 @@ non_empty_statement:
| T_GLOBAL global_var_list semi { $$ = Stmt\Global_[$2]; }
| T_STATIC static_var_list semi { $$ = Stmt\Static_[$2]; }
| T_ECHO expr_list_forbid_comma semi { $$ = Stmt\Echo_[$2]; }
| T_INLINE_HTML { $$ = Stmt\InlineHTML[$1]; }
| T_INLINE_HTML {
$$ = Stmt\InlineHTML[$1];
$$->setAttribute('hasLeadingNewline', $this->inlineHtmlHasLeadingNewline(#1));
}
| expr semi {
$e = $1;
if ($e instanceof Expr\Throw_) {
Expand Down Expand Up @@ -419,7 +422,7 @@ non_empty_statement:
statement:
non_empty_statement
| ';'
{ makeNop($$, $this->startAttributeStack[#1], $this->endAttributes);
{ makeNop($$);
if ($$ === null) $$ = array(); /* means: no statement */ }
;

Expand Down Expand Up @@ -834,7 +837,7 @@ class_statement_list_ex:

class_statement_list:
class_statement_list_ex
{ makeZeroLengthNop($nop, $this->lookaheadStartAttributes);
{ makeZeroLengthNop($nop);
if ($nop !== null) { $1[] = $nop; } $$ = $1; }
;

Expand Down Expand Up @@ -1337,7 +1340,7 @@ array_pair:
| /* empty */
{ /* Create an Error node now to remember the position. We'll later either report an error,
or convert this into a null element, depending on whether this is a creation or destructuring context. */
$attrs = $this->createEmptyElemAttributes($this->lookaheadStartAttributes);
$attrs = $this->createEmptyElemAttributes($this->tokenPos);
$$ = new Node\ArrayItem(new Expr\Error($attrs), null, false, $attrs); }
;

Expand Down
26 changes: 10 additions & 16 deletions grammar/phpyLang.php
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,13 @@ function ($matches) {

if ('attributes' === $name) {
assertArgs(0, $args, $name);
return '$this->startAttributeStack[#1] + $this->endAttributes';
return '$this->getAttributes($this->tokenStartStack[#1], $this->tokenEndStack[$stackPos])';
}

if ('stackAttributes' === $name) {
assertArgs(1, $args, $name);
return '$this->startAttributeStack[' . $args[0] . ']'
. ' + $this->endAttributeStack[' . $args[0] . ']';
return '$this->getAttributes($this->tokenStartStack[' . $args[0] . '], '
. ' $this->tokenEndStack[' . $args[0] . '])';
}

if ('init' === $name) {
Expand Down Expand Up @@ -111,30 +111,24 @@ function ($matches) {
}

if ('makeNop' === $name) {
assertArgs(3, $args, $name);
assertArgs(1, $args, $name);

return '$startAttributes = ' . $args[1] . ';'
. ' if (isset($startAttributes[\'comments\']))'
. ' { ' . $args[0] . ' = new Stmt\Nop($startAttributes + ' . $args[2] . '); }'
. ' else { ' . $args[0] . ' = null; }';
return $args[0] . ' = $this->maybeCreateNop($this->tokenStartStack[#1], $this->tokenEndStack[$stackPos])';
}

if ('makeZeroLengthNop' == $name) {
assertArgs(2, $args, $name);
assertArgs(1, $args, $name);

return '$startAttributes = ' . $args[1] . ';'
. ' if (isset($startAttributes[\'comments\']))'
. ' { ' . $args[0] . ' = new Stmt\Nop($this->createCommentNopAttributes($startAttributes[\'comments\'])); }'
. ' else { ' . $args[0] . ' = null; }';
return $args[0] . ' = $this->maybeCreateZeroLengthNop($this->tokenPos);';
}

if ('prependLeadingComments' === $name) {
assertArgs(1, $args, $name);

return '$attrs = $this->startAttributeStack[#1]; $stmts = ' . $args[0] . '; '
. 'if (!empty($attrs[\'comments\'])) {'
return '$comments = $this->getCommentsBeforeToken($this->tokenStartStack[#1]); $stmts = ' . $args[0] . '; '
. 'if (!empty($comments)) {'
. '$stmts[0]->setAttribute(\'comments\', '
. 'array_merge($attrs[\'comments\'], $stmts[0]->getAttribute(\'comments\', []))); }';
. 'array_merge($comments, $stmts[0]->getAttribute(\'comments\', []))); }';
}

return $matches[0];
Expand Down
151 changes: 0 additions & 151 deletions lib/PhpParser/Lexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,60 +5,8 @@
require __DIR__ . '/compatibility_tokens.php';

class Lexer {
/** @var string Code being tokenized */
protected $code;
/** @var list<Token> List of tokens */
protected $tokens;
/** @var int Current position in the token array */
protected $pos;
/** @var bool Whether the preceding closing PHP tag has a trailing newline */
protected $prevCloseTagHasNewline;
/** @var array<int, int> Map of tokens that should be dropped (like T_WHITESPACE) */
protected $dropTokens;

/** @var bool Whether to use the startLine attribute */
private $attributeStartLineUsed;
/** @var bool Whether to use the endLine attribute */
private $attributeEndLineUsed;
/** @var bool Whether to use the startTokenPos attribute */
private $attributeStartTokenPosUsed;
/** @var bool Whether to use the endTokenPos attribute */
private $attributeEndTokenPosUsed;
/** @var bool Whether to use the startFilePos attribute */
private $attributeStartFilePosUsed;
/** @var bool Whether to use the endFilePos attribute */
private $attributeEndFilePosUsed;
/** @var bool Whether to use the comments attribute */
private $attributeCommentsUsed;

/**
* Creates a Lexer.
*
* @param array{usedAttributes?: string[]} $options Options array. Currently only the
* 'usedAttributes' option is supported, which is an array of attributes to add to the
* AST nodes. Possible attributes are: 'comments', 'startLine', 'endLine', 'startTokenPos',
* 'endTokenPos', 'startFilePos', 'endFilePos'. The option defaults to the first three.
* For more info see getNextToken() docs.
*/
public function __construct(array $options = []) {
// map of tokens to drop while lexing (the map is only used for isset lookup,
// that's why the value is simply set to 1; the value is never actually used.)
$this->dropTokens = array_fill_keys(
[\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT, \T_BAD_CHARACTER], 1
);

$defaultAttributes = ['comments', 'startLine', 'endLine'];
$usedAttributes = array_fill_keys($options['usedAttributes'] ?? $defaultAttributes, true);

// Create individual boolean properties to make these checks faster.
$this->attributeStartLineUsed = isset($usedAttributes['startLine']);
$this->attributeEndLineUsed = isset($usedAttributes['endLine']);
$this->attributeStartTokenPosUsed = isset($usedAttributes['startTokenPos']);
$this->attributeEndTokenPosUsed = isset($usedAttributes['endTokenPos']);
$this->attributeStartFilePosUsed = isset($usedAttributes['startFilePos']);
$this->attributeEndFilePosUsed = isset($usedAttributes['endFilePos']);
$this->attributeCommentsUsed = isset($usedAttributes['comments']);
}

/**
* Initializes the lexer for lexing the provided source code.
Expand All @@ -75,13 +23,6 @@ public function startLexing(string $code, ?ErrorHandler $errorHandler = null): v
$errorHandler = new ErrorHandler\Throwing();
}

$this->code = $code; // keep the code around for __halt_compiler() handling
$this->pos = -1;

// If inline HTML occurs without preceding code, treat it as if it had a leading newline.
// This ensures proper composability, because having a newline is the "safe" assumption.
$this->prevCloseTagHasNewline = true;

$scream = ini_set('xdebug.scream', '0');

$this->tokens = @Token::tokenize($code);
Expand Down Expand Up @@ -165,84 +106,6 @@ protected function postprocessTokens(ErrorHandler $errorHandler): void {
$this->tokens[] = new Token(0, "\0", $lastToken->getEndLine(), $lastToken->getEndPos());
}

/**
* Fetches the next token.
*
* The available attributes are determined by the 'usedAttributes' option, which can
* be specified in the constructor. The following attributes are supported:
*
* * 'comments' => Array of PhpParser\Comment or PhpParser\Comment\Doc instances,
* representing all comments that occurred between the previous
* non-discarded token and the current one.
* * 'startLine' => Line in which the node starts.
* * 'endLine' => Line in which the node ends.
* * 'startTokenPos' => Offset into the token array of the first token in the node.
* * 'endTokenPos' => Offset into the token array of the last token in the node.
* * 'startFilePos' => Offset into the code string of the first character that is part of the node.
* * 'endFilePos' => Offset into the code string of the last character that is part of the node.
*
* @param mixed $value Variable to store token content in
* @param mixed $startAttributes Variable to store start attributes in
* @param mixed $endAttributes Variable to store end attributes in
*
* @return int Token id
*/
public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null): int {
$startAttributes = [];
$endAttributes = [];

while (1) {
$token = $this->tokens[++$this->pos];

$id = $token->id;
if (isset($this->dropTokens[$id])) {
if (\T_COMMENT === $id || \T_DOC_COMMENT === $id) {
if ($this->attributeCommentsUsed) {
$comment = \T_DOC_COMMENT === $id
? new Comment\Doc($token->text, $token->line, $token->pos, $this->pos,
$token->getEndLine(), $token->getEndPos() - 1, $this->pos)
: new Comment($token->text, $token->line, $token->pos, $this->pos,
$token->getEndLine(), $token->getEndPos() - 1, $this->pos);
$startAttributes['comments'][] = $comment;
}
}
continue;
}

if ($this->attributeStartLineUsed) {
$startAttributes['startLine'] = $token->line;
}
if ($this->attributeStartTokenPosUsed) {
$startAttributes['startTokenPos'] = $this->pos;
}
if ($this->attributeStartFilePosUsed) {
$startAttributes['startFilePos'] = $token->pos;
}

$value = $token->text;
if (\T_CLOSE_TAG === $token->id) {
$this->prevCloseTagHasNewline = false !== strpos($value, "\n")
|| false !== strpos($value, "\r");
} elseif (\T_INLINE_HTML === $token->id) {
$startAttributes['hasLeadingNewline'] = $this->prevCloseTagHasNewline;
}

// Fetch the end line/pos from the next token (if available) instead of recomputing it.
$nextToken = $this->tokens[$this->pos + 1] ?? null;
if ($this->attributeEndLineUsed) {
$endAttributes['endLine'] = $nextToken ? $nextToken->line : $token->getEndLine();
}
if ($this->attributeEndTokenPosUsed) {
$endAttributes['endTokenPos'] = $this->pos;
}
if ($this->attributeEndFilePosUsed) {
$endAttributes['endFilePos'] = ($nextToken ? $nextToken->pos : $token->getEndPos()) - 1;
}

return $id;
}
}

/**
* Returns the token array for current code.
*
Expand All @@ -259,18 +122,4 @@ public function getNextToken(&$value = null, &$startAttributes = null, &$endAttr
public function getTokens(): array {
return $this->tokens;
}

/**
* Handles __halt_compiler() by returning the text after it.
*
* @return string Remaining text
*/
public function handleHaltCompiler(): string {
// Prevent the lexer from returning any further tokens.
$nextToken = $this->tokens[$this->pos + 1];
$this->pos = \count($this->tokens) - 2;

// Return text after __halt_compiler.
return $nextToken->id === \T_INLINE_HTML ? $nextToken->text : '';
}
}
15 changes: 3 additions & 12 deletions lib/PhpParser/Lexer/Emulative.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,20 +33,11 @@ class Emulative extends Lexer {
private $hostPhpVersion;

/**
* @param array{usedAttributes?: string[], phpVersion?: PhpVersion|string} $options Lexer options.
* In addition to the usual options, accepts a 'phpVersion' (PhpVersion object or string)
* that specifies the version to emulate. Defaults to newest supported.
* @param PhpVersion|null $phpVersion PHP version to emulate. Defaults to newest supported.
*/
public function __construct(array $options = []) {
$version = $options['phpVersion'] ?? PhpVersion::getNewestSupported();
if (!$version instanceof PhpVersion) {
$version = PhpVersion::fromString($version);
}
$this->targetPhpVersion = $version;
public function __construct(?PhpVersion $phpVersion = null) {
$this->targetPhpVersion = $phpVersion ?? PhpVersion::getNewestSupported();
$this->hostPhpVersion = PhpVersion::getHostVersion();
unset($options['phpVersion']);

parent::__construct($options);

$emulators = [
new FlexibleDocStringEmulator(),
Expand Down

0 comments on commit 4b49704

Please sign in to comment.