Skip to content

Commit

Permalink
Don't parse unicode escapes for PHP < 7.0
Browse files Browse the repository at this point in the history
We still had the option for this but were hardcoding it to true.
Make it conditional on the PHP version instead.
  • Loading branch information
nikic committed Sep 23, 2023
1 parent f4961b8 commit d8e8065
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 40 deletions.
1 change: 1 addition & 0 deletions UPGRADE-5.0.md
Expand Up @@ -13,6 +13,7 @@ In particular, if an older `PhpVersion` is specified, then:

* For versions before PHP 7.0, `$foo =& new Bar()` assignments are allowed without error.
* For versions before PHP 7.0, invalid octal literals `089` are allowed without error.
* For versions before PHP 7.0, unicode escape sequences `\u{123}` in strings are not parsed.
* Type hints are interpreted as a class `Name` or as a built-in `Identifier` depending on PHP
version, for example `int` is treated as a class name on PHP 5.6 and as a built-in on PHP 7.0.

Expand Down
44 changes: 22 additions & 22 deletions grammar/parser.template
Expand Up @@ -28,68 +28,68 @@ class #(-p) extends \PhpParser\ParserAbstract
public const %s = %n;
#endtokenval

protected $tokenToSymbolMapSize = #(YYMAXLEX);
protected $actionTableSize = #(YYLAST);
protected $gotoTableSize = #(YYGLAST);
protected int $tokenToSymbolMapSize = #(YYMAXLEX);
protected int $actionTableSize = #(YYLAST);
protected int $gotoTableSize = #(YYGLAST);

protected $invalidSymbol = #(YYBADCH);
protected $errorSymbol = #(YYINTERRTOK);
protected $defaultAction = #(YYDEFAULT);
protected $unexpectedTokenRule = #(YYUNEXPECTED);
protected int $invalidSymbol = #(YYBADCH);
protected int $errorSymbol = #(YYINTERRTOK);
protected int $defaultAction = #(YYDEFAULT);
protected int $unexpectedTokenRule = #(YYUNEXPECTED);

protected $YY2TBLSTATE = #(YY2TBLSTATE);
protected $numNonLeafStates = #(YYNLSTATES);
protected int $YY2TBLSTATE = #(YY2TBLSTATE);
protected int $numNonLeafStates = #(YYNLSTATES);

protected $symbolToName = array(
protected array $symbolToName = array(
#listvar terminals
);

protected $tokenToSymbol = array(
protected array $tokenToSymbol = array(
#listvar yytranslate
);

protected $action = array(
protected array $action = array(
#listvar yyaction
);

protected $actionCheck = array(
protected array $actionCheck = array(
#listvar yycheck
);

protected $actionBase = array(
protected array $actionBase = array(
#listvar yybase
);

protected $actionDefault = array(
protected array $actionDefault = array(
#listvar yydefault
);

protected $goto = array(
protected array $goto = array(
#listvar yygoto
);

protected $gotoCheck = array(
protected array $gotoCheck = array(
#listvar yygcheck
);

protected $gotoBase = array(
protected array $gotoBase = array(
#listvar yygbase
);

protected $gotoDefault = array(
protected array $gotoDefault = array(
#listvar yygdefault
);

protected $ruleToNonTerminal = array(
protected array $ruleToNonTerminal = array(
#listvar yylhs
);

protected $ruleToLength = array(
protected array $ruleToLength = array(
#listvar yylen
);
#if -t

protected $productions = array(
protected array $productions = array(
#production-strings;
);
#endif
Expand Down
9 changes: 5 additions & 4 deletions grammar/php.y
Expand Up @@ -1152,8 +1152,8 @@ exit_expr:
backticks_expr:
/* empty */ { $$ = array(); }
| T_ENCAPSED_AND_WHITESPACE
{ $$ = array(Node\InterpolatedStringPart[Scalar\String_::parseEscapeSequences($1, '`')]); }
| encaps_list { parseEncapsed($1, '`', true); $$ = $1; }
{ $$ = array(Node\InterpolatedStringPart[Scalar\String_::parseEscapeSequences($1, '`', $this->phpVersion->supportsUnicodeEscapes())]); }
| encaps_list { parseEncapsed($1, '`', $this->phpVersion->supportsUnicodeEscapes()); $$ = $1; }
;

ctor_arguments:
Expand Down Expand Up @@ -1196,10 +1196,11 @@ dereferencable_scalar:
$$ = new Expr\Array_($3, $attrs);
$this->createdArrays->attach($$); }
| array_short_syntax { $$ = $1; $this->createdArrays->attach($$); }
| T_CONSTANT_ENCAPSED_STRING { $$ = Scalar\String_::fromString($1, attributes()); }
| T_CONSTANT_ENCAPSED_STRING
{ $$ = Scalar\String_::fromString($1, attributes(), $this->phpVersion->supportsUnicodeEscapes()); }
| '"' encaps_list '"'
{ $attrs = attributes(); $attrs['kind'] = Scalar\String_::KIND_DOUBLE_QUOTED;
parseEncapsed($2, '"', true); $$ = new Scalar\InterpolatedString($2, $attrs); }
parseEncapsed($2, '"', $this->phpVersion->supportsUnicodeEscapes()); $$ = new Scalar\InterpolatedString($2, $attrs); }
;

scalar:
Expand Down
8 changes: 4 additions & 4 deletions lib/PhpParser/Parser/Php7.php
Expand Up @@ -2455,10 +2455,10 @@ protected function initReduceCallbacks(): void {
$this->semValue = array();
},
511 => function ($stackPos) {
$this->semValue = array(new Node\InterpolatedStringPart(Scalar\String_::parseEscapeSequences($this->semStack[$stackPos-(1-1)], '`'), $this->getAttributes($this->tokenStartStack[$stackPos-(1-1)], $this->tokenEndStack[$stackPos])));
$this->semValue = array(new Node\InterpolatedStringPart(Scalar\String_::parseEscapeSequences($this->semStack[$stackPos-(1-1)], '`', $this->phpVersion->supportsUnicodeEscapes()), $this->getAttributes($this->tokenStartStack[$stackPos-(1-1)], $this->tokenEndStack[$stackPos])));
},
512 => function ($stackPos) {
foreach ($this->semStack[$stackPos-(1-1)] as $s) { if ($s instanceof Node\InterpolatedStringPart) { $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, '`', true); } }; $this->semValue = $this->semStack[$stackPos-(1-1)];
foreach ($this->semStack[$stackPos-(1-1)] as $s) { if ($s instanceof Node\InterpolatedStringPart) { $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, '`', $this->phpVersion->supportsUnicodeEscapes()); } }; $this->semValue = $this->semStack[$stackPos-(1-1)];
},
513 => function ($stackPos) {
$this->semValue = array();
Expand Down Expand Up @@ -2513,11 +2513,11 @@ protected function initReduceCallbacks(): void {
$this->semValue = $this->semStack[$stackPos-(1-1)]; $this->createdArrays->attach($this->semValue);
},
530 => function ($stackPos) {
$this->semValue = Scalar\String_::fromString($this->semStack[$stackPos-(1-1)], $this->getAttributes($this->tokenStartStack[$stackPos-(1-1)], $this->tokenEndStack[$stackPos]));
$this->semValue = Scalar\String_::fromString($this->semStack[$stackPos-(1-1)], $this->getAttributes($this->tokenStartStack[$stackPos-(1-1)], $this->tokenEndStack[$stackPos]), $this->phpVersion->supportsUnicodeEscapes());
},
531 => function ($stackPos) {
$attrs = $this->getAttributes($this->tokenStartStack[$stackPos-(3-1)], $this->tokenEndStack[$stackPos]); $attrs['kind'] = Scalar\String_::KIND_DOUBLE_QUOTED;
foreach ($this->semStack[$stackPos-(3-2)] as $s) { if ($s instanceof Node\InterpolatedStringPart) { $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, '"', true); } }; $this->semValue = new Scalar\InterpolatedString($this->semStack[$stackPos-(3-2)], $attrs);
foreach ($this->semStack[$stackPos-(3-2)] as $s) { if ($s instanceof Node\InterpolatedStringPart) { $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, '"', $this->phpVersion->supportsUnicodeEscapes()); } }; $this->semValue = new Scalar\InterpolatedString($this->semStack[$stackPos-(3-2)], $attrs);
},
532 => function ($stackPos) {
$this->semValue = $this->parseLNumber($this->semStack[$stackPos-(1-1)], $this->getAttributes($this->tokenStartStack[$stackPos-(1-1)], $this->tokenEndStack[$stackPos]), $this->phpVersion->allowsInvalidOctals());
Expand Down
8 changes: 4 additions & 4 deletions lib/PhpParser/Parser/Php8.php
Expand Up @@ -2463,10 +2463,10 @@ protected function initReduceCallbacks(): void {
$this->semValue = array();
},
511 => function ($stackPos) {
$this->semValue = array(new Node\InterpolatedStringPart(Scalar\String_::parseEscapeSequences($this->semStack[$stackPos-(1-1)], '`'), $this->getAttributes($this->tokenStartStack[$stackPos-(1-1)], $this->tokenEndStack[$stackPos])));
$this->semValue = array(new Node\InterpolatedStringPart(Scalar\String_::parseEscapeSequences($this->semStack[$stackPos-(1-1)], '`', $this->phpVersion->supportsUnicodeEscapes()), $this->getAttributes($this->tokenStartStack[$stackPos-(1-1)], $this->tokenEndStack[$stackPos])));
},
512 => function ($stackPos) {
foreach ($this->semStack[$stackPos-(1-1)] as $s) { if ($s instanceof Node\InterpolatedStringPart) { $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, '`', true); } }; $this->semValue = $this->semStack[$stackPos-(1-1)];
foreach ($this->semStack[$stackPos-(1-1)] as $s) { if ($s instanceof Node\InterpolatedStringPart) { $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, '`', $this->phpVersion->supportsUnicodeEscapes()); } }; $this->semValue = $this->semStack[$stackPos-(1-1)];
},
513 => function ($stackPos) {
$this->semValue = array();
Expand Down Expand Up @@ -2521,11 +2521,11 @@ protected function initReduceCallbacks(): void {
$this->semValue = $this->semStack[$stackPos-(1-1)]; $this->createdArrays->attach($this->semValue);
},
530 => function ($stackPos) {
$this->semValue = Scalar\String_::fromString($this->semStack[$stackPos-(1-1)], $this->getAttributes($this->tokenStartStack[$stackPos-(1-1)], $this->tokenEndStack[$stackPos]));
$this->semValue = Scalar\String_::fromString($this->semStack[$stackPos-(1-1)], $this->getAttributes($this->tokenStartStack[$stackPos-(1-1)], $this->tokenEndStack[$stackPos]), $this->phpVersion->supportsUnicodeEscapes());
},
531 => function ($stackPos) {
$attrs = $this->getAttributes($this->tokenStartStack[$stackPos-(3-1)], $this->tokenEndStack[$stackPos]); $attrs['kind'] = Scalar\String_::KIND_DOUBLE_QUOTED;
foreach ($this->semStack[$stackPos-(3-2)] as $s) { if ($s instanceof Node\InterpolatedStringPart) { $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, '"', true); } }; $this->semValue = new Scalar\InterpolatedString($this->semStack[$stackPos-(3-2)], $attrs);
foreach ($this->semStack[$stackPos-(3-2)] as $s) { if ($s instanceof Node\InterpolatedStringPart) { $s->value = Node\Scalar\String_::parseEscapeSequences($s->value, '"', $this->phpVersion->supportsUnicodeEscapes()); } }; $this->semValue = new Scalar\InterpolatedString($this->semStack[$stackPos-(3-2)], $attrs);
},
532 => function ($stackPos) {
$this->semValue = $this->parseLNumber($this->semStack[$stackPos-(1-1)], $this->getAttributes($this->tokenStartStack[$stackPos-(1-1)], $this->tokenEndStack[$stackPos]), $this->phpVersion->allowsInvalidOctals());
Expand Down
9 changes: 8 additions & 1 deletion lib/PhpParser/PhpVersion.php
Expand Up @@ -149,9 +149,16 @@ public function allowsDelInIdentifiers(): bool {
}

/**
* Whether this version support yield in expression context without parentheses.
* Whether this version supports yield in expression context without parentheses.
*/
public function supportsYieldWithoutParentheses(): bool {
return $this->id >= 70000;
}

/**
* Whether this version supports unicode escape sequences in strings.
*/
public function supportsUnicodeEscapes(): bool {
return $this->id >= 70000;
}
}
63 changes: 58 additions & 5 deletions test/code/parser/scalar/unicodeEscape.test
Expand Up @@ -3,8 +3,8 @@ Unicode escape sequence
<?php

"\u{0}";
"\u{114}";
"\u{1F602}";
"\u{114}$foo";
`\u{1F602}$bar`;
-----
array(
0: Stmt_Expression(
Expand All @@ -13,13 +13,66 @@ array(
)
)
1: Stmt_Expression(
expr: Scalar_String(
value: Ĕ
expr: Scalar_InterpolatedString(
parts: array(
0: InterpolatedStringPart(
value: Ĕ
)
1: Expr_Variable(
name: foo
)
)
)
)
2: Stmt_Expression(
expr: Expr_ShellExec(
parts: array(
0: InterpolatedStringPart(
value: @@{"\xF0\x9F\x98\x82"}@@
)
1: Expr_Variable(
name: bar
)
)
)
)
)
-----
<?php

"\u{0}";
"\u{114}$foo";
`\u{1F602}$bar`;
-----
!!version=5.6
array(
0: Stmt_Expression(
expr: Scalar_String(
value: @@{"\xF0\x9F\x98\x82"}@@
value: \u{0}
)
)
1: Stmt_Expression(
expr: Scalar_InterpolatedString(
parts: array(
0: InterpolatedStringPart(
value: \u{114}
)
1: Expr_Variable(
name: foo
)
)
)
)
2: Stmt_Expression(
expr: Expr_ShellExec(
parts: array(
0: InterpolatedStringPart(
value: \u{1F602}
)
1: Expr_Variable(
name: bar
)
)
)
)
)
Expand Down

0 comments on commit d8e8065

Please sign in to comment.