Skip to content

Commit 4b49704

Browse files
committed
Move attribute handling into parser
The Lexer now only provides the tokens to the parser, while the parser is responsible for determining which attributes are placed on notes. This only needs to be done when the attributes are actually needed, rather than for all tokens. This removes the usedAttributes lexer option (and lexer options entirely). The attributes are now enabled unconditionally. They have less overhead now, and the need to explicitly enable them for some use cases (e.g. formatting-preserving printing) doesn't seem like a good tradeoff anymore. There are some additional changes to the Lexer interface that should be done after this, and the docs / upgrading guide haven't been adjusted yet.
1 parent b20267c commit 4b49704

18 files changed

+963
-1135
lines changed

bin/php-parse

+1-4
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,7 @@ if (empty($files)) {
2626
showHelp("Must specify at least one file.");
2727
}
2828

29-
$lexerOptions = ['usedAttributes' => [
30-
'startLine', 'endLine', 'startFilePos', 'endFilePos', 'comments'
31-
]];
32-
$parser = (new PhpParser\ParserFactory())->createForVersion($attributes['version'], $lexerOptions);
29+
$parser = (new PhpParser\ParserFactory())->createForVersion($attributes['version']);
3330
$dumper = new PhpParser\NodeDumper([
3431
'dumpComments' => true,
3532
'dumpPositions' => $attributes['with-positions'],

grammar/php.y

+11-8
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ top_statement_list_ex:
134134

135135
top_statement_list:
136136
top_statement_list_ex
137-
{ makeZeroLengthNop($nop, $this->lookaheadStartAttributes);
137+
{ makeZeroLengthNop($nop);
138138
if ($nop !== null) { $1[] = $nop; } $$ = $1; }
139139
;
140140

@@ -237,7 +237,7 @@ top_statement:
237237
| function_declaration_statement
238238
| class_declaration_statement
239239
| T_HALT_COMPILER '(' ')' ';'
240-
{ $$ = Stmt\HaltCompiler[$this->lexer->handleHaltCompiler()]; }
240+
{ $$ = Stmt\HaltCompiler[$this->handleHaltCompiler()]; }
241241
| T_NAMESPACE namespace_declaration_name semi
242242
{ $$ = Stmt\Namespace_[$2, null];
243243
$$->setAttribute('kind', Stmt\Namespace_::KIND_SEMICOLON);
@@ -353,7 +353,7 @@ inner_statement_list_ex:
353353

354354
inner_statement_list:
355355
inner_statement_list_ex
356-
{ makeZeroLengthNop($nop, $this->lookaheadStartAttributes);
356+
{ makeZeroLengthNop($nop);
357357
if ($nop !== null) { $1[] = $nop; } $$ = $1; }
358358
;
359359

@@ -371,7 +371,7 @@ non_empty_statement:
371371
if ($2) {
372372
$$ = $2; prependLeadingComments($$);
373373
} else {
374-
makeNop($$, $this->startAttributeStack[#1], $this->endAttributes);
374+
makeNop($$);
375375
if (null === $$) { $$ = array(); }
376376
}
377377
}
@@ -390,7 +390,10 @@ non_empty_statement:
390390
| T_GLOBAL global_var_list semi { $$ = Stmt\Global_[$2]; }
391391
| T_STATIC static_var_list semi { $$ = Stmt\Static_[$2]; }
392392
| T_ECHO expr_list_forbid_comma semi { $$ = Stmt\Echo_[$2]; }
393-
| T_INLINE_HTML { $$ = Stmt\InlineHTML[$1]; }
393+
| T_INLINE_HTML {
394+
$$ = Stmt\InlineHTML[$1];
395+
$$->setAttribute('hasLeadingNewline', $this->inlineHtmlHasLeadingNewline(#1));
396+
}
394397
| expr semi {
395398
$e = $1;
396399
if ($e instanceof Expr\Throw_) {
@@ -419,7 +422,7 @@ non_empty_statement:
419422
statement:
420423
non_empty_statement
421424
| ';'
422-
{ makeNop($$, $this->startAttributeStack[#1], $this->endAttributes);
425+
{ makeNop($$);
423426
if ($$ === null) $$ = array(); /* means: no statement */ }
424427
;
425428

@@ -834,7 +837,7 @@ class_statement_list_ex:
834837

835838
class_statement_list:
836839
class_statement_list_ex
837-
{ makeZeroLengthNop($nop, $this->lookaheadStartAttributes);
840+
{ makeZeroLengthNop($nop);
838841
if ($nop !== null) { $1[] = $nop; } $$ = $1; }
839842
;
840843

@@ -1337,7 +1340,7 @@ array_pair:
13371340
| /* empty */
13381341
{ /* Create an Error node now to remember the position. We'll later either report an error,
13391342
or convert this into a null element, depending on whether this is a creation or destructuring context. */
1340-
$attrs = $this->createEmptyElemAttributes($this->lookaheadStartAttributes);
1343+
$attrs = $this->createEmptyElemAttributes($this->tokenPos);
13411344
$$ = new Node\ArrayItem(new Expr\Error($attrs), null, false, $attrs); }
13421345
;
13431346

grammar/phpyLang.php

+10-16
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,13 @@ function ($matches) {
6565

6666
if ('attributes' === $name) {
6767
assertArgs(0, $args, $name);
68-
return '$this->startAttributeStack[#1] + $this->endAttributes';
68+
return '$this->getAttributes($this->tokenStartStack[#1], $this->tokenEndStack[$stackPos])';
6969
}
7070

7171
if ('stackAttributes' === $name) {
7272
assertArgs(1, $args, $name);
73-
return '$this->startAttributeStack[' . $args[0] . ']'
74-
. ' + $this->endAttributeStack[' . $args[0] . ']';
73+
return '$this->getAttributes($this->tokenStartStack[' . $args[0] . '], '
74+
. ' $this->tokenEndStack[' . $args[0] . '])';
7575
}
7676

7777
if ('init' === $name) {
@@ -111,30 +111,24 @@ function ($matches) {
111111
}
112112

113113
if ('makeNop' === $name) {
114-
assertArgs(3, $args, $name);
114+
assertArgs(1, $args, $name);
115115

116-
return '$startAttributes = ' . $args[1] . ';'
117-
. ' if (isset($startAttributes[\'comments\']))'
118-
. ' { ' . $args[0] . ' = new Stmt\Nop($startAttributes + ' . $args[2] . '); }'
119-
. ' else { ' . $args[0] . ' = null; }';
116+
return $args[0] . ' = $this->maybeCreateNop($this->tokenStartStack[#1], $this->tokenEndStack[$stackPos])';
120117
}
121118

122119
if ('makeZeroLengthNop' == $name) {
123-
assertArgs(2, $args, $name);
120+
assertArgs(1, $args, $name);
124121

125-
return '$startAttributes = ' . $args[1] . ';'
126-
. ' if (isset($startAttributes[\'comments\']))'
127-
. ' { ' . $args[0] . ' = new Stmt\Nop($this->createCommentNopAttributes($startAttributes[\'comments\'])); }'
128-
. ' else { ' . $args[0] . ' = null; }';
122+
return $args[0] . ' = $this->maybeCreateZeroLengthNop($this->tokenPos);';
129123
}
130124

131125
if ('prependLeadingComments' === $name) {
132126
assertArgs(1, $args, $name);
133127

134-
return '$attrs = $this->startAttributeStack[#1]; $stmts = ' . $args[0] . '; '
135-
. 'if (!empty($attrs[\'comments\'])) {'
128+
return '$comments = $this->getCommentsBeforeToken($this->tokenStartStack[#1]); $stmts = ' . $args[0] . '; '
129+
. 'if (!empty($comments)) {'
136130
. '$stmts[0]->setAttribute(\'comments\', '
137-
. 'array_merge($attrs[\'comments\'], $stmts[0]->getAttribute(\'comments\', []))); }';
131+
. 'array_merge($comments, $stmts[0]->getAttribute(\'comments\', []))); }';
138132
}
139133

140134
return $matches[0];

lib/PhpParser/Lexer.php

-151
Original file line numberDiff line numberDiff line change
@@ -5,60 +5,8 @@
55
require __DIR__ . '/compatibility_tokens.php';
66

77
class Lexer {
8-
/** @var string Code being tokenized */
9-
protected $code;
108
/** @var list<Token> List of tokens */
119
protected $tokens;
12-
/** @var int Current position in the token array */
13-
protected $pos;
14-
/** @var bool Whether the preceding closing PHP tag has a trailing newline */
15-
protected $prevCloseTagHasNewline;
16-
/** @var array<int, int> Map of tokens that should be dropped (like T_WHITESPACE) */
17-
protected $dropTokens;
18-
19-
/** @var bool Whether to use the startLine attribute */
20-
private $attributeStartLineUsed;
21-
/** @var bool Whether to use the endLine attribute */
22-
private $attributeEndLineUsed;
23-
/** @var bool Whether to use the startTokenPos attribute */
24-
private $attributeStartTokenPosUsed;
25-
/** @var bool Whether to use the endTokenPos attribute */
26-
private $attributeEndTokenPosUsed;
27-
/** @var bool Whether to use the startFilePos attribute */
28-
private $attributeStartFilePosUsed;
29-
/** @var bool Whether to use the endFilePos attribute */
30-
private $attributeEndFilePosUsed;
31-
/** @var bool Whether to use the comments attribute */
32-
private $attributeCommentsUsed;
33-
34-
/**
35-
* Creates a Lexer.
36-
*
37-
* @param array{usedAttributes?: string[]} $options Options array. Currently only the
38-
* 'usedAttributes' option is supported, which is an array of attributes to add to the
39-
* AST nodes. Possible attributes are: 'comments', 'startLine', 'endLine', 'startTokenPos',
40-
* 'endTokenPos', 'startFilePos', 'endFilePos'. The option defaults to the first three.
41-
* For more info see getNextToken() docs.
42-
*/
43-
public function __construct(array $options = []) {
44-
// map of tokens to drop while lexing (the map is only used for isset lookup,
45-
// that's why the value is simply set to 1; the value is never actually used.)
46-
$this->dropTokens = array_fill_keys(
47-
[\T_WHITESPACE, \T_OPEN_TAG, \T_COMMENT, \T_DOC_COMMENT, \T_BAD_CHARACTER], 1
48-
);
49-
50-
$defaultAttributes = ['comments', 'startLine', 'endLine'];
51-
$usedAttributes = array_fill_keys($options['usedAttributes'] ?? $defaultAttributes, true);
52-
53-
// Create individual boolean properties to make these checks faster.
54-
$this->attributeStartLineUsed = isset($usedAttributes['startLine']);
55-
$this->attributeEndLineUsed = isset($usedAttributes['endLine']);
56-
$this->attributeStartTokenPosUsed = isset($usedAttributes['startTokenPos']);
57-
$this->attributeEndTokenPosUsed = isset($usedAttributes['endTokenPos']);
58-
$this->attributeStartFilePosUsed = isset($usedAttributes['startFilePos']);
59-
$this->attributeEndFilePosUsed = isset($usedAttributes['endFilePos']);
60-
$this->attributeCommentsUsed = isset($usedAttributes['comments']);
61-
}
6210

6311
/**
6412
* Initializes the lexer for lexing the provided source code.
@@ -75,13 +23,6 @@ public function startLexing(string $code, ?ErrorHandler $errorHandler = null): v
7523
$errorHandler = new ErrorHandler\Throwing();
7624
}
7725

78-
$this->code = $code; // keep the code around for __halt_compiler() handling
79-
$this->pos = -1;
80-
81-
// If inline HTML occurs without preceding code, treat it as if it had a leading newline.
82-
// This ensures proper composability, because having a newline is the "safe" assumption.
83-
$this->prevCloseTagHasNewline = true;
84-
8526
$scream = ini_set('xdebug.scream', '0');
8627

8728
$this->tokens = @Token::tokenize($code);
@@ -165,84 +106,6 @@ protected function postprocessTokens(ErrorHandler $errorHandler): void {
165106
$this->tokens[] = new Token(0, "\0", $lastToken->getEndLine(), $lastToken->getEndPos());
166107
}
167108

168-
/**
169-
* Fetches the next token.
170-
*
171-
* The available attributes are determined by the 'usedAttributes' option, which can
172-
* be specified in the constructor. The following attributes are supported:
173-
*
174-
* * 'comments' => Array of PhpParser\Comment or PhpParser\Comment\Doc instances,
175-
* representing all comments that occurred between the previous
176-
* non-discarded token and the current one.
177-
* * 'startLine' => Line in which the node starts.
178-
* * 'endLine' => Line in which the node ends.
179-
* * 'startTokenPos' => Offset into the token array of the first token in the node.
180-
* * 'endTokenPos' => Offset into the token array of the last token in the node.
181-
* * 'startFilePos' => Offset into the code string of the first character that is part of the node.
182-
* * 'endFilePos' => Offset into the code string of the last character that is part of the node.
183-
*
184-
* @param mixed $value Variable to store token content in
185-
* @param mixed $startAttributes Variable to store start attributes in
186-
* @param mixed $endAttributes Variable to store end attributes in
187-
*
188-
* @return int Token id
189-
*/
190-
public function getNextToken(&$value = null, &$startAttributes = null, &$endAttributes = null): int {
191-
$startAttributes = [];
192-
$endAttributes = [];
193-
194-
while (1) {
195-
$token = $this->tokens[++$this->pos];
196-
197-
$id = $token->id;
198-
if (isset($this->dropTokens[$id])) {
199-
if (\T_COMMENT === $id || \T_DOC_COMMENT === $id) {
200-
if ($this->attributeCommentsUsed) {
201-
$comment = \T_DOC_COMMENT === $id
202-
? new Comment\Doc($token->text, $token->line, $token->pos, $this->pos,
203-
$token->getEndLine(), $token->getEndPos() - 1, $this->pos)
204-
: new Comment($token->text, $token->line, $token->pos, $this->pos,
205-
$token->getEndLine(), $token->getEndPos() - 1, $this->pos);
206-
$startAttributes['comments'][] = $comment;
207-
}
208-
}
209-
continue;
210-
}
211-
212-
if ($this->attributeStartLineUsed) {
213-
$startAttributes['startLine'] = $token->line;
214-
}
215-
if ($this->attributeStartTokenPosUsed) {
216-
$startAttributes['startTokenPos'] = $this->pos;
217-
}
218-
if ($this->attributeStartFilePosUsed) {
219-
$startAttributes['startFilePos'] = $token->pos;
220-
}
221-
222-
$value = $token->text;
223-
if (\T_CLOSE_TAG === $token->id) {
224-
$this->prevCloseTagHasNewline = false !== strpos($value, "\n")
225-
|| false !== strpos($value, "\r");
226-
} elseif (\T_INLINE_HTML === $token->id) {
227-
$startAttributes['hasLeadingNewline'] = $this->prevCloseTagHasNewline;
228-
}
229-
230-
// Fetch the end line/pos from the next token (if available) instead of recomputing it.
231-
$nextToken = $this->tokens[$this->pos + 1] ?? null;
232-
if ($this->attributeEndLineUsed) {
233-
$endAttributes['endLine'] = $nextToken ? $nextToken->line : $token->getEndLine();
234-
}
235-
if ($this->attributeEndTokenPosUsed) {
236-
$endAttributes['endTokenPos'] = $this->pos;
237-
}
238-
if ($this->attributeEndFilePosUsed) {
239-
$endAttributes['endFilePos'] = ($nextToken ? $nextToken->pos : $token->getEndPos()) - 1;
240-
}
241-
242-
return $id;
243-
}
244-
}
245-
246109
/**
247110
* Returns the token array for current code.
248111
*
@@ -259,18 +122,4 @@ public function getNextToken(&$value = null, &$startAttributes = null, &$endAttr
259122
public function getTokens(): array {
260123
return $this->tokens;
261124
}
262-
263-
/**
264-
* Handles __halt_compiler() by returning the text after it.
265-
*
266-
* @return string Remaining text
267-
*/
268-
public function handleHaltCompiler(): string {
269-
// Prevent the lexer from returning any further tokens.
270-
$nextToken = $this->tokens[$this->pos + 1];
271-
$this->pos = \count($this->tokens) - 2;
272-
273-
// Return text after __halt_compiler.
274-
return $nextToken->id === \T_INLINE_HTML ? $nextToken->text : '';
275-
}
276125
}

lib/PhpParser/Lexer/Emulative.php

+3-12
Original file line numberDiff line numberDiff line change
@@ -33,20 +33,11 @@ class Emulative extends Lexer {
3333
private $hostPhpVersion;
3434

3535
/**
36-
* @param array{usedAttributes?: string[], phpVersion?: PhpVersion|string} $options Lexer options.
37-
* In addition to the usual options, accepts a 'phpVersion' (PhpVersion object or string)
38-
* that specifies the version to emulate. Defaults to newest supported.
36+
* @param PhpVersion|null $phpVersion PHP version to emulate. Defaults to newest supported.
3937
*/
40-
public function __construct(array $options = []) {
41-
$version = $options['phpVersion'] ?? PhpVersion::getNewestSupported();
42-
if (!$version instanceof PhpVersion) {
43-
$version = PhpVersion::fromString($version);
44-
}
45-
$this->targetPhpVersion = $version;
38+
public function __construct(?PhpVersion $phpVersion = null) {
39+
$this->targetPhpVersion = $phpVersion ?? PhpVersion::getNewestSupported();
4640
$this->hostPhpVersion = PhpVersion::getHostVersion();
47-
unset($options['phpVersion']);
48-
49-
parent::__construct($options);
5041

5142
$emulators = [
5243
new FlexibleDocStringEmulator(),

0 commit comments

Comments
 (0)