5
5
require __DIR__ . '/compatibility_tokens.php ' ;
6
6
7
7
class Lexer {
8
- /** @var string Code being tokenized */
9
- protected $ code ;
10
8
/** @var list<Token> List of tokens */
11
9
protected $ tokens ;
12
- /** @var int Current position in the token array */
13
- protected $ pos ;
14
- /** @var bool Whether the preceding closing PHP tag has a trailing newline */
15
- protected $ prevCloseTagHasNewline ;
16
- /** @var array<int, int> Map of tokens that should be dropped (like T_WHITESPACE) */
17
- protected $ dropTokens ;
18
-
19
- /** @var bool Whether to use the startLine attribute */
20
- private $ attributeStartLineUsed ;
21
- /** @var bool Whether to use the endLine attribute */
22
- private $ attributeEndLineUsed ;
23
- /** @var bool Whether to use the startTokenPos attribute */
24
- private $ attributeStartTokenPosUsed ;
25
- /** @var bool Whether to use the endTokenPos attribute */
26
- private $ attributeEndTokenPosUsed ;
27
- /** @var bool Whether to use the startFilePos attribute */
28
- private $ attributeStartFilePosUsed ;
29
- /** @var bool Whether to use the endFilePos attribute */
30
- private $ attributeEndFilePosUsed ;
31
- /** @var bool Whether to use the comments attribute */
32
- private $ attributeCommentsUsed ;
33
-
34
- /**
35
- * Creates a Lexer.
36
- *
37
- * @param array{usedAttributes?: string[]} $options Options array. Currently only the
38
- * 'usedAttributes' option is supported, which is an array of attributes to add to the
39
- * AST nodes. Possible attributes are: 'comments', 'startLine', 'endLine', 'startTokenPos',
40
- * 'endTokenPos', 'startFilePos', 'endFilePos'. The option defaults to the first three.
41
- * For more info see getNextToken() docs.
42
- */
43
- public function __construct (array $ options = []) {
44
- // map of tokens to drop while lexing (the map is only used for isset lookup,
45
- // that's why the value is simply set to 1; the value is never actually used.)
46
- $ this ->dropTokens = array_fill_keys (
47
- [\T_WHITESPACE , \T_OPEN_TAG , \T_COMMENT , \T_DOC_COMMENT , \T_BAD_CHARACTER ], 1
48
- );
49
-
50
- $ defaultAttributes = ['comments ' , 'startLine ' , 'endLine ' ];
51
- $ usedAttributes = array_fill_keys ($ options ['usedAttributes ' ] ?? $ defaultAttributes , true );
52
-
53
- // Create individual boolean properties to make these checks faster.
54
- $ this ->attributeStartLineUsed = isset ($ usedAttributes ['startLine ' ]);
55
- $ this ->attributeEndLineUsed = isset ($ usedAttributes ['endLine ' ]);
56
- $ this ->attributeStartTokenPosUsed = isset ($ usedAttributes ['startTokenPos ' ]);
57
- $ this ->attributeEndTokenPosUsed = isset ($ usedAttributes ['endTokenPos ' ]);
58
- $ this ->attributeStartFilePosUsed = isset ($ usedAttributes ['startFilePos ' ]);
59
- $ this ->attributeEndFilePosUsed = isset ($ usedAttributes ['endFilePos ' ]);
60
- $ this ->attributeCommentsUsed = isset ($ usedAttributes ['comments ' ]);
61
- }
62
10
63
11
/**
64
12
* Initializes the lexer for lexing the provided source code.
@@ -75,13 +23,6 @@ public function startLexing(string $code, ?ErrorHandler $errorHandler = null): v
75
23
$ errorHandler = new ErrorHandler \Throwing ();
76
24
}
77
25
78
- $ this ->code = $ code ; // keep the code around for __halt_compiler() handling
79
- $ this ->pos = -1 ;
80
-
81
- // If inline HTML occurs without preceding code, treat it as if it had a leading newline.
82
- // This ensures proper composability, because having a newline is the "safe" assumption.
83
- $ this ->prevCloseTagHasNewline = true ;
84
-
85
26
$ scream = ini_set ('xdebug.scream ' , '0 ' );
86
27
87
28
$ this ->tokens = @Token::tokenize ($ code );
@@ -165,84 +106,6 @@ protected function postprocessTokens(ErrorHandler $errorHandler): void {
165
106
$ this ->tokens [] = new Token (0 , "\0" , $ lastToken ->getEndLine (), $ lastToken ->getEndPos ());
166
107
}
167
108
168
- /**
169
- * Fetches the next token.
170
- *
171
- * The available attributes are determined by the 'usedAttributes' option, which can
172
- * be specified in the constructor. The following attributes are supported:
173
- *
174
- * * 'comments' => Array of PhpParser\Comment or PhpParser\Comment\Doc instances,
175
- * representing all comments that occurred between the previous
176
- * non-discarded token and the current one.
177
- * * 'startLine' => Line in which the node starts.
178
- * * 'endLine' => Line in which the node ends.
179
- * * 'startTokenPos' => Offset into the token array of the first token in the node.
180
- * * 'endTokenPos' => Offset into the token array of the last token in the node.
181
- * * 'startFilePos' => Offset into the code string of the first character that is part of the node.
182
- * * 'endFilePos' => Offset into the code string of the last character that is part of the node.
183
- *
184
- * @param mixed $value Variable to store token content in
185
- * @param mixed $startAttributes Variable to store start attributes in
186
- * @param mixed $endAttributes Variable to store end attributes in
187
- *
188
- * @return int Token id
189
- */
190
- public function getNextToken (&$ value = null , &$ startAttributes = null , &$ endAttributes = null ): int {
191
- $ startAttributes = [];
192
- $ endAttributes = [];
193
-
194
- while (1 ) {
195
- $ token = $ this ->tokens [++$ this ->pos ];
196
-
197
- $ id = $ token ->id ;
198
- if (isset ($ this ->dropTokens [$ id ])) {
199
- if (\T_COMMENT === $ id || \T_DOC_COMMENT === $ id ) {
200
- if ($ this ->attributeCommentsUsed ) {
201
- $ comment = \T_DOC_COMMENT === $ id
202
- ? new Comment \Doc ($ token ->text , $ token ->line , $ token ->pos , $ this ->pos ,
203
- $ token ->getEndLine (), $ token ->getEndPos () - 1 , $ this ->pos )
204
- : new Comment ($ token ->text , $ token ->line , $ token ->pos , $ this ->pos ,
205
- $ token ->getEndLine (), $ token ->getEndPos () - 1 , $ this ->pos );
206
- $ startAttributes ['comments ' ][] = $ comment ;
207
- }
208
- }
209
- continue ;
210
- }
211
-
212
- if ($ this ->attributeStartLineUsed ) {
213
- $ startAttributes ['startLine ' ] = $ token ->line ;
214
- }
215
- if ($ this ->attributeStartTokenPosUsed ) {
216
- $ startAttributes ['startTokenPos ' ] = $ this ->pos ;
217
- }
218
- if ($ this ->attributeStartFilePosUsed ) {
219
- $ startAttributes ['startFilePos ' ] = $ token ->pos ;
220
- }
221
-
222
- $ value = $ token ->text ;
223
- if (\T_CLOSE_TAG === $ token ->id ) {
224
- $ this ->prevCloseTagHasNewline = false !== strpos ($ value , "\n" )
225
- || false !== strpos ($ value , "\r" );
226
- } elseif (\T_INLINE_HTML === $ token ->id ) {
227
- $ startAttributes ['hasLeadingNewline ' ] = $ this ->prevCloseTagHasNewline ;
228
- }
229
-
230
- // Fetch the end line/pos from the next token (if available) instead of recomputing it.
231
- $ nextToken = $ this ->tokens [$ this ->pos + 1 ] ?? null ;
232
- if ($ this ->attributeEndLineUsed ) {
233
- $ endAttributes ['endLine ' ] = $ nextToken ? $ nextToken ->line : $ token ->getEndLine ();
234
- }
235
- if ($ this ->attributeEndTokenPosUsed ) {
236
- $ endAttributes ['endTokenPos ' ] = $ this ->pos ;
237
- }
238
- if ($ this ->attributeEndFilePosUsed ) {
239
- $ endAttributes ['endFilePos ' ] = ($ nextToken ? $ nextToken ->pos : $ token ->getEndPos ()) - 1 ;
240
- }
241
-
242
- return $ id ;
243
- }
244
- }
245
-
246
109
/**
247
110
* Returns the token array for current code.
248
111
*
@@ -259,18 +122,4 @@ public function getNextToken(&$value = null, &$startAttributes = null, &$endAttr
259
122
public function getTokens (): array {
260
123
return $ this ->tokens ;
261
124
}
262
-
263
- /**
264
- * Handles __halt_compiler() by returning the text after it.
265
- *
266
- * @return string Remaining text
267
- */
268
- public function handleHaltCompiler (): string {
269
- // Prevent the lexer from returning any further tokens.
270
- $ nextToken = $ this ->tokens [$ this ->pos + 1 ];
271
- $ this ->pos = \count ($ this ->tokens ) - 2 ;
272
-
273
- // Return text after __halt_compiler.
274
- return $ nextToken ->id === \T_INLINE_HTML ? $ nextToken ->text : '' ;
275
- }
276
125
}
0 commit comments