Scanner.php
Same filename in this branch
Namespace
Peast\SyntaxFile
-
vendor/
mck89/ peast/ lib/ Peast/ Syntax/ Scanner.php
View source
<?php
/**
* This file is part of the Peast package
*
* (c) Marco Marchiò <marco.mm89@gmail.com>
*
* For the full copyright and license information refer to the LICENSE file
* distributed with this source code
*/
namespace Peast\Syntax;
/**
* Base class for scanners.
*
* @author Marco Marchiò <marco.mm89@gmail.com>
*/
class Scanner {
use JSX\Scanner;
/**
* Scanner features
*
* @var Features
*/
protected $features;
/**
* Current column
*
* @var int
*/
protected $column = 0;
/**
* Current line
*
* @var int
*/
protected $line = 1;
/**
* Current index
*
* @var int
*/
protected $index = 0;
/**
* Source length
*
* @var int
*/
protected $length;
/**
* Source characters
*
* @var array
*/
protected $source;
/**
* Consumed position
*
* @var Position
*/
protected $position;
/**
* Current token
*
* @var Token
*/
protected $currentToken;
/**
* Next token
*
* @var Token
*/
protected $nextToken;
/**
* Strict mode flag
*
* @var bool
*/
protected $strictMode = false;
/**
* True to register tokens in the tokens array
*
* @var bool
*/
protected $registerTokens = false;
/**
* Module mode
*
* @var bool
*/
protected $isModule = false;
/**
* Comments handling
*
* @var bool
*/
protected $comments = false;
/**
* Internal JSX scan flag
*
* @var bool
*/
protected $jsx = false;
/**
* Registered tokens array
*
* @var array
*/
protected $tokens = array();
/**
* Comments to tokens map
*
* @var array
*/
protected $commentsMap = array();
/**
* Events emitter
*
* @var EventsEmitter
*/
protected $eventsEmitter;
/**
* Regex to match identifiers starts
*
* @var string
*/
protected $idStartRegex = "/[\\p{Lu}\\p{Ll}\\p{Lt}\\p{Lm}\\p{Lo}\\p{Nl}\\x{1885}\\x{1886}\\x{2118}\\x{212E}\\x{309B}\\x{309C}]/u";
/**
* Regex to match identifiers parts
*
* @var string
*/
protected $idPartRegex = "/[\\p{Lu}\\p{Ll}\\p{Lt}\\p{Lm}\\p{Lo}\\p{Nl}\\x{1885}\\x{1886}\\x{2118}\\x{212E}\\x{309B}\\x{309C}\\p{Mn}\\p{Mc}\\p{Nd}\\p{Pc}\\x{00B7}\\x{0387}\\x{1369}\\x{136A}\\x{136B}\\x{136C}\\x{136D}\\x{136E}\\x{136F}\\x{1370}\\x{1371}\\x{19DA}\\x{200C}\\x{200D}]/u";
/**
* Keywords array
*
* @var array
*/
protected $keywords = array(
"break",
"do",
"in",
"typeof",
"case",
"else",
"instanceof",
"var",
"catch",
"export",
"new",
"void",
"class",
"extends",
"return",
"while",
"const",
"finally",
"super",
"with",
"continue",
"for",
"switch",
"debugger",
"function",
"this",
"default",
"if",
"throw",
"delete",
"import",
"try",
"enum",
"await",
);
/**
* Array of words that are keywords only in strict mode
*
* @var array
*/
protected $strictModeKeywords = array(
"implements",
"interface",
"package",
"private",
"protected",
"public",
"static",
"let",
"yield",
);
/**
* Punctuators array
*
* @var array
*/
protected $punctuators = array(
".",
";",
",",
"<",
">",
"<=",
">=",
"==",
"!=",
"===",
"!==",
"+",
"-",
"*",
"%",
"++",
"--",
"<<",
">>",
">>>",
"&",
"|",
"^",
"!",
"~",
"&&",
"||",
"?",
":",
"=",
"+=",
"-=",
"*=",
"%=",
"<<=",
">>=",
">>>=",
"&=",
"|=",
"^=",
"=>",
"...",
"/",
"/=",
"**",
"**=",
"??",
"?.",
"&&=",
"||=",
"??=",
);
/**
* Punctuators LSM
*
* @var LSM
*/
protected $punctuatorsLSM;
/**
* Strings stops LSM
*
* @var LSM
*/
protected $stringsStopsLSM;
/**
* Brackets array
*
* @var array
*/
protected $brackets = array(
"(" => "",
"[" => "",
"{" => "",
")" => "(",
"]" => "[",
"}" => "{",
);
/**
* Open brackets array
*
* @var array
*/
protected $openBrackets = array();
/**
* Open templates array
*
* @var array
*/
protected $openTemplates = array();
/**
* Whitespaces array
*
* @var array
*/
protected $whitespaces = array(
" ",
"\t",
"\n",
"\r",
"\f",
"\v",
0xa0,
0xfeff,
0xa0,
0x1680,
0x2000,
0x2001,
0x2002,
0x2003,
0x2004,
0x2005,
0x2006,
0x2007,
0x2008,
0x2009,
0x200a,
0x202f,
0x205f,
0x3000,
0x2028,
0x2029,
);
/**
* Line terminators characters array
*
* @var array
*
* @static
*/
public static $lineTerminatorsChars = array(
"\n",
"\r",
0x2028,
0x2029,
);
/**
* Line terminators sequences array
*
* @var array
*
* @static
*/
public static $lineTerminatorsSequences = array(
"\r\n",
);
/**
* Regex to split texts using valid ES line terminators
*
* @var array
*/
protected $linesSplitter;
/**
* Concatenation of line terminators characters and line terminators
* sequences
*
* @var array
*/
protected $lineTerminators;
/**
* Properties to copy when getting the scanner state
*
* @var array
*/
protected $stateProps = array(
"position",
"index",
"column",
"line",
"currentToken",
"nextToken",
"strictMode",
"openBrackets",
"openTemplates",
"commentsMap",
);
/**
* Decimal numbers
*
* @var array
*/
protected $numbers = array(
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
);
/**
* Hexadecimal numbers
*
* @var array
*/
protected $xnumbers = array(
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"a",
"b",
"c",
"d",
"e",
"f",
"A",
"B",
"C",
"D",
"E",
"F",
);
/**
* Octal numbers
*
* @var array
*/
protected $onumbers = array(
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
);
/**
* Binary numbers
*
* @var array
*/
protected $bnumbers = array(
"0",
"1",
);
/**
* Class constructor
*
* @param string $source Source code
* @param Features $features Scanner features
* @param array $options Parsing options
*/
function __construct($source, Features $features, $options) {
$this->features = $features;
$encoding = isset($options["sourceEncoding"]) ? $options["sourceEncoding"] : null;
//Strip BOM characters from the source
$this->stripBOM($source, $encoding);
//Convert to UTF8 if needed
if ($encoding && !preg_match("/UTF-?8/i", $encoding)) {
$source = mb_convert_encoding($source, "UTF-8", $encoding);
}
//Instead of using mb_substr for each character, split the source
//into an array of UTF8 characters for performance reasons
$this->source = Utils::stringToUTF8Array($source, !isset($options["strictEncoding"]) || $options["strictEncoding"]);
$this->length = count($this->source);
//Convert character codes to UTF8 characters in whitespaces and line
//terminators
$this->lineTerminators = array_merge(self::$lineTerminatorsSequences, self::$lineTerminatorsChars);
foreach (array(
"whitespaces",
"lineTerminators",
) as $key) {
foreach ($this->{$key} as $i => $char) {
if (is_int($char)) {
$this->{$key}[$i] = Utils::unicodeToUtf8($char);
}
}
}
//Remove exponentiation operator if the feature
//is not enabled
if (!$this->features->exponentiationOperator) {
Utils::removeArrayValue($this->punctuators, "**");
Utils::removeArrayValue($this->punctuators, "**=");
}
if (!$this->features->optionalChaining) {
Utils::removeArrayValue($this->punctuators, "?.");
}
//Remove logical assignment operators if the feature
//is not enabled
if (!$this->features->logicalAssignmentOperators) {
Utils::removeArrayValue($this->punctuators, "&&=");
Utils::removeArrayValue($this->punctuators, "||=");
Utils::removeArrayValue($this->punctuators, "??=");
}
//Create a LSM for punctuators array
$this->punctuatorsLSM = new LSM($this->punctuators);
//Create a LSM for strings stops
$this->stringsStopsLSM = new LSM($this->lineTerminators, true);
//Allow paragraph and line separators in strings
if ($this->features->paragraphLineSepInStrings) {
$this->stringsStopsLSM
->remove(Utils::unicodeToUtf8(0x2028));
$this->stringsStopsLSM
->remove(Utils::unicodeToUtf8(0x2029));
}
//Remove await as keyword if async/await is enabled
if ($this->features->asyncAwait) {
Utils::removeArrayValue($this->keywords, "await");
}
$this->linesSplitter = "/" . implode("|", $this->lineTerminators) . "/uS";
$this->position = new Position(0, 0, 0);
}
/**
* Strips BOM characters from the source and detects source encoding if not
* given by the user
*
* @param string $source Source
* @param string $encoding User specified encoding
*/
public function stripBOM(&$source, &$encoding) {
$boms = array(
"\xef" => array(
array(
"\xbb",
"\xbf",
),
"UTF-8",
),
"\xfe" => array(
array(
"\xff",
),
"UTF-16BE",
),
"\xff" => array(
array(
"\xfe",
),
"UTF-16LE",
),
);
if (!isset($source[0]) || !isset($boms[$source[0]])) {
return;
}
$bom = $boms[$source[0]];
$l = count($bom[0]);
for ($i = 0; $i < $l; $i++) {
if (!isset($source[$i + 1]) || $source[$i + 1] !== $bom[0][$i]) {
return;
}
}
$source = substr($source, $l + 1);
if (!$encoding) {
$encoding = $bom[1];
}
}
/**
* Enables or disables module scanning mode
*
* @param bool $enable True to enable module scanning mode, false to disable it
*
* @return $this
*/
public function enableModuleMode($enable = true) {
$this->isModule = $enable;
return $this;
}
/**
* Enables or disables comments handling
*
* @param bool $enable True to enable comments handling, false to disable it
*
* @return $this
*/
public function enableComments($enable = true) {
$this->comments = $enable;
return $this;
}
/**
* Enables or disables tokens registration in the token array
*
* @param bool $enable True to enable token registration, false to disable it
*
* @return $this
*/
public function enableTokenRegistration($enable = true) {
$this->registerTokens = $enable;
return $this;
}
/**
* Return registered tokens
*
* @return array
*/
public function getTokens() {
return $this->tokens;
}
/**
* Returns the scanner's event emitter
*
* @return EventsEmitter
*/
public function getEventsEmitter() {
if (!$this->eventsEmitter) {
//The event emitter is created here so that it won't exist if not
//necessary
$this->eventsEmitter = new EventsEmitter();
}
return $this->eventsEmitter;
}
/**
* Enables or disables strict mode
*
* @param bool $strictMode Strict mode state
*
* @return $this
*/
public function setStrictMode($strictMode) {
$this->strictMode = $strictMode;
return $this;
}
/**
* Return strict mode state
*
* @return bool
*/
public function getStrictMode() {
return $this->strictMode;
}
/**
* Checks if the given token is a keyword in the current strict mode state
*
* @param Token $token Token to checks
*
* @return bool
*/
public function isStrictModeKeyword($token) {
return $token->type === Token::TYPE_KEYWORD && (in_array($token->value, $this->keywords) || $this->strictMode && in_array($token->value, $this->strictModeKeywords));
}
/**
* Returns the current scanner state
*
* @return array
*/
public function getState() {
//Consume current and next tokens so that they wont' be parsed again
//if the state is restored. If the current token is a slash the next
//token isn't parsed, this prevents some edge cases where a regexp
//that contains something that can be interpreted as a comment causes
//the content to be parsed as a real comment too
$token = $this->currentToken ?: $this->getToken();
if ($token && $token->value !== "/") {
$this->getNextToken();
}
$state = array();
foreach ($this->stateProps as $prop) {
$state[$prop] = $this->{$prop};
}
if ($this->registerTokens) {
$state["tokensNum"] = count($this->tokens);
}
//Emit the FreezeState event and pass the given state so that listeners
//attached to this event can add data
$this->eventsEmitter && $this->eventsEmitter
->fire("FreezeState", array(
&$state,
));
return $state;
}
/**
* Sets the current scanner state
*
* @param array $state State
*
* @return $this
*/
public function setState($state) {
if ($this->registerTokens) {
//Check if tokens have been added
if (isset($this->tokens[$state["tokensNum"]])) {
//Remove all added tokens
for ($i = count($this->tokens) - 1; $i >= $state["tokensNum"]; $i--) {
array_pop($this->tokens);
}
}
unset($state["tokensNum"]);
}
//Emit the ResetState event and pass the given state
$this->eventsEmitter && $this->eventsEmitter
->fire("ResetState", array(
&$state,
));
foreach ($state as $key => $value) {
$this->{$key} = $value;
}
return $this;
}
/**
* Returns current scanner state
*
* @param bool $scanPosition By default this method returns the scanner
* consumed position, if this parameter is true
* the scanned position will be returned
*
* @return Position
*/
public function getPosition($scanPosition = false) {
if ($scanPosition) {
return new Position($this->line, $this->column, $this->index);
}
else {
return $this->position;
}
}
/**
* Sets the current scan position at the given one
*
* @param Position $position Position at which the scan position will be set
*
* @return $this
*/
public function setScanPosition(Position $position) {
$this->line = $position->getLine();
$this->column = $position->getColumn();
$this->index = $position->getIndex();
return $this;
}
/**
* Return the character at the given index in the source code or null if the
* end is reached.
*
* @param int $index Index, if not given it will use the current index
*
* @return string|null
*/
public function charAt($index = null) {
if ($index === null) {
$index = $this->index;
}
return $index < $this->length ? $this->source[$index] : null;
}
/**
* Throws a syntax error
*
* @param string $message Error message
*
* @return void
*
* @throws Exception
*/
protected function error($message = null) {
if (!$message) {
$message = "Unexpected " . $this->charAt();
}
throw new Exception($message, $this->getPosition(true));
}
/**
* Consumes the current token
*
* @return $this
*/
public function consumeToken() {
//Move the scanner position to the end of the current position
$this->position = $this->currentToken->location->end;
//Before consume the token, consume comments associated with it
if ($this->comments) {
$this->consumeCommentsForCurrentToken();
}
//Register the token if required
if ($this->registerTokens) {
$this->tokens[] = $this->currentToken;
}
//Emit the TokenConsumed event for the consumed token
$this->eventsEmitter && $this->eventsEmitter
->fire("TokenConsumed", array(
$this->currentToken,
));
$this->currentToken = $this->nextToken;
$this->nextToken = null;
return $this;
}
/**
* Checks if the given string is matched, if so it consumes the token
*
* @param string $expected String to check
*
* @return Token|null
*/
public function consume($expected) {
//Do not call getToken if there's already a pending token for
//performance reasons
$token = $this->currentToken ?: $this->getToken();
if ($token && $token->value === $expected) {
$this->consumeToken();
return $token;
}
return null;
}
/**
* Checks if one of the given strings is matched, if so it consumes the
* token
*
* @param array $expected Strings to check
*
* @return Token|null
*/
public function consumeOneOf($expected) {
//Do not call getToken if there's already a pending token for
//performance reasons
$token = $this->currentToken ?: $this->getToken();
if ($token && in_array($token->value, $expected)) {
$this->consumeToken();
return $token;
}
return null;
}
/**
* Checks that there are not line terminators following the current scan
* position before next token
*
* @param bool $nextToken By default it checks the current token position
* relative to the current position, if this
* parameter is true the check will be done relative
* to the next token
*
* @return bool
*/
public function noLineTerminators($nextToken = false) {
if ($nextToken) {
$nextToken = $this->getNextToken();
$refLine = !$nextToken ? null : $nextToken->location->end
->getLine();
}
else {
$refLine = $this->getPosition()
->getLine();
}
$token = $this->currentToken ?: $this->getToken();
return $token && $token->location->start
->getLine() === $refLine;
}
/**
* Checks if one of the given strings follows the current scan position
*
* @param string|array $expected String or array of strings to check
* @param bool $nextToken This parameter must be true if the first
* parameter is an array so that it will
* check also next tokens
*
* @return bool
*/
public function isBefore($expected, $nextToken = false) {
$token = $this->currentToken ?: $this->getToken();
if (!$token) {
return false;
}
elseif (in_array($token->value, $expected)) {
return true;
}
elseif (!$nextToken) {
return false;
}
if (!$this->getNextToken()) {
return false;
}
foreach ($expected as $val) {
if (!is_array($val) || $val[0] !== $token->value) {
continue;
}
//If the second value in the array is true check that the current
//token is not followed by line terminators, otherwise compare its
//value to the next token
if ($val[1] === true && $this->noLineTerminators(true) || $val[1] !== true && $val[1] === $this->nextToken->value) {
return true;
}
}
return false;
}
/**
* Returns the next token
*
* @return Token|null
*/
public function getNextToken() {
if (!$this->nextToken) {
$token = $this->currentToken ?: $this->getToken();
$this->currentToken = null;
$this->nextToken = $this->getToken(true);
$this->currentToken = $token;
}
return $this->nextToken;
}
/**
* Returns the current token
*
* @param bool $skipEOFChecks True to skip end of file checks
* even if the end is reached
*
* @return Token|null
*/
public function getToken($skipEOFChecks = false) {
//The current token is returned until consumed
if ($this->currentToken) {
return $this->currentToken;
}
$comments = $this->skipWhitespacesAndComments();
//Emit the TokenCreated event for all the comments found
if ($comments) {
foreach ($comments as $comment) {
$this->eventsEmitter && $this->eventsEmitter
->fire("TokenCreated", array(
$comment,
));
}
}
//When the end of the source is reached
if ($this->index >= $this->length) {
//Check if there are open brackets
if (!$skipEOFChecks) {
foreach ($this->openBrackets as $bracket => $num) {
if ($num) {
$this->error("Unclosed {$bracket}");
}
}
//Check if there are open templates
if (count($this->openTemplates)) {
$this->error("Unterminated template");
}
}
//Register comments and consume them
if ($this->comments && $comments) {
$this->commentsForCurrentToken($comments);
}
//Emit the EndReached event when at the end of the source
$this->eventsEmitter && $this->eventsEmitter
->fire("EndReached");
return null;
}
$startPosition = $this->getPosition(true);
$origException = null;
try {
//Try to match a token
if ($this->jsx && ($token = $this->scanJSXIdentifier()) || ($token = $this->scanTemplate()) || ($token = $this->scanNumber()) || $this->jsx && ($token = $this->scanJSXPunctuator()) || ($token = $this->scanPunctuator()) || ($token = $this->scanKeywordOrIdentifier()) || $this->jsx && ($token = $this->scanJSXString()) || ($token = $this->scanString())) {
//Set the token start and end positions
$token->location->start = $startPosition;
$token->location->end = $this->getPosition(true);
$this->currentToken = $token;
//Register comments if required
if ($this->comments && $comments) {
$this->commentsForCurrentToken($comments);
}
//Emit the TokenCreated event for the token just created
$this->eventsEmitter && $this->eventsEmitter
->fire("TokenCreated", array(
$this->currentToken,
));
return $this->currentToken;
}
} catch (Exception $e) {
$origException = $e;
}
//If last token was "/" do not throw an error if the token has not be
//recognized since it can be the first character in a regexp and it will
//be consumed when the current token will be reconsumed as a regexp
if ($this->isAfterSlash($startPosition)) {
$this->setScanPosition($startPosition);
return null;
}
//No valid token found. If there was a scan error, throw the same
//exception again, otherwise throw a new error
if ($origException) {
throw $origException;
}
$this->error();
}
/**
* Executes the operations to handle the end of the source scanning
*
* @return $this
*/
public function consumeEnd() {
//Consume final comments
if ($this->comments) {
$this->consumeCommentsForCurrentToken();
}
//Emit the EndReached event when at the end of the source
$this->eventsEmitter && $this->eventsEmitter
->fire("EndReached");
return $this;
}
/**
* Gets or sets comments for the current token. If the parameter is an
* array it associates the given comments array to the current node,
* otherwise comments for the current token are returned
*
* @param array $comments Comments array
*
* @return array
*/
protected function commentsForCurrentToken($comments = null) {
$id = $this->currentToken ? spl_object_hash($this->currentToken) : "";
if ($comments !== null) {
$this->commentsMap[$id] = $comments;
}
elseif (isset($this->commentsMap[$id])) {
$comments = $this->commentsMap[$id];
unset($this->commentsMap[$id]);
}
return $comments;
}
/**
* Consumes comment tokens associated with the current token
*
* @return $this
*/
protected function consumeCommentsForCurrentToken() {
$comments = $this->commentsForCurrentToken();
if ($comments && ($this->registerTokens || $this->eventsEmitter)) {
foreach ($comments as $comment) {
//Register the token if required
if ($this->registerTokens) {
$this->tokens[] = $comment;
}
//Emit the TokenConsumed event for the comment
$this->eventsEmitter && $this->eventsEmitter
->fire("TokenConsumed", array(
$comment,
));
}
}
return $this;
}
/**
* Checks if the given position follows a slash.
*
* @param Position $position Position to check
*
* @return bool
*/
protected function isAfterSlash($position) {
//Start from the previous index and loop until the begin of the file is reached
$idx = $position->getIndex() - 1;
while ($idx >= 0) {
//Get the char at the index to check
$char = $this->charAt($idx);
//If the char is actually a slash check that it's not a multiline comment closing slash
if ($char === "/") {
return $idx === 0 || $this->charAt($idx - 1) !== "*";
}
elseif (in_array($char, $this->whitespaces) && !in_array($char, $this->lineTerminators)) {
$idx--;
}
elseif ($char === "=" && $this->charAt($idx - 1) === "/") {
return true;
}
else {
break;
}
}
return false;
}
/**
* Tries to reconsume the current token as a regexp if possible
*
* @return Token|null
*/
public function reconsumeCurrentTokenAsRegexp() {
$token = $this->currentToken ?: $this->getToken();
$value = $token ? $token->value : null;
//Check if the token starts with "/"
if (!$value || $value[0] !== "/") {
return null;
}
//Reset the scanner position to the token's start position
$startPosition = $token->location->start;
$this->setScanPosition($startPosition);
$buffer = "/";
$this->index++;
$this->column++;
$inClass = false;
while (true) {
//In a characters class the delimiter "/" is allowed without escape,
//so the characters class must be closed before closing the regexp
$stops = $inClass ? array(
"]",
) : array(
"/",
"[",
);
$tempBuffer = $this->consumeUntil($stops);
if ($tempBuffer === null) {
if ($inClass) {
$this->error("Unterminated character class in regexp");
}
else {
$this->error("Unterminated regexp");
}
}
$buffer .= $tempBuffer[0];
if ($tempBuffer[1] === "/") {
break;
}
else {
$inClass = $tempBuffer[1] === "[";
}
}
//Flags
while (($char = $this->charAt()) !== null) {
$lower = strtolower($char);
if ($lower >= "a" && $lower <= "z") {
$buffer .= $char;
$this->index++;
$this->column++;
}
else {
break;
}
}
//If next token has already been parsed and it's a bracket exclude it
//from the count of open brackets
if ($this->nextToken) {
$nextVal = $this->nextToken->value;
if (isset($this->brackets[$nextVal]) && isset($this->openBrackets[$nextVal])) {
if ($this->brackets[$nextVal]) {
$this->openBrackets[$nextVal]++;
}
else {
$this->openBrackets[$nextVal]--;
}
}
$this->nextToken = null;
}
//If comments handling is enabled, get the comments associated with the
//current token
$comments = $this->comments ? $this->commentsForCurrentToken() : null;
//Replace the current token with a regexp token
$token = new Token(Token::TYPE_REGULAR_EXPRESSION, $buffer);
$token->location->start = $startPosition;
$token->location->end = $this->getPosition(true);
$this->currentToken = $token;
if ($comments) {
//Attach the comments to the new current token
$this->commentsForCurrentToken($comments);
}
return $this->currentToken;
}
/**
* Skips whitespaces and comments from the current scan position. If
* comments handling is enabled, the array of parsed comments
*
* @return array
*/
protected function skipWhitespacesAndComments() {
$comments = [];
$content = "";
$secStartIdx = $this->index;
while (($char = $this->charAt()) !== null) {
//Whitespace
if (in_array($char, $this->whitespaces)) {
$content .= $char;
$this->index++;
}
elseif ($char === "/" || $char === "#") {
$nextChar = $this->charAt($this->index + 1);
if ($char === "#") {
//Hashbang comment. This will be parsed only if hashbang comments are enabled
//and if it appears at the beginning of the code
$valid = $nextChar === "!" && $this->features->hashbangComments && !$this->index;
}
else {
$valid = $nextChar === "/" || $nextChar === "*";
}
//Comment
if ($valid) {
//If comments must be handled, empty the current content too
//and get the comment start position
if ($this->comments) {
if ($content !== "") {
$this->adjustColumnAndLine($content);
$content = "";
}
$start = $this->getPosition(true);
}
$inline = $nextChar !== "*";
$this->index += 2;
$content .= $char . $nextChar;
while (true) {
$char = $this->charAt();
if ($char === null) {
if (!$inline) {
//If the end of the source has been reached and
//a multiline comment is still open, it's an
//error
$this->error("Unterminated comment");
}
$isEnd = true;
}
else {
$content .= $char;
$this->index++;
$isEnd = $inline ? in_array($char, $this->lineTerminators) : $char === "*" && $this->charAt() === "/";
}
if ($isEnd) {
if (!$inline) {
$content .= "/";
$this->index++;
}
if ($this->comments) {
//For inline comments the closing line
//terminator must be excluded from comment text
if ($inline && $char !== null) {
$this->index--;
$content = substr($content, 0, -strlen($char));
}
$this->adjustColumnAndLine($content);
$token = new Token(Token::TYPE_COMMENT, $content);
$token->location->start = $start;
$token->location->end = $this->getPosition(true);
$comments[] = $token;
//For inline comments the new content contains
//the closing line terminator since the char has
//already been processed
$content = "";
if ($inline && $char !== null) {
$content = $char;
$this->index++;
}
}
break;
}
}
}
else {
break;
}
}
elseif (!$this->isModule && $char === "<" && $this->charAt($this->index + 1) === "!" && $this->charAt($this->index + 2) === "-" && $this->charAt($this->index + 3) === "-") {
//If comments must be handled, empty the current content too
//and get the comment start position
if ($this->comments) {
if ($content !== "") {
$this->adjustColumnAndLine($content);
$content = "";
}
$start = $this->getPosition(true);
}
//Open html comment
$this->index += 4;
$content .= "<!--";
while (true) {
$char = $this->charAt();
if ($char === null) {
$isEnd = true;
}
else {
$content .= $char;
$this->index++;
$isEnd = in_array($char, $this->lineTerminators);
}
if ($isEnd) {
if ($this->comments) {
//Remove the closing line terminator from the
//comment text
if ($char !== null) {
$this->index--;
$content = substr($content, 0, -strlen($char));
}
$this->adjustColumnAndLine($content);
$token = new Token(Token::TYPE_COMMENT, $content);
$token->location->start = $start;
$token->location->end = $this->getPosition(true);
$comments[] = $token;
$content = "";
if ($char !== null) {
$content = $char;
$this->index++;
}
}
break;
}
}
}
elseif (!$this->isModule && $char === "-" && $this->charAt($this->index + 1) === "-" && $this->charAt($this->index + 2) === ">") {
//Close html comment
//Check if it is on it's own line
$allow = false;
if (!$secStartIdx) {
$allow = true;
}
else {
for ($index = $this->index - 1; $index >= $secStartIdx; $index--) {
if (in_array($this->charAt($index), $this->lineTerminators)) {
$allow = true;
break;
}
}
}
if ($allow) {
//If comments must be handled, empty the current content too
//and get the comment start position
if ($this->comments) {
if ($content !== "") {
$this->adjustColumnAndLine($content);
$content = "";
}
$start = $this->getPosition(true);
}
$this->index += 3;
$content .= "-->";
while (true) {
$char = $this->charAt();
if ($char === null) {
$isEnd = true;
}
else {
$content .= $char;
$this->index++;
$isEnd = in_array($char, $this->lineTerminators);
}
if ($isEnd) {
if ($this->comments) {
//Remove the closing line terminator from the
//comment text
if ($char !== null) {
$this->index--;
$content = substr($content, 0, -strlen($char));
}
$this->adjustColumnAndLine($content);
$token = new Token(Token::TYPE_COMMENT, $content);
$token->location->start = $start;
$token->location->end = $this->getPosition(true);
$comments[] = $token;
$content = "";
if ($char !== null) {
$content = $char;
$this->index++;
}
}
break;
}
}
}
else {
break;
}
}
else {
break;
}
}
if ($content !== "") {
$this->adjustColumnAndLine($content);
}
return $comments;
}
/**
* String scanning method
*
* @param bool $handleEscape True to handle escaping
*
* @return Token|null
*/
protected function scanString($handleEscape = true) {
$char = $this->charAt();
if ($char === "'" || $char === '"') {
$this->index++;
$this->column++;
//Add the quote to the LSM and then remove it after consuming
$this->stringsStopsLSM
->add($char);
$buffer = $this->consumeUntil($this->stringsStopsLSM, $handleEscape);
$this->stringsStopsLSM
->remove($char);
if ($buffer === null || $buffer[1] !== $char) {
$this->error("Unterminated string");
}
return new Token(Token::TYPE_STRING_LITERAL, $char . $buffer[0]);
}
return null;
}
/**
* Template scanning method
*
* @return Token|null
*/
protected function scanTemplate() {
$char = $this->charAt();
//Get the current number of open curly brackets
$openCurly = isset($this->openBrackets["{"]) ? $this->openBrackets["{"] : 0;
//If the character is a curly bracket check and the number of open
//curly brackets matches the last number in the open templates stack,
//then the bracket closes the open template expression
$endExpression = false;
if ($char === "}") {
$len = count($this->openTemplates);
if ($len && $this->openTemplates[$len - 1] === $openCurly) {
$endExpression = true;
array_pop($this->openTemplates);
}
}
if ($char === "`" || $endExpression) {
$this->index++;
$this->column++;
$buffer = $char;
while (true) {
$tempBuffer = $this->consumeUntil(array(
"`",
"\$",
));
if (!$tempBuffer) {
$this->error("Unterminated template");
}
$buffer .= $tempBuffer[0];
if ($tempBuffer[1] !== "\$" || $this->charAt() === "{") {
//If "${" is found it's a new template expression, register
//the current number of open curly brackets in the open
//templates stack
if ($tempBuffer[1] === "\$") {
$this->index++;
$this->column++;
$buffer .= "{";
$this->openTemplates[] = $openCurly;
}
break;
}
}
return new Token(Token::TYPE_TEMPLATE, $buffer);
}
return null;
}
/**
* Number scanning method
*
* @return Token|null
*/
protected function scanNumber() {
//Numbers can start with a decimal number or with a dot (.5)
$char = $this->charAt();
if (!($char >= "0" && $char <= "9" || $char === ".")) {
return null;
}
$buffer = "";
$allowedDecimals = true;
//Parse the integer part
if ($char !== ".") {
//Consume all decimal numbers
$buffer = $this->consumeNumbers();
$char = $this->charAt();
if ($this->features->bigInt && $char === "n") {
$this->index++;
$this->column++;
return new Token(Token::TYPE_BIGINT_LITERAL, $buffer . $char);
}
$lower = $char !== null ? strtolower($char) : null;
//Handle hexadecimal (0x), octal (0o) and binary (0b) forms
if ($buffer === "0" && $lower !== null && isset($this->{$lower . "numbers"})) {
$this->index++;
$this->column++;
$tempBuffer = $this->consumeNumbers($lower);
if ($tempBuffer === null) {
$this->error("Missing numbers after 0{$char}");
}
$buffer .= $char . $tempBuffer;
//Check that there are not numbers left
if ($this->consumeNumbers() !== null) {
$this->error();
}
if ($this->features->bigInt && $this->charAt() === "n") {
$this->index++;
$this->column++;
return new Token(Token::TYPE_BIGINT_LITERAL, $buffer . $char);
}
return new Token(Token::TYPE_NUMERIC_LITERAL, $buffer);
}
//Consume exponent part if present
if ($tempBuffer = $this->consumeExponentPart()) {
$buffer .= $tempBuffer;
$allowedDecimals = false;
}
}
//Parse the decimal part
if ($allowedDecimals && $this->charAt() === ".") {
//Consume the dot
$this->index++;
$this->column++;
$buffer .= ".";
//Consume all decimal numbers
$tempBuffer = $this->consumeNumbers();
$buffer .= $tempBuffer;
//If the buffer contains only the dot it should be parsed as
//punctuator
if ($buffer === ".") {
$this->index--;
$this->column--;
return null;
}
//Consume exponent part if present
if (($tempBuffer = $this->consumeExponentPart()) !== null) {
$buffer .= $tempBuffer;
}
}
return new Token(Token::TYPE_NUMERIC_LITERAL, $buffer);
}
/**
* Consumes the maximum number of digits
*
* @param string $type Digits type (decimal, hexadecimal, etc...)
* @param int $max Maximum number of digits to match
*
* @return string|null
*/
protected function consumeNumbers($type = "", $max = null) {
$buffer = "";
$char = $this->charAt();
$count = 0;
$extra = $this->features->numericLiteralSeparator ? "_" : "";
while (in_array($char, $this->{$type . "numbers"}) || $count && $char === $extra) {
$buffer .= $char;
$this->index++;
$this->column++;
$count++;
if ($count === $max) {
break;
}
$char = $this->charAt();
}
if ($count && substr($buffer, -1) === "_") {
$this->error("Numeric separators are not allowed at the end of a number");
}
return $count ? $buffer : null;
}
/**
* Consumes the exponent part of a number
*
* @return string|null
*/
protected function consumeExponentPart() {
$buffer = "";
$char = $this->charAt();
if ($char !== null && strtolower($char) === "e") {
$this->index++;
$this->column++;
$buffer .= $char;
$char = $this->charAt();
if ($char === "+" || $char === "-") {
$this->index++;
$this->column++;
$buffer .= $char;
}
$tempBuffer = $this->consumeNumbers();
if ($tempBuffer === null) {
$this->error("Missing exponent");
}
$buffer .= $tempBuffer;
}
return $buffer;
}
/**
* Punctuator scanning method
*
* @return Token|null
*/
protected function scanPunctuator() {
$token = null;
$char = $this->charAt();
//Check if the next char is a bracket
if (isset($this->brackets[$char])) {
//Check if it is a closing bracket
if ($this->brackets[$char]) {
$openBracket = $this->brackets[$char];
//Check if there is a corresponding open bracket
if (!isset($this->openBrackets[$openBracket]) || !$this->openBrackets[$openBracket]) {
if (!$this->isAfterSlash($this->getPosition(true))) {
$this->error();
}
}
else {
$this->openBrackets[$openBracket]--;
}
}
else {
if (!isset($this->openBrackets[$char])) {
$this->openBrackets[$char] = 0;
}
$this->openBrackets[$char]++;
}
$this->index++;
$this->column++;
$token = new Token(Token::TYPE_PUNCTUATOR, $char);
}
elseif ($match = $this->punctuatorsLSM
->match($this, $this->index, $char)) {
//Optional chaining punctuator cannot appear before a number, in this
//case only the question mark must be consumed
if ($match[1] === "?." && ($nextChar = $this->charAt($this->index + $match[0])) !== null && $nextChar >= "0" && $nextChar <= "9") {
$match = array(
1,
"?",
);
}
$this->index += $match[0];
$this->column += $match[0];
$token = new Token(Token::TYPE_PUNCTUATOR, $match[1]);
}
return $token;
}
/**
* Keywords and identifiers scanning method
*
* @return Token|null
*/
protected function scanKeywordOrIdentifier() {
//Check private identifier start character
if ($private = $this->features->privateMethodsAndFields && $this->charAt() === "#") {
$this->index++;
$this->column++;
}
//Consume the maximum number of characters that are unicode escape
//sequences or valid identifier starts (only the first character) or
//parts
$buffer = "";
$start = true;
while (($char = $this->charAt()) !== null) {
if ($char >= "a" && $char <= "z" || $char >= "A" && $char <= "Z" || $char === "_" || $char === "\$" || !$start && $char >= "0" && $char <= "9" || $this->isIdentifierChar($char, $start)) {
$buffer .= $char;
$this->index++;
$this->column++;
}
elseif ($char === "\\" && ($seq = $this->consumeUnicodeEscapeSequence())) {
//Verify that it's a valid character
if (!$this->isIdentifierChar($seq[1], $start)) {
break;
}
$buffer .= $seq[0];
}
else {
break;
}
$start = false;
}
//Identify token type
if ($buffer === "") {
//Unconsume the hash if nothing was found after that
if ($private) {
$this->index--;
$this->column--;
}
return null;
}
elseif ($private) {
$type = Token::TYPE_PRIVATE_IDENTIFIER;
$buffer = "#" . $buffer;
}
elseif ($buffer === "null") {
$type = Token::TYPE_NULL_LITERAL;
}
elseif ($buffer === "true" || $buffer === "false") {
$type = Token::TYPE_BOOLEAN_LITERAL;
}
elseif (in_array($buffer, $this->keywords) || in_array($buffer, $this->strictModeKeywords)) {
$type = Token::TYPE_KEYWORD;
}
else {
$type = Token::TYPE_IDENTIFIER;
}
return new Token($type, $buffer);
}
/**
* Consumes an unicode escape sequence
*
* @return array|null
*/
protected function consumeUnicodeEscapeSequence() {
if ($this->charAt() !== "\\" || $this->charAt($this->index + 1) !== "u") {
return null;
}
$startIndex = $this->index;
$startColumn = $this->column;
$this->index += 2;
$this->column += 2;
$brackets = false;
if ($this->charAt() === "{") {
//\u{FFF}
$brackets = true;
$this->index++;
$this->column++;
$code = $this->consumeNumbers("x");
if ($code && $this->charAt() !== "}") {
$code = null;
}
else {
$this->index++;
$this->column++;
}
}
else {
//\uFFFF
$code = $this->consumeNumbers("x", 4);
if ($code && strlen($code) !== 4) {
$code = null;
}
}
//Unconsume everything if the format is invalid
if ($code === null) {
$this->index = $startIndex;
$this->column = $startColumn;
return null;
}
//Return an array where the first element is the matched sequence
//and the second one is the decoded character
return array(
$brackets ? "\\u{" . $code . "}" : "\\u" . $code,
Utils::unicodeToUtf8(hexdec($code)),
);
}
/**
* Checks if the given character is valid for an identifier
*
* @param string $char Character to check
* @param bool $start If true it will check that the character is
* valid to start an identifier
*
* @return bool
*/
protected function isIdentifierChar($char, $start = true) {
return $char >= "a" && $char <= "z" || $char >= "A" && $char <= "Z" || $char === "_" || $char === "\$" || !$start && $char >= "0" && $char <= "9" || preg_match($start ? $this->idStartRegex : $this->idPartRegex, $char);
}
/**
* Increases columns and lines count according to the given string
*
* @param string $buffer String to analyze
*
* @return void
*/
protected function adjustColumnAndLine($buffer) {
$lines = preg_split($this->linesSplitter, $buffer);
$linesCount = count($lines) - 1;
$this->line += $linesCount;
$columns = mb_strlen($lines[$linesCount], "UTF-8");
if ($linesCount) {
$this->column = $columns;
}
else {
$this->column += $columns;
}
}
/**
* Consumes characters until one of the given characters is found
*
* @param array|LSM $stops Characters to search
* @param bool $handleEscape True to handle escaping
* @param bool $collectStop True to include the stop character
*
* @return array|null
*/
protected function consumeUntil($stops, $handleEscape = true, $collectStop = true) {
$isLSM = $stops instanceof LSM;
$buffer = "";
$escaped = false;
while (($char = $this->charAt()) !== null) {
$incrIndex = 1;
$isStop = false;
if ($isLSM) {
$m = $stops->match($this, $this->index, $char);
if ($m) {
$isStop = true;
$incrIndex = $m[0];
$char = $m[1];
}
}
else {
$isStop = in_array($char, $stops);
}
$validStop = $isStop && !$escaped;
if (!$validStop || $collectStop) {
$this->index += $incrIndex;
$buffer .= $char;
}
if ($validStop) {
if (!$collectStop && $buffer === "") {
return null;
}
$this->adjustColumnAndLine($buffer);
return array(
$buffer,
$char,
);
}
elseif (!$escaped && $char === "\\" && $handleEscape) {
$escaped = true;
}
else {
$escaped = false;
}
}
return null;
}
}
Classes
Title | Deprecated | Summary |
---|---|---|
Scanner | Base class for scanners. |