class Scanner
Same name in this branch
- 11.1.x vendor/masterminds/html5/src/HTML5/Parser/Scanner.php \Masterminds\HTML5\Parser\Scanner
Base class for scanners.
@author Marco Marchiò <marco.mm89@gmail.com>
Hierarchy
- class \Peast\Syntax\Scanner uses \JSX\Scanner
Expanded class hierarchy of Scanner
File
-
vendor/
mck89/ peast/ lib/ Peast/ Syntax/ Scanner.php, line 17
Namespace
Peast\SyntaxView source
class Scanner {
use JSX\Scanner;
/**
* Scanner features
*
* @var Features
*/
protected $features;
/**
* Current column
*
* @var int
*/
protected $column = 0;
/**
* Current line
*
* @var int
*/
protected $line = 1;
/**
* Current index
*
* @var int
*/
protected $index = 0;
/**
* Source length
*
* @var int
*/
protected $length;
/**
* Source characters
*
* @var array
*/
protected $source;
/**
* Consumed position
*
* @var Position
*/
protected $position;
/**
* Current token
*
* @var Token
*/
protected $currentToken;
/**
* Next token
*
* @var Token
*/
protected $nextToken;
/**
* Strict mode flag
*
* @var bool
*/
protected $strictMode = false;
/**
* True to register tokens in the tokens array
*
* @var bool
*/
protected $registerTokens = false;
/**
* Module mode
*
* @var bool
*/
protected $isModule = false;
/**
* Comments handling
*
* @var bool
*/
protected $comments = false;
/**
* Internal JSX scan flag
*
* @var bool
*/
protected $jsx = false;
/**
* Registered tokens array
*
* @var array
*/
protected $tokens = array();
/**
* Comments to tokens map
*
* @var array
*/
protected $commentsMap = array();
/**
* Events emitter
*
* @var EventsEmitter
*/
protected $eventsEmitter;
/**
* Regex to match identifiers starts
*
* @var string
*/
protected $idStartRegex = "/[\\p{Lu}\\p{Ll}\\p{Lt}\\p{Lm}\\p{Lo}\\p{Nl}\\x{1885}\\x{1886}\\x{2118}\\x{212E}\\x{309B}\\x{309C}]/u";
/**
* Regex to match identifiers parts
*
* @var string
*/
protected $idPartRegex = "/[\\p{Lu}\\p{Ll}\\p{Lt}\\p{Lm}\\p{Lo}\\p{Nl}\\x{1885}\\x{1886}\\x{2118}\\x{212E}\\x{309B}\\x{309C}\\p{Mn}\\p{Mc}\\p{Nd}\\p{Pc}\\x{00B7}\\x{0387}\\x{1369}\\x{136A}\\x{136B}\\x{136C}\\x{136D}\\x{136E}\\x{136F}\\x{1370}\\x{1371}\\x{19DA}\\x{200C}\\x{200D}]/u";
/**
* Keywords array
*
* @var array
*/
protected $keywords = array(
"break",
"do",
"in",
"typeof",
"case",
"else",
"instanceof",
"var",
"catch",
"export",
"new",
"void",
"class",
"extends",
"return",
"while",
"const",
"finally",
"super",
"with",
"continue",
"for",
"switch",
"debugger",
"function",
"this",
"default",
"if",
"throw",
"delete",
"import",
"try",
"enum",
"await",
);
/**
* Array of words that are keywords only in strict mode
*
* @var array
*/
protected $strictModeKeywords = array(
"implements",
"interface",
"package",
"private",
"protected",
"public",
"static",
"let",
"yield",
);
/**
* Punctuators array
*
* @var array
*/
protected $punctuators = array(
".",
";",
",",
"<",
">",
"<=",
">=",
"==",
"!=",
"===",
"!==",
"+",
"-",
"*",
"%",
"++",
"--",
"<<",
">>",
">>>",
"&",
"|",
"^",
"!",
"~",
"&&",
"||",
"?",
":",
"=",
"+=",
"-=",
"*=",
"%=",
"<<=",
">>=",
">>>=",
"&=",
"|=",
"^=",
"=>",
"...",
"/",
"/=",
"**",
"**=",
"??",
"?.",
"&&=",
"||=",
"??=",
);
/**
* Punctuators LSM
*
* @var LSM
*/
protected $punctuatorsLSM;
/**
* Strings stops LSM
*
* @var LSM
*/
protected $stringsStopsLSM;
/**
* Brackets array
*
* @var array
*/
protected $brackets = array(
"(" => "",
"[" => "",
"{" => "",
")" => "(",
"]" => "[",
"}" => "{",
);
/**
* Open brackets array
*
* @var array
*/
protected $openBrackets = array();
/**
* Open templates array
*
* @var array
*/
protected $openTemplates = array();
/**
* Whitespaces array
*
* @var array
*/
protected $whitespaces = array(
" ",
"\t",
"\n",
"\r",
"\f",
"\v",
0xa0,
0xfeff,
0xa0,
0x1680,
0x2000,
0x2001,
0x2002,
0x2003,
0x2004,
0x2005,
0x2006,
0x2007,
0x2008,
0x2009,
0x200a,
0x202f,
0x205f,
0x3000,
0x2028,
0x2029,
);
/**
* Line terminators characters array
*
* @var array
*
* @static
*/
public static $lineTerminatorsChars = array(
"\n",
"\r",
0x2028,
0x2029,
);
/**
* Line terminators sequences array
*
* @var array
*
* @static
*/
public static $lineTerminatorsSequences = array(
"\r\n",
);
/**
* Regex to split texts using valid ES line terminators
*
* @var array
*/
protected $linesSplitter;
/**
* Concatenation of line terminators characters and line terminators
* sequences
*
* @var array
*/
protected $lineTerminators;
/**
* Properties to copy when getting the scanner state
*
* @var array
*/
protected $stateProps = array(
"position",
"index",
"column",
"line",
"currentToken",
"nextToken",
"strictMode",
"openBrackets",
"openTemplates",
"commentsMap",
);
/**
* Decimal numbers
*
* @var array
*/
protected $numbers = array(
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
);
/**
* Hexadecimal numbers
*
* @var array
*/
protected $xnumbers = array(
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"a",
"b",
"c",
"d",
"e",
"f",
"A",
"B",
"C",
"D",
"E",
"F",
);
/**
* Octal numbers
*
* @var array
*/
protected $onumbers = array(
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
);
/**
* Binary numbers
*
* @var array
*/
protected $bnumbers = array(
"0",
"1",
);
/**
* Class constructor
*
* @param string $source Source code
* @param Features $features Scanner features
* @param array $options Parsing options
*/
function __construct($source, Features $features, $options) {
$this->features = $features;
$encoding = isset($options["sourceEncoding"]) ? $options["sourceEncoding"] : null;
//Strip BOM characters from the source
$this->stripBOM($source, $encoding);
//Convert to UTF8 if needed
if ($encoding && !preg_match("/UTF-?8/i", $encoding)) {
$source = mb_convert_encoding($source, "UTF-8", $encoding);
}
//Instead of using mb_substr for each character, split the source
//into an array of UTF8 characters for performance reasons
$this->source = Utils::stringToUTF8Array($source, !isset($options["strictEncoding"]) || $options["strictEncoding"]);
$this->length = count($this->source);
//Convert character codes to UTF8 characters in whitespaces and line
//terminators
$this->lineTerminators = array_merge(self::$lineTerminatorsSequences, self::$lineTerminatorsChars);
foreach (array(
"whitespaces",
"lineTerminators",
) as $key) {
foreach ($this->{$key} as $i => $char) {
if (is_int($char)) {
$this->{$key}[$i] = Utils::unicodeToUtf8($char);
}
}
}
//Remove exponentiation operator if the feature
//is not enabled
if (!$this->features->exponentiationOperator) {
Utils::removeArrayValue($this->punctuators, "**");
Utils::removeArrayValue($this->punctuators, "**=");
}
if (!$this->features->optionalChaining) {
Utils::removeArrayValue($this->punctuators, "?.");
}
//Remove logical assignment operators if the feature
//is not enabled
if (!$this->features->logicalAssignmentOperators) {
Utils::removeArrayValue($this->punctuators, "&&=");
Utils::removeArrayValue($this->punctuators, "||=");
Utils::removeArrayValue($this->punctuators, "??=");
}
//Create a LSM for punctuators array
$this->punctuatorsLSM = new LSM($this->punctuators);
//Create a LSM for strings stops
$this->stringsStopsLSM = new LSM($this->lineTerminators, true);
//Allow paragraph and line separators in strings
if ($this->features->paragraphLineSepInStrings) {
$this->stringsStopsLSM
->remove(Utils::unicodeToUtf8(0x2028));
$this->stringsStopsLSM
->remove(Utils::unicodeToUtf8(0x2029));
}
//Remove await as keyword if async/await is enabled
if ($this->features->asyncAwait) {
Utils::removeArrayValue($this->keywords, "await");
}
$this->linesSplitter = "/" . implode("|", $this->lineTerminators) . "/uS";
$this->position = new Position(0, 0, 0);
}
/**
* Strips BOM characters from the source and detects source encoding if not
* given by the user
*
* @param string $source Source
* @param string $encoding User specified encoding
*/
public function stripBOM(&$source, &$encoding) {
$boms = array(
"\xef" => array(
array(
"\xbb",
"\xbf",
),
"UTF-8",
),
"\xfe" => array(
array(
"\xff",
),
"UTF-16BE",
),
"\xff" => array(
array(
"\xfe",
),
"UTF-16LE",
),
);
if (!isset($source[0]) || !isset($boms[$source[0]])) {
return;
}
$bom = $boms[$source[0]];
$l = count($bom[0]);
for ($i = 0; $i < $l; $i++) {
if (!isset($source[$i + 1]) || $source[$i + 1] !== $bom[0][$i]) {
return;
}
}
$source = substr($source, $l + 1);
if (!$encoding) {
$encoding = $bom[1];
}
}
/**
* Enables or disables module scanning mode
*
* @param bool $enable True to enable module scanning mode, false to disable it
*
* @return $this
*/
public function enableModuleMode($enable = true) {
$this->isModule = $enable;
return $this;
}
/**
* Enables or disables comments handling
*
* @param bool $enable True to enable comments handling, false to disable it
*
* @return $this
*/
public function enableComments($enable = true) {
$this->comments = $enable;
return $this;
}
/**
* Enables or disables tokens registration in the token array
*
* @param bool $enable True to enable token registration, false to disable it
*
* @return $this
*/
public function enableTokenRegistration($enable = true) {
$this->registerTokens = $enable;
return $this;
}
/**
* Return registered tokens
*
* @return array
*/
public function getTokens() {
return $this->tokens;
}
/**
* Returns the scanner's event emitter
*
* @return EventsEmitter
*/
public function getEventsEmitter() {
if (!$this->eventsEmitter) {
//The event emitter is created here so that it won't exist if not
//necessary
$this->eventsEmitter = new EventsEmitter();
}
return $this->eventsEmitter;
}
/**
* Enables or disables strict mode
*
* @param bool $strictMode Strict mode state
*
* @return $this
*/
public function setStrictMode($strictMode) {
$this->strictMode = $strictMode;
return $this;
}
/**
* Return strict mode state
*
* @return bool
*/
public function getStrictMode() {
return $this->strictMode;
}
/**
* Checks if the given token is a keyword in the current strict mode state
*
* @param Token $token Token to checks
*
* @return bool
*/
public function isStrictModeKeyword($token) {
return $token->type === Token::TYPE_KEYWORD && (in_array($token->value, $this->keywords) || $this->strictMode && in_array($token->value, $this->strictModeKeywords));
}
/**
* Returns the current scanner state
*
* @return array
*/
public function getState() {
//Consume current and next tokens so that they wont' be parsed again
//if the state is restored. If the current token is a slash the next
//token isn't parsed, this prevents some edge cases where a regexp
//that contains something that can be interpreted as a comment causes
//the content to be parsed as a real comment too
$token = $this->currentToken ?: $this->getToken();
if ($token && $token->value !== "/") {
$this->getNextToken();
}
$state = array();
foreach ($this->stateProps as $prop) {
$state[$prop] = $this->{$prop};
}
if ($this->registerTokens) {
$state["tokensNum"] = count($this->tokens);
}
//Emit the FreezeState event and pass the given state so that listeners
//attached to this event can add data
$this->eventsEmitter && $this->eventsEmitter
->fire("FreezeState", array(
&$state,
));
return $state;
}
/**
* Sets the current scanner state
*
* @param array $state State
*
* @return $this
*/
public function setState($state) {
if ($this->registerTokens) {
//Check if tokens have been added
if (isset($this->tokens[$state["tokensNum"]])) {
//Remove all added tokens
for ($i = count($this->tokens) - 1; $i >= $state["tokensNum"]; $i--) {
array_pop($this->tokens);
}
}
unset($state["tokensNum"]);
}
//Emit the ResetState event and pass the given state
$this->eventsEmitter && $this->eventsEmitter
->fire("ResetState", array(
&$state,
));
foreach ($state as $key => $value) {
$this->{$key} = $value;
}
return $this;
}
/**
* Returns current scanner state
*
* @param bool $scanPosition By default this method returns the scanner
* consumed position, if this parameter is true
* the scanned position will be returned
*
* @return Position
*/
public function getPosition($scanPosition = false) {
if ($scanPosition) {
return new Position($this->line, $this->column, $this->index);
}
else {
return $this->position;
}
}
/**
* Sets the current scan position at the given one
*
* @param Position $position Position at which the scan position will be set
*
* @return $this
*/
public function setScanPosition(Position $position) {
$this->line = $position->getLine();
$this->column = $position->getColumn();
$this->index = $position->getIndex();
return $this;
}
/**
* Return the character at the given index in the source code or null if the
* end is reached.
*
* @param int $index Index, if not given it will use the current index
*
* @return string|null
*/
public function charAt($index = null) {
if ($index === null) {
$index = $this->index;
}
return $index < $this->length ? $this->source[$index] : null;
}
/**
* Throws a syntax error
*
* @param string $message Error message
*
* @return void
*
* @throws Exception
*/
protected function error($message = null) {
if (!$message) {
$message = "Unexpected " . $this->charAt();
}
throw new Exception($message, $this->getPosition(true));
}
/**
* Consumes the current token
*
* @return $this
*/
public function consumeToken() {
//Move the scanner position to the end of the current position
$this->position = $this->currentToken->location->end;
//Before consume the token, consume comments associated with it
if ($this->comments) {
$this->consumeCommentsForCurrentToken();
}
//Register the token if required
if ($this->registerTokens) {
$this->tokens[] = $this->currentToken;
}
//Emit the TokenConsumed event for the consumed token
$this->eventsEmitter && $this->eventsEmitter
->fire("TokenConsumed", array(
$this->currentToken,
));
$this->currentToken = $this->nextToken;
$this->nextToken = null;
return $this;
}
/**
* Checks if the given string is matched, if so it consumes the token
*
* @param string $expected String to check
*
* @return Token|null
*/
public function consume($expected) {
//Do not call getToken if there's already a pending token for
//performance reasons
$token = $this->currentToken ?: $this->getToken();
if ($token && $token->value === $expected) {
$this->consumeToken();
return $token;
}
return null;
}
/**
* Checks if one of the given strings is matched, if so it consumes the
* token
*
* @param array $expected Strings to check
*
* @return Token|null
*/
public function consumeOneOf($expected) {
//Do not call getToken if there's already a pending token for
//performance reasons
$token = $this->currentToken ?: $this->getToken();
if ($token && in_array($token->value, $expected)) {
$this->consumeToken();
return $token;
}
return null;
}
/**
* Checks that there are not line terminators following the current scan
* position before next token
*
* @param bool $nextToken By default it checks the current token position
* relative to the current position, if this
* parameter is true the check will be done relative
* to the next token
*
* @return bool
*/
public function noLineTerminators($nextToken = false) {
if ($nextToken) {
$nextToken = $this->getNextToken();
$refLine = !$nextToken ? null : $nextToken->location->end
->getLine();
}
else {
$refLine = $this->getPosition()
->getLine();
}
$token = $this->currentToken ?: $this->getToken();
return $token && $token->location->start
->getLine() === $refLine;
}
/**
* Checks if one of the given strings follows the current scan position
*
* @param string|array $expected String or array of strings to check
* @param bool $nextToken This parameter must be true if the first
* parameter is an array so that it will
* check also next tokens
*
* @return bool
*/
public function isBefore($expected, $nextToken = false) {
$token = $this->currentToken ?: $this->getToken();
if (!$token) {
return false;
}
elseif (in_array($token->value, $expected)) {
return true;
}
elseif (!$nextToken) {
return false;
}
if (!$this->getNextToken()) {
return false;
}
foreach ($expected as $val) {
if (!is_array($val) || $val[0] !== $token->value) {
continue;
}
//If the second value in the array is true check that the current
//token is not followed by line terminators, otherwise compare its
//value to the next token
if ($val[1] === true && $this->noLineTerminators(true) || $val[1] !== true && $val[1] === $this->nextToken->value) {
return true;
}
}
return false;
}
/**
* Returns the next token
*
* @return Token|null
*/
public function getNextToken() {
if (!$this->nextToken) {
$token = $this->currentToken ?: $this->getToken();
$this->currentToken = null;
$this->nextToken = $this->getToken(true);
$this->currentToken = $token;
}
return $this->nextToken;
}
/**
* Returns the current token
*
* @param bool $skipEOFChecks True to skip end of file checks
* even if the end is reached
*
* @return Token|null
*/
public function getToken($skipEOFChecks = false) {
//The current token is returned until consumed
if ($this->currentToken) {
return $this->currentToken;
}
$comments = $this->skipWhitespacesAndComments();
//Emit the TokenCreated event for all the comments found
if ($comments) {
foreach ($comments as $comment) {
$this->eventsEmitter && $this->eventsEmitter
->fire("TokenCreated", array(
$comment,
));
}
}
//When the end of the source is reached
if ($this->index >= $this->length) {
//Check if there are open brackets
if (!$skipEOFChecks) {
foreach ($this->openBrackets as $bracket => $num) {
if ($num) {
$this->error("Unclosed {$bracket}");
}
}
//Check if there are open templates
if (count($this->openTemplates)) {
$this->error("Unterminated template");
}
}
//Register comments and consume them
if ($this->comments && $comments) {
$this->commentsForCurrentToken($comments);
}
//Emit the EndReached event when at the end of the source
$this->eventsEmitter && $this->eventsEmitter
->fire("EndReached");
return null;
}
$startPosition = $this->getPosition(true);
$origException = null;
try {
//Try to match a token
if ($this->jsx && ($token = $this->scanJSXIdentifier()) || ($token = $this->scanTemplate()) || ($token = $this->scanNumber()) || $this->jsx && ($token = $this->scanJSXPunctuator()) || ($token = $this->scanPunctuator()) || ($token = $this->scanKeywordOrIdentifier()) || $this->jsx && ($token = $this->scanJSXString()) || ($token = $this->scanString())) {
//Set the token start and end positions
$token->location->start = $startPosition;
$token->location->end = $this->getPosition(true);
$this->currentToken = $token;
//Register comments if required
if ($this->comments && $comments) {
$this->commentsForCurrentToken($comments);
}
//Emit the TokenCreated event for the token just created
$this->eventsEmitter && $this->eventsEmitter
->fire("TokenCreated", array(
$this->currentToken,
));
return $this->currentToken;
}
} catch (Exception $e) {
$origException = $e;
}
//If last token was "/" do not throw an error if the token has not be
//recognized since it can be the first character in a regexp and it will
//be consumed when the current token will be reconsumed as a regexp
if ($this->isAfterSlash($startPosition)) {
$this->setScanPosition($startPosition);
return null;
}
//No valid token found. If there was a scan error, throw the same
//exception again, otherwise throw a new error
if ($origException) {
throw $origException;
}
$this->error();
}
/**
* Executes the operations to handle the end of the source scanning
*
* @return $this
*/
public function consumeEnd() {
//Consume final comments
if ($this->comments) {
$this->consumeCommentsForCurrentToken();
}
//Emit the EndReached event when at the end of the source
$this->eventsEmitter && $this->eventsEmitter
->fire("EndReached");
return $this;
}
/**
* Gets or sets comments for the current token. If the parameter is an
* array it associates the given comments array to the current node,
* otherwise comments for the current token are returned
*
* @param array $comments Comments array
*
* @return array
*/
protected function commentsForCurrentToken($comments = null) {
$id = $this->currentToken ? spl_object_hash($this->currentToken) : "";
if ($comments !== null) {
$this->commentsMap[$id] = $comments;
}
elseif (isset($this->commentsMap[$id])) {
$comments = $this->commentsMap[$id];
unset($this->commentsMap[$id]);
}
return $comments;
}
/**
* Consumes comment tokens associated with the current token
*
* @return $this
*/
protected function consumeCommentsForCurrentToken() {
$comments = $this->commentsForCurrentToken();
if ($comments && ($this->registerTokens || $this->eventsEmitter)) {
foreach ($comments as $comment) {
//Register the token if required
if ($this->registerTokens) {
$this->tokens[] = $comment;
}
//Emit the TokenConsumed event for the comment
$this->eventsEmitter && $this->eventsEmitter
->fire("TokenConsumed", array(
$comment,
));
}
}
return $this;
}
/**
* Checks if the given position follows a slash.
*
* @param Position $position Position to check
*
* @return bool
*/
protected function isAfterSlash($position) {
//Start from the previous index and loop until the begin of the file is reached
$idx = $position->getIndex() - 1;
while ($idx >= 0) {
//Get the char at the index to check
$char = $this->charAt($idx);
//If the char is actually a slash check that it's not a multiline comment closing slash
if ($char === "/") {
return $idx === 0 || $this->charAt($idx - 1) !== "*";
}
elseif (in_array($char, $this->whitespaces) && !in_array($char, $this->lineTerminators)) {
$idx--;
}
elseif ($char === "=" && $this->charAt($idx - 1) === "/") {
return true;
}
else {
break;
}
}
return false;
}
/**
* Tries to reconsume the current token as a regexp if possible
*
* @return Token|null
*/
public function reconsumeCurrentTokenAsRegexp() {
$token = $this->currentToken ?: $this->getToken();
$value = $token ? $token->value : null;
//Check if the token starts with "/"
if (!$value || $value[0] !== "/") {
return null;
}
//Reset the scanner position to the token's start position
$startPosition = $token->location->start;
$this->setScanPosition($startPosition);
$buffer = "/";
$this->index++;
$this->column++;
$inClass = false;
while (true) {
//In a characters class the delimiter "/" is allowed without escape,
//so the characters class must be closed before closing the regexp
$stops = $inClass ? array(
"]",
) : array(
"/",
"[",
);
$tempBuffer = $this->consumeUntil($stops);
if ($tempBuffer === null) {
if ($inClass) {
$this->error("Unterminated character class in regexp");
}
else {
$this->error("Unterminated regexp");
}
}
$buffer .= $tempBuffer[0];
if ($tempBuffer[1] === "/") {
break;
}
else {
$inClass = $tempBuffer[1] === "[";
}
}
//Flags
while (($char = $this->charAt()) !== null) {
$lower = strtolower($char);
if ($lower >= "a" && $lower <= "z") {
$buffer .= $char;
$this->index++;
$this->column++;
}
else {
break;
}
}
//If next token has already been parsed and it's a bracket exclude it
//from the count of open brackets
if ($this->nextToken) {
$nextVal = $this->nextToken->value;
if (isset($this->brackets[$nextVal]) && isset($this->openBrackets[$nextVal])) {
if ($this->brackets[$nextVal]) {
$this->openBrackets[$nextVal]++;
}
else {
$this->openBrackets[$nextVal]--;
}
}
$this->nextToken = null;
}
//If comments handling is enabled, get the comments associated with the
//current token
$comments = $this->comments ? $this->commentsForCurrentToken() : null;
//Replace the current token with a regexp token
$token = new Token(Token::TYPE_REGULAR_EXPRESSION, $buffer);
$token->location->start = $startPosition;
$token->location->end = $this->getPosition(true);
$this->currentToken = $token;
if ($comments) {
//Attach the comments to the new current token
$this->commentsForCurrentToken($comments);
}
return $this->currentToken;
}
/**
* Skips whitespaces and comments from the current scan position. If
* comments handling is enabled, the array of parsed comments
*
* @return array
*/
protected function skipWhitespacesAndComments() {
$comments = [];
$content = "";
$secStartIdx = $this->index;
while (($char = $this->charAt()) !== null) {
//Whitespace
if (in_array($char, $this->whitespaces)) {
$content .= $char;
$this->index++;
}
elseif ($char === "/" || $char === "#") {
$nextChar = $this->charAt($this->index + 1);
if ($char === "#") {
//Hashbang comment. This will be parsed only if hashbang comments are enabled
//and if it appears at the beginning of the code
$valid = $nextChar === "!" && $this->features->hashbangComments && !$this->index;
}
else {
$valid = $nextChar === "/" || $nextChar === "*";
}
//Comment
if ($valid) {
//If comments must be handled, empty the current content too
//and get the comment start position
if ($this->comments) {
if ($content !== "") {
$this->adjustColumnAndLine($content);
$content = "";
}
$start = $this->getPosition(true);
}
$inline = $nextChar !== "*";
$this->index += 2;
$content .= $char . $nextChar;
while (true) {
$char = $this->charAt();
if ($char === null) {
if (!$inline) {
//If the end of the source has been reached and
//a multiline comment is still open, it's an
//error
$this->error("Unterminated comment");
}
$isEnd = true;
}
else {
$content .= $char;
$this->index++;
$isEnd = $inline ? in_array($char, $this->lineTerminators) : $char === "*" && $this->charAt() === "/";
}
if ($isEnd) {
if (!$inline) {
$content .= "/";
$this->index++;
}
if ($this->comments) {
//For inline comments the closing line
//terminator must be excluded from comment text
if ($inline && $char !== null) {
$this->index--;
$content = substr($content, 0, -strlen($char));
}
$this->adjustColumnAndLine($content);
$token = new Token(Token::TYPE_COMMENT, $content);
$token->location->start = $start;
$token->location->end = $this->getPosition(true);
$comments[] = $token;
//For inline comments the new content contains
//the closing line terminator since the char has
//already been processed
$content = "";
if ($inline && $char !== null) {
$content = $char;
$this->index++;
}
}
break;
}
}
}
else {
break;
}
}
elseif (!$this->isModule && $char === "<" && $this->charAt($this->index + 1) === "!" && $this->charAt($this->index + 2) === "-" && $this->charAt($this->index + 3) === "-") {
//If comments must be handled, empty the current content too
//and get the comment start position
if ($this->comments) {
if ($content !== "") {
$this->adjustColumnAndLine($content);
$content = "";
}
$start = $this->getPosition(true);
}
//Open html comment
$this->index += 4;
$content .= "<!--";
while (true) {
$char = $this->charAt();
if ($char === null) {
$isEnd = true;
}
else {
$content .= $char;
$this->index++;
$isEnd = in_array($char, $this->lineTerminators);
}
if ($isEnd) {
if ($this->comments) {
//Remove the closing line terminator from the
//comment text
if ($char !== null) {
$this->index--;
$content = substr($content, 0, -strlen($char));
}
$this->adjustColumnAndLine($content);
$token = new Token(Token::TYPE_COMMENT, $content);
$token->location->start = $start;
$token->location->end = $this->getPosition(true);
$comments[] = $token;
$content = "";
if ($char !== null) {
$content = $char;
$this->index++;
}
}
break;
}
}
}
elseif (!$this->isModule && $char === "-" && $this->charAt($this->index + 1) === "-" && $this->charAt($this->index + 2) === ">") {
//Close html comment
//Check if it is on it's own line
$allow = false;
if (!$secStartIdx) {
$allow = true;
}
else {
for ($index = $this->index - 1; $index >= $secStartIdx; $index--) {
if (in_array($this->charAt($index), $this->lineTerminators)) {
$allow = true;
break;
}
}
}
if ($allow) {
//If comments must be handled, empty the current content too
//and get the comment start position
if ($this->comments) {
if ($content !== "") {
$this->adjustColumnAndLine($content);
$content = "";
}
$start = $this->getPosition(true);
}
$this->index += 3;
$content .= "-->";
while (true) {
$char = $this->charAt();
if ($char === null) {
$isEnd = true;
}
else {
$content .= $char;
$this->index++;
$isEnd = in_array($char, $this->lineTerminators);
}
if ($isEnd) {
if ($this->comments) {
//Remove the closing line terminator from the
//comment text
if ($char !== null) {
$this->index--;
$content = substr($content, 0, -strlen($char));
}
$this->adjustColumnAndLine($content);
$token = new Token(Token::TYPE_COMMENT, $content);
$token->location->start = $start;
$token->location->end = $this->getPosition(true);
$comments[] = $token;
$content = "";
if ($char !== null) {
$content = $char;
$this->index++;
}
}
break;
}
}
}
else {
break;
}
}
else {
break;
}
}
if ($content !== "") {
$this->adjustColumnAndLine($content);
}
return $comments;
}
/**
* String scanning method
*
* @param bool $handleEscape True to handle escaping
*
* @return Token|null
*/
protected function scanString($handleEscape = true) {
$char = $this->charAt();
if ($char === "'" || $char === '"') {
$this->index++;
$this->column++;
//Add the quote to the LSM and then remove it after consuming
$this->stringsStopsLSM
->add($char);
$buffer = $this->consumeUntil($this->stringsStopsLSM, $handleEscape);
$this->stringsStopsLSM
->remove($char);
if ($buffer === null || $buffer[1] !== $char) {
$this->error("Unterminated string");
}
return new Token(Token::TYPE_STRING_LITERAL, $char . $buffer[0]);
}
return null;
}
/**
* Template scanning method
*
* @return Token|null
*/
protected function scanTemplate() {
$char = $this->charAt();
//Get the current number of open curly brackets
$openCurly = isset($this->openBrackets["{"]) ? $this->openBrackets["{"] : 0;
//If the character is a curly bracket check and the number of open
//curly brackets matches the last number in the open templates stack,
//then the bracket closes the open template expression
$endExpression = false;
if ($char === "}") {
$len = count($this->openTemplates);
if ($len && $this->openTemplates[$len - 1] === $openCurly) {
$endExpression = true;
array_pop($this->openTemplates);
}
}
if ($char === "`" || $endExpression) {
$this->index++;
$this->column++;
$buffer = $char;
while (true) {
$tempBuffer = $this->consumeUntil(array(
"`",
"\$",
));
if (!$tempBuffer) {
$this->error("Unterminated template");
}
$buffer .= $tempBuffer[0];
if ($tempBuffer[1] !== "\$" || $this->charAt() === "{") {
//If "${" is found it's a new template expression, register
//the current number of open curly brackets in the open
//templates stack
if ($tempBuffer[1] === "\$") {
$this->index++;
$this->column++;
$buffer .= "{";
$this->openTemplates[] = $openCurly;
}
break;
}
}
return new Token(Token::TYPE_TEMPLATE, $buffer);
}
return null;
}
/**
* Number scanning method
*
* @return Token|null
*/
protected function scanNumber() {
//Numbers can start with a decimal number or with a dot (.5)
$char = $this->charAt();
if (!($char >= "0" && $char <= "9" || $char === ".")) {
return null;
}
$buffer = "";
$allowedDecimals = true;
//Parse the integer part
if ($char !== ".") {
//Consume all decimal numbers
$buffer = $this->consumeNumbers();
$char = $this->charAt();
if ($this->features->bigInt && $char === "n") {
$this->index++;
$this->column++;
return new Token(Token::TYPE_BIGINT_LITERAL, $buffer . $char);
}
$lower = $char !== null ? strtolower($char) : null;
//Handle hexadecimal (0x), octal (0o) and binary (0b) forms
if ($buffer === "0" && $lower !== null && isset($this->{$lower . "numbers"})) {
$this->index++;
$this->column++;
$tempBuffer = $this->consumeNumbers($lower);
if ($tempBuffer === null) {
$this->error("Missing numbers after 0{$char}");
}
$buffer .= $char . $tempBuffer;
//Check that there are not numbers left
if ($this->consumeNumbers() !== null) {
$this->error();
}
if ($this->features->bigInt && $this->charAt() === "n") {
$this->index++;
$this->column++;
return new Token(Token::TYPE_BIGINT_LITERAL, $buffer . $char);
}
return new Token(Token::TYPE_NUMERIC_LITERAL, $buffer);
}
//Consume exponent part if present
if ($tempBuffer = $this->consumeExponentPart()) {
$buffer .= $tempBuffer;
$allowedDecimals = false;
}
}
//Parse the decimal part
if ($allowedDecimals && $this->charAt() === ".") {
//Consume the dot
$this->index++;
$this->column++;
$buffer .= ".";
//Consume all decimal numbers
$tempBuffer = $this->consumeNumbers();
$buffer .= $tempBuffer;
//If the buffer contains only the dot it should be parsed as
//punctuator
if ($buffer === ".") {
$this->index--;
$this->column--;
return null;
}
//Consume exponent part if present
if (($tempBuffer = $this->consumeExponentPart()) !== null) {
$buffer .= $tempBuffer;
}
}
return new Token(Token::TYPE_NUMERIC_LITERAL, $buffer);
}
/**
* Consumes the maximum number of digits
*
* @param string $type Digits type (decimal, hexadecimal, etc...)
* @param int $max Maximum number of digits to match
*
* @return string|null
*/
protected function consumeNumbers($type = "", $max = null) {
$buffer = "";
$char = $this->charAt();
$count = 0;
$extra = $this->features->numericLiteralSeparator ? "_" : "";
while (in_array($char, $this->{$type . "numbers"}) || $count && $char === $extra) {
$buffer .= $char;
$this->index++;
$this->column++;
$count++;
if ($count === $max) {
break;
}
$char = $this->charAt();
}
if ($count && substr($buffer, -1) === "_") {
$this->error("Numeric separators are not allowed at the end of a number");
}
return $count ? $buffer : null;
}
/**
* Consumes the exponent part of a number
*
* @return string|null
*/
protected function consumeExponentPart() {
$buffer = "";
$char = $this->charAt();
if ($char !== null && strtolower($char) === "e") {
$this->index++;
$this->column++;
$buffer .= $char;
$char = $this->charAt();
if ($char === "+" || $char === "-") {
$this->index++;
$this->column++;
$buffer .= $char;
}
$tempBuffer = $this->consumeNumbers();
if ($tempBuffer === null) {
$this->error("Missing exponent");
}
$buffer .= $tempBuffer;
}
return $buffer;
}
/**
* Punctuator scanning method
*
* @return Token|null
*/
protected function scanPunctuator() {
$token = null;
$char = $this->charAt();
//Check if the next char is a bracket
if (isset($this->brackets[$char])) {
//Check if it is a closing bracket
if ($this->brackets[$char]) {
$openBracket = $this->brackets[$char];
//Check if there is a corresponding open bracket
if (!isset($this->openBrackets[$openBracket]) || !$this->openBrackets[$openBracket]) {
if (!$this->isAfterSlash($this->getPosition(true))) {
$this->error();
}
}
else {
$this->openBrackets[$openBracket]--;
}
}
else {
if (!isset($this->openBrackets[$char])) {
$this->openBrackets[$char] = 0;
}
$this->openBrackets[$char]++;
}
$this->index++;
$this->column++;
$token = new Token(Token::TYPE_PUNCTUATOR, $char);
}
elseif ($match = $this->punctuatorsLSM
->match($this, $this->index, $char)) {
//Optional chaining punctuator cannot appear before a number, in this
//case only the question mark must be consumed
if ($match[1] === "?." && ($nextChar = $this->charAt($this->index + $match[0])) !== null && $nextChar >= "0" && $nextChar <= "9") {
$match = array(
1,
"?",
);
}
$this->index += $match[0];
$this->column += $match[0];
$token = new Token(Token::TYPE_PUNCTUATOR, $match[1]);
}
return $token;
}
/**
* Keywords and identifiers scanning method
*
* @return Token|null
*/
protected function scanKeywordOrIdentifier() {
//Check private identifier start character
if ($private = $this->features->privateMethodsAndFields && $this->charAt() === "#") {
$this->index++;
$this->column++;
}
//Consume the maximum number of characters that are unicode escape
//sequences or valid identifier starts (only the first character) or
//parts
$buffer = "";
$start = true;
while (($char = $this->charAt()) !== null) {
if ($char >= "a" && $char <= "z" || $char >= "A" && $char <= "Z" || $char === "_" || $char === "\$" || !$start && $char >= "0" && $char <= "9" || $this->isIdentifierChar($char, $start)) {
$buffer .= $char;
$this->index++;
$this->column++;
}
elseif ($char === "\\" && ($seq = $this->consumeUnicodeEscapeSequence())) {
//Verify that it's a valid character
if (!$this->isIdentifierChar($seq[1], $start)) {
break;
}
$buffer .= $seq[0];
}
else {
break;
}
$start = false;
}
//Identify token type
if ($buffer === "") {
//Unconsume the hash if nothing was found after that
if ($private) {
$this->index--;
$this->column--;
}
return null;
}
elseif ($private) {
$type = Token::TYPE_PRIVATE_IDENTIFIER;
$buffer = "#" . $buffer;
}
elseif ($buffer === "null") {
$type = Token::TYPE_NULL_LITERAL;
}
elseif ($buffer === "true" || $buffer === "false") {
$type = Token::TYPE_BOOLEAN_LITERAL;
}
elseif (in_array($buffer, $this->keywords) || in_array($buffer, $this->strictModeKeywords)) {
$type = Token::TYPE_KEYWORD;
}
else {
$type = Token::TYPE_IDENTIFIER;
}
return new Token($type, $buffer);
}
/**
* Consumes an unicode escape sequence
*
* @return array|null
*/
protected function consumeUnicodeEscapeSequence() {
if ($this->charAt() !== "\\" || $this->charAt($this->index + 1) !== "u") {
return null;
}
$startIndex = $this->index;
$startColumn = $this->column;
$this->index += 2;
$this->column += 2;
$brackets = false;
if ($this->charAt() === "{") {
//\u{FFF}
$brackets = true;
$this->index++;
$this->column++;
$code = $this->consumeNumbers("x");
if ($code && $this->charAt() !== "}") {
$code = null;
}
else {
$this->index++;
$this->column++;
}
}
else {
//\uFFFF
$code = $this->consumeNumbers("x", 4);
if ($code && strlen($code) !== 4) {
$code = null;
}
}
//Unconsume everything if the format is invalid
if ($code === null) {
$this->index = $startIndex;
$this->column = $startColumn;
return null;
}
//Return an array where the first element is the matched sequence
//and the second one is the decoded character
return array(
$brackets ? "\\u{" . $code . "}" : "\\u" . $code,
Utils::unicodeToUtf8(hexdec($code)),
);
}
/**
* Checks if the given character is valid for an identifier
*
* @param string $char Character to check
* @param bool $start If true it will check that the character is
* valid to start an identifier
*
* @return bool
*/
protected function isIdentifierChar($char, $start = true) {
return $char >= "a" && $char <= "z" || $char >= "A" && $char <= "Z" || $char === "_" || $char === "\$" || !$start && $char >= "0" && $char <= "9" || preg_match($start ? $this->idStartRegex : $this->idPartRegex, $char);
}
/**
* Increases columns and lines count according to the given string
*
* @param string $buffer String to analyze
*
* @return void
*/
protected function adjustColumnAndLine($buffer) {
$lines = preg_split($this->linesSplitter, $buffer);
$linesCount = count($lines) - 1;
$this->line += $linesCount;
$columns = mb_strlen($lines[$linesCount], "UTF-8");
if ($linesCount) {
$this->column = $columns;
}
else {
$this->column += $columns;
}
}
/**
* Consumes characters until one of the given characters is found
*
* @param array|LSM $stops Characters to search
* @param bool $handleEscape True to handle escaping
* @param bool $collectStop True to include the stop character
*
* @return array|null
*/
protected function consumeUntil($stops, $handleEscape = true, $collectStop = true) {
$isLSM = $stops instanceof LSM;
$buffer = "";
$escaped = false;
while (($char = $this->charAt()) !== null) {
$incrIndex = 1;
$isStop = false;
if ($isLSM) {
$m = $stops->match($this, $this->index, $char);
if ($m) {
$isStop = true;
$incrIndex = $m[0];
$char = $m[1];
}
}
else {
$isStop = in_array($char, $stops);
}
$validStop = $isStop && !$escaped;
if (!$validStop || $collectStop) {
$this->index += $incrIndex;
$buffer .= $char;
}
if ($validStop) {
if (!$collectStop && $buffer === "") {
return null;
}
$this->adjustColumnAndLine($buffer);
return array(
$buffer,
$char,
);
}
elseif (!$escaped && $char === "\\" && $handleEscape) {
$escaped = true;
}
else {
$escaped = false;
}
}
return null;
}
}
Members
Title Sort descending | Modifiers | Object type | Summary |
---|---|---|---|
Scanner::$bnumbers | protected | property | Binary numbers |
Scanner::$brackets | protected | property | Brackets array |
Scanner::$column | protected | property | Current column |
Scanner::$comments | protected | property | Comments handling |
Scanner::$commentsMap | protected | property | Comments to tokens map |
Scanner::$currentToken | protected | property | Current token |
Scanner::$eventsEmitter | protected | property | Events emitter |
Scanner::$features | protected | property | Scanner features |
Scanner::$idPartRegex | protected | property | Regex to match identifiers parts |
Scanner::$idStartRegex | protected | property | Regex to match identifiers starts |
Scanner::$index | protected | property | Current index |
Scanner::$isModule | protected | property | Module mode |
Scanner::$jsx | protected | property | Internal JSX scan flag |
Scanner::$keywords | protected | property | Keywords array |
Scanner::$length | protected | property | Source length |
Scanner::$line | protected | property | Current line |
Scanner::$linesSplitter | protected | property | Regex to split texts using valid ES line terminators |
Scanner::$lineTerminators | protected | property | Concatenation of line terminators characters and line terminators sequences |
Scanner::$lineTerminatorsChars | public static | property | Line terminators characters array |
Scanner::$lineTerminatorsSequences | public static | property | Line terminators sequences array |
Scanner::$nextToken | protected | property | Next token |
Scanner::$numbers | protected | property | Decimal numbers |
Scanner::$onumbers | protected | property | Octal numbers |
Scanner::$openBrackets | protected | property | Open brackets array |
Scanner::$openTemplates | protected | property | Open templates array |
Scanner::$position | protected | property | Consumed position |
Scanner::$punctuators | protected | property | Punctuators array |
Scanner::$punctuatorsLSM | protected | property | Punctuators LSM |
Scanner::$registerTokens | protected | property | True to register tokens in the tokens array |
Scanner::$source | protected | property | Source characters |
Scanner::$stateProps | protected | property | Properties to copy when getting the scanner state |
Scanner::$strictMode | protected | property | Strict mode flag |
Scanner::$strictModeKeywords | protected | property | Array of words that are keywords only in strict mode |
Scanner::$stringsStopsLSM | protected | property | Strings stops LSM |
Scanner::$tokens | protected | property | Registered tokens array |
Scanner::$whitespaces | protected | property | Whitespaces array |
Scanner::$xnumbers | protected | property | Hexadecimal numbers |
Scanner::adjustColumnAndLine | protected | function | Increases columns and lines count according to the given string |
Scanner::charAt | public | function | Return the character at the given index in the source code or null if the end is reached. |
Scanner::commentsForCurrentToken | protected | function | Gets or sets comments for the current token. If the parameter is an array it associates the given comments array to the current node, otherwise comments for the current token are returned |
Scanner::consume | public | function | Checks if the given string is matched, if so it consumes the token |
Scanner::consumeCommentsForCurrentToken | protected | function | Consumes comment tokens associated with the current token |
Scanner::consumeEnd | public | function | Executes the operations to handle the end of the source scanning |
Scanner::consumeExponentPart | protected | function | Consumes the exponent part of a number |
Scanner::consumeNumbers | protected | function | Consumes the maximum number of digits |
Scanner::consumeOneOf | public | function | Checks if one of the given strings is matched, if so it consumes the token |
Scanner::consumeToken | public | function | Consumes the current token |
Scanner::consumeUnicodeEscapeSequence | protected | function | Consumes an unicode escape sequence |
Scanner::consumeUntil | protected | function | Consumes characters until one of the given characters is found |
Scanner::enableComments | public | function | Enables or disables comments handling |
Scanner::enableModuleMode | public | function | Enables or disables module scanning mode |
Scanner::enableTokenRegistration | public | function | Enables or disables tokens registration in the token array |
Scanner::error | protected | function | Throws a syntax error |
Scanner::getEventsEmitter | public | function | Returns the scanner's event emitter |
Scanner::getNextToken | public | function | Returns the next token |
Scanner::getPosition | public | function | Returns current scanner state |
Scanner::getState | public | function | Returns the current scanner state |
Scanner::getStrictMode | public | function | Return strict mode state |
Scanner::getToken | public | function | Returns the current token |
Scanner::getTokens | public | function | Return registered tokens |
Scanner::isAfterSlash | protected | function | Checks if the given position follows a slash. |
Scanner::isBefore | public | function | Checks if one of the given strings follows the current scan position |
Scanner::isIdentifierChar | protected | function | Checks if the given character is valid for an identifier |
Scanner::isStrictModeKeyword | public | function | Checks if the given token is a keyword in the current strict mode state |
Scanner::noLineTerminators | public | function | Checks that there are not line terminators following the current scan position before next token |
Scanner::reconsumeCurrentTokenAsRegexp | public | function | Tries to reconsume the current token as a regexp if possible |
Scanner::scanKeywordOrIdentifier | protected | function | Keywords and identifiers scanning method |
Scanner::scanNumber | protected | function | Number scanning method |
Scanner::scanPunctuator | protected | function | Punctuator scanning method |
Scanner::scanString | protected | function | String scanning method |
Scanner::scanTemplate | protected | function | Template scanning method |
Scanner::setScanPosition | public | function | Sets the current scan position at the given one |
Scanner::setState | public | function | Sets the current scanner state |
Scanner::setStrictMode | public | function | Enables or disables strict mode |
Scanner::skipWhitespacesAndComments | protected | function | Skips whitespaces and comments from the current scan position. If comments handling is enabled, the array of parsed comments |
Scanner::stripBOM | public | function | Strips BOM characters from the source and detects source encoding if not given by the user |
Scanner::__construct | function | Class constructor |