function TokenPolyfill::tokenize
Tokenize the given source code and return an array of tokens.
This performs certain canonicalizations to match the PHP 8.0 token format:
- Bad characters are represented using T_BAD_CHARACTER rather than omitted.
- T_COMMENT does not include trailing newlines, instead the newline is part of a following T_WHITESPACE token.
- Namespaced names are represented using T_NAME_* tokens.
Return value
static[]
File
-
vendor/
nikic/ php-parser/ lib/ PhpParser/ Internal/ TokenPolyfill.php, line 124
Class
- TokenPolyfill
- This is a polyfill for the PhpToken class introduced in PHP 8.0. We do not actually polyfill PhpToken, because composer might end up picking a different polyfill implementation, which does not meet our requirements.
Namespace
PhpParser\InternalCode
public static function tokenize(string $code, int $flags = 0) : array {
self::init();
$tokens = [];
$line = 1;
$pos = 0;
$origTokens = \token_get_all($code, $flags);
$numTokens = \count($origTokens);
for ($i = 0; $i < $numTokens; $i++) {
$token = $origTokens[$i];
if (\is_string($token)) {
if (\strlen($token) === 2) {
// b" and B" are tokenized as single-char tokens, even though they aren't.
$tokens[] = new static(\ord('"'), $token, $line, $pos);
$pos += 2;
}
else {
$tokens[] = new static(\ord($token), $token, $line, $pos);
$pos++;
}
}
else {
$id = $token[0];
$text = $token[1];
// Emulate PHP 8.0 comment format, which does not include trailing whitespace anymore.
if ($id === \T_COMMENT && \substr($text, 0, 2) !== '/*' && \preg_match('/(\\r\\n|\\n|\\r)$/D', $text, $matches)) {
$trailingNewline = $matches[0];
$text = \substr($text, 0, -\strlen($trailingNewline));
$tokens[] = new static($id, $text, $line, $pos);
$pos += \strlen($text);
if ($i + 1 < $numTokens && $origTokens[$i + 1][0] === \T_WHITESPACE) {
// Move trailing newline into following T_WHITESPACE token, if it already exists.
$origTokens[$i + 1][1] = $trailingNewline . $origTokens[$i + 1][1];
$origTokens[$i + 1][2]--;
}
else {
// Otherwise, we need to create a new T_WHITESPACE token.
$tokens[] = new static(\T_WHITESPACE, $trailingNewline, $line, $pos);
$line++;
$pos += \strlen($trailingNewline);
}
continue;
}
// Emulate PHP 8.0 T_NAME_* tokens, by combining sequences of T_NS_SEPARATOR and
// T_STRING into a single token.
if ($id === \T_NS_SEPARATOR || isset(self::$identifierTokens[$id])) {
$newText = $text;
$lastWasSeparator = $id === \T_NS_SEPARATOR;
for ($j = $i + 1; $j < $numTokens; $j++) {
if ($lastWasSeparator) {
if (!isset(self::$identifierTokens[$origTokens[$j][0]])) {
break;
}
$lastWasSeparator = false;
}
else {
if ($origTokens[$j][0] !== \T_NS_SEPARATOR) {
break;
}
$lastWasSeparator = true;
}
$newText .= $origTokens[$j][1];
}
if ($lastWasSeparator) {
// Trailing separator is not part of the name.
$j--;
$newText = \substr($newText, 0, -1);
}
if ($j > $i + 1) {
if ($id === \T_NS_SEPARATOR) {
$id = \T_NAME_FULLY_QUALIFIED;
}
elseif ($id === \T_NAMESPACE) {
$id = \T_NAME_RELATIVE;
}
else {
$id = \T_NAME_QUALIFIED;
}
$tokens[] = new static($id, $newText, $line, $pos);
$pos += \strlen($newText);
$i = $j - 1;
continue;
}
}
$tokens[] = new static($id, $text, $line, $pos);
$line += \substr_count($text, "\n");
$pos += \strlen($text);
}
}
return $tokens;
}