function TokenPolyfill::tokenize

Tokenize the given source code and return an array of tokens.

This performs certain canonicalizations to match the PHP 8.0 token format:

Bad characters are represented using T_BAD_CHARACTER rather than omitted.
T_COMMENT does not include trailing newlines, instead the newline is part of a following T_WHITESPACE token.
Namespaced names are represented using T_NAME_* tokens.

Return value

static[]

File

vendor/nikic/php-parser/lib/PhpParser/Internal/TokenPolyfill.php, line 124

Class

TokenPolyfill: This is a polyfill for the PhpToken class introduced in PHP 8.0. We do not actually polyfill PhpToken, because composer might end up picking a different polyfill implementation, which does not meet our requirements.

Namespace

PhpParser\Internal

Code

public static function tokenize(string $code, int $flags = 0) : array {
    self::init();
    $tokens = [];
    $line = 1;
    $pos = 0;
    $origTokens = \token_get_all($code, $flags);
    $numTokens = \count($origTokens);
    for ($i = 0; $i < $numTokens; $i++) {
        $token = $origTokens[$i];
        if (\is_string($token)) {
            if (\strlen($token) === 2) {
                // b" and B" are tokenized as single-char tokens, even though they aren't.
                $tokens[] = new static(\ord('"'), $token, $line, $pos);
                $pos += 2;
            }
            else {
                $tokens[] = new static(\ord($token), $token, $line, $pos);
                $pos++;
            }
        }
        else {
            $id = $token[0];
            $text = $token[1];
            // Emulate PHP 8.0 comment format, which does not include trailing whitespace anymore.
            if ($id === \T_COMMENT && \substr($text, 0, 2) !== '/*' && \preg_match('/(\\r\\n|\\n|\\r)$/D', $text, $matches)) {
                $trailingNewline = $matches[0];
                $text = \substr($text, 0, -\strlen($trailingNewline));
                $tokens[] = new static($id, $text, $line, $pos);
                $pos += \strlen($text);
                if ($i + 1 < $numTokens && $origTokens[$i + 1][0] === \T_WHITESPACE) {
                    // Move trailing newline into following T_WHITESPACE token, if it already exists.
                    $origTokens[$i + 1][1] = $trailingNewline . $origTokens[$i + 1][1];
                    $origTokens[$i + 1][2]--;
                }
                else {
                    // Otherwise, we need to create a new T_WHITESPACE token.
                    $tokens[] = new static(\T_WHITESPACE, $trailingNewline, $line, $pos);
                    $line++;
                    $pos += \strlen($trailingNewline);
                }
                continue;
            }
            // Emulate PHP 8.0 T_NAME_* tokens, by combining sequences of T_NS_SEPARATOR and
            // T_STRING into a single token.
            if ($id === \T_NS_SEPARATOR || isset(self::$identifierTokens[$id])) {
                $newText = $text;
                $lastWasSeparator = $id === \T_NS_SEPARATOR;
                for ($j = $i + 1; $j < $numTokens; $j++) {
                    if ($lastWasSeparator) {
                        if (!isset(self::$identifierTokens[$origTokens[$j][0]])) {
                            break;
                        }
                        $lastWasSeparator = false;
                    }
                    else {
                        if ($origTokens[$j][0] !== \T_NS_SEPARATOR) {
                            break;
                        }
                        $lastWasSeparator = true;
                    }
                    $newText .= $origTokens[$j][1];
                }
                if ($lastWasSeparator) {
                    // Trailing separator is not part of the name.
                    $j--;
                    $newText = \substr($newText, 0, -1);
                }
                if ($j > $i + 1) {
                    if ($id === \T_NS_SEPARATOR) {
                        $id = \T_NAME_FULLY_QUALIFIED;
                    }
                    elseif ($id === \T_NAMESPACE) {
                        $id = \T_NAME_RELATIVE;
                    }
                    else {
                        $id = \T_NAME_QUALIFIED;
                    }
                    $tokens[] = new static($id, $newText, $line, $pos);
                    $pos += \strlen($newText);
                    $i = $j - 1;
                    continue;
                }
            }
            $tokens[] = new static($id, $text, $line, $pos);
            $line += \substr_count($text, "\n");
            $pos += \strlen($text);
        }
    }
    return $tokens;
}