class TokenPolyfill

This is a polyfill for the PhpToken class introduced in PHP 8.0. We do not actually polyfill PhpToken, because composer might end up picking a different polyfill implementation, which does not meet our requirements.

@internal

Hierarchy

class \PhpParser\Internal\TokenPolyfill

Expanded class hierarchy of TokenPolyfill

File

vendor/nikic/php-parser/lib/PhpParser/Internal/TokenPolyfill.php, line 18

Namespace

PhpParser\Internal

View source

class TokenPolyfill {
    
    /** @var int The ID of the token. Either a T_* constant of a character code < 256. */
    public int $id;
    
    /** @var string The textual content of the token. */
    public string $text;
    
    /** @var int The 1-based starting line of the token (or -1 if unknown). */
    public int $line;
    
    /** @var int The 0-based starting position of the token (or -1 if unknown). */
    public int $pos;
    
    /** @var array<int, bool> Tokens ignored by the PHP parser. */
    private const IGNORABLE_TOKENS = [
        \T_WHITESPACE => true,
        \T_COMMENT => true,
        \T_DOC_COMMENT => true,
        \T_OPEN_TAG => true,
    ];
    
    /** @var array<int, bool> Tokens that may be part of a T_NAME_* identifier. */
    private static array $identifierTokens;
    
    /**
     * Create a Token with the given ID and text, as well optional line and position information.
     */
    public final function __construct(int $id, string $text, int $line = -1, int $pos = -1) {
        $this->id = $id;
        $this->text = $text;
        $this->line = $line;
        $this->pos = $pos;
    }
    
    /**
     * Get the name of the token. For single-char tokens this will be the token character.
     * Otherwise it will be a T_* style name, or null if the token ID is unknown.
     */
    public function getTokenName() : ?string {
        if ($this->id < 256) {
            return \chr($this->id);
        }
        $name = token_name($this->id);
        return $name === 'UNKNOWN' ? null : $name;
    }
    
    /**
     * Check whether the token is of the given kind. The kind may be either an integer that matches
     * the token ID, a string that matches the token text, or an array of integers/strings. In the
     * latter case, the function returns true if any of the kinds in the array match.
     *
     * @param int|string|(int|string)[] $kind
     */
    public function is($kind) : bool {
        if (\is_int($kind)) {
            return $this->id === $kind;
        }
        if (\is_string($kind)) {
            return $this->text === $kind;
        }
        if (\is_array($kind)) {
            foreach ($kind as $entry) {
                if (\is_int($entry)) {
                    if ($this->id === $entry) {
                        return true;
                    }
                }
                elseif (\is_string($entry)) {
                    if ($this->text === $entry) {
                        return true;
                    }
                }
                else {
                    throw new \TypeError('Argument #1 ($kind) must only have elements of type string|int, ' . gettype($entry) . ' given');
                }
            }
            return false;
        }
        throw new \TypeError('Argument #1 ($kind) must be of type string|int|array, ' . gettype($kind) . ' given');
    }
    
    /**
     * Check whether this token would be ignored by the PHP parser. Returns true for T_WHITESPACE,
     * T_COMMENT, T_DOC_COMMENT and T_OPEN_TAG, and false for everything else.
     */
    public function isIgnorable() : bool {
        return isset(self::IGNORABLE_TOKENS[$this->id]);
    }
    
    /**
     * Return the textual content of the token.
     */
    public function __toString() : string {
        return $this->text;
    }
    
    /**
     * Tokenize the given source code and return an array of tokens.
     *
     * This performs certain canonicalizations to match the PHP 8.0 token format:
     *  * Bad characters are represented using T_BAD_CHARACTER rather than omitted.
     *  * T_COMMENT does not include trailing newlines, instead the newline is part of a following
     *    T_WHITESPACE token.
     *  * Namespaced names are represented using T_NAME_* tokens.
     *
     * @return static[]
     */
    public static function tokenize(string $code, int $flags = 0) : array {
        self::init();
        $tokens = [];
        $line = 1;
        $pos = 0;
        $origTokens = \token_get_all($code, $flags);
        $numTokens = \count($origTokens);
        for ($i = 0; $i < $numTokens; $i++) {
            $token = $origTokens[$i];
            if (\is_string($token)) {
                if (\strlen($token) === 2) {
                    // b" and B" are tokenized as single-char tokens, even though they aren't.
                    $tokens[] = new static(\ord('"'), $token, $line, $pos);
                    $pos += 2;
                }
                else {
                    $tokens[] = new static(\ord($token), $token, $line, $pos);
                    $pos++;
                }
            }
            else {
                $id = $token[0];
                $text = $token[1];
                // Emulate PHP 8.0 comment format, which does not include trailing whitespace anymore.
                if ($id === \T_COMMENT && \substr($text, 0, 2) !== '/*' && \preg_match('/(\\r\\n|\\n|\\r)$/D', $text, $matches)) {
                    $trailingNewline = $matches[0];
                    $text = \substr($text, 0, -\strlen($trailingNewline));
                    $tokens[] = new static($id, $text, $line, $pos);
                    $pos += \strlen($text);
                    if ($i + 1 < $numTokens && $origTokens[$i + 1][0] === \T_WHITESPACE) {
                        // Move trailing newline into following T_WHITESPACE token, if it already exists.
                        $origTokens[$i + 1][1] = $trailingNewline . $origTokens[$i + 1][1];
                        $origTokens[$i + 1][2]--;
                    }
                    else {
                        // Otherwise, we need to create a new T_WHITESPACE token.
                        $tokens[] = new static(\T_WHITESPACE, $trailingNewline, $line, $pos);
                        $line++;
                        $pos += \strlen($trailingNewline);
                    }
                    continue;
                }
                // Emulate PHP 8.0 T_NAME_* tokens, by combining sequences of T_NS_SEPARATOR and
                // T_STRING into a single token.
                if ($id === \T_NS_SEPARATOR || isset(self::$identifierTokens[$id])) {
                    $newText = $text;
                    $lastWasSeparator = $id === \T_NS_SEPARATOR;
                    for ($j = $i + 1; $j < $numTokens; $j++) {
                        if ($lastWasSeparator) {
                            if (!isset(self::$identifierTokens[$origTokens[$j][0]])) {
                                break;
                            }
                            $lastWasSeparator = false;
                        }
                        else {
                            if ($origTokens[$j][0] !== \T_NS_SEPARATOR) {
                                break;
                            }
                            $lastWasSeparator = true;
                        }
                        $newText .= $origTokens[$j][1];
                    }
                    if ($lastWasSeparator) {
                        // Trailing separator is not part of the name.
                        $j--;
                        $newText = \substr($newText, 0, -1);
                    }
                    if ($j > $i + 1) {
                        if ($id === \T_NS_SEPARATOR) {
                            $id = \T_NAME_FULLY_QUALIFIED;
                        }
                        elseif ($id === \T_NAMESPACE) {
                            $id = \T_NAME_RELATIVE;
                        }
                        else {
                            $id = \T_NAME_QUALIFIED;
                        }
                        $tokens[] = new static($id, $newText, $line, $pos);
                        $pos += \strlen($newText);
                        $i = $j - 1;
                        continue;
                    }
                }
                $tokens[] = new static($id, $text, $line, $pos);
                $line += \substr_count($text, "\n");
                $pos += \strlen($text);
            }
        }
        return $tokens;
    }
    
    /** Initialize private static state needed by tokenize(). */
    private static function init() : void {
        if (isset(self::$identifierTokens)) {
            return;
        }
        // Based on semi_reserved production.
        self::$identifierTokens = \array_fill_keys([
            \T_STRING,
            \T_STATIC,
            \T_ABSTRACT,
            \T_FINAL,
            \T_PRIVATE,
            \T_PROTECTED,
            \T_PUBLIC,
            \T_READONLY,
            \T_INCLUDE,
            \T_INCLUDE_ONCE,
            \T_EVAL,
            \T_REQUIRE,
            \T_REQUIRE_ONCE,
            \T_LOGICAL_OR,
            \T_LOGICAL_XOR,
            \T_LOGICAL_AND,
            \T_INSTANCEOF,
            \T_NEW,
            \T_CLONE,
            \T_EXIT,
            \T_IF,
            \T_ELSEIF,
            \T_ELSE,
            \T_ENDIF,
            \T_ECHO,
            \T_DO,
            \T_WHILE,
            \T_ENDWHILE,
            \T_FOR,
            \T_ENDFOR,
            \T_FOREACH,
            \T_ENDFOREACH,
            \T_DECLARE,
            \T_ENDDECLARE,
            \T_AS,
            \T_TRY,
            \T_CATCH,
            \T_FINALLY,
            \T_THROW,
            \T_USE,
            \T_INSTEADOF,
            \T_GLOBAL,
            \T_VAR,
            \T_UNSET,
            \T_ISSET,
            \T_EMPTY,
            \T_CONTINUE,
            \T_GOTO,
            \T_FUNCTION,
            \T_CONST,
            \T_RETURN,
            \T_PRINT,
            \T_YIELD,
            \T_LIST,
            \T_SWITCH,
            \T_ENDSWITCH,
            \T_CASE,
            \T_DEFAULT,
            \T_BREAK,
            \T_ARRAY,
            \T_CALLABLE,
            \T_EXTENDS,
            \T_IMPLEMENTS,
            \T_NAMESPACE,
            \T_TRAIT,
            \T_INTERFACE,
            \T_CLASS,
            \T_CLASS_C,
            \T_TRAIT_C,
            \T_FUNC_C,
            \T_METHOD_C,
            \T_LINE,
            \T_FILE,
            \T_DIR,
            \T_NS_C,
            \T_HALT_COMPILER,
            \T_FN,
            \T_MATCH,
        ], true);
    }

}

Members

Title Sort descending	Modifiers	Object type	Summary
TokenPolyfill::$id	public	property	@var int The ID of the token. Either a T_* constant of a character code < 256.
TokenPolyfill::$identifierTokens	private static	property	@var array<int, bool> Tokens that may be part of a T_NAME_* identifier.
TokenPolyfill::$line	public	property	@var int The 1-based starting line of the token (or -1 if unknown).
TokenPolyfill::$pos	public	property	@var int The 0-based starting position of the token (or -1 if unknown).
TokenPolyfill::$text	public	property	@var string The textual content of the token.
TokenPolyfill::getTokenName	public	function	Get the name of the token. For single-char tokens this will be the token character. Otherwise it will be a T_* style name, or null if the token ID is unknown.
TokenPolyfill::IGNORABLE_TOKENS	private	constant	@var array<int, bool> Tokens ignored by the PHP parser.
TokenPolyfill::init	private static	function	Initialize private static state needed by tokenize().
TokenPolyfill::is	public	function	Check whether the token is of the given kind. The kind may be either an integer that matches the token ID, a string that matches the token text, or an array of integers/strings. In the latter case, the function returns true if any of the kinds in the…
TokenPolyfill::isIgnorable	public	function	Check whether this token would be ignored by the PHP parser. Returns true for T_WHITESPACE, T_COMMENT, T_DOC_COMMENT and T_OPEN_TAG, and false for everything else.
TokenPolyfill::tokenize	public static	function	Tokenize the given source code and return an array of tokens.
TokenPolyfill::__construct	final public	function	Create a Token with the given ID and text, as well optional line and position information.
TokenPolyfill::__toString	public	function	Return the textual content of the token.