function CSS::tokenize

Creates an array of tokens when given some CSS code.
Uses the PHP tokenizer to do all the tricky work
Parameters

string $string The string to tokenize.:
Return value

array
Overrides PHP::tokenize
File

vendor/squizlabs/php_codesniffer/src/Tokenizers/CSS.php, line 52
Class

CSS
Namespace

PHP_CodeSniffer\Tokenizers
Code

public function tokenize($string) {
    if (PHP_CODESNIFFER_VERBOSITY > 1) {
        echo "\t*** START CSS TOKENIZING 1ST PASS ***" . PHP_EOL;
    }
    // If the content doesn't have an EOL char on the end, add one so
    // the open and close tags we add are parsed correctly.
    $eolAdded = false;
    if (substr($string, strlen($this->eolChar) * -1) !== $this->eolChar) {
        $string .= $this->eolChar;
        $eolAdded = true;
    }
    $string = str_replace('<?php', '^PHPCS_CSS_T_OPEN_TAG^', $string);
    $string = str_replace('?>', '^PHPCS_CSS_T_CLOSE_TAG^', $string);
    $tokens = parent::tokenize('<?php ' . $string . '?>');
    $finalTokens = [];
    $finalTokens[0] = [
        'code' => T_OPEN_TAG,
        'type' => 'T_OPEN_TAG',
        'content' => '',
    ];
    $newStackPtr = 1;
    $numTokens = count($tokens);
    $multiLineComment = false;
    for ($stackPtr = 1; $stackPtr < $numTokens; $stackPtr++) {
        $token = $tokens[$stackPtr];
        // CSS files don't have lists, breaks etc, so convert these to
        // standard strings early so they can be converted into T_STYLE
        // tokens and joined with other strings if needed.
        if ($token['code'] === T_BREAK || $token['code'] === T_LIST || $token['code'] === T_DEFAULT || $token['code'] === T_SWITCH || $token['code'] === T_FOR || $token['code'] === T_FOREACH || $token['code'] === T_WHILE || $token['code'] === T_DEC || $token['code'] === T_NEW) {
            $token['type'] = 'T_STRING';
            $token['code'] = T_STRING;
        }
        $token['content'] = str_replace('^PHPCS_CSS_T_OPEN_TAG^', '<?php', $token['content']);
        $token['content'] = str_replace('^PHPCS_CSS_T_CLOSE_TAG^', '?>', $token['content']);
        if (PHP_CODESNIFFER_VERBOSITY > 1) {
            $type = $token['type'];
            $content = Util\Common::prepareForOutput($token['content']);
            echo "\tProcess token {$stackPtr}: {$type} => {$content}" . PHP_EOL;
        }
        if ($token['code'] === T_BITWISE_XOR && $tokens[$stackPtr + 1]['content'] === 'PHPCS_CSS_T_OPEN_TAG') {
            $content = '<?php';
            for ($stackPtr += 3; $stackPtr < $numTokens; $stackPtr++) {
                if ($tokens[$stackPtr]['code'] === T_BITWISE_XOR && $tokens[$stackPtr + 1]['content'] === 'PHPCS_CSS_T_CLOSE_TAG') {
                    // Add the end tag and ignore the * we put at the end.
                    $content .= '?>';
                    $stackPtr += 2;
                    break;
                }
                else {
                    $content .= $tokens[$stackPtr]['content'];
                }
            }
            if (PHP_CODESNIFFER_VERBOSITY > 1) {
                echo "\t\t=> Found embedded PHP code: ";
                $cleanContent = Util\Common::prepareForOutput($content);
                echo $cleanContent . PHP_EOL;
            }
            $finalTokens[$newStackPtr] = [
                'type' => 'T_EMBEDDED_PHP',
                'code' => T_EMBEDDED_PHP,
                'content' => $content,
            ];
            $newStackPtr++;
            continue;
        }
        
        //end if
        if ($token['code'] === T_GOTO_LABEL) {
            // Convert these back to T_STRING followed by T_COLON so we can
            // more easily process style definitions.
            $finalTokens[$newStackPtr] = [
                'type' => 'T_STRING',
                'code' => T_STRING,
                'content' => substr($token['content'], 0, -1),
            ];
            $newStackPtr++;
            $finalTokens[$newStackPtr] = [
                'type' => 'T_COLON',
                'code' => T_COLON,
                'content' => ':',
            ];
            $newStackPtr++;
            continue;
        }
        if ($token['code'] === T_FUNCTION) {
            // There are no functions in CSS, so convert this to a string.
            $finalTokens[$newStackPtr] = [
                'type' => 'T_STRING',
                'code' => T_STRING,
                'content' => $token['content'],
            ];
            $newStackPtr++;
            continue;
        }
        if ($token['code'] === T_COMMENT && substr($token['content'], 0, 2) === '/*') {
            // Multi-line comment. Record it so we can ignore other
            // comment tags until we get out of this one.
            $multiLineComment = true;
        }
        if ($token['code'] === T_COMMENT && $multiLineComment === false && (substr($token['content'], 0, 2) === '//' || $token['content'][0] === '#')) {
            $content = ltrim($token['content'], '#/');
            // Guard against PHP7+ syntax errors by stripping
            // leading zeros so the content doesn't look like an invalid int.
            $leadingZero = false;
            if ($content[0] === '0') {
                $content = '1' . $content;
                $leadingZero = true;
            }
            $commentTokens = parent::tokenize('<?php ' . $content . '?>');
            // The first and last tokens are the open/close tags.
            array_shift($commentTokens);
            $closeTag = array_pop($commentTokens);
            while ($closeTag['content'] !== '?' . '>') {
                $closeTag = array_pop($commentTokens);
            }
            if ($leadingZero === true) {
                $commentTokens[0]['content'] = substr($commentTokens[0]['content'], 1);
                $content = substr($content, 1);
            }
            if ($token['content'][0] === '#') {
                // The # character is not a comment in CSS files, so
                // determine what it means in this context.
                $firstContent = $commentTokens[0]['content'];
                // If the first content is just a number, it is probably a
                // colour like 8FB7DB, which PHP splits into 8 and FB7DB.
                if (($commentTokens[0]['code'] === T_LNUMBER || $commentTokens[0]['code'] === T_DNUMBER) && $commentTokens[1]['code'] === T_STRING) {
                    $firstContent .= $commentTokens[1]['content'];
                    array_shift($commentTokens);
                }
                // If the first content looks like a colour and not a class
                // definition, join the tokens together.
                if (preg_match('/^[ABCDEF0-9]+$/i', $firstContent) === 1 && $commentTokens[1]['content'] !== '-') {
                    array_shift($commentTokens);
                    // Work out what we trimmed off above and remember to re-add it.
                    $trimmed = substr($token['content'], 0, strlen($token['content']) - strlen($content));
                    $finalTokens[$newStackPtr] = [
                        'type' => 'T_COLOUR',
                        'code' => T_COLOUR,
                        'content' => $trimmed . $firstContent,
                    ];
                }
                else {
                    $finalTokens[$newStackPtr] = [
                        'type' => 'T_HASH',
                        'code' => T_HASH,
                        'content' => '#',
                    ];
                }
            }
            else {
                $finalTokens[$newStackPtr] = [
                    'type' => 'T_STRING',
                    'code' => T_STRING,
                    'content' => '//',
                ];
            }
            
            //end if
            $newStackPtr++;
            array_splice($tokens, $stackPtr, 1, $commentTokens);
            $numTokens = count($tokens);
            $stackPtr--;
            continue;
        }
        
        //end if
        if ($token['code'] === T_COMMENT && substr($token['content'], -2) === '*/') {
            // Multi-line comment is done.
            $multiLineComment = false;
        }
        $finalTokens[$newStackPtr] = $token;
        $newStackPtr++;
    }
    
    //end for
    if (PHP_CODESNIFFER_VERBOSITY > 1) {
        echo "\t*** END CSS TOKENIZING 1ST PASS ***" . PHP_EOL;
        echo "\t*** START CSS TOKENIZING 2ND PASS ***" . PHP_EOL;
    }
    // A flag to indicate if we are inside a style definition,
    // which is defined using curly braces.
    $inStyleDef = false;
    // A flag to indicate if an At-rule like "@media" is used, which will result
    // in nested curly brackets.
    $asperandStart = false;
    $numTokens = count($finalTokens);
    for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
        $token = $finalTokens[$stackPtr];
        if (PHP_CODESNIFFER_VERBOSITY > 1) {
            $type = $token['type'];
            $content = Util\Common::prepareForOutput($token['content']);
            echo "\tProcess token {$stackPtr}: {$type} => {$content}" . PHP_EOL;
        }
        switch ($token['code']) {
            case T_OPEN_CURLY_BRACKET:
                // Opening curly brackets for an At-rule do not start a style
                // definition. We also reset the asperand flag here because the next
                // opening curly bracket could be indeed the start of a style
                // definition.
                if ($asperandStart === true) {
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
                        if ($inStyleDef === true) {
                            echo "\t\t* style definition closed *" . PHP_EOL;
                        }
                        if ($asperandStart === true) {
                            echo "\t\t* at-rule definition closed *" . PHP_EOL;
                        }
                    }
                    $inStyleDef = false;
                    $asperandStart = false;
                }
                else {
                    $inStyleDef = true;
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
                        echo "\t\t* style definition opened *" . PHP_EOL;
                    }
                }
                break;
            case T_CLOSE_CURLY_BRACKET:
                if (PHP_CODESNIFFER_VERBOSITY > 1) {
                    if ($inStyleDef === true) {
                        echo "\t\t* style definition closed *" . PHP_EOL;
                    }
                    if ($asperandStart === true) {
                        echo "\t\t* at-rule definition closed *" . PHP_EOL;
                    }
                }
                $inStyleDef = false;
                $asperandStart = false;
                break;
            case T_MINUS:
                // Minus signs are often used instead of spaces inside
                // class names, IDs and styles.
                if ($finalTokens[$stackPtr + 1]['code'] === T_STRING) {
                    if ($finalTokens[$stackPtr - 1]['code'] === T_STRING) {
                        $newContent = $finalTokens[$stackPtr - 1]['content'] . '-' . $finalTokens[$stackPtr + 1]['content'];
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
                            echo "\t\t* token is a string joiner; ignoring this and previous token" . PHP_EOL;
                            $old = Util\Common::prepareForOutput($finalTokens[$stackPtr + 1]['content']);
                            $new = Util\Common::prepareForOutput($newContent);
                            echo "\t\t=> token " . ($stackPtr + 1) . " content changed from \"{$old}\" to \"{$new}\"" . PHP_EOL;
                        }
                        $finalTokens[$stackPtr + 1]['content'] = $newContent;
                        unset($finalTokens[$stackPtr]);
                        unset($finalTokens[$stackPtr - 1]);
                    }
                    else {
                        $newContent = '-' . $finalTokens[$stackPtr + 1]['content'];
                        $finalTokens[$stackPtr + 1]['content'] = $newContent;
                        unset($finalTokens[$stackPtr]);
                    }
                }
                else {
                    if ($finalTokens[$stackPtr + 1]['code'] === T_LNUMBER) {
                        // They can also be used to provide negative numbers.
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
                            echo "\t\t* token is part of a negative number; adding content to next token and ignoring *" . PHP_EOL;
                            $content = Util\Common::prepareForOutput($finalTokens[$stackPtr + 1]['content']);
                            echo "\t\t=> token " . ($stackPtr + 1) . " content changed from \"{$content}\" to \"-{$content}\"" . PHP_EOL;
                        }
                        $finalTokens[$stackPtr + 1]['content'] = '-' . $finalTokens[$stackPtr + 1]['content'];
                        unset($finalTokens[$stackPtr]);
                    }
                }
                
                //end if
                break;
            case T_COLON:
                // Only interested in colons that are defining styles.
                if ($inStyleDef === false) {
                    break;
                }
                for ($x = $stackPtr - 1; $x >= 0; $x--) {
                    if (isset(Util\Tokens::$emptyTokens[$finalTokens[$x]['code']]) === false) {
                        break;
                    }
                }
                if (PHP_CODESNIFFER_VERBOSITY > 1) {
                    $type = $finalTokens[$x]['type'];
                    echo "\t\t=> token {$x} changed from {$type} to T_STYLE" . PHP_EOL;
                }
                $finalTokens[$x]['type'] = 'T_STYLE';
                $finalTokens[$x]['code'] = T_STYLE;
                break;
            case T_STRING:
                if (strtolower($token['content']) === 'url') {
                    // Find the next content.
                    for ($x = $stackPtr + 1; $x < $numTokens; $x++) {
                        if (isset(Util\Tokens::$emptyTokens[$finalTokens[$x]['code']]) === false) {
                            break;
                        }
                    }
                    // Needs to be in the format "url(" for it to be a URL.
                    if ($finalTokens[$x]['code'] !== T_OPEN_PARENTHESIS) {
                        continue 2;
                    }
                    // Make sure the content isn't empty.
                    for ($y = $x + 1; $y < $numTokens; $y++) {
                        if (isset(Util\Tokens::$emptyTokens[$finalTokens[$y]['code']]) === false) {
                            break;
                        }
                    }
                    if ($finalTokens[$y]['code'] === T_CLOSE_PARENTHESIS) {
                        continue 2;
                    }
                    if (PHP_CODESNIFFER_VERBOSITY > 1) {
                        for ($i = $stackPtr + 1; $i <= $y; $i++) {
                            $type = $finalTokens[$i]['type'];
                            $content = Util\Common::prepareForOutput($finalTokens[$i]['content']);
                            echo "\tProcess token {$i}: {$type} => {$content}" . PHP_EOL;
                        }
                        echo "\t\t* token starts a URL *" . PHP_EOL;
                    }
                    // Join all the content together inside the url() statement.
                    $newContent = '';
                    for ($i = $x + 2; $i < $numTokens; $i++) {
                        if ($finalTokens[$i]['code'] === T_CLOSE_PARENTHESIS) {
                            break;
                        }
                        $newContent .= $finalTokens[$i]['content'];
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
                            $content = Util\Common::prepareForOutput($finalTokens[$i]['content']);
                            echo "\t\t=> token {$i} added to URL string and ignored: {$content}" . PHP_EOL;
                        }
                        unset($finalTokens[$i]);
                    }
                    $stackPtr = $i;
                    // If the content inside the "url()" is in double quotes
                    // there will only be one token and so we don't have to do
                    // anything except change its type. If it is not empty,
                    // we need to do some token merging.
                    $finalTokens[$x + 1]['type'] = 'T_URL';
                    $finalTokens[$x + 1]['code'] = T_URL;
                    if ($newContent !== '') {
                        $finalTokens[$x + 1]['content'] .= $newContent;
                        if (PHP_CODESNIFFER_VERBOSITY > 1) {
                            $content = Util\Common::prepareForOutput($finalTokens[$x + 1]['content']);
                            echo "\t\t=> token content changed to: {$content}" . PHP_EOL;
                        }
                    }
                }
                else {
                    if ($finalTokens[$stackPtr]['content'][0] === '-' && $finalTokens[$stackPtr + 1]['code'] === T_STRING) {
                        if (isset($finalTokens[$stackPtr - 1]) === true && $finalTokens[$stackPtr - 1]['code'] === T_STRING) {
                            $newContent = $finalTokens[$stackPtr - 1]['content'] . $finalTokens[$stackPtr]['content'] . $finalTokens[$stackPtr + 1]['content'];
                            if (PHP_CODESNIFFER_VERBOSITY > 1) {
                                echo "\t\t* token is a string joiner; ignoring this and previous token" . PHP_EOL;
                                $old = Util\Common::prepareForOutput($finalTokens[$stackPtr + 1]['content']);
                                $new = Util\Common::prepareForOutput($newContent);
                                echo "\t\t=> token " . ($stackPtr + 1) . " content changed from \"{$old}\" to \"{$new}\"" . PHP_EOL;
                            }
                            $finalTokens[$stackPtr + 1]['content'] = $newContent;
                            unset($finalTokens[$stackPtr]);
                            unset($finalTokens[$stackPtr - 1]);
                        }
                        else {
                            $newContent = $finalTokens[$stackPtr]['content'] . $finalTokens[$stackPtr + 1]['content'];
                            $finalTokens[$stackPtr + 1]['content'] = $newContent;
                            unset($finalTokens[$stackPtr]);
                        }
                    }
                }
                
                //end if
                break;
            case T_ASPERAND:
                $asperandStart = true;
                if (PHP_CODESNIFFER_VERBOSITY > 1) {
                    echo "\t\t* at-rule definition opened *" . PHP_EOL;
                }
                break;
            default:
                // Nothing special to be done with this token.
                break;
        }
        
        //end switch
    }
    
    //end for
    // Reset the array keys to avoid gaps.
    $finalTokens = array_values($finalTokens);
    $numTokens = count($finalTokens);
    // Blank out the content of the end tag.
    $finalTokens[$numTokens - 1]['content'] = '';
    if ($eolAdded === true) {
        // Strip off the extra EOL char we added for tokenizing.
        $finalTokens[$numTokens - 2]['content'] = substr($finalTokens[$numTokens - 2]['content'], 0, strlen($this->eolChar) * -1);
        if ($finalTokens[$numTokens - 2]['content'] === '') {
            unset($finalTokens[$numTokens - 2]);
            $finalTokens = array_values($finalTokens);
            $numTokens = count($finalTokens);
        }
    }
    if (PHP_CODESNIFFER_VERBOSITY > 1) {
        echo "\t*** END CSS TOKENIZING 2ND PASS ***" . PHP_EOL;
    }
    return $finalTokens;
}