Skip to main content
Drupal API
User account menu
  • Log in

Breadcrumb

  1. Drupal Core 11.1.x
  2. Tokenizer.php

function Tokenizer::decodeCharacterReference

Decode a character reference and return the string.

If $inAttribute is set to true, a bare & will be returned as-is.

Parameters

bool $inAttribute Set to true if the text is inside of an attribute value.: false otherwise.

Return value

string

4 calls to Tokenizer::decodeCharacterReference()
Tokenizer::consumeData in vendor/masterminds/html5/src/HTML5/Parser/Tokenizer.php
Consume a character and make a move. HTML5 8.2.4.1.
Tokenizer::quotedAttributeValue in vendor/masterminds/html5/src/HTML5/Parser/Tokenizer.php
Get an attribute value string.
Tokenizer::rcdata in vendor/masterminds/html5/src/HTML5/Parser/Tokenizer.php
Read text in RCDATA mode.
Tokenizer::unquotedAttributeValue in vendor/masterminds/html5/src/HTML5/Parser/Tokenizer.php

File

vendor/masterminds/html5/src/HTML5/Parser/Tokenizer.php, line 1102

Class

Tokenizer
The HTML5 tokenizer.

Namespace

Masterminds\HTML5\Parser

Code

protected function decodeCharacterReference($inAttribute = false) {
    // Next char after &.
    $tok = $this->scanner
        ->next();
    $start = $this->scanner
        ->position();
    if (false === $tok) {
        return '&';
    }
    // These indicate not an entity. We return just
    // the &.
    if ("\t" === $tok || "\n" === $tok || "\f" === $tok || ' ' === $tok || '&' === $tok || '<' === $tok) {
        // $this->scanner->next();
        return '&';
    }
    // Numeric entity
    if ('#' === $tok) {
        $tok = $this->scanner
            ->next();
        if (false === $tok) {
            $this->parseError('Expected &#DEC; &#HEX;, got EOF');
            $this->scanner
                ->unconsume(1);
            return '&';
        }
        // Hexidecimal encoding.
        // X[0-9a-fA-F]+;
        // x[0-9a-fA-F]+;
        if ('x' === $tok || 'X' === $tok) {
            $tok = $this->scanner
                ->next();
            // Consume x
            // Convert from hex code to char.
            $hex = $this->scanner
                ->getHex();
            if (empty($hex)) {
                $this->parseError('Expected &#xHEX;, got &#x%s', $tok);
                // We unconsume because we don't know what parser rules might
                // be in effect for the remaining chars. For example. '&#>'
                // might result in a specific parsing rule inside of tag
                // contexts, while not inside of pcdata context.
                $this->scanner
                    ->unconsume(2);
                return '&';
            }
            $entity = CharacterReference::lookupHex($hex);
        }
        else {
            // Convert from decimal to char.
            $numeric = $this->scanner
                ->getNumeric();
            if (false === $numeric) {
                $this->parseError('Expected &#DIGITS;, got &#%s', $tok);
                $this->scanner
                    ->unconsume(2);
                return '&';
            }
            $entity = CharacterReference::lookupDecimal($numeric);
        }
    }
    elseif ('=' === $tok && $inAttribute) {
        return '&';
    }
    else {
        // String entity.
        // Attempt to consume a string up to a ';'.
        // [a-zA-Z0-9]+;
        $cname = $this->scanner
            ->getAsciiAlphaNum();
        $entity = CharacterReference::lookupName($cname);
        // When no entity is found provide the name of the unmatched string
        // and continue on as the & is not part of an entity. The & will
        // be converted to &amp; elsewhere.
        if (null === $entity) {
            if (!$inAttribute || '' === $cname) {
                $this->parseError("No match in entity table for '%s'", $cname);
            }
            $this->scanner
                ->unconsume($this->scanner
                ->position() - $start);
            return '&';
        }
    }
    // The scanner has advanced the cursor for us.
    $tok = $this->scanner
        ->current();
    // We have an entity. We're done here.
    if (';' === $tok) {
        $this->scanner
            ->consume();
        return $entity;
    }
    // Failing to match ; means unconsume the entire string.
    $this->scanner
        ->unconsume($this->scanner
        ->position() - $start);
    $this->parseError('Expected &ENTITY;, got &ENTITY%s (no trailing ;) ', $tok);
    return '&';
}
RSS feed
Powered by Drupal