function Tokenizer::decodeCharacterReference
Decode a character reference and return the string.
If $inAttribute is set to true, a bare & will be returned as-is.
Parameters
bool $inAttribute Set to true if the text is inside of an attribute value.: false otherwise.
Return value
string
4 calls to Tokenizer::decodeCharacterReference()
- Tokenizer::consumeData in vendor/
masterminds/ html5/ src/ HTML5/ Parser/ Tokenizer.php - Consume a character and make a move. HTML5 8.2.4.1.
- Tokenizer::quotedAttributeValue in vendor/
masterminds/ html5/ src/ HTML5/ Parser/ Tokenizer.php - Get an attribute value string.
- Tokenizer::rcdata in vendor/
masterminds/ html5/ src/ HTML5/ Parser/ Tokenizer.php - Read text in RCDATA mode.
- Tokenizer::unquotedAttributeValue in vendor/
masterminds/ html5/ src/ HTML5/ Parser/ Tokenizer.php
File
-
vendor/
masterminds/ html5/ src/ HTML5/ Parser/ Tokenizer.php, line 1102
Class
- Tokenizer
- The HTML5 tokenizer.
Namespace
Masterminds\HTML5\ParserCode
protected function decodeCharacterReference($inAttribute = false) {
// Next char after &.
$tok = $this->scanner
->next();
$start = $this->scanner
->position();
if (false === $tok) {
return '&';
}
// These indicate not an entity. We return just
// the &.
if ("\t" === $tok || "\n" === $tok || "\f" === $tok || ' ' === $tok || '&' === $tok || '<' === $tok) {
// $this->scanner->next();
return '&';
}
// Numeric entity
if ('#' === $tok) {
$tok = $this->scanner
->next();
if (false === $tok) {
$this->parseError('Expected &#DEC; &#HEX;, got EOF');
$this->scanner
->unconsume(1);
return '&';
}
// Hexidecimal encoding.
// X[0-9a-fA-F]+;
// x[0-9a-fA-F]+;
if ('x' === $tok || 'X' === $tok) {
$tok = $this->scanner
->next();
// Consume x
// Convert from hex code to char.
$hex = $this->scanner
->getHex();
if (empty($hex)) {
$this->parseError('Expected &#xHEX;, got &#x%s', $tok);
// We unconsume because we don't know what parser rules might
// be in effect for the remaining chars. For example. '&#>'
// might result in a specific parsing rule inside of tag
// contexts, while not inside of pcdata context.
$this->scanner
->unconsume(2);
return '&';
}
$entity = CharacterReference::lookupHex($hex);
}
else {
// Convert from decimal to char.
$numeric = $this->scanner
->getNumeric();
if (false === $numeric) {
$this->parseError('Expected &#DIGITS;, got &#%s', $tok);
$this->scanner
->unconsume(2);
return '&';
}
$entity = CharacterReference::lookupDecimal($numeric);
}
}
elseif ('=' === $tok && $inAttribute) {
return '&';
}
else {
// String entity.
// Attempt to consume a string up to a ';'.
// [a-zA-Z0-9]+;
$cname = $this->scanner
->getAsciiAlphaNum();
$entity = CharacterReference::lookupName($cname);
// When no entity is found provide the name of the unmatched string
// and continue on as the & is not part of an entity. The & will
// be converted to & elsewhere.
if (null === $entity) {
if (!$inAttribute || '' === $cname) {
$this->parseError("No match in entity table for '%s'", $cname);
}
$this->scanner
->unconsume($this->scanner
->position() - $start);
return '&';
}
}
// The scanner has advanced the cursor for us.
$tok = $this->scanner
->current();
// We have an entity. We're done here.
if (';' === $tok) {
$this->scanner
->consume();
return $entity;
}
// Failing to match ; means unconsume the entire string.
$this->scanner
->unconsume($this->scanner
->position() - $start);
$this->parseError('Expected &ENTITY;, got &ENTITY%s (no trailing ;) ', $tok);
return '&';
}