function Tokenizer::attribute
Parse attributes from inside of a tag.
Parameters
string[] $attributes:
Return value
bool
Throws
1 call to Tokenizer::attribute()
- Tokenizer::tagName in vendor/
masterminds/ html5/ src/ HTML5/ Parser/ Tokenizer.php - Consume a tag name and body. See section 8.2.4.10.
File
-
vendor/
masterminds/ html5/ src/ HTML5/ Parser/ Tokenizer.php, line 462
Class
- Tokenizer
- The HTML5 tokenizer.
Namespace
Masterminds\HTML5\ParserCode
protected function attribute(&$attributes) {
$tok = $this->scanner
->current();
if ('/' == $tok || '>' == $tok || false === $tok) {
return false;
}
if ('<' == $tok) {
$this->parseError("Unexpected '<' inside of attributes list.");
// Push the < back onto the stack.
$this->scanner
->unconsume();
// Let the caller figure out how to handle this.
throw new ParseError('Start tag inside of attribute.');
}
$name = strtolower($this->scanner
->charsUntil("/>=\n\f\t "));
if (0 == strlen($name)) {
$tok = $this->scanner
->current();
$this->parseError('Expected an attribute name, got %s.', $tok);
// Really, only '=' can be the char here. Everything else gets absorbed
// under one rule or another.
$name = $tok;
$this->scanner
->consume();
}
$isValidAttribute = true;
// Attribute names can contain most Unicode characters for HTML5.
// But method "DOMElement::setAttribute" is throwing exception
// because of it's own internal restriction so these have to be filtered.
// see issue #23: https://github.com/Masterminds/html5-php/issues/23
// and http://www.w3.org/TR/2011/WD-html5-20110525/syntax.html#syntax-attribute-name
if (preg_match("/[\x01-,\\/;-@[-^`{-]/u", $name)) {
$this->parseError('Unexpected characters in attribute name: %s', $name);
$isValidAttribute = false;
}
elseif (preg_match('/^[0-9.-]/u', $name)) {
$this->parseError('Unexpected character at the begining of attribute name: %s', $name);
$isValidAttribute = false;
}
// 8.1.2.3
$this->scanner
->whitespace();
$val = $this->attributeValue();
if ($isValidAttribute) {
$attributes[$name] = $val;
}
return true;
}