class HTML5
This class offers convenience methods for parsing and serializing HTML5. It is roughly designed to mirror the \DOMDocument native class.
Hierarchy
- class \Masterminds\HTML5
Expanded class hierarchy of HTML5
2 files declare their use of HTML5
- Crawler.php in vendor/
symfony/ dom-crawler/ Crawler.php - Html.php in core/
lib/ Drupal/ Component/ Utility/ Html.php
1 string reference to 'HTML5'
- Tokenizer::doctype in vendor/
masterminds/ html5/ src/ HTML5/ Parser/ Tokenizer.php - Parse a DOCTYPE.
File
-
vendor/
masterminds/ html5/ src/ HTML5.php, line 15
Namespace
MastermindsView source
class HTML5 {
/**
* Global options for the parser and serializer.
*
* @var array
*/
private $defaultOptions = array(
// Whether the serializer should aggressively encode all characters as entities.
'encode_entities' => false,
// Prevents the parser from automatically assigning the HTML5 namespace to the DOM document.
'disable_html_ns' => false,
);
protected $errors = array();
public function __construct(array $defaultOptions = array()) {
$this->defaultOptions = array_merge($this->defaultOptions, $defaultOptions);
}
/**
* Get the current default options.
*
* @return array
*/
public function getOptions() {
return $this->defaultOptions;
}
/**
* Load and parse an HTML file.
*
* This will apply the HTML5 parser, which is tolerant of many
* varieties of HTML, including XHTML 1, HTML 4, and well-formed HTML
* 3. Note that in these cases, not all of the old data will be
* preserved. For example, XHTML's XML declaration will be removed.
*
* The rules governing parsing are set out in the HTML 5 spec.
*
* @param string|resource $file The path to the file to parse. If this is a resource, it is
* assumed to be an open stream whose pointer is set to the first
* byte of input.
* @param array $options Configuration options when parsing the HTML.
*
* @return \DOMDocument A DOM document. These object type is defined by the libxml
* library, and should have been included with your version of PHP.
*/
public function load($file, array $options = array()) {
// Handle the case where file is a resource.
if (is_resource($file)) {
return $this->parse(stream_get_contents($file), $options);
}
return $this->parse(file_get_contents($file), $options);
}
/**
* Parse a HTML Document from a string.
*
* Take a string of HTML 5 (or earlier) and parse it into a
* DOMDocument.
*
* @param string $string A html5 document as a string.
* @param array $options Configuration options when parsing the HTML.
*
* @return \DOMDocument A DOM document. DOM is part of libxml, which is included with
* almost all distribtions of PHP.
*/
public function loadHTML($string, array $options = array()) {
return $this->parse($string, $options);
}
/**
* Convenience function to load an HTML file.
*
* This is here to provide backwards compatibility with the
* PHP DOM implementation. It simply calls load().
*
* @param string $file The path to the file to parse. If this is a resource, it is
* assumed to be an open stream whose pointer is set to the first
* byte of input.
* @param array $options Configuration options when parsing the HTML.
*
* @return \DOMDocument A DOM document. These object type is defined by the libxml
* library, and should have been included with your version of PHP.
*/
public function loadHTMLFile($file, array $options = array()) {
return $this->load($file, $options);
}
/**
* Parse a HTML fragment from a string.
*
* @param string $string the HTML5 fragment as a string
* @param array $options Configuration options when parsing the HTML
*
* @return \DOMDocumentFragment A DOM fragment. The DOM is part of libxml, which is included with
* almost all distributions of PHP.
*/
public function loadHTMLFragment($string, array $options = array()) {
return $this->parseFragment($string, $options);
}
/**
* Return all errors encountered into parsing phase.
*
* @return array
*/
public function getErrors() {
return $this->errors;
}
/**
* Return true it some errors were encountered into parsing phase.
*
* @return bool
*/
public function hasErrors() {
return count($this->errors) > 0;
}
/**
* Parse an input string.
*
* @param string $input
* @param array $options
*
* @return \DOMDocument
*/
public function parse($input, array $options = array()) {
$this->errors = array();
$options = array_merge($this->defaultOptions, $options);
$events = new DOMTreeBuilder(false, $options);
$scanner = new Scanner($input, !empty($options['encoding']) ? $options['encoding'] : 'UTF-8');
$parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML);
$parser->parse();
$this->errors = $events->getErrors();
return $events->document();
}
/**
* Parse an input stream where the stream is a fragment.
*
* Lower-level loading function. This requires an input stream instead
* of a string, file, or resource.
*
* @param string $input The input data to parse in the form of a string.
* @param array $options An array of options.
*
* @return \DOMDocumentFragment
*/
public function parseFragment($input, array $options = array()) {
$options = array_merge($this->defaultOptions, $options);
$events = new DOMTreeBuilder(true, $options);
$scanner = new Scanner($input, !empty($options['encoding']) ? $options['encoding'] : 'UTF-8');
$parser = new Tokenizer($scanner, $events, !empty($options['xmlNamespaces']) ? Tokenizer::CONFORMANT_XML : Tokenizer::CONFORMANT_HTML);
$parser->parse();
$this->errors = $events->getErrors();
return $events->fragment();
}
/**
* Save a DOM into a given file as HTML5.
*
* @param mixed $dom The DOM to be serialized.
* @param string|resource $file The filename to be written or resource to write to.
* @param array $options Configuration options when serializing the DOM. These include:
* - encode_entities: Text written to the output is escaped by default and not all
* entities are encoded. If this is set to true all entities will be encoded.
* Defaults to false.
*/
public function save($dom, $file, $options = array()) {
$close = true;
if (is_resource($file)) {
$stream = $file;
$close = false;
}
else {
$stream = fopen($file, 'wb');
}
$options = array_merge($this->defaultOptions, $options);
$rules = new OutputRules($stream, $options);
$trav = new Traverser($dom, $stream, $rules, $options);
$trav->walk();
/*
* release the traverser to avoid cyclic references and allow PHP to free memory without waiting for gc_collect_cycles
*/
$rules->unsetTraverser();
if ($close) {
fclose($stream);
}
}
/**
* Convert a DOM into an HTML5 string.
*
* @param mixed $dom The DOM to be serialized.
* @param array $options Configuration options when serializing the DOM. These include:
* - encode_entities: Text written to the output is escaped by default and not all
* entities are encoded. If this is set to true all entities will be encoded.
* Defaults to false.
*
* @return string A HTML5 documented generated from the DOM.
*/
public function saveHTML($dom, $options = array()) {
$stream = fopen('php://temp', 'wb');
$this->save($dom, $stream, array_merge($this->defaultOptions, $options));
$html = stream_get_contents($stream, -1, 0);
fclose($stream);
return $html;
}
}
Members
Title Sort descending | Modifiers | Object type | Summary |
---|---|---|---|
HTML5::$defaultOptions | private | property | Global options for the parser and serializer. |
HTML5::$errors | protected | property | |
HTML5::getErrors | public | function | Return all errors encountered into parsing phase. |
HTML5::getOptions | public | function | Get the current default options. |
HTML5::hasErrors | public | function | Return true it some errors were encountered into parsing phase. |
HTML5::load | public | function | Load and parse an HTML file. |
HTML5::loadHTML | public | function | Parse a HTML Document from a string. |
HTML5::loadHTMLFile | public | function | Convenience function to load an HTML file. |
HTML5::loadHTMLFragment | public | function | Parse a HTML fragment from a string. |
HTML5::parse | public | function | Parse an input string. |
HTML5::parseFragment | public | function | Parse an input stream where the stream is a fragment. |
HTML5::save | public | function | Save a DOM into a given file as HTML5. |
HTML5::saveHTML | public | function | Convert a DOM into an HTML5 string. |
HTML5::__construct | public | function |