1: <?php
2: /**
3: * CakePHP(tm) : Rapid Development Framework (https://cakephp.org)
4: * Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
5: *
6: * Licensed under The MIT License
7: * For full copyright and license information, please see the LICENSE.txt
8: * Redistributions of files must retain the above copyright notice.
9: *
10: * @copyright Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
11: * @link https://cakephp.org CakePHP(tm) Project
12: * @since 0.10.3
13: * @license https://opensource.org/licenses/mit-license.php MIT License
14: */
15: namespace Cake\Utility;
16:
17: use Cake\Utility\Exception\XmlException;
18: use DOMDocument;
19: use DOMNode;
20: use DOMText;
21: use Exception;
22: use SimpleXMLElement;
23:
24: /**
25: * XML handling for CakePHP.
26: *
27: * The methods in these classes enable the datasources that use XML to work.
28: */
29: class Xml
30: {
31: /**
32: * Initialize SimpleXMLElement or DOMDocument from a given XML string, file path, URL or array.
33: *
34: * ### Usage:
35: *
36: * Building XML from a string:
37: *
38: * ```
39: * $xml = Xml::build('<example>text</example>');
40: * ```
41: *
42: * Building XML from string (output DOMDocument):
43: *
44: * ```
45: * $xml = Xml::build('<example>text</example>', ['return' => 'domdocument']);
46: * ```
47: *
48: * Building XML from a file path:
49: *
50: * ```
51: * $xml = Xml::build('/path/to/an/xml/file.xml');
52: * ```
53: *
54: * Building XML from a remote URL:
55: *
56: * ```
57: * use Cake\Http\Client;
58: *
59: * $http = new Client();
60: * $response = $http->get('http://example.com/example.xml');
61: * $xml = Xml::build($response->body());
62: * ```
63: *
64: * Building from an array:
65: *
66: * ```
67: * $value = [
68: * 'tags' => [
69: * 'tag' => [
70: * [
71: * 'id' => '1',
72: * 'name' => 'defect'
73: * ],
74: * [
75: * 'id' => '2',
76: * 'name' => 'enhancement'
77: * ]
78: * ]
79: * ]
80: * ];
81: * $xml = Xml::build($value);
82: * ```
83: *
84: * When building XML from an array ensure that there is only one top level element.
85: *
86: * ### Options
87: *
88: * - `return` Can be 'simplexml' to return object of SimpleXMLElement or 'domdocument' to return DOMDocument.
89: * - `loadEntities` Defaults to false. Set to true to enable loading of `<!ENTITY` definitions. This
90: * is disabled by default for security reasons.
91: * - `readFile` Set to false to disable file reading. This is important to disable when
92: * putting user data into Xml::build(). If enabled local files will be read if they exist.
93: * Defaults to true for backwards compatibility reasons.
94: * - `parseHuge` Enable the `LIBXML_PARSEHUGE` flag.
95: *
96: * If using array as input, you can pass `options` from Xml::fromArray.
97: *
98: * @param string|array $input XML string, a path to a file, a URL or an array
99: * @param array $options The options to use
100: * @return \SimpleXMLElement|\DOMDocument SimpleXMLElement or DOMDocument
101: * @throws \Cake\Utility\Exception\XmlException
102: */
103: public static function build($input, array $options = [])
104: {
105: $defaults = [
106: 'return' => 'simplexml',
107: 'loadEntities' => false,
108: 'readFile' => true,
109: 'parseHuge' => false,
110: ];
111: $options += $defaults;
112:
113: if (is_array($input) || is_object($input)) {
114: return static::fromArray($input, $options);
115: }
116:
117: if (strpos($input, '<') !== false) {
118: return static::_loadXml($input, $options);
119: }
120:
121: if ($options['readFile'] && file_exists($input)) {
122: return static::_loadXml(file_get_contents($input), $options);
123: }
124:
125: if (!is_string($input)) {
126: throw new XmlException('Invalid input.');
127: }
128:
129: throw new XmlException('XML cannot be read.');
130: }
131:
132: /**
133: * Parse the input data and create either a SimpleXmlElement object or a DOMDocument.
134: *
135: * @param string $input The input to load.
136: * @param array $options The options to use. See Xml::build()
137: * @return \SimpleXMLElement|\DOMDocument
138: * @throws \Cake\Utility\Exception\XmlException
139: */
140: protected static function _loadXml($input, $options)
141: {
142: $hasDisable = function_exists('libxml_disable_entity_loader');
143: $internalErrors = libxml_use_internal_errors(true);
144: if ($hasDisable && !$options['loadEntities']) {
145: libxml_disable_entity_loader(true);
146: }
147: $flags = 0;
148: if (!empty($options['parseHuge'])) {
149: $flags |= LIBXML_PARSEHUGE;
150: }
151: try {
152: if ($options['return'] === 'simplexml' || $options['return'] === 'simplexmlelement') {
153: $flags |= LIBXML_NOCDATA;
154: $xml = new SimpleXMLElement($input, $flags);
155: } else {
156: $xml = new DOMDocument();
157: $xml->loadXML($input, $flags);
158: }
159:
160: return $xml;
161: } catch (Exception $e) {
162: throw new XmlException('Xml cannot be read. ' . $e->getMessage(), null, $e);
163: } finally {
164: if ($hasDisable && !$options['loadEntities']) {
165: libxml_disable_entity_loader(false);
166: }
167: libxml_use_internal_errors($internalErrors);
168: }
169: }
170:
171: /**
172: * Parse the input html string and create either a SimpleXmlElement object or a DOMDocument.
173: *
174: * @param string $input The input html string to load.
175: * @param array $options The options to use. See Xml::build()
176: * @return \SimpleXMLElement|\DOMDocument
177: * @throws \Cake\Utility\Exception\XmlException
178: */
179: public static function loadHtml($input, $options = [])
180: {
181: $defaults = [
182: 'return' => 'simplexml',
183: 'loadEntities' => false,
184: ];
185: $options += $defaults;
186:
187: $hasDisable = function_exists('libxml_disable_entity_loader');
188: $internalErrors = libxml_use_internal_errors(true);
189: if ($hasDisable && !$options['loadEntities']) {
190: libxml_disable_entity_loader(true);
191: }
192: $flags = 0;
193: if (!empty($options['parseHuge'])) {
194: $flags |= LIBXML_PARSEHUGE;
195: }
196: try {
197: $xml = new DOMDocument();
198: $xml->loadHTML($input, $flags);
199:
200: if ($options['return'] === 'simplexml' || $options['return'] === 'simplexmlelement') {
201: $flags |= LIBXML_NOCDATA;
202: $xml = simplexml_import_dom($xml);
203: }
204:
205: return $xml;
206: } catch (Exception $e) {
207: throw new XmlException('Xml cannot be read. ' . $e->getMessage(), null, $e);
208: } finally {
209: if ($hasDisable && !$options['loadEntities']) {
210: libxml_disable_entity_loader(false);
211: }
212: libxml_use_internal_errors($internalErrors);
213: }
214: }
215:
216: /**
217: * Transform an array into a SimpleXMLElement
218: *
219: * ### Options
220: *
221: * - `format` If create childs ('tags') or attributes ('attributes').
222: * - `pretty` Returns formatted Xml when set to `true`. Defaults to `false`
223: * - `version` Version of XML document. Default is 1.0.
224: * - `encoding` Encoding of XML document. If null remove from XML header. Default is the some of application.
225: * - `return` If return object of SimpleXMLElement ('simplexml') or DOMDocument ('domdocument'). Default is SimpleXMLElement.
226: *
227: * Using the following data:
228: *
229: * ```
230: * $value = [
231: * 'root' => [
232: * 'tag' => [
233: * 'id' => 1,
234: * 'value' => 'defect',
235: * '@' => 'description'
236: * ]
237: * ]
238: * ];
239: * ```
240: *
241: * Calling `Xml::fromArray($value, 'tags');` Will generate:
242: *
243: * `<root><tag><id>1</id><value>defect</value>description</tag></root>`
244: *
245: * And calling `Xml::fromArray($value, 'attributes');` Will generate:
246: *
247: * `<root><tag id="1" value="defect">description</tag></root>`
248: *
249: * @param array|\Cake\Collection\Collection $input Array with data or a collection instance.
250: * @param string|array $options The options to use or a string to use as format.
251: * @return \SimpleXMLElement|\DOMDocument SimpleXMLElement or DOMDocument
252: * @throws \Cake\Utility\Exception\XmlException
253: */
254: public static function fromArray($input, $options = [])
255: {
256: if (is_object($input) && method_exists($input, 'toArray') && is_callable([$input, 'toArray'])) {
257: $input = call_user_func([$input, 'toArray']);
258: }
259: if (!is_array($input) || count($input) !== 1) {
260: throw new XmlException('Invalid input.');
261: }
262: $key = key($input);
263: if (is_int($key)) {
264: throw new XmlException('The key of input must be alphanumeric');
265: }
266:
267: if (!is_array($options)) {
268: $options = ['format' => (string)$options];
269: }
270: $defaults = [
271: 'format' => 'tags',
272: 'version' => '1.0',
273: 'encoding' => mb_internal_encoding(),
274: 'return' => 'simplexml',
275: 'pretty' => false
276: ];
277: $options += $defaults;
278:
279: $dom = new DOMDocument($options['version'], $options['encoding']);
280: if ($options['pretty']) {
281: $dom->formatOutput = true;
282: }
283: self::_fromArray($dom, $dom, $input, $options['format']);
284:
285: $options['return'] = strtolower($options['return']);
286: if ($options['return'] === 'simplexml' || $options['return'] === 'simplexmlelement') {
287: return new SimpleXMLElement($dom->saveXML());
288: }
289:
290: return $dom;
291: }
292:
293: /**
294: * Recursive method to create childs from array
295: *
296: * @param \DOMDocument $dom Handler to DOMDocument
297: * @param \DOMElement $node Handler to DOMElement (child)
298: * @param array $data Array of data to append to the $node.
299: * @param string $format Either 'attributes' or 'tags'. This determines where nested keys go.
300: * @return void
301: * @throws \Cake\Utility\Exception\XmlException
302: */
303: protected static function _fromArray($dom, $node, &$data, $format)
304: {
305: if (empty($data) || !is_array($data)) {
306: return;
307: }
308: foreach ($data as $key => $value) {
309: if (is_string($key)) {
310: if (is_object($value) && method_exists($value, 'toArray') && is_callable([$value, 'toArray'])) {
311: $value = call_user_func([$value, 'toArray']);
312: }
313:
314: if (!is_array($value)) {
315: if (is_bool($value)) {
316: $value = (int)$value;
317: } elseif ($value === null) {
318: $value = '';
319: }
320: $isNamespace = strpos($key, 'xmlns:');
321: if ($isNamespace !== false) {
322: $node->setAttributeNS('http://www.w3.org/2000/xmlns/', $key, $value);
323: continue;
324: }
325: if ($key[0] !== '@' && $format === 'tags') {
326: if (!is_numeric($value)) {
327: // Escape special characters
328: // https://www.w3.org/TR/REC-xml/#syntax
329: // https://bugs.php.net/bug.php?id=36795
330: $child = $dom->createElement($key, '');
331: $child->appendChild(new DOMText($value));
332: } else {
333: $child = $dom->createElement($key, $value);
334: }
335: $node->appendChild($child);
336: } else {
337: if ($key[0] === '@') {
338: $key = substr($key, 1);
339: }
340: $attribute = $dom->createAttribute($key);
341: $attribute->appendChild($dom->createTextNode($value));
342: $node->appendChild($attribute);
343: }
344: } else {
345: if ($key[0] === '@') {
346: throw new XmlException('Invalid array');
347: }
348: if (is_numeric(implode('', array_keys($value)))) {
349: // List
350: foreach ($value as $item) {
351: $itemData = compact('dom', 'node', 'key', 'format');
352: $itemData['value'] = $item;
353: static::_createChild($itemData);
354: }
355: } else {
356: // Struct
357: static::_createChild(compact('dom', 'node', 'key', 'value', 'format'));
358: }
359: }
360: } else {
361: throw new XmlException('Invalid array');
362: }
363: }
364: }
365:
366: /**
367: * Helper to _fromArray(). It will create childs of arrays
368: *
369: * @param array $data Array with information to create childs
370: * @return void
371: */
372: protected static function _createChild($data)
373: {
374: $data += [
375: 'dom' => null,
376: 'node' => null,
377: 'key' => null,
378: 'value' => null,
379: 'format' => null,
380: ];
381:
382: $value = $data['value'];
383: $dom = $data['dom'];
384: $key = $data['key'];
385: $format = $data['format'];
386: $node = $data['node'];
387:
388: $childNS = $childValue = null;
389: if (is_object($value) && method_exists($value, 'toArray') && is_callable([$value, 'toArray'])) {
390: $value = call_user_func([$value, 'toArray']);
391: }
392: if (is_array($value)) {
393: if (isset($value['@'])) {
394: $childValue = (string)$value['@'];
395: unset($value['@']);
396: }
397: if (isset($value['xmlns:'])) {
398: $childNS = $value['xmlns:'];
399: unset($value['xmlns:']);
400: }
401: } elseif (!empty($value) || $value === 0 || $value === '0') {
402: $childValue = (string)$value;
403: }
404:
405: $child = $dom->createElement($key);
406: if ($childValue !== null) {
407: $child->appendChild($dom->createTextNode($childValue));
408: }
409: if ($childNS) {
410: $child->setAttribute('xmlns', $childNS);
411: }
412:
413: static::_fromArray($dom, $child, $value, $format);
414: $node->appendChild($child);
415: }
416:
417: /**
418: * Returns this XML structure as an array.
419: *
420: * @param \SimpleXMLElement|\DOMDocument|\DOMNode $obj SimpleXMLElement, DOMDocument or DOMNode instance
421: * @return array Array representation of the XML structure.
422: * @throws \Cake\Utility\Exception\XmlException
423: */
424: public static function toArray($obj)
425: {
426: if ($obj instanceof DOMNode) {
427: $obj = simplexml_import_dom($obj);
428: }
429: if (!($obj instanceof SimpleXMLElement)) {
430: throw new XmlException('The input is not instance of SimpleXMLElement, DOMDocument or DOMNode.');
431: }
432: $result = [];
433: $namespaces = array_merge(['' => ''], $obj->getNamespaces(true));
434: static::_toArray($obj, $result, '', array_keys($namespaces));
435:
436: return $result;
437: }
438:
439: /**
440: * Recursive method to toArray
441: *
442: * @param \SimpleXMLElement $xml SimpleXMLElement object
443: * @param array $parentData Parent array with data
444: * @param string $ns Namespace of current child
445: * @param string[] $namespaces List of namespaces in XML
446: * @return void
447: */
448: protected static function _toArray($xml, &$parentData, $ns, $namespaces)
449: {
450: $data = [];
451:
452: foreach ($namespaces as $namespace) {
453: foreach ($xml->attributes($namespace, true) as $key => $value) {
454: if (!empty($namespace)) {
455: $key = $namespace . ':' . $key;
456: }
457: $data['@' . $key] = (string)$value;
458: }
459:
460: foreach ($xml->children($namespace, true) as $child) {
461: static::_toArray($child, $data, $namespace, $namespaces);
462: }
463: }
464:
465: $asString = trim((string)$xml);
466: if (empty($data)) {
467: $data = $asString;
468: } elseif (strlen($asString) > 0) {
469: $data['@'] = $asString;
470: }
471:
472: if (!empty($ns)) {
473: $ns .= ':';
474: }
475: $name = $ns . $xml->getName();
476: if (isset($parentData[$name])) {
477: if (!is_array($parentData[$name]) || !isset($parentData[$name][0])) {
478: $parentData[$name] = [$parentData[$name]];
479: }
480: $parentData[$name][] = $data;
481: } else {
482: $parentData[$name] = $data;
483: }
484: }
485: }
486: