1: <?php
2: /**
3: * CakePHP(tm) : Rapid Development Framework (https://cakephp.org)
4: * Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
5: *
6: * Licensed under The MIT License
7: * For full copyright and license information, please see the LICENSE.txt
8: * Redistributions of files must retain the above copyright notice.
9: *
10: * @copyright Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
11: * @link https://cakephp.org CakePHP(tm) Project
12: * @since 1.2.0
13: * @license https://opensource.org/licenses/mit-license.php MIT License
14: */
15: namespace Cake\Utility;
16:
17: use InvalidArgumentException;
18:
19: /**
20: * Text handling methods.
21: */
22: class Text
23: {
24: /**
25: * Default transliterator.
26: *
27: * @var \Transliterator Transliterator instance.
28: */
29: protected static $_defaultTransliterator;
30:
31: /**
32: * Default transliterator id string.
33: *
34: * @var string $_defaultTransliteratorId Transliterator identifier string.
35: */
36: protected static $_defaultTransliteratorId = 'Any-Latin; Latin-ASCII; [\u0080-\u7fff] remove';
37:
38: /**
39: * Default html tags who must not be count for truncate text.
40: *
41: * @var array
42: */
43: protected static $_defaultHtmlNoCount = [
44: 'style',
45: 'script'
46: ];
47:
48: /**
49: * Generate a random UUID version 4
50: *
51: * Warning: This method should not be used as a random seed for any cryptographic operations.
52: * Instead you should use the openssl or mcrypt extensions.
53: *
54: * It should also not be used to create identifiers that have security implications, such as
55: * 'unguessable' URL identifiers. Instead you should use `Security::randomBytes()` for that.
56: *
57: * @see https://www.ietf.org/rfc/rfc4122.txt
58: * @return string RFC 4122 UUID
59: * @copyright Matt Farina MIT License https://github.com/lootils/uuid/blob/master/LICENSE
60: */
61: public static function uuid()
62: {
63: $random = function_exists('random_int') ? 'random_int' : 'mt_rand';
64:
65: return sprintf(
66: '%04x%04x-%04x-%04x-%04x-%04x%04x%04x',
67: // 32 bits for "time_low"
68: $random(0, 65535),
69: $random(0, 65535),
70: // 16 bits for "time_mid"
71: $random(0, 65535),
72: // 12 bits before the 0100 of (version) 4 for "time_hi_and_version"
73: $random(0, 4095) | 0x4000,
74: // 16 bits, 8 bits for "clk_seq_hi_res",
75: // 8 bits for "clk_seq_low",
76: // two most significant bits holds zero and one for variant DCE1.1
77: $random(0, 0x3fff) | 0x8000,
78: // 48 bits for "node"
79: $random(0, 65535),
80: $random(0, 65535),
81: $random(0, 65535)
82: );
83: }
84:
85: /**
86: * Tokenizes a string using $separator, ignoring any instance of $separator that appears between
87: * $leftBound and $rightBound.
88: *
89: * @param string $data The data to tokenize.
90: * @param string $separator The token to split the data on.
91: * @param string $leftBound The left boundary to ignore separators in.
92: * @param string $rightBound The right boundary to ignore separators in.
93: * @return string|string[] Array of tokens in $data or original input if empty.
94: */
95: public static function tokenize($data, $separator = ',', $leftBound = '(', $rightBound = ')')
96: {
97: if (empty($data)) {
98: return [];
99: }
100:
101: $depth = 0;
102: $offset = 0;
103: $buffer = '';
104: $results = [];
105: $length = mb_strlen($data);
106: $open = false;
107:
108: while ($offset <= $length) {
109: $tmpOffset = -1;
110: $offsets = [
111: mb_strpos($data, $separator, $offset),
112: mb_strpos($data, $leftBound, $offset),
113: mb_strpos($data, $rightBound, $offset)
114: ];
115: for ($i = 0; $i < 3; $i++) {
116: if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset == -1)) {
117: $tmpOffset = $offsets[$i];
118: }
119: }
120: if ($tmpOffset !== -1) {
121: $buffer .= mb_substr($data, $offset, $tmpOffset - $offset);
122: $char = mb_substr($data, $tmpOffset, 1);
123: if (!$depth && $char === $separator) {
124: $results[] = $buffer;
125: $buffer = '';
126: } else {
127: $buffer .= $char;
128: }
129: if ($leftBound !== $rightBound) {
130: if ($char === $leftBound) {
131: $depth++;
132: }
133: if ($char === $rightBound) {
134: $depth--;
135: }
136: } else {
137: if ($char === $leftBound) {
138: if (!$open) {
139: $depth++;
140: $open = true;
141: } else {
142: $depth--;
143: $open = false;
144: }
145: }
146: }
147: $tmpOffset += 1;
148: $offset = $tmpOffset;
149: } else {
150: $results[] = $buffer . mb_substr($data, $offset);
151: $offset = $length + 1;
152: }
153: }
154: if (empty($results) && !empty($buffer)) {
155: $results[] = $buffer;
156: }
157:
158: if (!empty($results)) {
159: return array_map('trim', $results);
160: }
161:
162: return [];
163: }
164:
165: /**
166: * Replaces variable placeholders inside a $str with any given $data. Each key in the $data array
167: * corresponds to a variable placeholder name in $str.
168: * Example:
169: * ```
170: * Text::insert(':name is :age years old.', ['name' => 'Bob', 'age' => '65']);
171: * ```
172: * Returns: Bob is 65 years old.
173: *
174: * Available $options are:
175: *
176: * - before: The character or string in front of the name of the variable placeholder (Defaults to `:`)
177: * - after: The character or string after the name of the variable placeholder (Defaults to null)
178: * - escape: The character or string used to escape the before character / string (Defaults to `\`)
179: * - format: A regex to use for matching variable placeholders. Default is: `/(?<!\\)\:%s/`
180: * (Overwrites before, after, breaks escape / clean)
181: * - clean: A boolean or array with instructions for Text::cleanInsert
182: *
183: * @param string $str A string containing variable placeholders
184: * @param array $data A key => val array where each key stands for a placeholder variable name
185: * to be replaced with val
186: * @param array $options An array of options, see description above
187: * @return string
188: */
189: public static function insert($str, $data, array $options = [])
190: {
191: $defaults = [
192: 'before' => ':', 'after' => null, 'escape' => '\\', 'format' => null, 'clean' => false
193: ];
194: $options += $defaults;
195: $format = $options['format'];
196: $data = (array)$data;
197: if (empty($data)) {
198: return $options['clean'] ? static::cleanInsert($str, $options) : $str;
199: }
200:
201: if (!isset($format)) {
202: $format = sprintf(
203: '/(?<!%s)%s%%s%s/',
204: preg_quote($options['escape'], '/'),
205: str_replace('%', '%%', preg_quote($options['before'], '/')),
206: str_replace('%', '%%', preg_quote($options['after'], '/'))
207: );
208: }
209:
210: if (strpos($str, '?') !== false && is_numeric(key($data))) {
211: $offset = 0;
212: while (($pos = strpos($str, '?', $offset)) !== false) {
213: $val = array_shift($data);
214: $offset = $pos + strlen($val);
215: $str = substr_replace($str, $val, $pos, 1);
216: }
217:
218: return $options['clean'] ? static::cleanInsert($str, $options) : $str;
219: }
220:
221: $dataKeys = array_keys($data);
222: $hashKeys = array_map('crc32', $dataKeys);
223: $tempData = array_combine($dataKeys, $hashKeys);
224: krsort($tempData);
225:
226: foreach ($tempData as $key => $hashVal) {
227: $key = sprintf($format, preg_quote($key, '/'));
228: $str = preg_replace($key, $hashVal, $str);
229: }
230: $dataReplacements = array_combine($hashKeys, array_values($data));
231: foreach ($dataReplacements as $tmpHash => $tmpValue) {
232: $tmpValue = is_array($tmpValue) ? '' : $tmpValue;
233: $str = str_replace($tmpHash, $tmpValue, $str);
234: }
235:
236: if (!isset($options['format']) && isset($options['before'])) {
237: $str = str_replace($options['escape'] . $options['before'], $options['before'], $str);
238: }
239:
240: return $options['clean'] ? static::cleanInsert($str, $options) : $str;
241: }
242:
243: /**
244: * Cleans up a Text::insert() formatted string with given $options depending on the 'clean' key in
245: * $options. The default method used is text but html is also available. The goal of this function
246: * is to replace all whitespace and unneeded markup around placeholders that did not get replaced
247: * by Text::insert().
248: *
249: * @param string $str String to clean.
250: * @param array $options Options list.
251: * @return string
252: * @see \Cake\Utility\Text::insert()
253: */
254: public static function cleanInsert($str, array $options)
255: {
256: $clean = $options['clean'];
257: if (!$clean) {
258: return $str;
259: }
260: if ($clean === true) {
261: $clean = ['method' => 'text'];
262: }
263: if (!is_array($clean)) {
264: $clean = ['method' => $options['clean']];
265: }
266: switch ($clean['method']) {
267: case 'html':
268: $clean += [
269: 'word' => '[\w,.]+',
270: 'andText' => true,
271: 'replacement' => '',
272: ];
273: $kleenex = sprintf(
274: '/[\s]*[a-z]+=(")(%s%s%s[\s]*)+\\1/i',
275: preg_quote($options['before'], '/'),
276: $clean['word'],
277: preg_quote($options['after'], '/')
278: );
279: $str = preg_replace($kleenex, $clean['replacement'], $str);
280: if ($clean['andText']) {
281: $options['clean'] = ['method' => 'text'];
282: $str = static::cleanInsert($str, $options);
283: }
284: break;
285: case 'text':
286: $clean += [
287: 'word' => '[\w,.]+',
288: 'gap' => '[\s]*(?:(?:and|or)[\s]*)?',
289: 'replacement' => '',
290: ];
291:
292: $kleenex = sprintf(
293: '/(%s%s%s%s|%s%s%s%s)/',
294: preg_quote($options['before'], '/'),
295: $clean['word'],
296: preg_quote($options['after'], '/'),
297: $clean['gap'],
298: $clean['gap'],
299: preg_quote($options['before'], '/'),
300: $clean['word'],
301: preg_quote($options['after'], '/')
302: );
303: $str = preg_replace($kleenex, $clean['replacement'], $str);
304: break;
305: }
306:
307: return $str;
308: }
309:
310: /**
311: * Wraps text to a specific width, can optionally wrap at word breaks.
312: *
313: * ### Options
314: *
315: * - `width` The width to wrap to. Defaults to 72.
316: * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
317: * - `indent` String to indent with. Defaults to null.
318: * - `indentAt` 0 based index to start indenting at. Defaults to 0.
319: *
320: * @param string $text The text to format.
321: * @param array|int $options Array of options to use, or an integer to wrap the text to.
322: * @return string Formatted text.
323: */
324: public static function wrap($text, $options = [])
325: {
326: if (is_numeric($options)) {
327: $options = ['width' => $options];
328: }
329: $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
330: if ($options['wordWrap']) {
331: $wrapped = self::wordWrap($text, $options['width'], "\n");
332: } else {
333: $wrapped = trim(chunk_split($text, $options['width'] - 1, "\n"));
334: }
335: if (!empty($options['indent'])) {
336: $chunks = explode("\n", $wrapped);
337: for ($i = $options['indentAt'], $len = count($chunks); $i < $len; $i++) {
338: $chunks[$i] = $options['indent'] . $chunks[$i];
339: }
340: $wrapped = implode("\n", $chunks);
341: }
342:
343: return $wrapped;
344: }
345:
346: /**
347: * Wraps a complete block of text to a specific width, can optionally wrap
348: * at word breaks.
349: *
350: * ### Options
351: *
352: * - `width` The width to wrap to. Defaults to 72.
353: * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
354: * - `indent` String to indent with. Defaults to null.
355: * - `indentAt` 0 based index to start indenting at. Defaults to 0.
356: *
357: * @param string $text The text to format.
358: * @param array|int $options Array of options to use, or an integer to wrap the text to.
359: * @return string Formatted text.
360: */
361: public static function wrapBlock($text, $options = [])
362: {
363: if (is_numeric($options)) {
364: $options = ['width' => $options];
365: }
366: $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
367:
368: if (!empty($options['indentAt']) && $options['indentAt'] === 0) {
369: $indentLength = !empty($options['indent']) ? strlen($options['indent']) : 0;
370: $options['width'] -= $indentLength;
371:
372: return self::wrap($text, $options);
373: }
374:
375: $wrapped = self::wrap($text, $options);
376:
377: if (!empty($options['indent'])) {
378: $indentationLength = mb_strlen($options['indent']);
379: $chunks = explode("\n", $wrapped);
380: $count = count($chunks);
381: if ($count < 2) {
382: return $wrapped;
383: }
384: $toRewrap = '';
385: for ($i = $options['indentAt']; $i < $count; $i++) {
386: $toRewrap .= mb_substr($chunks[$i], $indentationLength) . ' ';
387: unset($chunks[$i]);
388: }
389: $options['width'] -= $indentationLength;
390: $options['indentAt'] = 0;
391: $rewrapped = self::wrap($toRewrap, $options);
392: $newChunks = explode("\n", $rewrapped);
393:
394: $chunks = array_merge($chunks, $newChunks);
395: $wrapped = implode("\n", $chunks);
396: }
397:
398: return $wrapped;
399: }
400:
401: /**
402: * Unicode and newline aware version of wordwrap.
403: *
404: * @param string $text The text to format.
405: * @param int $width The width to wrap to. Defaults to 72.
406: * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
407: * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
408: * @return string Formatted text.
409: */
410: public static function wordWrap($text, $width = 72, $break = "\n", $cut = false)
411: {
412: $paragraphs = explode($break, $text);
413: foreach ($paragraphs as &$paragraph) {
414: $paragraph = static::_wordWrap($paragraph, $width, $break, $cut);
415: }
416:
417: return implode($break, $paragraphs);
418: }
419:
420: /**
421: * Unicode aware version of wordwrap as helper method.
422: *
423: * @param string $text The text to format.
424: * @param int $width The width to wrap to. Defaults to 72.
425: * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
426: * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
427: * @return string Formatted text.
428: */
429: protected static function _wordWrap($text, $width = 72, $break = "\n", $cut = false)
430: {
431: if ($cut) {
432: $parts = [];
433: while (mb_strlen($text) > 0) {
434: $part = mb_substr($text, 0, $width);
435: $parts[] = trim($part);
436: $text = trim(mb_substr($text, mb_strlen($part)));
437: }
438:
439: return implode($break, $parts);
440: }
441:
442: $parts = [];
443: while (mb_strlen($text) > 0) {
444: if ($width >= mb_strlen($text)) {
445: $parts[] = trim($text);
446: break;
447: }
448:
449: $part = mb_substr($text, 0, $width);
450: $nextChar = mb_substr($text, $width, 1);
451: if ($nextChar !== ' ') {
452: $breakAt = mb_strrpos($part, ' ');
453: if ($breakAt === false) {
454: $breakAt = mb_strpos($text, ' ', $width);
455: }
456: if ($breakAt === false) {
457: $parts[] = trim($text);
458: break;
459: }
460: $part = mb_substr($text, 0, $breakAt);
461: }
462:
463: $part = trim($part);
464: $parts[] = $part;
465: $text = trim(mb_substr($text, mb_strlen($part)));
466: }
467:
468: return implode($break, $parts);
469: }
470:
471: /**
472: * Highlights a given phrase in a text. You can specify any expression in highlighter that
473: * may include the \1 expression to include the $phrase found.
474: *
475: * ### Options:
476: *
477: * - `format` The piece of HTML with that the phrase will be highlighted
478: * - `html` If true, will ignore any HTML tags, ensuring that only the correct text is highlighted
479: * - `regex` A custom regex rule that is used to match words, default is '|$tag|iu'
480: * - `limit` A limit, optional, defaults to -1 (none)
481: *
482: * @param string $text Text to search the phrase in.
483: * @param string|array $phrase The phrase or phrases that will be searched.
484: * @param array $options An array of HTML attributes and options.
485: * @return string The highlighted text
486: * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#highlighting-substrings
487: */
488: public static function highlight($text, $phrase, array $options = [])
489: {
490: if (empty($phrase)) {
491: return $text;
492: }
493:
494: $defaults = [
495: 'format' => '<span class="highlight">\1</span>',
496: 'html' => false,
497: 'regex' => '|%s|iu',
498: 'limit' => -1,
499: ];
500: $options += $defaults;
501:
502: $html = $format = $limit = null;
503: /**
504: * @var bool $html
505: * @var string|array $format
506: * @var int $limit
507: */
508: extract($options);
509:
510: if (is_array($phrase)) {
511: $replace = [];
512: $with = [];
513:
514: foreach ($phrase as $key => $segment) {
515: $segment = '(' . preg_quote($segment, '|') . ')';
516: if ($html) {
517: $segment = "(?![^<]+>)$segment(?![^<]+>)";
518: }
519:
520: $with[] = is_array($format) ? $format[$key] : $format;
521: $replace[] = sprintf($options['regex'], $segment);
522: }
523:
524: return preg_replace($replace, $with, $text, $limit);
525: }
526:
527: $phrase = '(' . preg_quote($phrase, '|') . ')';
528: if ($html) {
529: $phrase = "(?![^<]+>)$phrase(?![^<]+>)";
530: }
531:
532: return preg_replace(sprintf($options['regex'], $phrase), $format, $text, $limit);
533: }
534:
535: /**
536: * Strips given text of all links (<a href=....).
537: *
538: * *Warning* This method is not an robust solution in preventing XSS
539: * or malicious HTML.
540: *
541: * @param string $text Text
542: * @return string The text without links
543: * @deprecated 3.2.12 This method will be removed in 4.0.0
544: */
545: public static function stripLinks($text)
546: {
547: deprecationWarning('This method will be removed in 4.0.0.');
548: do {
549: $text = preg_replace('#</?a([/\s][^>]*)?(>|$)#i', '', $text, -1, $count);
550: } while ($count);
551:
552: return $text;
553: }
554:
555: /**
556: * Truncates text starting from the end.
557: *
558: * Cuts a string to the length of $length and replaces the first characters
559: * with the ellipsis if the text is longer than length.
560: *
561: * ### Options:
562: *
563: * - `ellipsis` Will be used as beginning and prepended to the trimmed string
564: * - `exact` If false, $text will not be cut mid-word
565: *
566: * @param string $text String to truncate.
567: * @param int $length Length of returned string, including ellipsis.
568: * @param array $options An array of options.
569: * @return string Trimmed string.
570: */
571: public static function tail($text, $length = 100, array $options = [])
572: {
573: $default = [
574: 'ellipsis' => '...', 'exact' => true
575: ];
576: $options += $default;
577: $exact = $ellipsis = null;
578: /**
579: * @var string $ellipsis
580: * @var bool $exact
581: */
582: extract($options);
583:
584: if (mb_strlen($text) <= $length) {
585: return $text;
586: }
587:
588: $truncate = mb_substr($text, mb_strlen($text) - $length + mb_strlen($ellipsis));
589: if (!$exact) {
590: $spacepos = mb_strpos($truncate, ' ');
591: $truncate = $spacepos === false ? '' : trim(mb_substr($truncate, $spacepos));
592: }
593:
594: return $ellipsis . $truncate;
595: }
596:
597: /**
598: * Truncates text.
599: *
600: * Cuts a string to the length of $length and replaces the last characters
601: * with the ellipsis if the text is longer than length.
602: *
603: * ### Options:
604: *
605: * - `ellipsis` Will be used as ending and appended to the trimmed string
606: * - `exact` If false, $text will not be cut mid-word
607: * - `html` If true, HTML tags would be handled correctly
608: * - `trimWidth` If true, $text will be truncated with the width
609: *
610: * @param string $text String to truncate.
611: * @param int $length Length of returned string, including ellipsis.
612: * @param array $options An array of HTML attributes and options.
613: * @return string Trimmed string.
614: * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#truncating-text
615: */
616: public static function truncate($text, $length = 100, array $options = [])
617: {
618: $default = [
619: 'ellipsis' => '...', 'exact' => true, 'html' => false, 'trimWidth' => false,
620: ];
621: if (!empty($options['html']) && strtolower(mb_internal_encoding()) === 'utf-8') {
622: $default['ellipsis'] = "\xe2\x80\xa6";
623: }
624: $options += $default;
625:
626: $prefix = '';
627: $suffix = $options['ellipsis'];
628:
629: if ($options['html']) {
630: $ellipsisLength = self::_strlen(strip_tags($options['ellipsis']), $options);
631:
632: $truncateLength = 0;
633: $totalLength = 0;
634: $openTags = [];
635: $truncate = '';
636:
637: preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER);
638: foreach ($tags as $tag) {
639: $contentLength = 0;
640: if (!in_array($tag[2], static::$_defaultHtmlNoCount, true)) {
641: $contentLength = self::_strlen($tag[3], $options);
642: }
643:
644: if ($truncate === '') {
645: if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/i', $tag[2])) {
646: if (preg_match('/<[\w]+[^>]*>/', $tag[0])) {
647: array_unshift($openTags, $tag[2]);
648: } elseif (preg_match('/<\/([\w]+)[^>]*>/', $tag[0], $closeTag)) {
649: $pos = array_search($closeTag[1], $openTags, true);
650: if ($pos !== false) {
651: array_splice($openTags, $pos, 1);
652: }
653: }
654: }
655:
656: $prefix .= $tag[1];
657:
658: if ($totalLength + $contentLength + $ellipsisLength > $length) {
659: $truncate = $tag[3];
660: $truncateLength = $length - $totalLength;
661: } else {
662: $prefix .= $tag[3];
663: }
664: }
665:
666: $totalLength += $contentLength;
667: if ($totalLength > $length) {
668: break;
669: }
670: }
671:
672: if ($totalLength <= $length) {
673: return $text;
674: }
675:
676: $text = $truncate;
677: $length = $truncateLength;
678:
679: foreach ($openTags as $tag) {
680: $suffix .= '</' . $tag . '>';
681: }
682: } else {
683: if (self::_strlen($text, $options) <= $length) {
684: return $text;
685: }
686: $ellipsisLength = self::_strlen($options['ellipsis'], $options);
687: }
688:
689: $result = self::_substr($text, 0, $length - $ellipsisLength, $options);
690:
691: if (!$options['exact']) {
692: if (self::_substr($text, $length - $ellipsisLength, 1, $options) !== ' ') {
693: $result = self::_removeLastWord($result);
694: }
695:
696: // If result is empty, then we don't need to count ellipsis in the cut.
697: if (!strlen($result)) {
698: $result = self::_substr($text, 0, $length, $options);
699: }
700: }
701:
702: return $prefix . $result . $suffix;
703: }
704:
705: /**
706: * Truncate text with specified width.
707: *
708: * @param string $text String to truncate.
709: * @param int $length Length of returned string, including ellipsis.
710: * @param array $options An array of HTML attributes and options.
711: * @return string Trimmed string.
712: * @see \Cake\Utility\Text::truncate()
713: */
714: public static function truncateByWidth($text, $length = 100, array $options = [])
715: {
716: return static::truncate($text, $length, ['trimWidth' => true] + $options);
717: }
718:
719: /**
720: * Get string length.
721: *
722: * ### Options:
723: *
724: * - `html` If true, HTML entities will be handled as decoded characters.
725: * - `trimWidth` If true, the width will return.
726: *
727: * @param string $text The string being checked for length
728: * @param array $options An array of options.
729: * @return int
730: */
731: protected static function _strlen($text, array $options)
732: {
733: if (empty($options['trimWidth'])) {
734: $strlen = 'mb_strlen';
735: } else {
736: $strlen = 'mb_strwidth';
737: }
738:
739: if (empty($options['html'])) {
740: return $strlen($text);
741: }
742:
743: $pattern = '/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i';
744: $replace = preg_replace_callback(
745: $pattern,
746: function ($match) use ($strlen) {
747: $utf8 = html_entity_decode($match[0], ENT_HTML5 | ENT_QUOTES, 'UTF-8');
748:
749: return str_repeat(' ', $strlen($utf8, 'UTF-8'));
750: },
751: $text
752: );
753:
754: return $strlen($replace);
755: }
756:
757: /**
758: * Return part of a string.
759: *
760: * ### Options:
761: *
762: * - `html` If true, HTML entities will be handled as decoded characters.
763: * - `trimWidth` If true, will be truncated with specified width.
764: *
765: * @param string $text The input string.
766: * @param int $start The position to begin extracting.
767: * @param int $length The desired length.
768: * @param array $options An array of options.
769: * @return string
770: */
771: protected static function _substr($text, $start, $length, array $options)
772: {
773: if (empty($options['trimWidth'])) {
774: $substr = 'mb_substr';
775: } else {
776: $substr = 'mb_strimwidth';
777: }
778:
779: $maxPosition = self::_strlen($text, ['trimWidth' => false] + $options);
780: if ($start < 0) {
781: $start += $maxPosition;
782: if ($start < 0) {
783: $start = 0;
784: }
785: }
786: if ($start >= $maxPosition) {
787: return '';
788: }
789:
790: if ($length === null) {
791: $length = self::_strlen($text, $options);
792: }
793:
794: if ($length < 0) {
795: $text = self::_substr($text, $start, null, $options);
796: $start = 0;
797: $length += self::_strlen($text, $options);
798: }
799:
800: if ($length <= 0) {
801: return '';
802: }
803:
804: if (empty($options['html'])) {
805: return (string)$substr($text, $start, $length);
806: }
807:
808: $totalOffset = 0;
809: $totalLength = 0;
810: $result = '';
811:
812: $pattern = '/(&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};)/i';
813: $parts = preg_split($pattern, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
814: foreach ($parts as $part) {
815: $offset = 0;
816:
817: if ($totalOffset < $start) {
818: $len = self::_strlen($part, ['trimWidth' => false] + $options);
819: if ($totalOffset + $len <= $start) {
820: $totalOffset += $len;
821: continue;
822: }
823:
824: $offset = $start - $totalOffset;
825: $totalOffset = $start;
826: }
827:
828: $len = self::_strlen($part, $options);
829: if ($offset !== 0 || $totalLength + $len > $length) {
830: if (strpos($part, '&') === 0 && preg_match($pattern, $part)
831: && $part !== html_entity_decode($part, ENT_HTML5 | ENT_QUOTES, 'UTF-8')
832: ) {
833: // Entities cannot be passed substr.
834: continue;
835: }
836:
837: $part = $substr($part, $offset, $length - $totalLength);
838: $len = self::_strlen($part, $options);
839: }
840:
841: $result .= $part;
842: $totalLength += $len;
843: if ($totalLength >= $length) {
844: break;
845: }
846: }
847:
848: return $result;
849: }
850:
851: /**
852: * Removes the last word from the input text.
853: *
854: * @param string $text The input text
855: * @return string
856: */
857: protected static function _removeLastWord($text)
858: {
859: $spacepos = mb_strrpos($text, ' ');
860:
861: if ($spacepos !== false) {
862: $lastWord = mb_strrpos($text, $spacepos);
863:
864: // Some languages are written without word separation.
865: // We recognize a string as a word if it doesn't contain any full-width characters.
866: if (mb_strwidth($lastWord) === mb_strlen($lastWord)) {
867: $text = mb_substr($text, 0, $spacepos);
868: }
869:
870: return $text;
871: }
872:
873: return '';
874: }
875:
876: /**
877: * Extracts an excerpt from the text surrounding the phrase with a number of characters on each side
878: * determined by radius.
879: *
880: * @param string $text String to search the phrase in
881: * @param string $phrase Phrase that will be searched for
882: * @param int $radius The amount of characters that will be returned on each side of the founded phrase
883: * @param string $ellipsis Ending that will be appended
884: * @return string Modified string
885: * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#extracting-an-excerpt
886: */
887: public static function excerpt($text, $phrase, $radius = 100, $ellipsis = '...')
888: {
889: if (empty($text) || empty($phrase)) {
890: return static::truncate($text, $radius * 2, ['ellipsis' => $ellipsis]);
891: }
892:
893: $append = $prepend = $ellipsis;
894:
895: $phraseLen = mb_strlen($phrase);
896: $textLen = mb_strlen($text);
897:
898: $pos = mb_stripos($text, $phrase);
899: if ($pos === false) {
900: return mb_substr($text, 0, $radius) . $ellipsis;
901: }
902:
903: $startPos = $pos - $radius;
904: if ($startPos <= 0) {
905: $startPos = 0;
906: $prepend = '';
907: }
908:
909: $endPos = $pos + $phraseLen + $radius;
910: if ($endPos >= $textLen) {
911: $endPos = $textLen;
912: $append = '';
913: }
914:
915: $excerpt = mb_substr($text, $startPos, $endPos - $startPos);
916: $excerpt = $prepend . $excerpt . $append;
917:
918: return $excerpt;
919: }
920:
921: /**
922: * Creates a comma separated list where the last two items are joined with 'and', forming natural language.
923: *
924: * @param string[] $list The list to be joined.
925: * @param string|null $and The word used to join the last and second last items together with. Defaults to 'and'.
926: * @param string $separator The separator used to join all the other items together. Defaults to ', '.
927: * @return string The glued together string.
928: * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#converting-an-array-to-sentence-form
929: */
930: public static function toList(array $list, $and = null, $separator = ', ')
931: {
932: if ($and === null) {
933: $and = __d('cake', 'and');
934: }
935: if (count($list) > 1) {
936: return implode($separator, array_slice($list, null, -1)) . ' ' . $and . ' ' . array_pop($list);
937: }
938:
939: return array_pop($list);
940: }
941:
942: /**
943: * Check if the string contain multibyte characters
944: *
945: * @param string $string value to test
946: * @return bool
947: */
948: public static function isMultibyte($string)
949: {
950: $length = strlen($string);
951:
952: for ($i = 0; $i < $length; $i++) {
953: $value = ord($string[$i]);
954: if ($value > 128) {
955: return true;
956: }
957: }
958:
959: return false;
960: }
961:
962: /**
963: * Converts a multibyte character string
964: * to the decimal value of the character
965: *
966: * @param string $string String to convert.
967: * @return array
968: */
969: public static function utf8($string)
970: {
971: $map = [];
972:
973: $values = [];
974: $find = 1;
975: $length = strlen($string);
976:
977: for ($i = 0; $i < $length; $i++) {
978: $value = ord($string[$i]);
979:
980: if ($value < 128) {
981: $map[] = $value;
982: } else {
983: if (empty($values)) {
984: $find = ($value < 224) ? 2 : 3;
985: }
986: $values[] = $value;
987:
988: if (count($values) === $find) {
989: if ($find == 3) {
990: $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
991: } else {
992: $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
993: }
994: $values = [];
995: $find = 1;
996: }
997: }
998: }
999:
1000: return $map;
1001: }
1002:
1003: /**
1004: * Converts the decimal value of a multibyte character string
1005: * to a string
1006: *
1007: * @param array $array Array
1008: * @return string
1009: */
1010: public static function ascii(array $array)
1011: {
1012: $ascii = '';
1013:
1014: foreach ($array as $utf8) {
1015: if ($utf8 < 128) {
1016: $ascii .= chr($utf8);
1017: } elseif ($utf8 < 2048) {
1018: $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
1019: $ascii .= chr(128 + ($utf8 % 64));
1020: } else {
1021: $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
1022: $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
1023: $ascii .= chr(128 + ($utf8 % 64));
1024: }
1025: }
1026:
1027: return $ascii;
1028: }
1029:
1030: /**
1031: * Converts filesize from human readable string to bytes
1032: *
1033: * @param string $size Size in human readable string like '5MB', '5M', '500B', '50kb' etc.
1034: * @param mixed $default Value to be returned when invalid size was used, for example 'Unknown type'
1035: * @return mixed Number of bytes as integer on success, `$default` on failure if not false
1036: * @throws \InvalidArgumentException On invalid Unit type.
1037: * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#Cake\Utility\Text::parseFileSize
1038: */
1039: public static function parseFileSize($size, $default = false)
1040: {
1041: if (ctype_digit($size)) {
1042: return (int)$size;
1043: }
1044: $size = strtoupper($size);
1045:
1046: $l = -2;
1047: $i = array_search(substr($size, -2), ['KB', 'MB', 'GB', 'TB', 'PB'], true);
1048: if ($i === false) {
1049: $l = -1;
1050: $i = array_search(substr($size, -1), ['K', 'M', 'G', 'T', 'P'], true);
1051: }
1052: if ($i !== false) {
1053: $size = (float)substr($size, 0, $l);
1054:
1055: return $size * pow(1024, $i + 1);
1056: }
1057:
1058: if (substr($size, -1) === 'B' && ctype_digit(substr($size, 0, -1))) {
1059: $size = substr($size, 0, -1);
1060:
1061: return (int)$size;
1062: }
1063:
1064: if ($default !== false) {
1065: return $default;
1066: }
1067: throw new InvalidArgumentException('No unit type.');
1068: }
1069:
1070: /**
1071: * Get the default transliterator.
1072: *
1073: * @return \Transliterator|null Either a Transliterator instance, or `null`
1074: * in case no transliterator has been set yet.
1075: * @since 3.7.0
1076: */
1077: public static function getTransliterator()
1078: {
1079: return static::$_defaultTransliterator;
1080: }
1081:
1082: /**
1083: * Set the default transliterator.
1084: *
1085: * @param \Transliterator $transliterator A `Transliterator` instance.
1086: * @return void
1087: * @since 3.7.0
1088: */
1089: public static function setTransliterator(\Transliterator $transliterator)
1090: {
1091: static::$_defaultTransliterator = $transliterator;
1092: }
1093:
1094: /**
1095: * Get default transliterator identifier string.
1096: *
1097: * @return string Transliterator identifier.
1098: */
1099: public static function getTransliteratorId()
1100: {
1101: return static::$_defaultTransliteratorId;
1102: }
1103:
1104: /**
1105: * Set default transliterator identifier string.
1106: *
1107: * @param string $transliteratorId Transliterator identifier.
1108: * @return void
1109: */
1110: public static function setTransliteratorId($transliteratorId)
1111: {
1112: static::setTransliterator(transliterator_create($transliteratorId));
1113: static::$_defaultTransliteratorId = $transliteratorId;
1114: }
1115:
1116: /**
1117: * Transliterate string.
1118: *
1119: * @param string $string String to transliterate.
1120: * @param \Transliterator|string|null $transliterator Either a Transliterator
1121: * instance, or a transliterator identifier string. If `null`, the default
1122: * transliterator (identifier) set via `setTransliteratorId()` or
1123: * `setTransliterator()` will be used.
1124: * @return string
1125: * @see https://secure.php.net/manual/en/transliterator.transliterate.php
1126: */
1127: public static function transliterate($string, $transliterator = null)
1128: {
1129: if (!$transliterator) {
1130: $transliterator = static::$_defaultTransliterator ?: static::$_defaultTransliteratorId;
1131: }
1132:
1133: return transliterator_transliterate($transliterator, $string);
1134: }
1135:
1136: /**
1137: * Returns a string with all spaces converted to dashes (by default),
1138: * characters transliterated to ASCII characters, and non word characters removed.
1139: *
1140: * ### Options:
1141: *
1142: * - `replacement`: Replacement string. Default '-'.
1143: * - `transliteratorId`: A valid transliterator id string.
1144: * If `null` (default) the transliterator (identifier) set via
1145: * `setTransliteratorId()` or `setTransliterator()` will be used.
1146: * If `false` no transliteration will be done, only non words will be removed.
1147: * - `preserve`: Specific non-word character to preserve. Default `null`.
1148: * For e.g. this option can be set to '.' to generate clean file names.
1149: *
1150: * @param string $string the string you want to slug
1151: * @param array $options If string it will be use as replacement character
1152: * or an array of options.
1153: * @return string
1154: * @see setTransliterator()
1155: * @see setTransliteratorId()
1156: */
1157: public static function slug($string, $options = [])
1158: {
1159: if (is_string($options)) {
1160: $options = ['replacement' => $options];
1161: }
1162: $options += [
1163: 'replacement' => '-',
1164: 'transliteratorId' => null,
1165: 'preserve' => null
1166: ];
1167:
1168: if ($options['transliteratorId'] !== false) {
1169: $string = static::transliterate($string, $options['transliteratorId']);
1170: }
1171:
1172: $regex = '^\s\p{Ll}\p{Lm}\p{Lo}\p{Lt}\p{Lu}\p{Nd}';
1173: if ($options['preserve']) {
1174: $regex .= preg_quote($options['preserve'], '/');
1175: }
1176: $quotedReplacement = preg_quote($options['replacement'], '/');
1177: $map = [
1178: '/[' . $regex . ']/mu' => ' ',
1179: '/[\s]+/mu' => $options['replacement'],
1180: sprintf('/^[%s]+|[%s]+$/', $quotedReplacement, $quotedReplacement) => '',
1181: ];
1182: $string = preg_replace(array_keys($map), $map, $string);
1183:
1184: return $string;
1185: }
1186: }
1187: