File Utility/Text.php

   1: <?php
   2: /**
   3:  * CakePHP(tm) : Rapid Development Framework (https://cakephp.org)
   4:  * Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
   5:  *
   6:  * Licensed under The MIT License
   7:  * For full copyright and license information, please see the LICENSE.txt
   8:  * Redistributions of files must retain the above copyright notice.
   9:  *
  10:  * @copyright     Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
  11:  * @link          https://cakephp.org CakePHP(tm) Project
  12:  * @since         1.2.0
  13:  * @license       https://opensource.org/licenses/mit-license.php MIT License
  14:  */
  15: namespace Cake\Utility;
  16: 
  17: use InvalidArgumentException;
  18: 
  19: /**
  20:  * Text handling methods.
  21:  */
  22: class Text
  23: {
  24:     /**
  25:      * Default transliterator.
  26:      *
  27:      * @var \Transliterator Transliterator instance.
  28:      */
  29:     protected static $_defaultTransliterator;
  30: 
  31:     /**
  32:      * Default transliterator id string.
  33:      *
  34:      * @var string $_defaultTransliteratorId Transliterator identifier string.
  35:      */
  36:     protected static $_defaultTransliteratorId = 'Any-Latin; Latin-ASCII; [\u0080-\u7fff] remove';
  37: 
  38:     /**
  39:      * Default html tags who must not be count for truncate text.
  40:      *
  41:      * @var array
  42:      */
  43:     protected static $_defaultHtmlNoCount = [
  44:         'style',
  45:         'script'
  46:     ];
  47: 
  48:     /**
  49:      * Generate a random UUID version 4
  50:      *
  51:      * Warning: This method should not be used as a random seed for any cryptographic operations.
  52:      * Instead you should use the openssl or mcrypt extensions.
  53:      *
  54:      * It should also not be used to create identifiers that have security implications, such as
  55:      * 'unguessable' URL identifiers. Instead you should use `Security::randomBytes()` for that.
  56:      *
  57:      * @see https://www.ietf.org/rfc/rfc4122.txt
  58:      * @return string RFC 4122 UUID
  59:      * @copyright Matt Farina MIT License https://github.com/lootils/uuid/blob/master/LICENSE
  60:      */
  61:     public static function uuid()
  62:     {
  63:         $random = function_exists('random_int') ? 'random_int' : 'mt_rand';
  64: 
  65:         return sprintf(
  66:             '%04x%04x-%04x-%04x-%04x-%04x%04x%04x',
  67:             // 32 bits for "time_low"
  68:             $random(0, 65535),
  69:             $random(0, 65535),
  70:             // 16 bits for "time_mid"
  71:             $random(0, 65535),
  72:             // 12 bits before the 0100 of (version) 4 for "time_hi_and_version"
  73:             $random(0, 4095) | 0x4000,
  74:             // 16 bits, 8 bits for "clk_seq_hi_res",
  75:             // 8 bits for "clk_seq_low",
  76:             // two most significant bits holds zero and one for variant DCE1.1
  77:             $random(0, 0x3fff) | 0x8000,
  78:             // 48 bits for "node"
  79:             $random(0, 65535),
  80:             $random(0, 65535),
  81:             $random(0, 65535)
  82:         );
  83:     }
  84: 
  85:     /**
  86:      * Tokenizes a string using $separator, ignoring any instance of $separator that appears between
  87:      * $leftBound and $rightBound.
  88:      *
  89:      * @param string $data The data to tokenize.
  90:      * @param string $separator The token to split the data on.
  91:      * @param string $leftBound The left boundary to ignore separators in.
  92:      * @param string $rightBound The right boundary to ignore separators in.
  93:      * @return string|string[] Array of tokens in $data or original input if empty.
  94:      */
  95:     public static function tokenize($data, $separator = ',', $leftBound = '(', $rightBound = ')')
  96:     {
  97:         if (empty($data)) {
  98:             return [];
  99:         }
 100: 
 101:         $depth = 0;
 102:         $offset = 0;
 103:         $buffer = '';
 104:         $results = [];
 105:         $length = mb_strlen($data);
 106:         $open = false;
 107: 
 108:         while ($offset <= $length) {
 109:             $tmpOffset = -1;
 110:             $offsets = [
 111:                 mb_strpos($data, $separator, $offset),
 112:                 mb_strpos($data, $leftBound, $offset),
 113:                 mb_strpos($data, $rightBound, $offset)
 114:             ];
 115:             for ($i = 0; $i < 3; $i++) {
 116:                 if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset == -1)) {
 117:                     $tmpOffset = $offsets[$i];
 118:                 }
 119:             }
 120:             if ($tmpOffset !== -1) {
 121:                 $buffer .= mb_substr($data, $offset, $tmpOffset - $offset);
 122:                 $char = mb_substr($data, $tmpOffset, 1);
 123:                 if (!$depth && $char === $separator) {
 124:                     $results[] = $buffer;
 125:                     $buffer = '';
 126:                 } else {
 127:                     $buffer .= $char;
 128:                 }
 129:                 if ($leftBound !== $rightBound) {
 130:                     if ($char === $leftBound) {
 131:                         $depth++;
 132:                     }
 133:                     if ($char === $rightBound) {
 134:                         $depth--;
 135:                     }
 136:                 } else {
 137:                     if ($char === $leftBound) {
 138:                         if (!$open) {
 139:                             $depth++;
 140:                             $open = true;
 141:                         } else {
 142:                             $depth--;
 143:                             $open = false;
 144:                         }
 145:                     }
 146:                 }
 147:                 $tmpOffset += 1;
 148:                 $offset = $tmpOffset;
 149:             } else {
 150:                 $results[] = $buffer . mb_substr($data, $offset);
 151:                 $offset = $length + 1;
 152:             }
 153:         }
 154:         if (empty($results) && !empty($buffer)) {
 155:             $results[] = $buffer;
 156:         }
 157: 
 158:         if (!empty($results)) {
 159:             return array_map('trim', $results);
 160:         }
 161: 
 162:         return [];
 163:     }
 164: 
 165:     /**
 166:      * Replaces variable placeholders inside a $str with any given $data. Each key in the $data array
 167:      * corresponds to a variable placeholder name in $str.
 168:      * Example:
 169:      * ```
 170:      * Text::insert(':name is :age years old.', ['name' => 'Bob', 'age' => '65']);
 171:      * ```
 172:      * Returns: Bob is 65 years old.
 173:      *
 174:      * Available $options are:
 175:      *
 176:      * - before: The character or string in front of the name of the variable placeholder (Defaults to `:`)
 177:      * - after: The character or string after the name of the variable placeholder (Defaults to null)
 178:      * - escape: The character or string used to escape the before character / string (Defaults to `\`)
 179:      * - format: A regex to use for matching variable placeholders. Default is: `/(?<!\\)\:%s/`
 180:      *   (Overwrites before, after, breaks escape / clean)
 181:      * - clean: A boolean or array with instructions for Text::cleanInsert
 182:      *
 183:      * @param string $str A string containing variable placeholders
 184:      * @param array $data A key => val array where each key stands for a placeholder variable name
 185:      *     to be replaced with val
 186:      * @param array $options An array of options, see description above
 187:      * @return string
 188:      */
 189:     public static function insert($str, $data, array $options = [])
 190:     {
 191:         $defaults = [
 192:             'before' => ':', 'after' => null, 'escape' => '\\', 'format' => null, 'clean' => false
 193:         ];
 194:         $options += $defaults;
 195:         $format = $options['format'];
 196:         $data = (array)$data;
 197:         if (empty($data)) {
 198:             return $options['clean'] ? static::cleanInsert($str, $options) : $str;
 199:         }
 200: 
 201:         if (!isset($format)) {
 202:             $format = sprintf(
 203:                 '/(?<!%s)%s%%s%s/',
 204:                 preg_quote($options['escape'], '/'),
 205:                 str_replace('%', '%%', preg_quote($options['before'], '/')),
 206:                 str_replace('%', '%%', preg_quote($options['after'], '/'))
 207:             );
 208:         }
 209: 
 210:         if (strpos($str, '?') !== false && is_numeric(key($data))) {
 211:             $offset = 0;
 212:             while (($pos = strpos($str, '?', $offset)) !== false) {
 213:                 $val = array_shift($data);
 214:                 $offset = $pos + strlen($val);
 215:                 $str = substr_replace($str, $val, $pos, 1);
 216:             }
 217: 
 218:             return $options['clean'] ? static::cleanInsert($str, $options) : $str;
 219:         }
 220: 
 221:         $dataKeys = array_keys($data);
 222:         $hashKeys = array_map('crc32', $dataKeys);
 223:         $tempData = array_combine($dataKeys, $hashKeys);
 224:         krsort($tempData);
 225: 
 226:         foreach ($tempData as $key => $hashVal) {
 227:             $key = sprintf($format, preg_quote($key, '/'));
 228:             $str = preg_replace($key, $hashVal, $str);
 229:         }
 230:         $dataReplacements = array_combine($hashKeys, array_values($data));
 231:         foreach ($dataReplacements as $tmpHash => $tmpValue) {
 232:             $tmpValue = is_array($tmpValue) ? '' : $tmpValue;
 233:             $str = str_replace($tmpHash, $tmpValue, $str);
 234:         }
 235: 
 236:         if (!isset($options['format']) && isset($options['before'])) {
 237:             $str = str_replace($options['escape'] . $options['before'], $options['before'], $str);
 238:         }
 239: 
 240:         return $options['clean'] ? static::cleanInsert($str, $options) : $str;
 241:     }
 242: 
 243:     /**
 244:      * Cleans up a Text::insert() formatted string with given $options depending on the 'clean' key in
 245:      * $options. The default method used is text but html is also available. The goal of this function
 246:      * is to replace all whitespace and unneeded markup around placeholders that did not get replaced
 247:      * by Text::insert().
 248:      *
 249:      * @param string $str String to clean.
 250:      * @param array $options Options list.
 251:      * @return string
 252:      * @see \Cake\Utility\Text::insert()
 253:      */
 254:     public static function cleanInsert($str, array $options)
 255:     {
 256:         $clean = $options['clean'];
 257:         if (!$clean) {
 258:             return $str;
 259:         }
 260:         if ($clean === true) {
 261:             $clean = ['method' => 'text'];
 262:         }
 263:         if (!is_array($clean)) {
 264:             $clean = ['method' => $options['clean']];
 265:         }
 266:         switch ($clean['method']) {
 267:             case 'html':
 268:                 $clean += [
 269:                     'word' => '[\w,.]+',
 270:                     'andText' => true,
 271:                     'replacement' => '',
 272:                 ];
 273:                 $kleenex = sprintf(
 274:                     '/[\s]*[a-z]+=(")(%s%s%s[\s]*)+\\1/i',
 275:                     preg_quote($options['before'], '/'),
 276:                     $clean['word'],
 277:                     preg_quote($options['after'], '/')
 278:                 );
 279:                 $str = preg_replace($kleenex, $clean['replacement'], $str);
 280:                 if ($clean['andText']) {
 281:                     $options['clean'] = ['method' => 'text'];
 282:                     $str = static::cleanInsert($str, $options);
 283:                 }
 284:                 break;
 285:             case 'text':
 286:                 $clean += [
 287:                     'word' => '[\w,.]+',
 288:                     'gap' => '[\s]*(?:(?:and|or)[\s]*)?',
 289:                     'replacement' => '',
 290:                 ];
 291: 
 292:                 $kleenex = sprintf(
 293:                     '/(%s%s%s%s|%s%s%s%s)/',
 294:                     preg_quote($options['before'], '/'),
 295:                     $clean['word'],
 296:                     preg_quote($options['after'], '/'),
 297:                     $clean['gap'],
 298:                     $clean['gap'],
 299:                     preg_quote($options['before'], '/'),
 300:                     $clean['word'],
 301:                     preg_quote($options['after'], '/')
 302:                 );
 303:                 $str = preg_replace($kleenex, $clean['replacement'], $str);
 304:                 break;
 305:         }
 306: 
 307:         return $str;
 308:     }
 309: 
 310:     /**
 311:      * Wraps text to a specific width, can optionally wrap at word breaks.
 312:      *
 313:      * ### Options
 314:      *
 315:      * - `width` The width to wrap to. Defaults to 72.
 316:      * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
 317:      * - `indent` String to indent with. Defaults to null.
 318:      * - `indentAt` 0 based index to start indenting at. Defaults to 0.
 319:      *
 320:      * @param string $text The text to format.
 321:      * @param array|int $options Array of options to use, or an integer to wrap the text to.
 322:      * @return string Formatted text.
 323:      */
 324:     public static function wrap($text, $options = [])
 325:     {
 326:         if (is_numeric($options)) {
 327:             $options = ['width' => $options];
 328:         }
 329:         $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
 330:         if ($options['wordWrap']) {
 331:             $wrapped = self::wordWrap($text, $options['width'], "\n");
 332:         } else {
 333:             $wrapped = trim(chunk_split($text, $options['width'] - 1, "\n"));
 334:         }
 335:         if (!empty($options['indent'])) {
 336:             $chunks = explode("\n", $wrapped);
 337:             for ($i = $options['indentAt'], $len = count($chunks); $i < $len; $i++) {
 338:                 $chunks[$i] = $options['indent'] . $chunks[$i];
 339:             }
 340:             $wrapped = implode("\n", $chunks);
 341:         }
 342: 
 343:         return $wrapped;
 344:     }
 345: 
 346:     /**
 347:      * Wraps a complete block of text to a specific width, can optionally wrap
 348:      * at word breaks.
 349:      *
 350:      * ### Options
 351:      *
 352:      * - `width` The width to wrap to. Defaults to 72.
 353:      * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
 354:      * - `indent` String to indent with. Defaults to null.
 355:      * - `indentAt` 0 based index to start indenting at. Defaults to 0.
 356:      *
 357:      * @param string $text The text to format.
 358:      * @param array|int $options Array of options to use, or an integer to wrap the text to.
 359:      * @return string Formatted text.
 360:      */
 361:     public static function wrapBlock($text, $options = [])
 362:     {
 363:         if (is_numeric($options)) {
 364:             $options = ['width' => $options];
 365:         }
 366:         $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
 367: 
 368:         if (!empty($options['indentAt']) && $options['indentAt'] === 0) {
 369:             $indentLength = !empty($options['indent']) ? strlen($options['indent']) : 0;
 370:             $options['width'] -= $indentLength;
 371: 
 372:             return self::wrap($text, $options);
 373:         }
 374: 
 375:         $wrapped = self::wrap($text, $options);
 376: 
 377:         if (!empty($options['indent'])) {
 378:             $indentationLength = mb_strlen($options['indent']);
 379:             $chunks = explode("\n", $wrapped);
 380:             $count = count($chunks);
 381:             if ($count < 2) {
 382:                 return $wrapped;
 383:             }
 384:             $toRewrap = '';
 385:             for ($i = $options['indentAt']; $i < $count; $i++) {
 386:                 $toRewrap .= mb_substr($chunks[$i], $indentationLength) . ' ';
 387:                 unset($chunks[$i]);
 388:             }
 389:             $options['width'] -= $indentationLength;
 390:             $options['indentAt'] = 0;
 391:             $rewrapped = self::wrap($toRewrap, $options);
 392:             $newChunks = explode("\n", $rewrapped);
 393: 
 394:             $chunks = array_merge($chunks, $newChunks);
 395:             $wrapped = implode("\n", $chunks);
 396:         }
 397: 
 398:         return $wrapped;
 399:     }
 400: 
 401:     /**
 402:      * Unicode and newline aware version of wordwrap.
 403:      *
 404:      * @param string $text The text to format.
 405:      * @param int $width The width to wrap to. Defaults to 72.
 406:      * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
 407:      * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
 408:      * @return string Formatted text.
 409:      */
 410:     public static function wordWrap($text, $width = 72, $break = "\n", $cut = false)
 411:     {
 412:         $paragraphs = explode($break, $text);
 413:         foreach ($paragraphs as &$paragraph) {
 414:             $paragraph = static::_wordWrap($paragraph, $width, $break, $cut);
 415:         }
 416: 
 417:         return implode($break, $paragraphs);
 418:     }
 419: 
 420:     /**
 421:      * Unicode aware version of wordwrap as helper method.
 422:      *
 423:      * @param string $text The text to format.
 424:      * @param int $width The width to wrap to. Defaults to 72.
 425:      * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
 426:      * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
 427:      * @return string Formatted text.
 428:      */
 429:     protected static function _wordWrap($text, $width = 72, $break = "\n", $cut = false)
 430:     {
 431:         if ($cut) {
 432:             $parts = [];
 433:             while (mb_strlen($text) > 0) {
 434:                 $part = mb_substr($text, 0, $width);
 435:                 $parts[] = trim($part);
 436:                 $text = trim(mb_substr($text, mb_strlen($part)));
 437:             }
 438: 
 439:             return implode($break, $parts);
 440:         }
 441: 
 442:         $parts = [];
 443:         while (mb_strlen($text) > 0) {
 444:             if ($width >= mb_strlen($text)) {
 445:                 $parts[] = trim($text);
 446:                 break;
 447:             }
 448: 
 449:             $part = mb_substr($text, 0, $width);
 450:             $nextChar = mb_substr($text, $width, 1);
 451:             if ($nextChar !== ' ') {
 452:                 $breakAt = mb_strrpos($part, ' ');
 453:                 if ($breakAt === false) {
 454:                     $breakAt = mb_strpos($text, ' ', $width);
 455:                 }
 456:                 if ($breakAt === false) {
 457:                     $parts[] = trim($text);
 458:                     break;
 459:                 }
 460:                 $part = mb_substr($text, 0, $breakAt);
 461:             }
 462: 
 463:             $part = trim($part);
 464:             $parts[] = $part;
 465:             $text = trim(mb_substr($text, mb_strlen($part)));
 466:         }
 467: 
 468:         return implode($break, $parts);
 469:     }
 470: 
 471:     /**
 472:      * Highlights a given phrase in a text. You can specify any expression in highlighter that
 473:      * may include the \1 expression to include the $phrase found.
 474:      *
 475:      * ### Options:
 476:      *
 477:      * - `format` The piece of HTML with that the phrase will be highlighted
 478:      * - `html` If true, will ignore any HTML tags, ensuring that only the correct text is highlighted
 479:      * - `regex` A custom regex rule that is used to match words, default is '|$tag|iu'
 480:      * - `limit` A limit, optional, defaults to -1 (none)
 481:      *
 482:      * @param string $text Text to search the phrase in.
 483:      * @param string|array $phrase The phrase or phrases that will be searched.
 484:      * @param array $options An array of HTML attributes and options.
 485:      * @return string The highlighted text
 486:      * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#highlighting-substrings
 487:      */
 488:     public static function highlight($text, $phrase, array $options = [])
 489:     {
 490:         if (empty($phrase)) {
 491:             return $text;
 492:         }
 493: 
 494:         $defaults = [
 495:             'format' => '<span class="highlight">\1</span>',
 496:             'html' => false,
 497:             'regex' => '|%s|iu',
 498:             'limit' => -1,
 499:         ];
 500:         $options += $defaults;
 501: 
 502:         $html = $format = $limit = null;
 503:         /**
 504:          * @var bool $html
 505:          * @var string|array $format
 506:          * @var int $limit
 507:          */
 508:         extract($options);
 509: 
 510:         if (is_array($phrase)) {
 511:             $replace = [];
 512:             $with = [];
 513: 
 514:             foreach ($phrase as $key => $segment) {
 515:                 $segment = '(' . preg_quote($segment, '|') . ')';
 516:                 if ($html) {
 517:                     $segment = "(?![^<]+>)$segment(?![^<]+>)";
 518:                 }
 519: 
 520:                 $with[] = is_array($format) ? $format[$key] : $format;
 521:                 $replace[] = sprintf($options['regex'], $segment);
 522:             }
 523: 
 524:             return preg_replace($replace, $with, $text, $limit);
 525:         }
 526: 
 527:         $phrase = '(' . preg_quote($phrase, '|') . ')';
 528:         if ($html) {
 529:             $phrase = "(?![^<]+>)$phrase(?![^<]+>)";
 530:         }
 531: 
 532:         return preg_replace(sprintf($options['regex'], $phrase), $format, $text, $limit);
 533:     }
 534: 
 535:     /**
 536:      * Strips given text of all links (<a href=....).
 537:      *
 538:      * *Warning* This method is not an robust solution in preventing XSS
 539:      * or malicious HTML.
 540:      *
 541:      * @param string $text Text
 542:      * @return string The text without links
 543:      * @deprecated 3.2.12 This method will be removed in 4.0.0
 544:      */
 545:     public static function stripLinks($text)
 546:     {
 547:         deprecationWarning('This method will be removed in 4.0.0.');
 548:         do {
 549:             $text = preg_replace('#</?a([/\s][^>]*)?(>|$)#i', '', $text, -1, $count);
 550:         } while ($count);
 551: 
 552:         return $text;
 553:     }
 554: 
 555:     /**
 556:      * Truncates text starting from the end.
 557:      *
 558:      * Cuts a string to the length of $length and replaces the first characters
 559:      * with the ellipsis if the text is longer than length.
 560:      *
 561:      * ### Options:
 562:      *
 563:      * - `ellipsis` Will be used as beginning and prepended to the trimmed string
 564:      * - `exact` If false, $text will not be cut mid-word
 565:      *
 566:      * @param string $text String to truncate.
 567:      * @param int $length Length of returned string, including ellipsis.
 568:      * @param array $options An array of options.
 569:      * @return string Trimmed string.
 570:      */
 571:     public static function tail($text, $length = 100, array $options = [])
 572:     {
 573:         $default = [
 574:             'ellipsis' => '...', 'exact' => true
 575:         ];
 576:         $options += $default;
 577:         $exact = $ellipsis = null;
 578:         /**
 579:          * @var string $ellipsis
 580:          * @var bool $exact
 581:          */
 582:         extract($options);
 583: 
 584:         if (mb_strlen($text) <= $length) {
 585:             return $text;
 586:         }
 587: 
 588:         $truncate = mb_substr($text, mb_strlen($text) - $length + mb_strlen($ellipsis));
 589:         if (!$exact) {
 590:             $spacepos = mb_strpos($truncate, ' ');
 591:             $truncate = $spacepos === false ? '' : trim(mb_substr($truncate, $spacepos));
 592:         }
 593: 
 594:         return $ellipsis . $truncate;
 595:     }
 596: 
 597:     /**
 598:      * Truncates text.
 599:      *
 600:      * Cuts a string to the length of $length and replaces the last characters
 601:      * with the ellipsis if the text is longer than length.
 602:      *
 603:      * ### Options:
 604:      *
 605:      * - `ellipsis` Will be used as ending and appended to the trimmed string
 606:      * - `exact` If false, $text will not be cut mid-word
 607:      * - `html` If true, HTML tags would be handled correctly
 608:      * - `trimWidth` If true, $text will be truncated with the width
 609:      *
 610:      * @param string $text String to truncate.
 611:      * @param int $length Length of returned string, including ellipsis.
 612:      * @param array $options An array of HTML attributes and options.
 613:      * @return string Trimmed string.
 614:      * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#truncating-text
 615:      */
 616:     public static function truncate($text, $length = 100, array $options = [])
 617:     {
 618:         $default = [
 619:             'ellipsis' => '...', 'exact' => true, 'html' => false, 'trimWidth' => false,
 620:         ];
 621:         if (!empty($options['html']) && strtolower(mb_internal_encoding()) === 'utf-8') {
 622:             $default['ellipsis'] = "\xe2\x80\xa6";
 623:         }
 624:         $options += $default;
 625: 
 626:         $prefix = '';
 627:         $suffix = $options['ellipsis'];
 628: 
 629:         if ($options['html']) {
 630:             $ellipsisLength = self::_strlen(strip_tags($options['ellipsis']), $options);
 631: 
 632:             $truncateLength = 0;
 633:             $totalLength = 0;
 634:             $openTags = [];
 635:             $truncate = '';
 636: 
 637:             preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER);
 638:             foreach ($tags as $tag) {
 639:                 $contentLength = 0;
 640:                 if (!in_array($tag[2], static::$_defaultHtmlNoCount, true)) {
 641:                     $contentLength = self::_strlen($tag[3], $options);
 642:                 }
 643: 
 644:                 if ($truncate === '') {
 645:                     if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/i', $tag[2])) {
 646:                         if (preg_match('/<[\w]+[^>]*>/', $tag[0])) {
 647:                             array_unshift($openTags, $tag[2]);
 648:                         } elseif (preg_match('/<\/([\w]+)[^>]*>/', $tag[0], $closeTag)) {
 649:                             $pos = array_search($closeTag[1], $openTags, true);
 650:                             if ($pos !== false) {
 651:                                 array_splice($openTags, $pos, 1);
 652:                             }
 653:                         }
 654:                     }
 655: 
 656:                     $prefix .= $tag[1];
 657: 
 658:                     if ($totalLength + $contentLength + $ellipsisLength > $length) {
 659:                         $truncate = $tag[3];
 660:                         $truncateLength = $length - $totalLength;
 661:                     } else {
 662:                         $prefix .= $tag[3];
 663:                     }
 664:                 }
 665: 
 666:                 $totalLength += $contentLength;
 667:                 if ($totalLength > $length) {
 668:                     break;
 669:                 }
 670:             }
 671: 
 672:             if ($totalLength <= $length) {
 673:                 return $text;
 674:             }
 675: 
 676:             $text = $truncate;
 677:             $length = $truncateLength;
 678: 
 679:             foreach ($openTags as $tag) {
 680:                 $suffix .= '</' . $tag . '>';
 681:             }
 682:         } else {
 683:             if (self::_strlen($text, $options) <= $length) {
 684:                 return $text;
 685:             }
 686:             $ellipsisLength = self::_strlen($options['ellipsis'], $options);
 687:         }
 688: 
 689:         $result = self::_substr($text, 0, $length - $ellipsisLength, $options);
 690: 
 691:         if (!$options['exact']) {
 692:             if (self::_substr($text, $length - $ellipsisLength, 1, $options) !== ' ') {
 693:                 $result = self::_removeLastWord($result);
 694:             }
 695: 
 696:             // If result is empty, then we don't need to count ellipsis in the cut.
 697:             if (!strlen($result)) {
 698:                 $result = self::_substr($text, 0, $length, $options);
 699:             }
 700:         }
 701: 
 702:         return $prefix . $result . $suffix;
 703:     }
 704: 
 705:     /**
 706:      * Truncate text with specified width.
 707:      *
 708:      * @param string $text String to truncate.
 709:      * @param int $length Length of returned string, including ellipsis.
 710:      * @param array $options An array of HTML attributes and options.
 711:      * @return string Trimmed string.
 712:      * @see \Cake\Utility\Text::truncate()
 713:      */
 714:     public static function truncateByWidth($text, $length = 100, array $options = [])
 715:     {
 716:         return static::truncate($text, $length, ['trimWidth' => true] + $options);
 717:     }
 718: 
 719:     /**
 720:      * Get string length.
 721:      *
 722:      * ### Options:
 723:      *
 724:      * - `html` If true, HTML entities will be handled as decoded characters.
 725:      * - `trimWidth` If true, the width will return.
 726:      *
 727:      * @param string $text The string being checked for length
 728:      * @param array $options An array of options.
 729:      * @return int
 730:      */
 731:     protected static function _strlen($text, array $options)
 732:     {
 733:         if (empty($options['trimWidth'])) {
 734:             $strlen = 'mb_strlen';
 735:         } else {
 736:             $strlen = 'mb_strwidth';
 737:         }
 738: 
 739:         if (empty($options['html'])) {
 740:             return $strlen($text);
 741:         }
 742: 
 743:         $pattern = '/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i';
 744:         $replace = preg_replace_callback(
 745:             $pattern,
 746:             function ($match) use ($strlen) {
 747:                 $utf8 = html_entity_decode($match[0], ENT_HTML5 | ENT_QUOTES, 'UTF-8');
 748: 
 749:                 return str_repeat(' ', $strlen($utf8, 'UTF-8'));
 750:             },
 751:             $text
 752:         );
 753: 
 754:         return $strlen($replace);
 755:     }
 756: 
 757:     /**
 758:      * Return part of a string.
 759:      *
 760:      * ### Options:
 761:      *
 762:      * - `html` If true, HTML entities will be handled as decoded characters.
 763:      * - `trimWidth` If true, will be truncated with specified width.
 764:      *
 765:      * @param string $text The input string.
 766:      * @param int $start The position to begin extracting.
 767:      * @param int $length The desired length.
 768:      * @param array $options An array of options.
 769:      * @return string
 770:      */
 771:     protected static function _substr($text, $start, $length, array $options)
 772:     {
 773:         if (empty($options['trimWidth'])) {
 774:             $substr = 'mb_substr';
 775:         } else {
 776:             $substr = 'mb_strimwidth';
 777:         }
 778: 
 779:         $maxPosition = self::_strlen($text, ['trimWidth' => false] + $options);
 780:         if ($start < 0) {
 781:             $start += $maxPosition;
 782:             if ($start < 0) {
 783:                 $start = 0;
 784:             }
 785:         }
 786:         if ($start >= $maxPosition) {
 787:             return '';
 788:         }
 789: 
 790:         if ($length === null) {
 791:             $length = self::_strlen($text, $options);
 792:         }
 793: 
 794:         if ($length < 0) {
 795:             $text = self::_substr($text, $start, null, $options);
 796:             $start = 0;
 797:             $length += self::_strlen($text, $options);
 798:         }
 799: 
 800:         if ($length <= 0) {
 801:             return '';
 802:         }
 803: 
 804:         if (empty($options['html'])) {
 805:             return (string)$substr($text, $start, $length);
 806:         }
 807: 
 808:         $totalOffset = 0;
 809:         $totalLength = 0;
 810:         $result = '';
 811: 
 812:         $pattern = '/(&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};)/i';
 813:         $parts = preg_split($pattern, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
 814:         foreach ($parts as $part) {
 815:             $offset = 0;
 816: 
 817:             if ($totalOffset < $start) {
 818:                 $len = self::_strlen($part, ['trimWidth' => false] + $options);
 819:                 if ($totalOffset + $len <= $start) {
 820:                     $totalOffset += $len;
 821:                     continue;
 822:                 }
 823: 
 824:                 $offset = $start - $totalOffset;
 825:                 $totalOffset = $start;
 826:             }
 827: 
 828:             $len = self::_strlen($part, $options);
 829:             if ($offset !== 0 || $totalLength + $len > $length) {
 830:                 if (strpos($part, '&') === 0 && preg_match($pattern, $part)
 831:                     && $part !== html_entity_decode($part, ENT_HTML5 | ENT_QUOTES, 'UTF-8')
 832:                 ) {
 833:                     // Entities cannot be passed substr.
 834:                     continue;
 835:                 }
 836: 
 837:                 $part = $substr($part, $offset, $length - $totalLength);
 838:                 $len = self::_strlen($part, $options);
 839:             }
 840: 
 841:             $result .= $part;
 842:             $totalLength += $len;
 843:             if ($totalLength >= $length) {
 844:                 break;
 845:             }
 846:         }
 847: 
 848:         return $result;
 849:     }
 850: 
 851:     /**
 852:      * Removes the last word from the input text.
 853:      *
 854:      * @param string $text The input text
 855:      * @return string
 856:      */
 857:     protected static function _removeLastWord($text)
 858:     {
 859:         $spacepos = mb_strrpos($text, ' ');
 860: 
 861:         if ($spacepos !== false) {
 862:             $lastWord = mb_strrpos($text, $spacepos);
 863: 
 864:             // Some languages are written without word separation.
 865:             // We recognize a string as a word if it doesn't contain any full-width characters.
 866:             if (mb_strwidth($lastWord) === mb_strlen($lastWord)) {
 867:                 $text = mb_substr($text, 0, $spacepos);
 868:             }
 869: 
 870:             return $text;
 871:         }
 872: 
 873:         return '';
 874:     }
 875: 
 876:     /**
 877:      * Extracts an excerpt from the text surrounding the phrase with a number of characters on each side
 878:      * determined by radius.
 879:      *
 880:      * @param string $text String to search the phrase in
 881:      * @param string $phrase Phrase that will be searched for
 882:      * @param int $radius The amount of characters that will be returned on each side of the founded phrase
 883:      * @param string $ellipsis Ending that will be appended
 884:      * @return string Modified string
 885:      * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#extracting-an-excerpt
 886:      */
 887:     public static function excerpt($text, $phrase, $radius = 100, $ellipsis = '...')
 888:     {
 889:         if (empty($text) || empty($phrase)) {
 890:             return static::truncate($text, $radius * 2, ['ellipsis' => $ellipsis]);
 891:         }
 892: 
 893:         $append = $prepend = $ellipsis;
 894: 
 895:         $phraseLen = mb_strlen($phrase);
 896:         $textLen = mb_strlen($text);
 897: 
 898:         $pos = mb_stripos($text, $phrase);
 899:         if ($pos === false) {
 900:             return mb_substr($text, 0, $radius) . $ellipsis;
 901:         }
 902: 
 903:         $startPos = $pos - $radius;
 904:         if ($startPos <= 0) {
 905:             $startPos = 0;
 906:             $prepend = '';
 907:         }
 908: 
 909:         $endPos = $pos + $phraseLen + $radius;
 910:         if ($endPos >= $textLen) {
 911:             $endPos = $textLen;
 912:             $append = '';
 913:         }
 914: 
 915:         $excerpt = mb_substr($text, $startPos, $endPos - $startPos);
 916:         $excerpt = $prepend . $excerpt . $append;
 917: 
 918:         return $excerpt;
 919:     }
 920: 
 921:     /**
 922:      * Creates a comma separated list where the last two items are joined with 'and', forming natural language.
 923:      *
 924:      * @param string[] $list The list to be joined.
 925:      * @param string|null $and The word used to join the last and second last items together with. Defaults to 'and'.
 926:      * @param string $separator The separator used to join all the other items together. Defaults to ', '.
 927:      * @return string The glued together string.
 928:      * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#converting-an-array-to-sentence-form
 929:      */
 930:     public static function toList(array $list, $and = null, $separator = ', ')
 931:     {
 932:         if ($and === null) {
 933:             $and = __d('cake', 'and');
 934:         }
 935:         if (count($list) > 1) {
 936:             return implode($separator, array_slice($list, null, -1)) . ' ' . $and . ' ' . array_pop($list);
 937:         }
 938: 
 939:         return array_pop($list);
 940:     }
 941: 
 942:     /**
 943:      * Check if the string contain multibyte characters
 944:      *
 945:      * @param string $string value to test
 946:      * @return bool
 947:      */
 948:     public static function isMultibyte($string)
 949:     {
 950:         $length = strlen($string);
 951: 
 952:         for ($i = 0; $i < $length; $i++) {
 953:             $value = ord($string[$i]);
 954:             if ($value > 128) {
 955:                 return true;
 956:             }
 957:         }
 958: 
 959:         return false;
 960:     }
 961: 
 962:     /**
 963:      * Converts a multibyte character string
 964:      * to the decimal value of the character
 965:      *
 966:      * @param string $string String to convert.
 967:      * @return array
 968:      */
 969:     public static function utf8($string)
 970:     {
 971:         $map = [];
 972: 
 973:         $values = [];
 974:         $find = 1;
 975:         $length = strlen($string);
 976: 
 977:         for ($i = 0; $i < $length; $i++) {
 978:             $value = ord($string[$i]);
 979: 
 980:             if ($value < 128) {
 981:                 $map[] = $value;
 982:             } else {
 983:                 if (empty($values)) {
 984:                     $find = ($value < 224) ? 2 : 3;
 985:                 }
 986:                 $values[] = $value;
 987: 
 988:                 if (count($values) === $find) {
 989:                     if ($find == 3) {
 990:                         $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
 991:                     } else {
 992:                         $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
 993:                     }
 994:                     $values = [];
 995:                     $find = 1;
 996:                 }
 997:             }
 998:         }
 999: 
1000:         return $map;
1001:     }
1002: 
1003:     /**
1004:      * Converts the decimal value of a multibyte character string
1005:      * to a string
1006:      *
1007:      * @param array $array Array
1008:      * @return string
1009:      */
1010:     public static function ascii(array $array)
1011:     {
1012:         $ascii = '';
1013: 
1014:         foreach ($array as $utf8) {
1015:             if ($utf8 < 128) {
1016:                 $ascii .= chr($utf8);
1017:             } elseif ($utf8 < 2048) {
1018:                 $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
1019:                 $ascii .= chr(128 + ($utf8 % 64));
1020:             } else {
1021:                 $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
1022:                 $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
1023:                 $ascii .= chr(128 + ($utf8 % 64));
1024:             }
1025:         }
1026: 
1027:         return $ascii;
1028:     }
1029: 
1030:     /**
1031:      * Converts filesize from human readable string to bytes
1032:      *
1033:      * @param string $size Size in human readable string like '5MB', '5M', '500B', '50kb' etc.
1034:      * @param mixed $default Value to be returned when invalid size was used, for example 'Unknown type'
1035:      * @return mixed Number of bytes as integer on success, `$default` on failure if not false
1036:      * @throws \InvalidArgumentException On invalid Unit type.
1037:      * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#Cake\Utility\Text::parseFileSize
1038:      */
1039:     public static function parseFileSize($size, $default = false)
1040:     {
1041:         if (ctype_digit($size)) {
1042:             return (int)$size;
1043:         }
1044:         $size = strtoupper($size);
1045: 
1046:         $l = -2;
1047:         $i = array_search(substr($size, -2), ['KB', 'MB', 'GB', 'TB', 'PB'], true);
1048:         if ($i === false) {
1049:             $l = -1;
1050:             $i = array_search(substr($size, -1), ['K', 'M', 'G', 'T', 'P'], true);
1051:         }
1052:         if ($i !== false) {
1053:             $size = (float)substr($size, 0, $l);
1054: 
1055:             return $size * pow(1024, $i + 1);
1056:         }
1057: 
1058:         if (substr($size, -1) === 'B' && ctype_digit(substr($size, 0, -1))) {
1059:             $size = substr($size, 0, -1);
1060: 
1061:             return (int)$size;
1062:         }
1063: 
1064:         if ($default !== false) {
1065:             return $default;
1066:         }
1067:         throw new InvalidArgumentException('No unit type.');
1068:     }
1069: 
1070:     /**
1071:      * Get the default transliterator.
1072:      *
1073:      * @return \Transliterator|null Either a Transliterator instance, or `null`
1074:      *   in case no transliterator has been set yet.
1075:      * @since 3.7.0
1076:      */
1077:     public static function getTransliterator()
1078:     {
1079:         return static::$_defaultTransliterator;
1080:     }
1081: 
1082:     /**
1083:      * Set the default transliterator.
1084:      *
1085:      * @param \Transliterator $transliterator A `Transliterator` instance.
1086:      * @return void
1087:      * @since 3.7.0
1088:      */
1089:     public static function setTransliterator(\Transliterator $transliterator)
1090:     {
1091:         static::$_defaultTransliterator = $transliterator;
1092:     }
1093: 
1094:     /**
1095:      * Get default transliterator identifier string.
1096:      *
1097:      * @return string Transliterator identifier.
1098:      */
1099:     public static function getTransliteratorId()
1100:     {
1101:         return static::$_defaultTransliteratorId;
1102:     }
1103: 
1104:     /**
1105:      * Set default transliterator identifier string.
1106:      *
1107:      * @param string $transliteratorId Transliterator identifier.
1108:      * @return void
1109:      */
1110:     public static function setTransliteratorId($transliteratorId)
1111:     {
1112:         static::setTransliterator(transliterator_create($transliteratorId));
1113:         static::$_defaultTransliteratorId = $transliteratorId;
1114:     }
1115: 
1116:     /**
1117:      * Transliterate string.
1118:      *
1119:      * @param string $string String to transliterate.
1120:      * @param \Transliterator|string|null $transliterator Either a Transliterator
1121:      *   instance, or a transliterator identifier string. If `null`, the default
1122:      *   transliterator (identifier) set via `setTransliteratorId()` or
1123:      *   `setTransliterator()` will be used.
1124:      * @return string
1125:      * @see https://secure.php.net/manual/en/transliterator.transliterate.php
1126:      */
1127:     public static function transliterate($string, $transliterator = null)
1128:     {
1129:         if (!$transliterator) {
1130:             $transliterator = static::$_defaultTransliterator ?: static::$_defaultTransliteratorId;
1131:         }
1132: 
1133:         return transliterator_transliterate($transliterator, $string);
1134:     }
1135: 
1136:     /**
1137:      * Returns a string with all spaces converted to dashes (by default),
1138:      * characters transliterated to ASCII characters, and non word characters removed.
1139:      *
1140:      * ### Options:
1141:      *
1142:      * - `replacement`: Replacement string. Default '-'.
1143:      * - `transliteratorId`: A valid transliterator id string.
1144:      *   If `null` (default) the transliterator (identifier) set via
1145:      *   `setTransliteratorId()` or `setTransliterator()` will be used.
1146:      *   If `false` no transliteration will be done, only non words will be removed.
1147:      * - `preserve`: Specific non-word character to preserve. Default `null`.
1148:      *   For e.g. this option can be set to '.' to generate clean file names.
1149:      *
1150:      * @param string $string the string you want to slug
1151:      * @param array $options If string it will be use as replacement character
1152:      *   or an array of options.
1153:      * @return string
1154:      * @see setTransliterator()
1155:      * @see setTransliteratorId()
1156:      */
1157:     public static function slug($string, $options = [])
1158:     {
1159:         if (is_string($options)) {
1160:             $options = ['replacement' => $options];
1161:         }
1162:         $options += [
1163:             'replacement' => '-',
1164:             'transliteratorId' => null,
1165:             'preserve' => null
1166:         ];
1167: 
1168:         if ($options['transliteratorId'] !== false) {
1169:             $string = static::transliterate($string, $options['transliteratorId']);
1170:         }
1171: 
1172:         $regex = '^\s\p{Ll}\p{Lm}\p{Lo}\p{Lt}\p{Lu}\p{Nd}';
1173:         if ($options['preserve']) {
1174:             $regex .= preg_quote($options['preserve'], '/');
1175:         }
1176:         $quotedReplacement = preg_quote($options['replacement'], '/');
1177:         $map = [
1178:             '/[' . $regex . ']/mu' => ' ',
1179:             '/[\s]+/mu' => $options['replacement'],
1180:             sprintf('/^[%s]+|[%s]+$/', $quotedReplacement, $quotedReplacement) => '',
1181:         ];
1182:         $string = preg_replace(array_keys($map), $map, $string);
1183: 
1184:         return $string;
1185:     }
1186: }
1187:
C CakePHP 3.8 Red Velvet API

Version:

Namespaces

Classes

Traits