function Idn::utf8Decode
Takes a UTF-8 encoded string and converts it into a series of integer code points. Any invalid byte sequences will be replaced by a U+FFFD replacement code point.
Parameters
string $input:
Return value
array<int, int>
See also
https://encoding.spec.whatwg.org/#utf-8-decoder
3 calls to Idn::utf8Decode()
- Idn::mapCodePoints in vendor/
symfony/ polyfill-intl-idn/ Idn.php - Idn::punycodeEncode in vendor/
symfony/ polyfill-intl-idn/ Idn.php - Idn::validateLabel in vendor/
symfony/ polyfill-intl-idn/ Idn.php
File
-
vendor/
symfony/ polyfill-intl-idn/ Idn.php, line 799
Class
- Idn
- @internal
Namespace
Symfony\Polyfill\Intl\IdnCode
private static function utf8Decode($input) {
$bytesSeen = 0;
$bytesNeeded = 0;
$lowerBoundary = 0x80;
$upperBoundary = 0xbf;
$codePoint = 0;
$codePoints = [];
$length = \strlen($input);
for ($i = 0; $i < $length; ++$i) {
$byte = \ord($input[$i]);
if (0 === $bytesNeeded) {
if ($byte >= 0x0 && $byte <= 0x7f) {
$codePoints[] = $byte;
continue;
}
if ($byte >= 0xc2 && $byte <= 0xdf) {
$bytesNeeded = 1;
$codePoint = $byte & 0x1f;
}
elseif ($byte >= 0xe0 && $byte <= 0xef) {
if (0xe0 === $byte) {
$lowerBoundary = 0xa0;
}
elseif (0xed === $byte) {
$upperBoundary = 0x9f;
}
$bytesNeeded = 2;
$codePoint = $byte & 0xf;
}
elseif ($byte >= 0xf0 && $byte <= 0xf4) {
if (0xf0 === $byte) {
$lowerBoundary = 0x90;
}
elseif (0xf4 === $byte) {
$upperBoundary = 0x8f;
}
$bytesNeeded = 3;
$codePoint = $byte & 0x7;
}
else {
$codePoints[] = 0xfffd;
}
continue;
}
if ($byte < $lowerBoundary || $byte > $upperBoundary) {
$codePoint = 0;
$bytesNeeded = 0;
$bytesSeen = 0;
$lowerBoundary = 0x80;
$upperBoundary = 0xbf;
--$i;
$codePoints[] = 0xfffd;
continue;
}
$lowerBoundary = 0x80;
$upperBoundary = 0xbf;
$codePoint = $codePoint << 6 | $byte & 0x3f;
if (++$bytesSeen !== $bytesNeeded) {
continue;
}
$codePoints[] = $codePoint;
$codePoint = 0;
$bytesNeeded = 0;
$bytesSeen = 0;
}
// String unexpectedly ended, so append a U+FFFD code point.
if (0 !== $bytesNeeded) {
$codePoints[] = 0xfffd;
}
return $codePoints;
}