class EmailLexer
@extends AbstractLexer<int, string>
Hierarchy
- class \Doctrine\Common\Lexer\AbstractLexer
- class \Egulias\EmailValidator\EmailLexer extends \Doctrine\Common\Lexer\AbstractLexer
Expanded class hierarchy of EmailLexer
18 files declare their use of EmailLexer
- Comment.php in vendor/
egulias/ email-validator/ src/ Parser/ Comment.php - CommentStrategy.php in vendor/
egulias/ email-validator/ src/ Parser/ CommentStrategy/ CommentStrategy.php - DNSCheckValidation.php in vendor/
egulias/ email-validator/ src/ Validation/ DNSCheckValidation.php - DomainComment.php in vendor/
egulias/ email-validator/ src/ Parser/ CommentStrategy/ DomainComment.php - DomainLiteral.php in vendor/
egulias/ email-validator/ src/ Parser/ DomainLiteral.php
File
-
vendor/
egulias/ email-validator/ src/ EmailLexer.php, line 9
Namespace
Egulias\EmailValidatorView source
class EmailLexer extends AbstractLexer {
//ASCII values
public const S_EMPTY = -1;
public const C_NUL = 0;
public const S_HTAB = 9;
public const S_LF = 10;
public const S_CR = 13;
public const S_SP = 32;
public const EXCLAMATION = 33;
public const S_DQUOTE = 34;
public const NUMBER_SIGN = 35;
public const DOLLAR = 36;
public const PERCENTAGE = 37;
public const AMPERSAND = 38;
public const S_SQUOTE = 39;
public const S_OPENPARENTHESIS = 40;
public const S_CLOSEPARENTHESIS = 41;
public const ASTERISK = 42;
public const S_PLUS = 43;
public const S_COMMA = 44;
public const S_HYPHEN = 45;
public const S_DOT = 46;
public const S_SLASH = 47;
public const S_COLON = 58;
public const S_SEMICOLON = 59;
public const S_LOWERTHAN = 60;
public const S_EQUAL = 61;
public const S_GREATERTHAN = 62;
public const QUESTIONMARK = 63;
public const S_AT = 64;
public const S_OPENBRACKET = 91;
public const S_BACKSLASH = 92;
public const S_CLOSEBRACKET = 93;
public const CARET = 94;
public const S_UNDERSCORE = 95;
public const S_BACKTICK = 96;
public const S_OPENCURLYBRACES = 123;
public const S_PIPE = 124;
public const S_CLOSECURLYBRACES = 125;
public const S_TILDE = 126;
public const C_DEL = 127;
public const INVERT_QUESTIONMARK = 168;
public const INVERT_EXCLAMATION = 173;
public const GENERIC = 300;
public const S_IPV6TAG = 301;
public const INVALID = 302;
public const CRLF = 1310;
public const S_DOUBLECOLON = 5858;
public const ASCII_INVALID_FROM = 127;
public const ASCII_INVALID_TO = 199;
/**
* US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
*
* @var array
*/
protected $charValue = [
'{' => self::S_OPENCURLYBRACES,
'}' => self::S_CLOSECURLYBRACES,
'(' => self::S_OPENPARENTHESIS,
')' => self::S_CLOSEPARENTHESIS,
'<' => self::S_LOWERTHAN,
'>' => self::S_GREATERTHAN,
'[' => self::S_OPENBRACKET,
']' => self::S_CLOSEBRACKET,
':' => self::S_COLON,
';' => self::S_SEMICOLON,
'@' => self::S_AT,
'\\' => self::S_BACKSLASH,
'/' => self::S_SLASH,
',' => self::S_COMMA,
'.' => self::S_DOT,
"'" => self::S_SQUOTE,
"`" => self::S_BACKTICK,
'"' => self::S_DQUOTE,
'-' => self::S_HYPHEN,
'::' => self::S_DOUBLECOLON,
' ' => self::S_SP,
"\t" => self::S_HTAB,
"\r" => self::S_CR,
"\n" => self::S_LF,
"\r\n" => self::CRLF,
'IPv6' => self::S_IPV6TAG,
'' => self::S_EMPTY,
'\\0' => self::C_NUL,
'*' => self::ASTERISK,
'!' => self::EXCLAMATION,
'&' => self::AMPERSAND,
'^' => self::CARET,
'$' => self::DOLLAR,
'%' => self::PERCENTAGE,
'~' => self::S_TILDE,
'|' => self::S_PIPE,
'_' => self::S_UNDERSCORE,
'=' => self::S_EQUAL,
'+' => self::S_PLUS,
'¿' => self::INVERT_QUESTIONMARK,
'?' => self::QUESTIONMARK,
'#' => self::NUMBER_SIGN,
'¡' => self::INVERT_EXCLAMATION,
];
public const INVALID_CHARS_REGEX = "/[^\\p{S}\\p{C}\\p{Cc}]+/iu";
public const VALID_UTF8_REGEX = '/\\p{Cc}+/u';
public const CATCHABLE_PATTERNS = [
'[a-zA-Z]+[46]?',
//ASCII and domain literal
'[^\\x00-\\x7F]',
//UTF-8
'[0-9]+',
'\\r\\n',
'::',
'\\s+?',
'.',
];
public const NON_CATCHABLE_PATTERNS = [
'[\\xA0-\\xff]+',
];
public const MODIFIERS = 'iu';
/** @var bool */
protected $hasInvalidTokens = false;
/**
* @var Token<int, string>
*/
protected Token $previous;
/**
* The last matched/seen token.
*
* @var Token<int, string>
*/
public Token $current;
/**
* @var Token<int, string>
*/
private Token $nullToken;
/** @var string */
private $accumulator = '';
/** @var bool */
private $hasToRecord = false;
public function __construct() {
/** @var Token<int, string> $nullToken */
$nullToken = new Token('', self::S_EMPTY, 0);
$this->nullToken = $nullToken;
$this->current = $this->previous = $this->nullToken;
$this->lookahead = null;
}
public function reset() : void {
$this->hasInvalidTokens = false;
parent::reset();
$this->current = $this->previous = $this->nullToken;
}
/**
* @param int $type
* @throws \UnexpectedValueException
* @return boolean
*
* @psalm-suppress InvalidScalarArgument
*/
public function find($type) : bool {
$search = clone $this;
$search->skipUntil($type);
if (!$search->lookahead) {
throw new \UnexpectedValueException($type . ' not found');
}
return true;
}
/**
* moveNext
*
* @return boolean
*/
public function moveNext() : bool {
if ($this->hasToRecord && $this->previous === $this->nullToken) {
$this->accumulator .= $this->current->value;
}
$this->previous = $this->current;
if ($this->lookahead === null) {
$this->lookahead = $this->nullToken;
}
$hasNext = parent::moveNext();
$this->current = $this->token ?? $this->nullToken;
if ($this->hasToRecord) {
$this->accumulator .= $this->current->value;
}
return $hasNext;
}
/**
* Retrieve token type. Also processes the token value if necessary.
*
* @param string $value
* @throws \InvalidArgumentException
* @return integer
*/
protected function getType(&$value) : int {
$encoded = $value;
if (mb_detect_encoding($value, 'auto', true) !== 'UTF-8') {
$encoded = mb_convert_encoding($value, 'UTF-8', 'Windows-1252');
}
if ($this->isValid($encoded)) {
return $this->charValue[$encoded];
}
if ($this->isNullType($encoded)) {
return self::C_NUL;
}
if ($this->isInvalidChar($encoded)) {
$this->hasInvalidTokens = true;
return self::INVALID;
}
return self::GENERIC;
}
protected function isValid(string $value) : bool {
return isset($this->charValue[$value]);
}
protected function isNullType(string $value) : bool {
return $value === "\x00";
}
protected function isInvalidChar(string $value) : bool {
return !preg_match(self::INVALID_CHARS_REGEX, $value);
}
protected function isUTF8Invalid(string $value) : bool {
return preg_match(self::VALID_UTF8_REGEX, $value) !== false;
}
public function hasInvalidTokens() : bool {
return $this->hasInvalidTokens;
}
/**
* getPrevious
*
* @return Token<int, string>
*/
public function getPrevious() : Token {
return $this->previous;
}
/**
* Lexical catchable patterns.
*
* @return string[]
*/
protected function getCatchablePatterns() : array {
return self::CATCHABLE_PATTERNS;
}
/**
* Lexical non-catchable patterns.
*
* @return string[]
*/
protected function getNonCatchablePatterns() : array {
return self::NON_CATCHABLE_PATTERNS;
}
protected function getModifiers() : string {
return self::MODIFIERS;
}
public function getAccumulatedValues() : string {
return $this->accumulator;
}
public function startRecording() : void {
$this->hasToRecord = true;
}
public function stopRecording() : void {
$this->hasToRecord = false;
}
public function clearRecorded() : void {
$this->accumulator = '';
}
}
Members
Title Sort descending | Modifiers | Object type | Summary | Overriden Title |
---|---|---|---|---|
AbstractLexer::$input | private | property | Lexer original input string. | |
AbstractLexer::$lookahead | public | property | The next token in the input. | |
AbstractLexer::$peek | private | property | Current peek of current lexer position. | |
AbstractLexer::$position | private | property | Current lexer position in input string. | |
AbstractLexer::$regex | private | property | Composed regex for input parsing. | |
AbstractLexer::$token | public | property | The last matched/seen token. | |
AbstractLexer::$tokens | private | property | Array of scanned tokens. | |
AbstractLexer::getInputUntilPosition | public | function | Retrieve the original lexer's input until a given position. | |
AbstractLexer::getLiteral | public | function | Gets the literal for a given token. | |
AbstractLexer::glimpse | public | function | Peeks at the next token, returns it and immediately resets the peek. | |
AbstractLexer::isA | public | function | Checks if given value is identical to the given token. | |
AbstractLexer::isNextToken | public | function | Checks whether a given token matches the current lookahead. | |
AbstractLexer::isNextTokenAny | public | function | Checks whether any of the given tokens matches the current lookahead. | |
AbstractLexer::peek | public | function | Moves the lookahead token forward. | |
AbstractLexer::resetPeek | public | function | Resets the peek pointer to 0. | |
AbstractLexer::resetPosition | public | function | Resets the lexer position on the input to the given position. | |
AbstractLexer::scan | protected | function | Scans the input string for tokens. | |
AbstractLexer::setInput | public | function | Sets the input data to be tokenized. | |
AbstractLexer::skipUntil | public | function | Tells the lexer to skip input tokens until it sees a token with the given value. | |
EmailLexer::$accumulator | private | property | @var string | |
EmailLexer::$charValue | protected | property | US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3) | |
EmailLexer::$current | public | property | The last matched/seen token. | |
EmailLexer::$hasInvalidTokens | protected | property | @var bool | |
EmailLexer::$hasToRecord | private | property | @var bool | |
EmailLexer::$nullToken | private | property | ||
EmailLexer::$previous | protected | property | ||
EmailLexer::AMPERSAND | public | constant | ||
EmailLexer::ASCII_INVALID_FROM | public | constant | ||
EmailLexer::ASCII_INVALID_TO | public | constant | ||
EmailLexer::ASTERISK | public | constant | ||
EmailLexer::CARET | public | constant | ||
EmailLexer::CATCHABLE_PATTERNS | public | constant | ||
EmailLexer::clearRecorded | public | function | ||
EmailLexer::CRLF | public | constant | ||
EmailLexer::C_DEL | public | constant | ||
EmailLexer::C_NUL | public | constant | ||
EmailLexer::DOLLAR | public | constant | ||
EmailLexer::EXCLAMATION | public | constant | ||
EmailLexer::find | public | function | @psalm-suppress InvalidScalarArgument | |
EmailLexer::GENERIC | public | constant | ||
EmailLexer::getAccumulatedValues | public | function | ||
EmailLexer::getCatchablePatterns | protected | function | Lexical catchable patterns. | Overrides AbstractLexer::getCatchablePatterns |
EmailLexer::getModifiers | protected | function | Regex modifiers | Overrides AbstractLexer::getModifiers |
EmailLexer::getNonCatchablePatterns | protected | function | Lexical non-catchable patterns. | Overrides AbstractLexer::getNonCatchablePatterns |
EmailLexer::getPrevious | public | function | getPrevious | |
EmailLexer::getType | protected | function | Retrieve token type. Also processes the token value if necessary. | Overrides AbstractLexer::getType |
EmailLexer::hasInvalidTokens | public | function | ||
EmailLexer::INVALID | public | constant | ||
EmailLexer::INVALID_CHARS_REGEX | public | constant | ||
EmailLexer::INVERT_EXCLAMATION | public | constant | ||
EmailLexer::INVERT_QUESTIONMARK | public | constant | ||
EmailLexer::isInvalidChar | protected | function | ||
EmailLexer::isNullType | protected | function | ||
EmailLexer::isUTF8Invalid | protected | function | ||
EmailLexer::isValid | protected | function | ||
EmailLexer::MODIFIERS | public | constant | ||
EmailLexer::moveNext | public | function | moveNext | Overrides AbstractLexer::moveNext |
EmailLexer::NON_CATCHABLE_PATTERNS | public | constant | ||
EmailLexer::NUMBER_SIGN | public | constant | ||
EmailLexer::PERCENTAGE | public | constant | ||
EmailLexer::QUESTIONMARK | public | constant | ||
EmailLexer::reset | public | function | Resets the lexer. | Overrides AbstractLexer::reset |
EmailLexer::startRecording | public | function | ||
EmailLexer::stopRecording | public | function | ||
EmailLexer::S_AT | public | constant | ||
EmailLexer::S_BACKSLASH | public | constant | ||
EmailLexer::S_BACKTICK | public | constant | ||
EmailLexer::S_CLOSEBRACKET | public | constant | ||
EmailLexer::S_CLOSECURLYBRACES | public | constant | ||
EmailLexer::S_CLOSEPARENTHESIS | public | constant | ||
EmailLexer::S_COLON | public | constant | ||
EmailLexer::S_COMMA | public | constant | ||
EmailLexer::S_CR | public | constant | ||
EmailLexer::S_DOT | public | constant | ||
EmailLexer::S_DOUBLECOLON | public | constant | ||
EmailLexer::S_DQUOTE | public | constant | ||
EmailLexer::S_EMPTY | public | constant | ||
EmailLexer::S_EQUAL | public | constant | ||
EmailLexer::S_GREATERTHAN | public | constant | ||
EmailLexer::S_HTAB | public | constant | ||
EmailLexer::S_HYPHEN | public | constant | ||
EmailLexer::S_IPV6TAG | public | constant | ||
EmailLexer::S_LF | public | constant | ||
EmailLexer::S_LOWERTHAN | public | constant | ||
EmailLexer::S_OPENBRACKET | public | constant | ||
EmailLexer::S_OPENCURLYBRACES | public | constant | ||
EmailLexer::S_OPENPARENTHESIS | public | constant | ||
EmailLexer::S_PIPE | public | constant | ||
EmailLexer::S_PLUS | public | constant | ||
EmailLexer::S_SEMICOLON | public | constant | ||
EmailLexer::S_SLASH | public | constant | ||
EmailLexer::S_SP | public | constant | ||
EmailLexer::S_SQUOTE | public | constant | ||
EmailLexer::S_TILDE | public | constant | ||
EmailLexer::S_UNDERSCORE | public | constant | ||
EmailLexer::VALID_UTF8_REGEX | public | constant | ||
EmailLexer::__construct | public | function |