class EmailLexer

@extends AbstractLexer<int, string>

Hierarchy

class \Doctrine\Common\Lexer\AbstractLexer
- class \Egulias\EmailValidator\EmailLexer extends \Doctrine\Common\Lexer\AbstractLexer

Expanded class hierarchy of EmailLexer

18 files declare their use of EmailLexer

Comment.php in vendor/egulias/email-validator/src/Parser/Comment.php
CommentStrategy.php in vendor/egulias/email-validator/src/Parser/CommentStrategy/CommentStrategy.php
DNSCheckValidation.php in vendor/egulias/email-validator/src/Validation/DNSCheckValidation.php
DomainComment.php in vendor/egulias/email-validator/src/Parser/CommentStrategy/DomainComment.php
DomainLiteral.php in vendor/egulias/email-validator/src/Parser/DomainLiteral.php

... See full list

File

vendor/egulias/email-validator/src/EmailLexer.php, line 9

Namespace

Egulias\EmailValidator

View source

class EmailLexer extends AbstractLexer {
    
    //ASCII values
    public const S_EMPTY = -1;
    public const C_NUL = 0;
    public const S_HTAB = 9;
    public const S_LF = 10;
    public const S_CR = 13;
    public const S_SP = 32;
    public const EXCLAMATION = 33;
    public const S_DQUOTE = 34;
    public const NUMBER_SIGN = 35;
    public const DOLLAR = 36;
    public const PERCENTAGE = 37;
    public const AMPERSAND = 38;
    public const S_SQUOTE = 39;
    public const S_OPENPARENTHESIS = 40;
    public const S_CLOSEPARENTHESIS = 41;
    public const ASTERISK = 42;
    public const S_PLUS = 43;
    public const S_COMMA = 44;
    public const S_HYPHEN = 45;
    public const S_DOT = 46;
    public const S_SLASH = 47;
    public const S_COLON = 58;
    public const S_SEMICOLON = 59;
    public const S_LOWERTHAN = 60;
    public const S_EQUAL = 61;
    public const S_GREATERTHAN = 62;
    public const QUESTIONMARK = 63;
    public const S_AT = 64;
    public const S_OPENBRACKET = 91;
    public const S_BACKSLASH = 92;
    public const S_CLOSEBRACKET = 93;
    public const CARET = 94;
    public const S_UNDERSCORE = 95;
    public const S_BACKTICK = 96;
    public const S_OPENCURLYBRACES = 123;
    public const S_PIPE = 124;
    public const S_CLOSECURLYBRACES = 125;
    public const S_TILDE = 126;
    public const C_DEL = 127;
    public const INVERT_QUESTIONMARK = 168;
    public const INVERT_EXCLAMATION = 173;
    public const GENERIC = 300;
    public const S_IPV6TAG = 301;
    public const INVALID = 302;
    public const CRLF = 1310;
    public const S_DOUBLECOLON = 5858;
    public const ASCII_INVALID_FROM = 127;
    public const ASCII_INVALID_TO = 199;
    
    /**
     * US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
     *
     * @var array
     */
    protected $charValue = [
        '{' => self::S_OPENCURLYBRACES,
        '}' => self::S_CLOSECURLYBRACES,
        '(' => self::S_OPENPARENTHESIS,
        ')' => self::S_CLOSEPARENTHESIS,
        '<' => self::S_LOWERTHAN,
        '>' => self::S_GREATERTHAN,
        '[' => self::S_OPENBRACKET,
        ']' => self::S_CLOSEBRACKET,
        ':' => self::S_COLON,
        ';' => self::S_SEMICOLON,
        '@' => self::S_AT,
        '\\' => self::S_BACKSLASH,
        '/' => self::S_SLASH,
        ',' => self::S_COMMA,
        '.' => self::S_DOT,
        "'" => self::S_SQUOTE,
        "`" => self::S_BACKTICK,
        '"' => self::S_DQUOTE,
        '-' => self::S_HYPHEN,
        '::' => self::S_DOUBLECOLON,
        ' ' => self::S_SP,
        "\t" => self::S_HTAB,
        "\r" => self::S_CR,
        "\n" => self::S_LF,
        "\r\n" => self::CRLF,
        'IPv6' => self::S_IPV6TAG,
        '' => self::S_EMPTY,
        '\\0' => self::C_NUL,
        '*' => self::ASTERISK,
        '!' => self::EXCLAMATION,
        '&' => self::AMPERSAND,
        '^' => self::CARET,
        '$' => self::DOLLAR,
        '%' => self::PERCENTAGE,
        '~' => self::S_TILDE,
        '|' => self::S_PIPE,
        '_' => self::S_UNDERSCORE,
        '=' => self::S_EQUAL,
        '+' => self::S_PLUS,
        '¿' => self::INVERT_QUESTIONMARK,
        '?' => self::QUESTIONMARK,
        '#' => self::NUMBER_SIGN,
        '¡' => self::INVERT_EXCLAMATION,
    ];
    public const INVALID_CHARS_REGEX = "/[^\\p{S}\\p{C}\\p{Cc}]+/iu";
    public const VALID_UTF8_REGEX = '/\\p{Cc}+/u';
    public const CATCHABLE_PATTERNS = [
        '[a-zA-Z]+[46]?',
        
        //ASCII and domain literal
'[^\\x00-\\x7F]',
        
        //UTF-8
'[0-9]+',
        '\\r\\n',
        '::',
        '\\s+?',
        '.',
    ];
    public const NON_CATCHABLE_PATTERNS = [
        '[\\xA0-\\xff]+',
    ];
    public const MODIFIERS = 'iu';
    
    /** @var bool */
    protected $hasInvalidTokens = false;
    
    /**
     * @var Token<int, string>
     */
    protected Token $previous;
    
    /**
     * The last matched/seen token.
     *
     * @var Token<int, string>
     */
    public Token $current;
    
    /**
     * @var Token<int, string>
     */
    private Token $nullToken;
    
    /** @var string */
    private $accumulator = '';
    
    /** @var bool */
    private $hasToRecord = false;
    public function __construct() {
        
        /** @var Token<int, string> $nullToken */
        $nullToken = new Token('', self::S_EMPTY, 0);
        $this->nullToken = $nullToken;
        $this->current = $this->previous = $this->nullToken;
        $this->lookahead = null;
    }
    public function reset() : void {
        $this->hasInvalidTokens = false;
        parent::reset();
        $this->current = $this->previous = $this->nullToken;
    }
    
    /**
     * @param int $type
     * @throws \UnexpectedValueException
     * @return boolean
     *
     * @psalm-suppress InvalidScalarArgument
     */
    public function find($type) : bool {
        $search = clone $this;
        $search->skipUntil($type);
        if (!$search->lookahead) {
            throw new \UnexpectedValueException($type . ' not found');
        }
        return true;
    }
    
    /**
     * moveNext
     *
     * @return boolean
     */
    public function moveNext() : bool {
        if ($this->hasToRecord && $this->previous === $this->nullToken) {
            $this->accumulator .= $this->current->value;
        }
        $this->previous = $this->current;
        if ($this->lookahead === null) {
            $this->lookahead = $this->nullToken;
        }
        $hasNext = parent::moveNext();
        $this->current = $this->token ?? $this->nullToken;
        if ($this->hasToRecord) {
            $this->accumulator .= $this->current->value;
        }
        return $hasNext;
    }
    
    /**
     * Retrieve token type. Also processes the token value if necessary.
     *
     * @param string $value
     * @throws \InvalidArgumentException
     * @return integer
     */
    protected function getType(&$value) : int {
        $encoded = $value;
        if (mb_detect_encoding($value, 'auto', true) !== 'UTF-8') {
            $encoded = mb_convert_encoding($value, 'UTF-8', 'Windows-1252');
        }
        if ($this->isValid($encoded)) {
            return $this->charValue[$encoded];
        }
        if ($this->isNullType($encoded)) {
            return self::C_NUL;
        }
        if ($this->isInvalidChar($encoded)) {
            $this->hasInvalidTokens = true;
            return self::INVALID;
        }
        return self::GENERIC;
    }
    protected function isValid(string $value) : bool {
        return isset($this->charValue[$value]);
    }
    protected function isNullType(string $value) : bool {
        return $value === "\x00";
    }
    protected function isInvalidChar(string $value) : bool {
        return !preg_match(self::INVALID_CHARS_REGEX, $value);
    }
    protected function isUTF8Invalid(string $value) : bool {
        return preg_match(self::VALID_UTF8_REGEX, $value) !== false;
    }
    public function hasInvalidTokens() : bool {
        return $this->hasInvalidTokens;
    }
    
    /**
     * getPrevious
     *
     * @return Token<int, string>
     */
    public function getPrevious() : Token {
        return $this->previous;
    }
    
    /**
     * Lexical catchable patterns.
     *
     * @return string[]
     */
    protected function getCatchablePatterns() : array {
        return self::CATCHABLE_PATTERNS;
    }
    
    /**
     * Lexical non-catchable patterns.
     *
     * @return string[]
     */
    protected function getNonCatchablePatterns() : array {
        return self::NON_CATCHABLE_PATTERNS;
    }
    protected function getModifiers() : string {
        return self::MODIFIERS;
    }
    public function getAccumulatedValues() : string {
        return $this->accumulator;
    }
    public function startRecording() : void {
        $this->hasToRecord = true;
    }
    public function stopRecording() : void {
        $this->hasToRecord = false;
    }
    public function clearRecorded() : void {
        $this->accumulator = '';
    }

}

Members

Title Sort descending	Modifiers	Object type	Summary	Overriden Title
AbstractLexer::$input	private	property	Lexer original input string.
AbstractLexer::$lookahead	public	property	The next token in the input.
AbstractLexer::$peek	private	property	Current peek of current lexer position.
AbstractLexer::$position	private	property	Current lexer position in input string.
AbstractLexer::$regex	private	property	Composed regex for input parsing.
AbstractLexer::$token	public	property	The last matched/seen token.
AbstractLexer::$tokens	private	property	Array of scanned tokens.
AbstractLexer::getInputUntilPosition	public	function	Retrieve the original lexer's input until a given position.
AbstractLexer::getLiteral	public	function	Gets the literal for a given token.
AbstractLexer::glimpse	public	function	Peeks at the next token, returns it and immediately resets the peek.
AbstractLexer::isA	public	function	Checks if given value is identical to the given token.
AbstractLexer::isNextToken	public	function	Checks whether a given token matches the current lookahead.
AbstractLexer::isNextTokenAny	public	function	Checks whether any of the given tokens matches the current lookahead.
AbstractLexer::peek	public	function	Moves the lookahead token forward.
AbstractLexer::resetPeek	public	function	Resets the peek pointer to 0.
AbstractLexer::resetPosition	public	function	Resets the lexer position on the input to the given position.
AbstractLexer::scan	protected	function	Scans the input string for tokens.
AbstractLexer::setInput	public	function	Sets the input data to be tokenized.
AbstractLexer::skipUntil	public	function	Tells the lexer to skip input tokens until it sees a token with the given value.
EmailLexer::$accumulator	private	property	@var string
EmailLexer::$charValue	protected	property	US-ASCII visible characters not valid for atext (@link http://tools.ietf.org/html/rfc5322#section-3.2.3)
EmailLexer::$current	public	property	The last matched/seen token.
EmailLexer::$hasInvalidTokens	protected	property	@var bool
EmailLexer::$hasToRecord	private	property	@var bool
EmailLexer::$nullToken	private	property
EmailLexer::$previous	protected	property
EmailLexer::AMPERSAND	public	constant
EmailLexer::ASCII_INVALID_FROM	public	constant
EmailLexer::ASCII_INVALID_TO	public	constant
EmailLexer::ASTERISK	public	constant
EmailLexer::CARET	public	constant
EmailLexer::CATCHABLE_PATTERNS	public	constant
EmailLexer::clearRecorded	public	function
EmailLexer::CRLF	public	constant
EmailLexer::C_DEL	public	constant
EmailLexer::C_NUL	public	constant
EmailLexer::DOLLAR	public	constant
EmailLexer::EXCLAMATION	public	constant
EmailLexer::find	public	function	@psalm-suppress InvalidScalarArgument
EmailLexer::GENERIC	public	constant
EmailLexer::getAccumulatedValues	public	function
EmailLexer::getCatchablePatterns	protected	function	Lexical catchable patterns.	Overrides AbstractLexer::getCatchablePatterns
EmailLexer::getModifiers	protected	function	Regex modifiers	Overrides AbstractLexer::getModifiers
EmailLexer::getNonCatchablePatterns	protected	function	Lexical non-catchable patterns.	Overrides AbstractLexer::getNonCatchablePatterns
EmailLexer::getPrevious	public	function	getPrevious
EmailLexer::getType	protected	function	Retrieve token type. Also processes the token value if necessary.	Overrides AbstractLexer::getType
EmailLexer::hasInvalidTokens	public	function
EmailLexer::INVALID	public	constant
EmailLexer::INVALID_CHARS_REGEX	public	constant
EmailLexer::INVERT_EXCLAMATION	public	constant
EmailLexer::INVERT_QUESTIONMARK	public	constant
EmailLexer::isInvalidChar	protected	function
EmailLexer::isNullType	protected	function
EmailLexer::isUTF8Invalid	protected	function
EmailLexer::isValid	protected	function
EmailLexer::MODIFIERS	public	constant
EmailLexer::moveNext	public	function	moveNext	Overrides AbstractLexer::moveNext
EmailLexer::NON_CATCHABLE_PATTERNS	public	constant
EmailLexer::NUMBER_SIGN	public	constant
EmailLexer::PERCENTAGE	public	constant
EmailLexer::QUESTIONMARK	public	constant
EmailLexer::reset	public	function	Resets the lexer.	Overrides AbstractLexer::reset
EmailLexer::startRecording	public	function
EmailLexer::stopRecording	public	function
EmailLexer::S_AT	public	constant
EmailLexer::S_BACKSLASH	public	constant
EmailLexer::S_BACKTICK	public	constant
EmailLexer::S_CLOSEBRACKET	public	constant
EmailLexer::S_CLOSECURLYBRACES	public	constant
EmailLexer::S_CLOSEPARENTHESIS	public	constant
EmailLexer::S_COLON	public	constant
EmailLexer::S_COMMA	public	constant
EmailLexer::S_CR	public	constant
EmailLexer::S_DOT	public	constant
EmailLexer::S_DOUBLECOLON	public	constant
EmailLexer::S_DQUOTE	public	constant
EmailLexer::S_EMPTY	public	constant
EmailLexer::S_EQUAL	public	constant
EmailLexer::S_GREATERTHAN	public	constant
EmailLexer::S_HTAB	public	constant
EmailLexer::S_HYPHEN	public	constant
EmailLexer::S_IPV6TAG	public	constant
EmailLexer::S_LF	public	constant
EmailLexer::S_LOWERTHAN	public	constant
EmailLexer::S_OPENBRACKET	public	constant
EmailLexer::S_OPENCURLYBRACES	public	constant
EmailLexer::S_OPENPARENTHESIS	public	constant
EmailLexer::S_PIPE	public	constant
EmailLexer::S_PLUS	public	constant
EmailLexer::S_SEMICOLON	public	constant
EmailLexer::S_SLASH	public	constant
EmailLexer::S_SP	public	constant
EmailLexer::S_SQUOTE	public	constant
EmailLexer::S_TILDE	public	constant
EmailLexer::S_UNDERSCORE	public	constant
EmailLexer::VALID_UTF8_REGEX	public	constant
EmailLexer::__construct	public	function