class LSM
Longest Sequence Matcher. Utility class used by the scanner to consume the longest sequence of character given a set of allowed characters sequences.
@author Marco Marchiò <marco.mm89@gmail.com>
Hierarchy
- class \Peast\Syntax\LSM
Expanded class hierarchy of LSM
File
-
vendor/
mck89/ peast/ lib/ Peast/ Syntax/ LSM.php, line 18
Namespace
Peast\SyntaxView source
class LSM {
/**
* Internal sequences map
*
* @var array
*/
protected $map = array();
/**
* Encoding handle flag
*
* @var bool
*/
protected $handleEncoding = false;
/**
* Class constructor
*
* @param array $sequences Allowed characters sequences
* @param bool $handleEncoding True to handle encoding when matching
*/
function __construct($sequences, $handleEncoding = false) {
$this->handleEncoding = $handleEncoding;
foreach ($sequences as $s) {
$this->add($s);
}
}
/**
* Adds a sequence
*
* @param string $sequence Sequence to add
*
* @return $this
*/
public function add($sequence) {
if ($this->handleEncoding) {
$s = Utils::stringToUTF8Array($sequence);
$first = $s[0];
$len = count($s);
}
else {
$first = $sequence[0];
$len = strlen($sequence);
}
if (!isset($this->map[$first])) {
$this->map[$first] = array(
"maxLen" => $len,
"map" => array(
$sequence,
),
);
}
else {
$this->map[$first]["map"][] = $sequence;
$this->map[$first]["maxLen"] = max($this->map[$first]["maxLen"], $len);
}
return $this;
}
/**
* Removes a sequence
*
* @param string $sequence Sequence to remove
*
* @return $this
*/
public function remove($sequence) {
if ($this->handleEncoding) {
$s = Utils::stringToUTF8Array($sequence);
$first = $s[0];
}
else {
$first = $sequence[0];
}
if (isset($this->map[$first])) {
$len = $this->handleEncoding ? count($s) : strlen($sequence);
$this->map[$first]["map"] = array_diff($this->map[$first]["map"], array(
$sequence,
));
if (!count($this->map[$first]["map"])) {
unset($this->map[$first]);
}
elseif ($this->map[$first]["maxLen"] === $len) {
// Recalculate the max length if necessary
foreach ($this->map[$first]["map"] as $m) {
$this->map[$first]["maxLen"] = max($this->map[$first]["maxLen"], strlen($m));
}
}
}
return $this;
}
/**
* Executes the match. It returns an array where the first element is the
* number of consumed characters and the second element is the match. If
* no match is found it returns null.
*
* @param Scanner $scanner Scanner instance
* @param int $index Current index
* @param string $char Current character
*
* @return array|null
*/
public function match($scanner, $index, $char) {
$consumed = 1;
$bestMatch = null;
if (isset($this->map[$char])) {
//If the character is present in the map and it has a max length of
//1, match immediately
if ($this->map[$char]["maxLen"] === 1) {
$bestMatch = array(
$consumed,
$char,
);
}
else {
//Otherwise consume a number of characters equal to the max
//length and find the longest match
$buffer = $char;
$map = $this->map[$char]["map"];
$maxLen = $this->map[$char]["maxLen"];
do {
if (in_array($buffer, $map)) {
$bestMatch = array(
$consumed,
$buffer,
);
}
$nextChar = $scanner->charAt($index + $consumed);
if ($nextChar === null) {
break;
}
$buffer .= $nextChar;
$consumed++;
} while ($consumed <= $maxLen);
}
}
return $bestMatch;
}
}
Members
Title Sort descending | Modifiers | Object type | Summary |
---|---|---|---|
LSM::$handleEncoding | protected | property | Encoding handle flag |
LSM::$map | protected | property | Internal sequences map |
LSM::add | public | function | Adds a sequence |
LSM::match | public | function | Executes the match. It returns an array where the first element is the number of consumed characters and the second element is the match. If no match is found it returns null. |
LSM::remove | public | function | Removes a sequence |
LSM::__construct | function | Class constructor |