1: <?php
2: class QLexer extends QBaseClass {
3: private $objRegexMode = array();
4: private $objModeHandlers = array();
5: private $objTokenModeMap = array();
6: private $objTokens = array();
7: private $objModeStack;
8:
9: const UNMATCHED = "__UNMATCHED__";
10: const DefaultMode = "default_mode";
11:
12: public function __construct($strStartMode = QLexer::DefaultMode) {
13: $this->objModeStack = new QStack();
14: $this->objModeStack->Push($strStartMode);
15: }
16:
17: public function addPattern($strPattern, $strTokenName, $strMode = QLexer::DefaultMode) {
18: if(!isset($this->objRegexMode[$strMode])) {
19: $this->objRegexMode[$strMode] = new QRegex();
20: }
21: $this->objRegexMode[$strMode]->addPattern($strPattern,$strTokenName);
22: }
23:
24: public function addEntryPattern($strPattern, $strTokenName, $strMode = QLexer::DefaultMode, $strNewMode) {
25: if(!isset($this->objRegexMode[$strMode])) {
26: $this->objRegexMode[$strMode] = new QRegex();
27: }
28: $this->objRegexMode[$strMode]->addPattern($strPattern,$strTokenName);
29: $this->objTokenModeMap[$strTokenName] = $strNewMode;
30: }
31:
32: public function addExitPattern($strPattern, $strTokenName, $strMode, $strNewMode = QLexer::DefaultMode) {
33: if(!isset($this->objRegexMode[$strMode])) {
34: $this->objRegexMode[$strMode] = new QRegex();
35: }
36: $this->objRegexMode[$strMode]->addPattern($strPattern,$strTokenName);
37: $this->objTokenModeMap[$strTokenName] = "__exit";
38: }
39:
40: public function Tokenize(&$strRaw) {
41: $objTokens = array();
42: $intLength = strlen($strRaw);
43: while (is_array($objParsed = $this->Reduce($strRaw))) {
44: list($strUnmatched,$strMatched,$strToken) = $objParsed;
45: if($strUnmatched != "") {
46: array_push($objTokens,array("token"=>QLexer::UNMATCHED,'raw'=>$strUnmatched));
47: }
48:
49: if(array_key_exists($strToken,$this->objTokenModeMap)) {
50: if($this->objTokenModeMap[$strToken] == "__exit") {
51: $this->objModeStack->Pop();
52: array_push($objTokens,array("token"=>$strToken,'raw'=>$strMatched));
53: return $objTokens;
54: }
55: else {
56: $this->objModeStack->Push($this->objTokenModeMap[$strToken]);
57: array_push($objTokens,array("token"=>$strToken,'raw'=>$this->Tokenize($strRaw)));
58: }
59: }
60: else {
61: array_push($objTokens,array("token"=>$strToken,'raw'=>$strMatched));
62: }
63: }
64:
65: if($objParsed) {
66: array_push($objTokens,array("token"=>QLexer::UNMATCHED,'raw'=>$strRaw));
67: }
68:
69: 70: 71: 72:
73: if($this->objModeStack->Size() > 1)
74: $this->objModeStack->Pop();
75:
76: 77: 78:
79: if(is_array($objParsed = $this->Reduce($strRaw))) {
80: list($strUnmatched,$strMatched,$strToken) = $objParsed;
81: if($strMatched != "") {
82:
83:
84: $objLastToken = array_pop($objTokens);
85: $objLastToken["raw"] = $strUnmatched;
86: array_push($objTokens,$objLastToken);
87: $strRaw = $strMatched . $strRaw;
88: }
89: }
90:
91: return $objTokens;
92: }
93:
94: private function Reduce(&$strRaw) {
95:
96: if (!isset($this->objRegexMode[$this->objModeStack->PeekLast()])) {
97: return FALSE;
98: }
99:
100:
101: if ($strRaw === "") {
102: return TRUE;
103: }
104:
105: $strToken = $this->objRegexMode[$this->objModeStack->PeekLast()]->match($strRaw, $strMatch);
106: if ($strToken) {
107:
108: $intMatchPosition = strpos($strRaw, $strMatch);
109: $strUnparsed = substr($strRaw, 0, $intMatchPosition);
110: $strRaw = substr($strRaw, $intMatchPosition + strlen($strMatch));
111: return array($strUnparsed, $strMatch, $strToken);
112: }
113:
114: return TRUE;
115: }
116: }