Ticket #6465: parser.js

File parser.js, 6.6 KB (added by sebastian, on 05/14/2018 at 07:03:24 PM)
Line 
1"use strict";
2
3const NODE_YIELD = 1;
4const NODE_MATCH_SYMBOL = 2;
5const NODE_MATCH_STRING = 4;
6const NODE_MATCH_STRING_TRAILING = NODE_MATCH_STRING | 1;
7const NODE_MATCH_STRING_ANYWHERE = NODE_MATCH_STRING | 2;
8
9const SYMBOL_END = 1;
10const SYMBOL_DELIMITER = 2;
11const SYMBOL_DOMAIN = 3;
12
13let tree = [];
14
15function getPrefixLength(s1, s2)
16{
17  let length = Math.min(s1.length, s2.length);
18  for (let i = 0; i < length; i++)
19  {
20    if (s1[i] != s2[i])
21      return i;
22  }
23  return length;
24}
25
26function injectNode(nodeType, nodeValue, nodes)
27{
28  for (let i = 0; i < nodes.length; i++)
29  {
30    let [thisNodeType, thisNodeValue, thisChildNodes] = nodes[i];
31    if (nodeType == thisNodeType)
32    {
33      if (nodeValue == thisNodeValue)
34        return thisChildNodes;
35
36      if (nodeType & NODE_MATCH_STRING)
37      {
38        let prefixLength = getPrefixLength(nodeValue, thisNodeValue);
39        if (prefixLength > 0)
40        {
41          if (prefixLength == thisNodeValue.length)
42            return injectNode(NODE_MATCH_STRING_TRAILING,
43                              nodeValue.slice(prefixLength),
44                              thisChildNodes);
45
46          let childNodes = [[NODE_MATCH_STRING_TRAILING,
47                             thisNodeValue.slice(prefixLength),
48                             thisChildNodes]];
49          nodes[i] = [thisNodeType,
50                      thisNodeValue.slice(0, prefixLength),
51                      childNodes];
52
53          if (prefixLength == nodeValue.length)
54            return childNodes;
55
56          let newChildNodes = [];
57          childNodes.push([NODE_MATCH_STRING_TRAILING,
58                           nodeValue.slice(prefixLength),
59                           newChildNodes]);
60          return newChildNodes;
61        }
62      }
63    }
64  }
65
66  let childNodes = [];
67  nodes.push([nodeType, nodeValue, childNodes]);
68  return childNodes;
69}
70
71function parse(pattern, id)
72{
73  let offset = 0;
74  let matchMode = NODE_MATCH_STRING_ANYWHERE;
75  let nodes = tree;
76
77  if (pattern.startsWith("|"))
78  {
79    offset++;
80    matchMode = NODE_MATCH_STRING_TRAILING;
81
82    if (pattern.startsWith("||"))
83    {
84      offset++;
85      nodes = injectNode(NODE_MATCH_SYMBOL, SYMBOL_DOMAIN, nodes);
86    }
87  }
88
89  while (true)
90  {
91    let asteriskIndex = pattern.indexOf("*", offset);
92    let caretIndex = pattern.indexOf("^", offset);
93
94    if (asteriskIndex != -1 && (caretIndex == -1 || asteriskIndex < caretIndex))
95    {
96      if (asteriskIndex > offset)
97        nodes = injectNode(matchMode, pattern.slice(offset, asteriskIndex), nodes);
98
99      matchMode = NODE_MATCH_STRING_ANYWHERE;
100      offset = asteriskIndex + 1;
101
102      continue;
103    }
104
105    if (caretIndex != -1 && (asteriskIndex == -1 || caretIndex < asteriskIndex))
106    {
107      if (caretIndex > offset ||
108          nodes == tree && matchMode == NODE_MATCH_STRING_ANYWHERE)
109        nodes = injectNode(matchMode, pattern.slice(offset, caretIndex), nodes);
110
111      nodes = injectNode(NODE_MATCH_SYMBOL, SYMBOL_DELIMITER, nodes);
112      matchMode = NODE_MATCH_STRING_TRAILING;
113      offset = caretIndex + 1;
114
115      continue;
116    }
117
118    break;
119  }
120
121  let impliedEnd = pattern.endsWith("|") && pattern.length > offset;
122  let end = pattern.length - impliedEnd;
123
124  if (end > offset)
125  {
126    nodes = injectNode(matchMode, pattern.slice(offset, end), nodes);
127    matchMode = NODE_MATCH_STRING_TRAILING;
128  }
129
130  if (impliedEnd && matchMode == NODE_MATCH_STRING_TRAILING)
131    nodes = injectNode(NODE_MATCH_SYMBOL, SYMBOL_END, nodes);
132
133  nodes.push([NODE_YIELD, id, null]);
134}
135
136function match(s)
137{
138  let results = new Set();
139  let queue = [[tree, 0]];
140
141  while (queue.length > 0)
142  {
143    let [nodes, offset] = queue.shift();
144
145    for (let [nodeType, nodeValue, childNodes] of nodes)
146    {
147      if (nodeType == NODE_YIELD)
148      {
149        results.add(nodeValue);
150      }
151      else if (nodeType == NODE_MATCH_STRING_TRAILING)
152      {
153        if (s.startsWith(nodeValue, offset))
154          queue.push([childNodes, offset + nodeValue.length]);
155      }
156      else if (nodeType == NODE_MATCH_STRING_ANYWHERE)
157      {
158        let index = s.indexOf(nodeValue, offset);
159        if (index != -1)
160        {
161          do
162          {
163            queue.push([childNodes, index + nodeValue.length]);
164            index = s.indexOf(nodeValue, index + 1);
165          }
166          while (index != -1 && index < s.length)
167        }
168      }
169      else if (nodeType == NODE_MATCH_SYMBOL)
170      {
171        if ((nodeValue == SYMBOL_END ||
172             nodeValue == SYMBOL_DELIMITER) && offset >= s.length)
173        {
174          queue.push([childNodes, offset]);
175        }
176        else if (nodeValue == SYMBOL_DELIMITER)
177        {
178          let charCode = s.charCodeAt(offset);
179          if (charCode <= 0x24 ||
180              charCode >= 0x26 && charCode <= 0x2c ||
181              charCode == 0x2f ||
182              charCode >= 0x3a && charCode <= 0x40 ||
183              charCode >= 0x5b && charCode <= 0x5e ||
184              charCode == 0x60 ||
185              charCode >= 0x7b && charCode <= 0x7f)
186            queue.push([childNodes, offset + 1]);
187        }
188        else if (nodeValue == SYMBOL_DOMAIN)
189        {
190          let index = s.indexOf(":");
191          if (index >= offset)
192          {
193            while (s.startsWith("/", ++index))
194              ;
195
196            while (true)
197            {
198              queue.push([childNodes, index]);
199
200              let dotIndex = s.indexOf(".", index);
201              if (dotIndex == -1 || s.lastIndexOf("/", index) >= dotIndex)
202                break;
203
204              index = dotIndex + 1;
205            }
206          }
207        }
208      }
209    }
210  }
211
212  return results;
213}
214
215var readline = require("readline");
216var performance = require("perf_hooks").performance;
217var rl = readline.createInterface({
218  input: process.stdin,
219  output: process.stdout,
220  terminal: false
221});
222
223let counter = 0;
224//let filters = {};
225rl.on("line", line => {
226  line = line.trim();
227  if (line != "" && !line.startsWith("!") && !line.startsWith("["))
228  {
229    let optIndex = line.indexOf("$");
230    if (optIndex != -1)
231      line = line.slice(0, optIndex);
232    if (!line.startsWith("/") || !line.endsWith("/")) {
233      parse(line, ++counter);
234//      filters[counter] = line;
235    }
236  }
237});
238
239performance.mark("C");
240rl.on("close", () => {
241  performance.mark("D");
242  performance.mark("B");
243  performance.measure("Y", "C", "D");
244  console.log(performance.getEntriesByName("Y")[0].duration);
245  console.log(process.memoryUsage());
246//  console.log(JSON.stringify(tree, null, 2));
247//  console.log(filters);
248  performance.mark("A");
249  match("http://www.example.com/adbanner");
250  performance.mark("B");
251  performance.measure("X", "A", "B");
252  console.log(performance.getEntriesByName("X")[0].duration);
253});