1 /++
2   A module containing parsing code utilities
3 
4   Copyright: © 2017 Szabo Bogdan
5   License: Subject to the terms of the MIT license, as written in the included LICENSE.txt file.
6   Authors: Szabo Bogdan
7 +/
8 module trial.discovery.code;
9 
10 import std.algorithm;
11 import std..string;
12 import std.range;
13 import std.file;
14 import std.stdio;
15 import std.conv;
16 
17 version (Have_libdparse)
18 {
19   public import dparse.ast;
20   public import dparse.lexer;
21   public import dparse.parser;
22 }
23 
24 /// Get the module name of a DLang source file
25 string getModuleName(string fileName)
26 {
27   if (!exists(fileName))
28   {
29     return "";
30   }
31 
32   if (isDir(fileName))
33   {
34     return "";
35   }
36 
37   auto file = File(fileName);
38 
39   auto moduleLine = file.byLine().map!(a => a.to!string).filter!(a => a.startsWith("module"));
40 
41   if (moduleLine.empty)
42   {
43     return "";
44   }
45 
46   return moduleLine.front.split(' ')[1].split(";")[0];
47 }
48 
49 version (Have_libdparse)
50 {
51   const(Token)[] stringToDTokens(string data)
52   {
53     try {
54       ubyte[] fileBytes = cast(ubyte[]) data;
55 
56       StringCache cache = StringCache(StringCache.defaultBucketCount);
57 
58       LexerConfig config;
59       config.stringBehavior = StringBehavior.source;
60       config.fileName = "";
61       config.commentBehavior = CommentBehavior.intern;
62 
63       auto lexer = DLexer(fileBytes, config, &cache);
64       const(Token)[] tokens = lexer.array;
65 
66       return tokens.map!(token => const Token(token.type, token.text.idup, token.line, token.column, token.index)).array;
67     } catch(Throwable t) {
68       t.writeln;
69       return [];
70     }
71   }
72 
73   ///
74   struct DLangAttribute
75   {
76     const(Token)[] tokens;
77 
78     inout
79     {
80       string identifier()
81       {
82         string result;
83 
84         foreach (token; tokens)
85         {
86           if (str(token.type) == "(")
87           {
88             break;
89           }
90 
91           result ~= token.text;
92         }
93 
94         return result;
95       }
96 
97       string value()
98       {
99         bool after;
100         string result;
101 
102         foreach (token; tokens)
103         {
104           if (after)
105           {
106             result ~= token.text.strip('"').strip('`').strip('\'');
107           }
108 
109           if (str(token.type) == "(")
110           {
111             after = true;
112           }
113         }
114 
115         return result;
116       }
117 
118       auto line()
119       {
120         return tokens[0].line;
121       }
122     }
123   }
124 
125   struct DLangFunction
126   {
127     const(DLangAttribute)[] attributes;
128     const(Token)[] tokens;
129 
130     string name()
131     {
132       auto result = TokenIterator(tokens).readUntilType("(").replace("\n", " ")
133         .replace("\r", " ").replace("\t", " ").split(" ");
134 
135       std.algorithm.reverse(result);
136 
137       return result[0];
138     }
139 
140     bool hasAttribute(string name)
141     {
142       return !attributes.filter!(a => a.identifier == name).empty;
143     }
144 
145     auto getAttribute(string name)
146     {
147       return attributes.filter!(a => a.identifier == name).front;
148     }
149 
150     string testName()
151     {
152       foreach (attribute; attributes)
153       {
154         if (attribute.identifier == "")
155         {
156           return attribute.value;
157         }
158       }
159 
160       return name.camelToSentence;
161     }
162 
163     size_t line()
164     {
165       return TokenIterator(tokens).skipUntilType("(").currentToken.line;
166     }
167   }
168 
169   struct DLangClass
170   {
171     const(Token)[] tokens;
172 
173     /// returns the class name
174     string name()
175     {
176       auto iterator = TokenIterator(tokens);
177       auto name = iterator.readUntilType("{");
178 
179       import std.stdio;
180 
181       if (name.indexOf(":") != -1)
182       {
183         name = name.split(":")[0];
184       }
185 
186       return name.strip;
187     }
188 
189     DLangFunction[] functions()
190     {
191       int paranthesisCount;
192 
193       auto iterator = TokenIterator(tokens);
194       iterator.skipUntilType("{");
195 
196       const(Token)[] currentTokens;
197       DLangFunction[] discoveredFunctions;
198       DLangAttribute[] attributes;
199       bool readingFunction;
200       int functionLevel = 1;
201 
202       foreach (token; iterator)
203       {
204         string type = token.type.str;
205         currentTokens ~= token;
206 
207         if (type == "@")
208         {
209           attributes ~= iterator.readAttribute;
210         }
211 
212         if (type == "{")
213         {
214           paranthesisCount++;
215         }
216 
217         if (type == "}")
218         {
219           paranthesisCount--;
220 
221           if (paranthesisCount == functionLevel)
222           {
223             discoveredFunctions ~= DLangFunction(attributes, currentTokens);
224           }
225         }
226 
227         readingFunction = paranthesisCount > functionLevel;
228 
229         if (type == "}" || (!readingFunction && type == ";"))
230         {
231           currentTokens = [];
232           attributes = [];
233         }
234       }
235 
236       return discoveredFunctions;
237     }
238   }
239 
240   /// An iterator that helps to deal with DLang tokens
241   struct TokenIterator
242   {
243     private
244     {
245       const(Token)[] tokens;
246       size_t index;
247     }
248 
249     ///
250     int opApply(int delegate(const(Token)) dg)
251     {
252       int result = 0;
253 
254       while (index < tokens.length)
255       {
256         result = dg(tokens[index]);
257         index++;
258         if (result)
259         {
260           break;
261         }
262       }
263 
264       return result;
265     }
266 
267     ///
268     ref auto skipWsAndComments()
269     {
270       while (index < tokens.length)
271       {
272         auto type = str(tokens[index].type);
273         if (type != "comment" && type != "whitespace")
274         {
275           break;
276         }
277 
278         index++;
279       }
280 
281       return this;
282     }
283 
284     ///
285     auto currentToken()
286     {
287       return tokens[index];
288     }
289 
290     /// Skip until a token with a certain text is reached
291     ref auto skipUntil(string text)
292     {
293       while (index < tokens.length)
294       {
295         if (tokens[index].text == text)
296         {
297           break;
298         }
299 
300         index++;
301       }
302 
303       return this;
304     }
305 
306     ref auto skipNextBlock()
307     {
308       readNextBlock();
309       return this;
310     }
311 
312     auto readNextBlock()
313     {
314       const(Token)[] blockTokens = [];
315 
316       bool readingBlock;
317       int paranthesisCount;
318 
319       while (index < tokens.length)
320       {
321         auto type = str(tokens[index].type);
322 
323         if (type == "{")
324         {
325           paranthesisCount++;
326           readingBlock = true;
327         }
328 
329         if (type == "}")
330         {
331           paranthesisCount--;
332         }
333 
334         blockTokens ~= tokens[index];
335         index++;
336         if (readingBlock && paranthesisCount == 0)
337         {
338           break;
339         }
340       }
341 
342       return blockTokens;
343     }
344 
345     /// Skip until a token with a certain type is reached
346     ref auto skipUntilType(string type)
347     {
348       while (index < tokens.length)
349       {
350         if (str(tokens[index].type) == type)
351         {
352           break;
353         }
354 
355         index++;
356       }
357 
358       return this;
359     }
360 
361     /// Skip one token
362     ref auto skipOne()
363     {
364       index++;
365 
366       return this;
367     }
368 
369     /// Concatenate all the tokens until the first token of a certain type
370     /// that will be ignored
371     string readUntilType(string type)
372     {
373       string result;
374 
375       while (index < tokens.length)
376       {
377         if (str(tokens[index].type) == type)
378         {
379           break;
380         }
381 
382         result ~= tokens[index].text == "" ? str(tokens[index].type) : tokens[index].text;
383         index++;
384       }
385 
386       return result;
387     }
388 
389     /// Returns a Dlang class. You must call this method after the
390     /// class token was read.
391     DLangClass readClass()
392     {
393       const(Token)[] classTokens = [];
394 
395       bool readingClass;
396       int paranthesisCount;
397 
398       while (index < tokens.length)
399       {
400         auto type = str(tokens[index].type);
401 
402         if (type == "{")
403         {
404           paranthesisCount++;
405           readingClass = true;
406         }
407 
408         if (type == "}")
409         {
410           paranthesisCount--;
411         }
412         classTokens ~= tokens[index];
413         index++;
414         if (readingClass && paranthesisCount == 0)
415         {
416           break;
417         }
418       }
419 
420       return DLangClass(classTokens);
421     }
422 
423     /// Returns a Dlang attribute. You must call this method after the
424     /// @ token was read.
425     DLangAttribute readAttribute()
426     {
427       const(Token)[] attributeTokens = [];
428 
429       int paranthesisCount;
430       bool readingParams;
431       bool foundWs;
432 
433       while (index < tokens.length)
434       {
435         auto type = str(tokens[index].type);
436 
437         if (type == "whitespace" && paranthesisCount == 0 && !readingParams)
438         {
439           foundWs = true;
440         }
441 
442         if (foundWs && type == ".")
443         {
444           foundWs = false;
445         }
446 
447         if (foundWs && type != "(")
448         {
449           break;
450         }
451 
452         if (type == "(")
453         {
454           paranthesisCount++;
455           readingParams = true;
456           foundWs = false;
457         }
458 
459         if (type == ")")
460         {
461           paranthesisCount--;
462         }
463 
464         attributeTokens ~= tokens[index];
465 
466         if (readingParams && paranthesisCount == 0)
467         {
468           break;
469         }
470 
471         index++;
472       }
473 
474       return DLangAttribute(attributeTokens);
475     }
476   }
477 }
478 
479 /// Converts a string from camel notation to a readable sentence
480 string camelToSentence(const string name) pure
481 {
482   string sentence;
483 
484   foreach (ch; name)
485   {
486     if (ch.toUpper == ch)
487     {
488       sentence ~= " " ~ ch.toLower.to!string;
489     }
490     else
491     {
492       sentence ~= ch;
493     }
494   }
495 
496   return sentence.capitalize;
497 }