1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef textParser_DEFINED
9 #define textParser_DEFINED
10 
11 #include <functional>
12 
13 #include "bookmaker.h"
14 
15 class BmhParser;
16 class Definition;
17 
18 class TextParser : public NonAssignable {
TextParser()19     TextParser() {}  // only for ParserCommon, TextParserSave
20     friend class ParserCommon;
21     friend class TextParserSave;
22 public:
~TextParser()23     virtual ~TextParser() {}
24 
TextParser(string fileName,const char * start,const char * end,int lineCount)25     TextParser(string fileName, const char* start, const char* end, int lineCount)
26         : fFileName(fileName)
27         , fStart(start)
28         , fLine(start)
29         , fChar(start)
30         , fEnd(end)
31         , fLineCount(lineCount)
32     {
33     }
34 
35     TextParser(const Definition* );
36 
anyOf(const char * str)37     const char* anyOf(const char* str) const {
38         const char* ptr = fChar;
39         while (ptr < fEnd) {
40             if (strchr(str, ptr[0])) {
41                 return ptr;
42             }
43             ++ptr;
44         }
45         return nullptr;
46     }
47 
anyOf(const char * wordStart,const char * wordList[],size_t wordListCount)48     const char* anyOf(const char* wordStart, const char* wordList[], size_t wordListCount) const {
49         const char** wordPtr = wordList;
50         const char** wordEnd = wordPtr + wordListCount;
51         const size_t matchLen = fChar - wordStart;
52         while (wordPtr < wordEnd) {
53             const char* word = *wordPtr++;
54             if (strlen(word) == matchLen && !strncmp(wordStart, word, matchLen)) {
55                 return word;
56             }
57         }
58         return nullptr;
59     }
60 
61     // words must be alpha only
anyWord(const vector<string> & wordList,int spaces)62     string anyWord(const vector<string>& wordList, int spaces) const {
63         const char* matchStart = fChar;
64         do {
65             int count = spaces;
66             while (matchStart < fEnd && !isalpha(matchStart[0])) {
67                 ++matchStart;
68             }
69             const char* matchEnd = matchStart;
70             const char* nextWord = nullptr;
71             while (matchEnd < fEnd) {
72                 if (isalpha(matchEnd[0])) {
73                 } else if (' ' == matchEnd[0] && --count >= 0) {
74                     if (!nextWord) {
75                         nextWord = matchEnd;
76                     }
77                 } else {
78                     break;
79                 }
80                 ++matchEnd;
81             }
82             size_t matchLen = matchEnd - matchStart;
83             for (auto word : wordList) {
84                 if (word.length() != matchLen) {
85                     continue;
86                 }
87                 for (unsigned index = 0; index < matchLen; ++index) {
88                     if (tolower(matchStart[index]) != word[index]) {
89                         goto nextWord;
90                     }
91                 }
92                 return word;
93         nextWord: ;
94             }
95             matchStart = nextWord ? nextWord : matchEnd;
96         } while (matchStart < fEnd);
97         return "";
98     }
99 
back(const char * pattern)100     bool back(const char* pattern) {
101         size_t len = strlen(pattern);
102         const char* start = fChar - len;
103         if (start <= fStart) {
104             return false;
105         }
106         if (strncmp(start, pattern, len)) {
107             return false;
108         }
109         fChar = start;
110         return true;
111     }
112 
backup(const char * pattern)113     char backup(const char* pattern) const {
114         size_t len = strlen(pattern);
115         const char* start = fChar - len;
116         if (start <= fStart) {
117             return '\0';
118         }
119         if (strncmp(start, pattern, len)) {
120             return '\0';
121         }
122         return start[-1];
123     }
124 
backupWord()125     void backupWord() {
126         while (fChar > fStart && isalpha(fChar[-1])) {
127             --fChar;
128         }
129     }
130 
contains(const char * match,const char * lineEnd,const char ** loc)131     bool contains(const char* match, const char* lineEnd, const char** loc) const {
132         const char* result = this->strnstr(match, lineEnd);
133         if (loc) {
134             *loc = result;
135         }
136         return result;
137     }
138 
containsWord(const char * match,const char * lineEnd,const char ** loc)139     bool containsWord(const char* match, const char* lineEnd, const char** loc) {
140         size_t len = strlen(match);
141         do {
142             const char* result = this->strnstr(match, lineEnd);
143             if (!result) {
144                 return false;
145             }
146             if ((result > fStart && isalnum(result[-1])) || (result + len < fEnd
147                     && isalnum(result[len]))) {
148                 fChar = result + len;
149                 continue;
150             }
151             if (loc) {
152                 *loc = result;
153             }
154             return true;
155         } while (true);
156     }
157 
158     // either /n/n or /n# will stop parsing a typedef
doubleLF()159     const char* doubleLF() const {
160         const char* ptr = fChar - 1;
161         const char* doubleStart = nullptr;
162         while (++ptr < fEnd) {
163             if (!doubleStart) {
164                 if ('\n' == ptr[0]) {
165                     doubleStart = ptr;
166                 }
167                 continue;
168             }
169             if ('\n' == ptr[0] || '#' == ptr[0]) {
170                 return doubleStart;
171             }
172             if (' ' < ptr[0]) {
173                 doubleStart = nullptr;
174             }
175         }
176         return nullptr;
177     }
178 
endsWith(const char * match)179     bool endsWith(const char* match) {
180         int matchLen = strlen(match);
181         if (matchLen > fChar - fLine) {
182             return false;
183         }
184         return !strncmp(fChar - matchLen, match, matchLen);
185     }
186 
eof()187     bool eof() const { return fChar >= fEnd; }
188 
lineEnd()189     const char* lineEnd() const {
190         const char* ptr = fChar;
191         do {
192             if (ptr >= fEnd) {
193                 return ptr;
194             }
195             char test = *ptr++;
196             if (test == '\n' || test == '\0') {
197                 break;
198             }
199         } while (true);
200         return ptr;
201     }
202 
lineLength()203     ptrdiff_t lineLength() const {
204         return this->lineEnd() - fLine;
205     }
206 
207     bool match(TextParser* );
208 
next()209     char next() {
210         SkASSERT(fChar < fEnd);
211         char result = *fChar++;
212         if ('\n' == result) {
213             ++fLineCount;
214             fLine = fChar;
215         }
216         return result;
217     }
218 
peek()219     char peek() const { SkASSERT(fChar < fEnd); return *fChar; }
220 
restorePlace(const TextParser & save)221     void restorePlace(const TextParser& save) {
222         fChar = save.fChar;
223         fLine = save.fLine;
224         fLineCount = save.fLineCount;
225     }
226 
savePlace(TextParser * save)227     void savePlace(TextParser* save) {
228         save->fChar = fChar;
229         save->fLine = fLine;
230         save->fLineCount = fLineCount;
231     }
232 
233     void reportError(const char* errorStr) const;
234     static string ReportFilename(string file);
235     void reportWarning(const char* errorStr) const;
236 
reportError(const char * errorStr)237     template <typename T> T reportError(const char* errorStr) const {
238         this->reportError(errorStr);
239         return T();
240     }
241 
sentenceEnd(const char * check)242     bool sentenceEnd(const char* check) const {
243         while (check > fStart) {
244             --check;
245             if (' ' < check[0] && '.' != check[0]) {
246                 return false;
247             }
248             if ('.' == check[0]) {
249                 return ' ' >= check[1];
250             }
251             if ('\n' == check[0] && '\n' == check[1]) {
252                 return true;
253             }
254         }
255         return true;
256     }
257 
258     void setForErrorReporting(const Definition* , const char* );
259 
skipToBalancedEndBracket(char startB,char endB)260     bool skipToBalancedEndBracket(char startB, char endB) {
261         SkASSERT(fChar < fEnd);
262         SkASSERT(startB == fChar[0]);
263         int startCount = 0;
264         do {
265             char test = this->next();
266             startCount += startB == test;
267             startCount -= endB  == test;
268         } while (startCount && fChar < fEnd);
269         return !startCount;
270     }
271 
272     bool skipToEndBracket(char endBracket, const char* end = nullptr) {
273         if (nullptr == end) {
274             end = fEnd;
275         }
276         while (fChar[0] != endBracket) {
277             if (fChar >= end) {
278                 return false;
279             }
280             (void) this->next();
281         }
282         return true;
283     }
284 
skipToEndBracket(const char * endBracket)285     bool skipToEndBracket(const char* endBracket) {
286         size_t len = strlen(endBracket);
287         while (strncmp(fChar, endBracket, len)) {
288             if (fChar >= fEnd) {
289                 return false;
290             }
291             (void) this->next();
292         }
293         return true;
294     }
295 
skipLine()296     bool skipLine() {
297         return skipToEndBracket('\n');
298     }
299 
skipTo(const char * skip)300     void skipTo(const char* skip) {
301        while (fChar < skip) {
302            this->next();
303        }
304     }
305 
skipToAlpha()306     void skipToAlpha() {
307         while (fChar < fEnd && !isalpha(fChar[0])) {
308             fChar++;
309         }
310     }
311 
312     // returns true if saw close brace
skipToAlphaNum()313     bool skipToAlphaNum() {
314         bool sawCloseBrace = false;
315         while (fChar < fEnd && !isalnum(fChar[0])) {
316             sawCloseBrace |= '}' == *fChar++;
317         }
318         return sawCloseBrace;
319     }
320 
skipExact(const char * pattern)321     bool skipExact(const char* pattern) {
322         if (!this->startsWith(pattern)) {
323             return false;
324         }
325         this->skipName(pattern);
326         return true;
327     }
328 
329     // differs from skipToNonAlphaNum in that a.b isn't considered a full name,
330     // since a.b can't be found as a named definition
skipFullName()331     void skipFullName() {
332         do {
333             char last = '\0';
334             while (fChar < fEnd && (isalnum(fChar[0])
335                     || '_' == fChar[0]  /* || '-' == fChar[0] */
336                     || (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1]))) {
337                 if (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1]) {
338                     fChar++;
339                 }
340                 last = fChar[0];
341                 fChar++;
342             }
343             if (fChar + 1 >= fEnd || '/' != fChar[0] || !isalpha(last) || !isalpha(fChar[1])) {
344                 break;  // stop unless pattern is xxx/xxx as in I/O
345             }
346             fChar++; // skip slash
347         } while (true);
348     }
349 
skipToLineBalance(char open,char close)350     int skipToLineBalance(char open, char close) {
351         int match = 0;
352         while (!this->eof() && '\n' != fChar[0]) {
353             match += open == this->peek();
354             match -= close == this->next();
355         }
356         return match;
357     }
358 
skipToLineStart()359     bool skipToLineStart() {
360         if (!this->skipLine()) {
361             return false;
362         }
363         if (!this->eof()) {
364             return this->skipWhiteSpace();
365         }
366         return true;
367     }
368 
skipToLineStart(int * indent,bool * sawReturn)369     void skipToLineStart(int* indent, bool* sawReturn) {
370         SkAssertResult(this->skipLine());
371         this->skipWhiteSpace(indent, sawReturn);
372     }
373 
skipLower()374     void skipLower() {
375         while (fChar < fEnd && (islower(fChar[0]) || '_' == fChar[0])) {
376             fChar++;
377         }
378     }
379 
skipToNonAlphaNum()380     void skipToNonAlphaNum() {
381         while (fChar < fEnd && (isalnum(fChar[0]) || '_' == fChar[0])) {
382             fChar++;
383         }
384     }
385 
skipToNonName()386     void skipToNonName() {
387         while (fChar < fEnd && (isalnum(fChar[0])
388                 || '_' == fChar[0] || '-' == fChar[0]
389                 || (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1])
390                 || ('.' == fChar[0] && fChar + 1 < fEnd && isalpha(fChar[1])))) {
391             if (':' == fChar[0] && fChar +1 < fEnd && ':' == fChar[1]) {
392                 fChar++;
393             }
394             fChar++;
395         }
396     }
397 
skipPhraseName()398     void skipPhraseName() {
399         while (fChar < fEnd && (islower(fChar[0]) || '_' == fChar[0])) {
400             fChar++;
401         }
402     }
403 
skipToSpace()404     void skipToSpace() {
405         while (fChar < fEnd && ' ' != fChar[0]) {
406             fChar++;
407         }
408     }
409 
skipToWhiteSpace()410     void skipToWhiteSpace() {
411         while (fChar < fEnd && ' ' < fChar[0]) {
412             fChar++;
413         }
414     }
415 
skipName(const char * word)416     bool skipName(const char* word) {
417         size_t len = strlen(word);
418         if (len <= (size_t) (fEnd - fChar) && !strncmp(word, fChar, len)) {
419             for (size_t i = 0; i < len; ++i) {
420                 this->next();
421             }
422         }
423         return this->eof() || ' ' >= fChar[0];
424     }
425 
skipSpace()426     bool skipSpace() {
427         while (' ' == this->peek()) {
428             (void) this->next();
429             if (fChar >= fEnd) {
430                 return false;
431             }
432         }
433         return true;
434     }
435 
skipWord(const char * word)436     bool skipWord(const char* word) {
437         if (!this->skipWhiteSpace()) {
438             return false;
439         }
440         const char* save = fChar;
441         if (!this->skipName(word)) {
442             fChar = save;
443             return false;
444         }
445         if (!this->skipWhiteSpace()) {
446             return false;
447         }
448         return true;
449     }
450 
skipWhiteSpace()451     bool skipWhiteSpace() {
452         while (' ' >= this->peek()) {
453             (void) this->next();
454             if (fChar >= fEnd) {
455                 return false;
456             }
457         }
458         return true;
459     }
460 
skipWhiteSpace(int * indent,bool * skippedReturn)461     bool skipWhiteSpace(int* indent, bool* skippedReturn) {
462         while (' ' >= this->peek()) {
463             *indent = *skippedReturn ? *indent + 1 : 1;
464             if ('\n' == this->peek()) {
465                 *skippedReturn |= true;
466                 *indent = 0;
467             }
468             (void) this->next();
469             if (fChar >= fEnd) {
470                 return false;
471             }
472         }
473         return true;
474     }
475 
startsWith(const char * str)476     bool startsWith(const char* str) const {
477         size_t len = strlen(str);
478         ptrdiff_t lineLen = fEnd - fChar;
479         return len <= (size_t) lineLen && 0 == strncmp(str, fChar, len);
480     }
481 
482     // ignores minor white space differences
startsWith(const char * str,size_t oLen)483     bool startsWith(const char* str, size_t oLen) const {
484         size_t tIndex = 0;
485         size_t tLen = fEnd - fChar;
486         size_t oIndex = 0;
487         while (oIndex < oLen && tIndex < tLen) {
488             bool tSpace = ' ' >= fChar[tIndex];
489             bool oSpace = ' ' >= str[oIndex];
490             if (tSpace != oSpace) {
491                 break;
492             }
493             if (tSpace) {
494                 do {
495                     ++tIndex;
496                 } while (tIndex < tLen && ' ' >= fChar[tIndex]);
497                 do {
498                     ++oIndex;
499                 } while (oIndex < oLen && ' ' >= str[oIndex]);
500                 continue;
501             }
502             if (fChar[tIndex] != str[oIndex]) {
503                 break;
504             }
505             ++tIndex;
506             ++oIndex;
507         }
508         return oIndex >= oLen;
509     }
510 
strnchr(char ch,const char * end)511     const char* strnchr(char ch, const char* end) const {
512         const char* ptr = fChar;
513         while (ptr < end) {
514             if (ptr[0] == ch) {
515                 return ptr;
516             }
517             ++ptr;
518         }
519         return nullptr;
520     }
521 
strnstr(const char * match,const char * end)522     const char* strnstr(const char *match, const char* end) const {
523         size_t matchLen = strlen(match);
524         SkASSERT(matchLen > 0);
525         ptrdiff_t len = end - fChar;
526         SkASSERT(len >= 0);
527         if ((size_t) len < matchLen ) {
528             return nullptr;
529         }
530         size_t count = len - matchLen;
531         for (size_t index = 0; index <= count; index++) {
532             if (0 == strncmp(&fChar[index], match, matchLen)) {
533                 return &fChar[index];
534             }
535         }
536         return nullptr;
537     }
538 
trimmedBracketEnd(const char bracket)539     const char* trimmedBracketEnd(const char bracket) const {
540         int max = (int) (this->lineLength());
541         int index = 0;
542         while (index < max && bracket != fChar[index]) {
543             ++index;
544         }
545         SkASSERT(index < max);
546         while (index > 0 && ' ' >= fChar[index - 1]) {
547             --index;
548         }
549         return fChar + index;
550     }
551 
trimmedBracketEnd(string bracket)552     const char* trimmedBracketEnd(string bracket) const {
553         size_t max = (size_t) (this->lineLength());
554         string line(fChar, max);
555         size_t index = line.find(bracket);
556         SkASSERT(index < max);
557         while (index > 0 && ' ' >= fChar[index - 1]) {
558             --index;
559         }
560         return fChar + index;
561     }
562 
trimmedBracketNoEnd(const char bracket)563     const char* trimmedBracketNoEnd(const char bracket) const {
564         int max = (int) (fEnd - fChar);
565         int index = 0;
566         while (index < max && bracket != fChar[index]) {
567             ++index;
568         }
569         SkASSERT(index < max);
570         while (index > 0 && ' ' >= fChar[index - 1]) {
571             --index;
572         }
573         return fChar + index;
574     }
575 
trimmedLineEnd()576     const char* trimmedLineEnd() const {
577         const char* result = this->lineEnd();
578         while (result > fChar && ' ' >= result[-1]) {
579             --result;
580         }
581         return result;
582     }
583 
trimEnd()584     void trimEnd() {
585         while (fEnd > fStart && ' ' >= fEnd[-1]) {
586             --fEnd;
587         }
588     }
589 
590     // FIXME: nothing else in TextParser knows from C++ --
591     // there could be a class between TextParser and ParserCommon
592     virtual string typedefName();
593 
wordEnd()594     const char* wordEnd() const {
595         const char* end = fChar;
596         while (isalnum(end[0]) || '_' == end[0] || '-' == end[0]) {
597             ++end;
598         }
599         return end;
600     }
601 
602     string fFileName;
603     const char* fStart;
604     const char* fLine;
605     const char* fChar;
606     const char* fEnd;
607     size_t fLineCount;
608 };
609 
610 class TextParserSave {
611 public:
TextParserSave(TextParser * parser)612     TextParserSave(TextParser* parser) {
613         fParser = parser;
614         fSave.fFileName = parser->fFileName;
615         fSave.fStart = parser->fStart;
616         fSave.fLine = parser->fLine;
617         fSave.fChar = parser->fChar;
618         fSave.fEnd = parser->fEnd;
619         fSave.fLineCount = parser->fLineCount;
620     }
621 
restore()622     void restore() const {
623         fParser->fFileName = fSave.fFileName;
624         fParser->fStart = fSave.fStart;
625         fParser->fLine = fSave.fLine;
626         fParser->fChar = fSave.fChar;
627         fParser->fEnd = fSave.fEnd;
628         fParser->fLineCount = fSave.fLineCount;
629     }
630 
631 private:
632     TextParser* fParser;
633     TextParser fSave;
634 };
635 
has_nonwhitespace(string s)636 static inline bool has_nonwhitespace(string s) {
637     bool nonwhite = false;
638     for (const char& c : s) {
639         if (' ' < c) {
640             nonwhite = true;
641             break;
642         }
643     }
644     return nonwhite;
645 }
646 
trim_end(string & s)647 static inline void trim_end(string &s) {
648     s.erase(std::find_if(s.rbegin(), s.rend(),
649             std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
650 }
651 
trim_end_spaces(string & s)652 static inline void trim_end_spaces(string &s) {
653     while (s.length() > 0 && ' ' == s.back()) {
654         s.pop_back();
655     }
656 }
657 
trim_start(string & s)658 static inline void trim_start(string &s) {
659     s.erase(s.begin(), std::find_if(s.begin(), s.end(),
660             std::not1(std::ptr_fun<int, int>(std::isspace))));
661 }
662 
trim_start_end(string & s)663 static inline void trim_start_end(string& s) {
664     trim_start(s);
665     trim_end(s);
666 }
667 
trim_inline_spaces(string s)668 static inline string trim_inline_spaces(string s) {
669     bool lastSpace = false;
670     string trimmed;
671     for (const char* ptr = &s.front(); ptr <= &s.back(); ++ptr) {
672         char c = *ptr;
673         if (' ' >= c) {
674             if (!lastSpace) {
675                 trimmed += ' ';
676             }
677             lastSpace = true;
678             continue;
679         }
680         lastSpace = false;
681         trimmed += c;
682     }
683     return trimmed;
684 }
685 
686 class EscapeParser : public TextParser {
687 public:
EscapeParser(const char * start,const char * end)688     EscapeParser(const char* start, const char* end) :
689             TextParser("", start, end, 0) {
690         const char* reader = fStart;
691         fStorage = new char[end - start];
692         char* writer = fStorage;
693         while (reader < fEnd) {
694             char ch = *reader++;
695             if (ch != '\\') {
696                 *writer++ = ch;
697             } else {
698                 char ctrl = *reader++;
699                 if (ctrl == 'u') {
700                     unsigned unicode = 0;
701                     for (int i = 0; i < 4; ++i) {
702                         unicode <<= 4;
703                         SkASSERT((reader[0] >= '0' && reader[0] <= '9') ||
704                             (reader[0] >= 'A' && reader[0] <= 'F') ||
705                             (reader[0] >= 'a' && reader[0] <= 'f'));
706                         int nibble = *reader++ - '0';
707                         if (nibble > 9) {
708                             nibble = (nibble & ~('a' - 'A')) - 'A' + '9' + 1;
709                         }
710                         unicode |= nibble;
711                     }
712                     SkASSERT(unicode < 256);
713                     *writer++ = (unsigned char) unicode;
714                 } else {
715                     SkASSERT(ctrl == 'n');
716                     *writer++ = '\n';
717                 }
718             }
719         }
720         fStart = fLine = fChar = fStorage;
721         fEnd = writer;
722     }
723 
~EscapeParser()724     ~EscapeParser() override {
725         delete fStorage;
726     }
727 private:
728     char* fStorage;
729 };
730 
731 // some methods cannot be trivially parsed; look for class-name / ~ / operator
732 class MethodParser : public TextParser {
733 public:
MethodParser(string className,string fileName,const char * start,const char * end,int lineCount)734     MethodParser(string className, string fileName,
735             const char* start, const char* end, int lineCount)
736         : TextParser(fileName, start, end, lineCount)
737         , fClassName(className) {
738         size_t doubleColons = className.find_last_of("::");
739         if (string::npos != doubleColons) {
740             fLocalName = className.substr(doubleColons + 1);
741             SkASSERT(fLocalName.length() > 0);
742         }
743     }
744 
~MethodParser()745     ~MethodParser() override {}
746 
localName()747     string localName() const {
748         return fLocalName;
749     }
750 
setLocalName(string name)751     void setLocalName(string name) {
752         if (name == fClassName) {
753             fLocalName = "";
754         } else {
755             fLocalName = name;
756         }
757     }
758 
759     // returns true if close brace was skipped
skipToMethodStart()760     int skipToMethodStart() {
761         if (!fClassName.length()) {
762             return this->skipToAlphaNum();
763         }
764         int braceCount = 0;
765         while (!this->eof() && !isalnum(this->peek()) && '~' != this->peek()) {
766             braceCount += '{' == this->peek();
767             braceCount -= '}' == this->peek();
768             this->next();
769         }
770         return braceCount;
771     }
772 
773     void skipToMethodEnd(Resolvable resolvable);
774 
wordEndsWith(const char * str)775     bool wordEndsWith(const char* str) const {
776         const char* space = this->strnchr(' ', fEnd);
777         if (!space) {
778             return false;
779         }
780         size_t len = strlen(str);
781         if (space < fChar + len) {
782             return false;
783         }
784         return !strncmp(str, space - len, len);
785     }
786 
787 private:
788     string fClassName;
789     string fLocalName;
790     typedef TextParser INHERITED;
791 };
792 
793 #endif
794