1 /*
2  * Copyright 2017 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "bmhParser.h"
9 
10 #include "SkCommandLineFlags.h"
11 #include "SkOSFile.h"
12 #include "SkOSPath.h"
13 
14 /*
15 things to do
16 if cap word is beginning of sentence, add it to table as lower-case
17    word must have only a single initial capital
18 
19 if word is camel cased, look for :: matches on suffix
20 
21 when function crosses lines, whole thing isn't seen as a 'word' e.g., search for largeArc in path
22 
23 words in external not seen
24 
25 look for x-bit but allow x bits
26 
27 don't treat 'pos' or 'glyphs' as spell-checkable as in 'RunBuffer.pos' or 'RunBuffer.glyphs'
28  */
29 
30 struct CheckEntry {
31     string fFile;
32     int fLine;
33     int fCount;
34     bool fOverride;
35 };
36 
37 class SpellCheck : public ParserCommon {
38 public:
SpellCheck(const BmhParser & bmh)39     SpellCheck(const BmhParser& bmh) : ParserCommon()
40         , fBmhParser(bmh) {
41         this->reset();
42     }
43     bool check(const char* match);
44     void report(SkCommandLineFlags::StringArray report);
45 private:
46     enum class TableState {
47         kNone,
48         kRow,
49         kColumn,
50     };
51 
52     enum class PrintCheck {
53         kWordsOnly,
54         kAllowNumbers,
55     };
56 
57     bool check(Definition* );
58     bool checkable(MarkType markType);
59     void childCheck(Definition* def, const char* start);
60     void leafCheck(const char* start, const char* end);
parseFromFile(const char * path)61     bool parseFromFile(const char* path) override { return true; }
62     void printCheck(string str, PrintCheck);
63 
reset()64     void reset() override {
65         INHERITED::resetCommon();
66         fMethod = nullptr;
67         fRoot = nullptr;
68         fInCode = false;
69         fInConst = false;
70         fInFormula = false;
71         fInDescription = false;
72         fInStdOut = false;
73         fOverride = false;
74     }
75 
76     void wordCheck(string str);
77     void wordCheck(ptrdiff_t len, const char* ch);
78 
79     unordered_map<string, CheckEntry> fCode;
80     unordered_map<string, CheckEntry> fColons;
81     unordered_map<string, CheckEntry> fDigits;
82     unordered_map<string, CheckEntry> fDots;
83     unordered_map<string, CheckEntry> fParens;  // also hold destructors, operators
84     unordered_map<string, CheckEntry> fUnderscores;
85     unordered_map<string, CheckEntry> fWords;
86     const BmhParser& fBmhParser;
87     Definition* fMethod;
88     RootDefinition* fRoot;
89     int fLocalLine;
90     bool fInCode;
91     bool fInConst;
92     bool fInDescription;
93     bool fInFormula;
94     bool fInStdOut;
95     bool fOverride;
96     typedef ParserCommon INHERITED;
97 };
98 
99 /* This doesn't perform a traditional spell or grammar check, although
100    maybe it should. Instead it looks for words used uncommonly and lower
101    case words that match capitalized words that are not sentence starters.
102    It also looks for articles preceeding capitalized words and their
103    modifiers to try to maintain a consistent voice.
104    Maybe also look for passive verbs (e.g. 'is') and suggest active ones?
105  */
spellCheck(const char * match,SkCommandLineFlags::StringArray report) const106 void BmhParser::spellCheck(const char* match, SkCommandLineFlags::StringArray report) const {
107     SpellCheck checker(*this);
108     checker.check(match);
109     checker.report(report);
110 }
111 
spellStatus(const char * statusFile,SkCommandLineFlags::StringArray report) const112 void BmhParser::spellStatus(const char* statusFile, SkCommandLineFlags::StringArray report) const {
113     SpellCheck checker(*this);
114     StatusIter iter(statusFile, ".bmh", StatusFilter::kInProgress);
115     string file;
116     iter.next(&file, nullptr);
117     string match = iter.baseDir();
118     checker.check(match.c_str());
119     checker.report(report);
120 }
121 
check(const char * match)122 bool SpellCheck::check(const char* match) {
123     for (const auto& topic : fBmhParser.fTopicMap) {
124         Definition* topicDef = topic.second;
125         if (topicDef->fParent) {
126             continue;
127         }
128         if (!topicDef->isRoot()) {
129             return this->reportError<bool>("expected root topic");
130         }
131         fRoot = topicDef->asRoot();
132         if (string::npos == fRoot->fFileName.rfind(match)) {
133             continue;
134         }
135         fOverride = string::npos != fRoot->fFileName.rfind("undocumented.bmh")
136                 || string::npos != fRoot->fFileName.rfind("markup.bmh")
137                 || string::npos != fRoot->fFileName.rfind("usingBookmaker.bmh");
138         this->check(topicDef);
139     }
140     return true;
141 }
142 
all_lower(string str)143 static bool all_lower(string str) {
144     for (auto c : str) {
145         if (!islower(c)) {
146             return false;
147         }
148     }
149     return true;
150 }
151 
check(Definition * def)152 bool SpellCheck::check(Definition* def) {
153     fFileName = def->fFileName;
154     fLineCount = def->fLineCount;
155     string printable = def->printableName();
156     const char* textStart = def->fContentStart;
157     switch (def->fMarkType) {
158         case MarkType::kAlias:
159             break;
160         case MarkType::kAnchor:
161             break;
162         case MarkType::kBug:
163             break;
164         case MarkType::kClass:
165             this->wordCheck(def->fName);
166             break;
167         case MarkType::kCode:
168             fInCode = true;
169             break;
170         case MarkType::kColumn:
171             break;
172         case MarkType::kComment:
173             break;
174         case MarkType::kConst: {
175             fInConst = true;
176             this->wordCheck(def->fName);
177             const char* lineEnd = strchr(textStart, '\n');
178             this->wordCheck(lineEnd - textStart, textStart);
179             textStart = lineEnd;
180         } break;
181         case MarkType::kDefine:
182             break;
183         case MarkType::kDescription:
184             fInDescription = true;
185             break;
186         case MarkType::kDetails:
187             break;
188         case MarkType::kDuration:
189             break;
190         case MarkType::kEnum:
191         case MarkType::kEnumClass:
192             this->wordCheck(def->fName);
193             break;
194         case MarkType::kExample:
195             break;
196         case MarkType::kExternal:
197             break;
198         case MarkType::kFile:
199             break;
200         case MarkType::kFilter:
201             break;
202         case MarkType::kFormula:
203             fInFormula = true;
204             break;
205         case MarkType::kFunction:
206             break;
207         case MarkType::kHeight:
208             break;
209         case MarkType::kIllustration:
210             break;
211         case MarkType::kImage:
212             break;
213         case MarkType::kIn:
214             break;
215         case MarkType::kLegend:
216             break;
217         case MarkType::kLine:
218             break;
219         case MarkType::kLink:
220             break;
221         case MarkType::kList:
222             break;
223         case MarkType::kLiteral:
224             break;
225         case MarkType::kMarkChar:
226             break;
227         case MarkType::kMember:
228             break;
229         case MarkType::kMethod: {
230             string method_name = def->methodName();
231             if (all_lower(method_name)) {
232                 method_name += "()";
233             }
234             if (!def->isClone() && Definition::MethodType::kOperator != def->fMethodType) {
235                 this->wordCheck(method_name);
236             }
237             fMethod = def;
238             } break;
239         case MarkType::kNoExample:
240             break;
241         case MarkType::kNoJustify:
242             break;
243         case MarkType::kOutdent:
244             break;
245         case MarkType::kParam: {
246             TextParser paramParser(def->fFileName, def->fStart, def->fContentStart,
247                     def->fLineCount);
248             paramParser.skipWhiteSpace();
249             SkASSERT(paramParser.startsWith("#Param"));
250             paramParser.next(); // skip hash
251             paramParser.skipToNonName(); // skip Param
252             paramParser.skipSpace();
253             const char* paramName = paramParser.fChar;
254             paramParser.skipToSpace();
255             fInCode = true;
256             this->wordCheck(paramParser.fChar - paramName, paramName);
257             fInCode = false;
258         } break;
259         case MarkType::kPhraseDef:
260             break;
261         case MarkType::kPhraseParam:
262             break;
263         case MarkType::kPhraseRef:
264             break;
265         case MarkType::kPlatform:
266             break;
267         case MarkType::kPopulate:
268             break;
269         case MarkType::kReturn:
270             break;
271         case MarkType::kRow:
272             break;
273         case MarkType::kSeeAlso:
274             break;
275         case MarkType::kSet:
276             break;
277         case MarkType::kStdOut: {
278             fInStdOut = true;
279             TextParser code(def);
280             code.skipSpace();
281             while (!code.eof()) {
282                 const char* end = code.trimmedLineEnd();
283                 this->wordCheck(end - code.fChar, code.fChar);
284                 code.skipToLineStart();
285             }
286             fInStdOut = false;
287             } break;
288         case MarkType::kStruct:
289             fRoot = def->asRoot();
290             this->wordCheck(def->fName);
291             break;
292         case MarkType::kSubstitute:
293             break;
294         case MarkType::kSubtopic:
295             // TODO: add a tag that allows subtopic labels in illustrations to skip spellcheck?
296             if (string::npos == fFileName.find("illustrations.bmh")) {
297                 this->printCheck(printable, PrintCheck::kAllowNumbers);
298             }
299             break;
300         case MarkType::kTable:
301             break;
302         case MarkType::kTemplate:
303             break;
304         case MarkType::kText:
305             break;
306         case MarkType::kToDo:
307             break;
308         case MarkType::kTopic:
309             this->printCheck(printable, PrintCheck::kWordsOnly);
310             break;
311         case MarkType::kTypedef:
312             break;
313         case MarkType::kUnion:
314             break;
315         case MarkType::kVolatile:
316             break;
317         case MarkType::kWidth:
318             break;
319         default:
320             SkASSERT(0); // handle everything
321             break;
322     }
323     this->childCheck(def, textStart);
324     switch (def->fMarkType) {  // post child work, at least for tables
325         case MarkType::kCode:
326             fInCode = false;
327             break;
328         case MarkType::kColumn:
329             break;
330         case MarkType::kDescription:
331             fInDescription = false;
332             break;
333         case MarkType::kEnum:
334         case MarkType::kEnumClass:
335             break;
336         case MarkType::kExample:
337             break;
338         case MarkType::kFormula:
339             fInFormula = false;
340             break;
341         case MarkType::kLegend:
342             break;
343         case MarkType::kMethod:
344             fMethod = nullptr;
345             break;
346         case MarkType::kConst:
347             fInConst = false;
348         case MarkType::kParam:
349             break;
350         case MarkType::kReturn:
351         case MarkType::kSeeAlso:
352             break;
353         case MarkType::kRow:
354             break;
355         case MarkType::kStruct:
356             fRoot = fRoot->rootParent();
357             break;
358         case MarkType::kTable:
359             break;
360         default:
361             break;
362     }
363     return true;
364 }
365 
checkable(MarkType markType)366 bool SpellCheck::checkable(MarkType markType) {
367     return Resolvable::kYes == fBmhParser.kMarkProps[(int) markType].fResolve;
368 }
369 
childCheck(Definition * def,const char * start)370 void SpellCheck::childCheck(Definition* def, const char* start) {
371     const char* end;
372     fLineCount = def->fLineCount;
373     if (def->isRoot()) {
374         fRoot = def->asRoot();
375     }
376     for (auto& child : def->fChildren) {
377         end = child->fStart;
378         if (this->checkable(def->fMarkType)) {
379             this->leafCheck(start, end);
380         }
381         this->check(child);
382         start = child->fTerminator;
383     }
384     if (this->checkable(def->fMarkType)) {
385         end = def->fContentEnd;
386         this->leafCheck(start, end);
387     }
388 }
389 
leafCheck(const char * start,const char * end)390 void SpellCheck::leafCheck(const char* start, const char* end) {
391     const char* chPtr = start;
392     int inAngles = 0;
393     int inParens = 0;
394     bool inQuotes = false;
395     bool allLower = true;
396     char prePriorCh = 0;
397     char priorCh = 0;
398     char lastCh = 0;
399     const char* wordStart = nullptr;
400     const char* wordEnd = nullptr;
401     const char* possibleEnd = nullptr;
402     fLocalLine = 0;
403     do {
404         if (wordStart && wordEnd) {
405             if (!allLower || (!inQuotes && '\"' != lastCh && !inParens
406                     && ')' != lastCh && !inAngles && '>' != lastCh)) {
407                 string word(wordStart, (possibleEnd ? possibleEnd : wordEnd) - wordStart);
408                 if ("e" != word || !isdigit(prePriorCh) || ('+' != lastCh &&
409                         '-' != lastCh && !isdigit(lastCh))) {
410                     this->wordCheck(word);
411                 }
412             }
413             wordStart = nullptr;
414         }
415         if (chPtr == end) {
416             break;
417         }
418         switch (*chPtr) {
419             case '>':
420                 if (isalpha(lastCh)) {
421                     --inAngles;
422                     SkASSERT(inAngles >= 0);
423                 }
424                 wordEnd = chPtr;
425                 break;
426             case '(':
427                 ++inParens;
428                 possibleEnd = chPtr;
429                 break;
430             case ')':
431                 --inParens;
432                 if ('(' == lastCh) {
433                     wordEnd = chPtr + 1;
434                 } else {
435                     wordEnd = chPtr;
436                 }
437                 SkASSERT(inParens >= 0 || fInStdOut);
438                 break;
439             case '\"':
440                 inQuotes = !inQuotes;
441                 wordEnd = chPtr;
442                 SkASSERT(inQuotes == !wordStart);
443                 break;
444             case 'A': case 'B': case 'C': case 'D': case 'E':
445             case 'F': case 'G': case 'H': case 'I': case 'J':
446             case 'K': case 'L': case 'M': case 'N': case 'O':
447             case 'P': case 'Q': case 'R': case 'S': case 'T':
448             case 'U': case 'V': case 'W': case 'X': case 'Y':
449             case 'Z':
450                 allLower = false;
451             case 'a': case 'b': case 'c': case 'd': case 'e':
452             case 'f': case 'g': case 'h': case 'i': case 'j':
453             case 'k': case 'l': case 'm': case 'n': case 'o':
454             case 'p': case 'q': case 'r': case 's': case 't':
455             case 'u': case 'v': case 'w': case 'x': case 'y':
456             case 'z':
457                 if (!wordStart) {
458                     wordStart = chPtr;
459                     wordEnd = nullptr;
460                     possibleEnd = nullptr;
461                     allLower = 'a' <= *chPtr;
462                     if ('<' == lastCh || ('<' == priorCh && '/' == lastCh)) {
463                         ++inAngles;
464                     }
465                 }
466                 break;
467             case '0': case '1': case '2': case '3': case '4':
468             case '5': case '6': case '7': case '8': case '9':
469             case '_':
470                 allLower = false;
471             case '-':  // note that dash doesn't clear allLower
472                 break;
473             case '!':
474                 if (!inQuotes) {
475                     wordEnd = chPtr;
476                 }
477                 break;
478             case '\n':
479                 ++fLocalLine;
480                 // fall through
481             default:
482                 wordEnd = chPtr;
483                 break;
484         }
485         prePriorCh = priorCh;
486         priorCh = lastCh;
487         lastCh = *chPtr;
488     } while (++chPtr <= end);
489 }
490 
printCheck(string str,PrintCheck allowed)491 void SpellCheck::printCheck(string str, PrintCheck allowed) {
492     string word;
493     for (std::stringstream stream(str); stream >> word; ) {
494         if (PrintCheck::kAllowNumbers == allowed && (std::isdigit(word.back()) || 'x' == word.back())) {
495             // allow ###x for RGB_888x
496             if ((size_t) std::count_if(word.begin(), word.end() - 1,
497                     [](unsigned char c){ return std::isdigit(c); } ) == word.length() - 1) {
498                 continue;
499             }
500         }
501         wordCheck(word);
502     }
503 }
504 
stringCompare(const std::pair<string,CheckEntry> & i,const std::pair<string,CheckEntry> & j)505 static bool stringCompare(const std::pair<string, CheckEntry>& i, const std::pair<string, CheckEntry>& j) {
506     return i.first.compare(j.first) < 0;
507 }
508 
report(SkCommandLineFlags::StringArray report)509 void SpellCheck::report(SkCommandLineFlags::StringArray report) {
510     vector<std::pair<string, CheckEntry>> elems(fWords.begin(), fWords.end());
511     std::sort(elems.begin(), elems.end(), stringCompare);
512     if (report.contains("once")) {
513         for (auto iter : elems) {
514             if (iter.second.fOverride) {
515                 continue;
516             }
517             if (iter.second.fCount == 1) {
518                 string fullName = this->ReportFilename(iter.second.fFile);
519                 SkDebugf("%s(%d): %s\n", fullName.c_str(), iter.second.fLine,
520                         iter.first.c_str());
521             }
522         }
523         SkDebugf("\n");
524         return;
525     }
526     if (report.contains("all")) {
527         int column = 0;
528         char lastInitial = 'a';
529         int count = 0;
530         for (auto iter : elems) {
531             if (iter.second.fOverride) {
532                 continue;
533             }
534             string check = iter.first.c_str();
535             bool allLower = true;
536             for (auto c : check) {
537                 if (isupper(c)) {
538                     allLower = false;
539                     break;
540                 }
541             }
542             if (!allLower) {
543                 continue;
544             }
545             if (column + check.length() > 100 || check[0] != lastInitial) {
546                 SkDebugf("\n");
547                 column = 0;
548             }
549             if (check[0] != lastInitial) {
550                 SkDebugf("\n");
551                 lastInitial = check[0];
552             }
553             SkDebugf("%s ", check.c_str());
554             column += check.length();
555             ++count;
556         }
557         SkDebugf("\n\ncount = %d\n", count);
558         return;
559     }
560     int index = 0;
561     const char* mispelled = report[0];
562     for (auto iter : elems) {
563         if (iter.second.fOverride) {
564             continue;
565         }
566         string check = iter.first.c_str();
567         while (check.compare(mispelled) > 0) {
568             SkDebugf("%s not found\n", mispelled);
569             if (report.count() == ++index) {
570                 break;
571             }
572         }
573         if (report.count() == index) {
574             break;
575         }
576         if (check.compare(mispelled) == 0) {
577             string fullName = this->ReportFilename(iter.second.fFile);
578             SkDebugf("%s(%d): %s\n", fullName.c_str(), iter.second.fLine,
579                     iter.first.c_str());
580             if (report.count() == ++index) {
581                 break;
582             }
583         }
584     }
585 }
586 
wordCheck(string str)587 void SpellCheck::wordCheck(string str) {
588     if ("nullptr" == str) {
589         return;  // doesn't seem worth it, treating nullptr as a word in need of correction
590     }
591     bool hasColon = false;
592     bool hasDot = false;
593     bool hasParen = false;
594     bool hasUnderscore = false;
595     bool sawDash = false;
596     bool sawDigit = false;
597     bool sawSpecial = false;
598     SkASSERT(str.length() > 0);
599     SkASSERT(isalpha(str[0]) || '~' == str[0]);
600     for (char ch : str) {
601         if (isalpha(ch) || '-' == ch) {
602             sawDash |= '-' == ch;
603             continue;
604         }
605         bool isColon = ':' == ch;
606         hasColon |= isColon;
607         bool isDot = '.' == ch;
608         hasDot |= isDot;
609         bool isParen = '(' == ch || ')' == ch || '~' == ch || '=' == ch || '!' == ch ||
610                 '[' == ch || ']' == ch;
611         hasParen |= isParen;
612         bool isUnderscore = '_' == ch;
613         hasUnderscore |= isUnderscore;
614         if (isColon || isDot || isUnderscore || isParen) {
615             continue;
616         }
617         if (isdigit(ch)) {
618             sawDigit = true;
619             continue;
620         }
621         if ('&' == ch || ',' == ch || ' ' == ch) {
622             sawSpecial = true;
623             continue;
624         }
625         SkASSERT(0);
626     }
627     if (sawSpecial && !hasParen) {
628         SkASSERT(0);
629     }
630     bool inCode = fInCode;
631     if (hasUnderscore && isupper(str[0]) && ('S' != str[0] || 'K' != str[1])
632             && !hasColon && !hasDot && !hasParen && !fInStdOut && !inCode && !fInConst
633             && !sawDigit && !sawSpecial && !sawDash) {
634         std::istringstream ss(str);
635         string token;
636         while (std::getline(ss, token, '_')) {
637             if (token.length()) {
638                 this->wordCheck(token);
639             }
640         }
641         return;
642     }
643     if (!hasColon && !hasDot && !hasParen && !hasUnderscore
644             && !fInStdOut && !inCode && !fInConst && !sawDigit
645             && islower(str[0]) && isupper(str[1])) {
646         inCode = true;
647     }
648     bool methodParam = false;
649     if (fMethod) {
650         for (auto child : fMethod->fChildren) {
651             if (MarkType::kParam == child->fMarkType && str == child->fName) {
652                 methodParam = true;
653                 break;
654             }
655         }
656     }
657     auto& mappy = hasColon ? fColons :
658                   hasDot ? fDots :
659                   hasParen ? fParens :
660                   hasUnderscore ? fUnderscores :
661                   fInStdOut || fInFormula || inCode || fInConst || methodParam ? fCode :
662                   sawDigit ? fDigits : fWords;
663     auto iter = mappy.find(str);
664     if (mappy.end() != iter) {
665         if (iter->second.fOverride && !fOverride) {
666             iter->second.fFile = fFileName;
667             iter->second.fLine = fLineCount + fLocalLine;
668             iter->second.fOverride = false;
669         }
670         iter->second.fCount += 1;
671     } else {
672         CheckEntry* entry = &mappy[str];
673         entry->fFile = fFileName;
674         entry->fLine = fLineCount + fLocalLine;
675         entry->fCount = 1;
676         entry->fOverride = fOverride;
677     }
678 }
679 
wordCheck(ptrdiff_t len,const char * ch)680 void SpellCheck::wordCheck(ptrdiff_t len, const char* ch) {
681     leafCheck(ch, ch + len);
682 }
683