1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content.  This is useful for regression tests etc.
12 //
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37 
38 static cl::opt<std::string>
39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40 
41 static cl::opt<std::string>
42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43               cl::init("-"), cl::value_desc("filename"));
44 
45 static cl::list<std::string>
46 CheckPrefixes("check-prefix",
47               cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 static cl::alias CheckPrefixesAlias(
49     "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50     cl::NotHidden,
51     cl::desc(
52         "Alias for -check-prefix permitting multiple comma separated values"));
53 
54 static cl::opt<bool>
55 NoCanonicalizeWhiteSpace("strict-whitespace",
56               cl::desc("Do not treat all horizontal whitespace as equivalent"));
57 
58 static cl::list<std::string> ImplicitCheckNot(
59     "implicit-check-not",
60     cl::desc("Add an implicit negative check with this pattern to every\n"
61              "positive check. This can be used to ensure that no instances of\n"
62              "this pattern occur which are not matched by a positive pattern"),
63     cl::value_desc("pattern"));
64 
65 static cl::opt<bool> AllowEmptyInput(
66     "allow-empty", cl::init(false),
67     cl::desc("Allow the input file to be empty. This is useful when making\n"
68              "checks that some error message does not occur, for example."));
69 
70 static cl::opt<bool> MatchFullLines(
71     "match-full-lines", cl::init(false),
72     cl::desc("Require all positive matches to cover an entire input line.\n"
73              "Allows leading and trailing whitespace if --strict-whitespace\n"
74              "is not also passed."));
75 
76 typedef cl::list<std::string>::const_iterator prefix_iterator;
77 
78 //===----------------------------------------------------------------------===//
79 // Pattern Handling Code.
80 //===----------------------------------------------------------------------===//
81 
82 namespace Check {
83   enum CheckType {
84     CheckNone = 0,
85     CheckPlain,
86     CheckNext,
87     CheckSame,
88     CheckNot,
89     CheckDAG,
90     CheckLabel,
91 
92     /// MatchEOF - When set, this pattern only matches the end of file. This is
93     /// used for trailing CHECK-NOTs.
94     CheckEOF,
95     /// CheckBadNot - Found -NOT combined with another CHECK suffix.
96     CheckBadNot
97   };
98 }
99 
100 class Pattern {
101   SMLoc PatternLoc;
102 
103   Check::CheckType CheckTy;
104 
105   /// FixedStr - If non-empty, this pattern is a fixed string match with the
106   /// specified fixed string.
107   StringRef FixedStr;
108 
109   /// RegEx - If non-empty, this is a regex pattern.
110   std::string RegExStr;
111 
112   /// \brief Contains the number of line this pattern is in.
113   unsigned LineNumber;
114 
115   /// VariableUses - Entries in this vector map to uses of a variable in the
116   /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
117   /// "foobaz" and we'll get an entry in this vector that tells us to insert the
118   /// value of bar at offset 3.
119   std::vector<std::pair<StringRef, unsigned> > VariableUses;
120 
121   /// VariableDefs - Maps definitions of variables to their parenthesized
122   /// capture numbers.
123   /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
124   std::map<StringRef, unsigned> VariableDefs;
125 
126 public:
127 
Pattern(Check::CheckType Ty)128   Pattern(Check::CheckType Ty)
129     : CheckTy(Ty) { }
130 
131   /// getLoc - Return the location in source code.
getLoc() const132   SMLoc getLoc() const { return PatternLoc; }
133 
134   /// ParsePattern - Parse the given string into the Pattern. Prefix provides
135   /// which prefix is being matched, SM provides the SourceMgr used for error
136   /// reports, and LineNumber is the line number in the input file from which
137   /// the pattern string was read.  Returns true in case of an error, false
138   /// otherwise.
139   bool ParsePattern(StringRef PatternStr,
140                     StringRef Prefix,
141                     SourceMgr &SM,
142                     unsigned LineNumber);
143 
144   /// Match - Match the pattern string against the input buffer Buffer.  This
145   /// returns the position that is matched or npos if there is no match.  If
146   /// there is a match, the size of the matched string is returned in MatchLen.
147   ///
148   /// The VariableTable StringMap provides the current values of filecheck
149   /// variables and is updated if this match defines new values.
150   size_t Match(StringRef Buffer, size_t &MatchLen,
151                StringMap<StringRef> &VariableTable) const;
152 
153   /// PrintFailureInfo - Print additional information about a failure to match
154   /// involving this pattern.
155   void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
156                         const StringMap<StringRef> &VariableTable) const;
157 
hasVariable() const158   bool hasVariable() const { return !(VariableUses.empty() &&
159                                       VariableDefs.empty()); }
160 
getCheckTy() const161   Check::CheckType getCheckTy() const { return CheckTy; }
162 
163 private:
164   bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
165   void AddBackrefToRegEx(unsigned BackrefNum);
166 
167   /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
168   /// matching this pattern at the start of \arg Buffer; a distance of zero
169   /// should correspond to a perfect match.
170   unsigned ComputeMatchDistance(StringRef Buffer,
171                                const StringMap<StringRef> &VariableTable) const;
172 
173   /// \brief Evaluates expression and stores the result to \p Value.
174   /// \return true on success. false when the expression has invalid syntax.
175   bool EvaluateExpression(StringRef Expr, std::string &Value) const;
176 
177   /// \brief Finds the closing sequence of a regex variable usage or
178   /// definition. Str has to point in the beginning of the definition
179   /// (right after the opening sequence).
180   /// \return offset of the closing sequence within Str, or npos if it was not
181   /// found.
182   size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
183 };
184 
185 
ParsePattern(StringRef PatternStr,StringRef Prefix,SourceMgr & SM,unsigned LineNumber)186 bool Pattern::ParsePattern(StringRef PatternStr,
187                            StringRef Prefix,
188                            SourceMgr &SM,
189                            unsigned LineNumber) {
190   bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
191 
192   this->LineNumber = LineNumber;
193   PatternLoc = SMLoc::getFromPointer(PatternStr.data());
194 
195   // Ignore trailing whitespace.
196   while (!PatternStr.empty() &&
197          (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
198     PatternStr = PatternStr.substr(0, PatternStr.size()-1);
199 
200   // Check that there is something on the line.
201   if (PatternStr.empty()) {
202     SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
203                     "found empty check string with prefix '" +
204                     Prefix + ":'");
205     return true;
206   }
207 
208   // Check to see if this is a fixed string, or if it has regex pieces.
209   if (!MatchFullLinesHere &&
210       (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
211                                  PatternStr.find("[[") == StringRef::npos))) {
212     FixedStr = PatternStr;
213     return false;
214   }
215 
216   if (MatchFullLinesHere) {
217     RegExStr += '^';
218     if (!NoCanonicalizeWhiteSpace)
219       RegExStr += " *";
220   }
221 
222   // Paren value #0 is for the fully matched string.  Any new parenthesized
223   // values add from there.
224   unsigned CurParen = 1;
225 
226   // Otherwise, there is at least one regex piece.  Build up the regex pattern
227   // by escaping scary characters in fixed strings, building up one big regex.
228   while (!PatternStr.empty()) {
229     // RegEx matches.
230     if (PatternStr.startswith("{{")) {
231       // This is the start of a regex match.  Scan for the }}.
232       size_t End = PatternStr.find("}}");
233       if (End == StringRef::npos) {
234         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
235                         SourceMgr::DK_Error,
236                         "found start of regex string with no end '}}'");
237         return true;
238       }
239 
240       // Enclose {{}} patterns in parens just like [[]] even though we're not
241       // capturing the result for any purpose.  This is required in case the
242       // expression contains an alternation like: CHECK:  abc{{x|z}}def.  We
243       // want this to turn into: "abc(x|z)def" not "abcx|zdef".
244       RegExStr += '(';
245       ++CurParen;
246 
247       if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
248         return true;
249       RegExStr += ')';
250 
251       PatternStr = PatternStr.substr(End+2);
252       continue;
253     }
254 
255     // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
256     // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
257     // second form is [[foo]] which is a reference to foo.  The variable name
258     // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
259     // it.  This is to catch some common errors.
260     if (PatternStr.startswith("[[")) {
261       // Find the closing bracket pair ending the match.  End is going to be an
262       // offset relative to the beginning of the match string.
263       size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
264 
265       if (End == StringRef::npos) {
266         SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
267                         SourceMgr::DK_Error,
268                         "invalid named regex reference, no ]] found");
269         return true;
270       }
271 
272       StringRef MatchStr = PatternStr.substr(2, End);
273       PatternStr = PatternStr.substr(End+4);
274 
275       // Get the regex name (e.g. "foo").
276       size_t NameEnd = MatchStr.find(':');
277       StringRef Name = MatchStr.substr(0, NameEnd);
278 
279       if (Name.empty()) {
280         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
281                         "invalid name in named regex: empty name");
282         return true;
283       }
284 
285       // Verify that the name/expression is well formed. FileCheck currently
286       // supports @LINE, @LINE+number, @LINE-number expressions. The check here
287       // is relaxed, more strict check is performed in \c EvaluateExpression.
288       bool IsExpression = false;
289       for (unsigned i = 0, e = Name.size(); i != e; ++i) {
290         if (i == 0 && Name[i] == '@') {
291           if (NameEnd != StringRef::npos) {
292             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
293                             SourceMgr::DK_Error,
294                             "invalid name in named regex definition");
295             return true;
296           }
297           IsExpression = true;
298           continue;
299         }
300         if (Name[i] != '_' && !isalnum(Name[i]) &&
301             (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
302           SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
303                           SourceMgr::DK_Error, "invalid name in named regex");
304           return true;
305         }
306       }
307 
308       // Name can't start with a digit.
309       if (isdigit(static_cast<unsigned char>(Name[0]))) {
310         SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
311                         "invalid name in named regex");
312         return true;
313       }
314 
315       // Handle [[foo]].
316       if (NameEnd == StringRef::npos) {
317         // Handle variables that were defined earlier on the same line by
318         // emitting a backreference.
319         if (VariableDefs.find(Name) != VariableDefs.end()) {
320           unsigned VarParenNum = VariableDefs[Name];
321           if (VarParenNum < 1 || VarParenNum > 9) {
322             SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
323                             SourceMgr::DK_Error,
324                             "Can't back-reference more than 9 variables");
325             return true;
326           }
327           AddBackrefToRegEx(VarParenNum);
328         } else {
329           VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
330         }
331         continue;
332       }
333 
334       // Handle [[foo:.*]].
335       VariableDefs[Name] = CurParen;
336       RegExStr += '(';
337       ++CurParen;
338 
339       if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
340         return true;
341 
342       RegExStr += ')';
343     }
344 
345     // Handle fixed string matches.
346     // Find the end, which is the start of the next regex.
347     size_t FixedMatchEnd = PatternStr.find("{{");
348     FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
349     RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
350     PatternStr = PatternStr.substr(FixedMatchEnd);
351   }
352 
353   if (MatchFullLinesHere) {
354     if (!NoCanonicalizeWhiteSpace)
355       RegExStr += " *";
356     RegExStr += '$';
357   }
358 
359   return false;
360 }
361 
AddRegExToRegEx(StringRef RS,unsigned & CurParen,SourceMgr & SM)362 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
363                               SourceMgr &SM) {
364   Regex R(RS);
365   std::string Error;
366   if (!R.isValid(Error)) {
367     SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
368                     "invalid regex: " + Error);
369     return true;
370   }
371 
372   RegExStr += RS.str();
373   CurParen += R.getNumMatches();
374   return false;
375 }
376 
AddBackrefToRegEx(unsigned BackrefNum)377 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
378   assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
379   std::string Backref = std::string("\\") +
380                         std::string(1, '0' + BackrefNum);
381   RegExStr += Backref;
382 }
383 
EvaluateExpression(StringRef Expr,std::string & Value) const384 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
385   // The only supported expression is @LINE([\+-]\d+)?
386   if (!Expr.startswith("@LINE"))
387     return false;
388   Expr = Expr.substr(StringRef("@LINE").size());
389   int Offset = 0;
390   if (!Expr.empty()) {
391     if (Expr[0] == '+')
392       Expr = Expr.substr(1);
393     else if (Expr[0] != '-')
394       return false;
395     if (Expr.getAsInteger(10, Offset))
396       return false;
397   }
398   Value = llvm::itostr(LineNumber + Offset);
399   return true;
400 }
401 
402 /// Match - Match the pattern string against the input buffer Buffer.  This
403 /// returns the position that is matched or npos if there is no match.  If
404 /// there is a match, the size of the matched string is returned in MatchLen.
Match(StringRef Buffer,size_t & MatchLen,StringMap<StringRef> & VariableTable) const405 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
406                       StringMap<StringRef> &VariableTable) const {
407   // If this is the EOF pattern, match it immediately.
408   if (CheckTy == Check::CheckEOF) {
409     MatchLen = 0;
410     return Buffer.size();
411   }
412 
413   // If this is a fixed string pattern, just match it now.
414   if (!FixedStr.empty()) {
415     MatchLen = FixedStr.size();
416     return Buffer.find(FixedStr);
417   }
418 
419   // Regex match.
420 
421   // If there are variable uses, we need to create a temporary string with the
422   // actual value.
423   StringRef RegExToMatch = RegExStr;
424   std::string TmpStr;
425   if (!VariableUses.empty()) {
426     TmpStr = RegExStr;
427 
428     unsigned InsertOffset = 0;
429     for (const auto &VariableUse : VariableUses) {
430       std::string Value;
431 
432       if (VariableUse.first[0] == '@') {
433         if (!EvaluateExpression(VariableUse.first, Value))
434           return StringRef::npos;
435       } else {
436         StringMap<StringRef>::iterator it =
437             VariableTable.find(VariableUse.first);
438         // If the variable is undefined, return an error.
439         if (it == VariableTable.end())
440           return StringRef::npos;
441 
442         // Look up the value and escape it so that we can put it into the regex.
443         Value += Regex::escape(it->second);
444       }
445 
446       // Plop it into the regex at the adjusted offset.
447       TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
448                     Value.begin(), Value.end());
449       InsertOffset += Value.size();
450     }
451 
452     // Match the newly constructed regex.
453     RegExToMatch = TmpStr;
454   }
455 
456 
457   SmallVector<StringRef, 4> MatchInfo;
458   if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
459     return StringRef::npos;
460 
461   // Successful regex match.
462   assert(!MatchInfo.empty() && "Didn't get any match");
463   StringRef FullMatch = MatchInfo[0];
464 
465   // If this defines any variables, remember their values.
466   for (const auto &VariableDef : VariableDefs) {
467     assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
468     VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
469   }
470 
471   MatchLen = FullMatch.size();
472   return FullMatch.data()-Buffer.data();
473 }
474 
ComputeMatchDistance(StringRef Buffer,const StringMap<StringRef> & VariableTable) const475 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
476                               const StringMap<StringRef> &VariableTable) const {
477   // Just compute the number of matching characters. For regular expressions, we
478   // just compare against the regex itself and hope for the best.
479   //
480   // FIXME: One easy improvement here is have the regex lib generate a single
481   // example regular expression which matches, and use that as the example
482   // string.
483   StringRef ExampleString(FixedStr);
484   if (ExampleString.empty())
485     ExampleString = RegExStr;
486 
487   // Only compare up to the first line in the buffer, or the string size.
488   StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
489   BufferPrefix = BufferPrefix.split('\n').first;
490   return BufferPrefix.edit_distance(ExampleString);
491 }
492 
PrintFailureInfo(const SourceMgr & SM,StringRef Buffer,const StringMap<StringRef> & VariableTable) const493 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
494                                const StringMap<StringRef> &VariableTable) const{
495   // If this was a regular expression using variables, print the current
496   // variable values.
497   if (!VariableUses.empty()) {
498     for (const auto &VariableUse : VariableUses) {
499       SmallString<256> Msg;
500       raw_svector_ostream OS(Msg);
501       StringRef Var = VariableUse.first;
502       if (Var[0] == '@') {
503         std::string Value;
504         if (EvaluateExpression(Var, Value)) {
505           OS << "with expression \"";
506           OS.write_escaped(Var) << "\" equal to \"";
507           OS.write_escaped(Value) << "\"";
508         } else {
509           OS << "uses incorrect expression \"";
510           OS.write_escaped(Var) << "\"";
511         }
512       } else {
513         StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
514 
515         // Check for undefined variable references.
516         if (it == VariableTable.end()) {
517           OS << "uses undefined variable \"";
518           OS.write_escaped(Var) << "\"";
519         } else {
520           OS << "with variable \"";
521           OS.write_escaped(Var) << "\" equal to \"";
522           OS.write_escaped(it->second) << "\"";
523         }
524       }
525 
526       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
527                       OS.str());
528     }
529   }
530 
531   // Attempt to find the closest/best fuzzy match.  Usually an error happens
532   // because some string in the output didn't exactly match. In these cases, we
533   // would like to show the user a best guess at what "should have" matched, to
534   // save them having to actually check the input manually.
535   size_t NumLinesForward = 0;
536   size_t Best = StringRef::npos;
537   double BestQuality = 0;
538 
539   // Use an arbitrary 4k limit on how far we will search.
540   for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
541     if (Buffer[i] == '\n')
542       ++NumLinesForward;
543 
544     // Patterns have leading whitespace stripped, so skip whitespace when
545     // looking for something which looks like a pattern.
546     if (Buffer[i] == ' ' || Buffer[i] == '\t')
547       continue;
548 
549     // Compute the "quality" of this match as an arbitrary combination of the
550     // match distance and the number of lines skipped to get to this match.
551     unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
552     double Quality = Distance + (NumLinesForward / 100.);
553 
554     if (Quality < BestQuality || Best == StringRef::npos) {
555       Best = i;
556       BestQuality = Quality;
557     }
558   }
559 
560   // Print the "possible intended match here" line if we found something
561   // reasonable and not equal to what we showed in the "scanning from here"
562   // line.
563   if (Best && Best != StringRef::npos && BestQuality < 50) {
564       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
565                       SourceMgr::DK_Note, "possible intended match here");
566 
567     // FIXME: If we wanted to be really friendly we would show why the match
568     // failed, as it can be hard to spot simple one character differences.
569   }
570 }
571 
FindRegexVarEnd(StringRef Str,SourceMgr & SM)572 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
573   // Offset keeps track of the current offset within the input Str
574   size_t Offset = 0;
575   // [...] Nesting depth
576   size_t BracketDepth = 0;
577 
578   while (!Str.empty()) {
579     if (Str.startswith("]]") && BracketDepth == 0)
580       return Offset;
581     if (Str[0] == '\\') {
582       // Backslash escapes the next char within regexes, so skip them both.
583       Str = Str.substr(2);
584       Offset += 2;
585     } else {
586       switch (Str[0]) {
587         default:
588           break;
589         case '[':
590           BracketDepth++;
591           break;
592         case ']':
593           if (BracketDepth == 0) {
594             SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
595                             SourceMgr::DK_Error,
596                             "missing closing \"]\" for regex variable");
597             exit(1);
598           }
599           BracketDepth--;
600           break;
601       }
602       Str = Str.substr(1);
603       Offset++;
604     }
605   }
606 
607   return StringRef::npos;
608 }
609 
610 
611 //===----------------------------------------------------------------------===//
612 // Check Strings.
613 //===----------------------------------------------------------------------===//
614 
615 /// CheckString - This is a check that we found in the input file.
616 struct CheckString {
617   /// Pat - The pattern to match.
618   Pattern Pat;
619 
620   /// Prefix - Which prefix name this check matched.
621   StringRef Prefix;
622 
623   /// Loc - The location in the match file that the check string was specified.
624   SMLoc Loc;
625 
626   /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
627   /// as opposed to a CHECK: directive.
628   //  Check::CheckType CheckTy;
629 
630   /// DagNotStrings - These are all of the strings that are disallowed from
631   /// occurring between this match string and the previous one (or start of
632   /// file).
633   std::vector<Pattern> DagNotStrings;
634 
CheckStringCheckString635   CheckString(const Pattern &P, StringRef S, SMLoc L)
636       : Pat(P), Prefix(S), Loc(L) {}
637 
638   /// Check - Match check string and its "not strings" and/or "dag strings".
639   size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
640                size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
641 
642   /// CheckNext - Verify there is a single line in the given buffer.
643   bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
644 
645   /// CheckSame - Verify there is no newline in the given buffer.
646   bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
647 
648   /// CheckNot - Verify there's no "not strings" in the given buffer.
649   bool CheckNot(const SourceMgr &SM, StringRef Buffer,
650                 const std::vector<const Pattern *> &NotStrings,
651                 StringMap<StringRef> &VariableTable) const;
652 
653   /// CheckDag - Match "dag strings" and their mixed "not strings".
654   size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
655                   std::vector<const Pattern *> &NotStrings,
656                   StringMap<StringRef> &VariableTable) const;
657 };
658 
659 /// Canonicalize whitespaces in the input file. Line endings are replaced
660 /// with UNIX-style '\n'.
661 ///
662 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
663 /// characters to a single space.
664 static std::unique_ptr<MemoryBuffer>
CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,bool PreserveHorizontal)665 CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
666                       bool PreserveHorizontal) {
667   SmallString<128> NewFile;
668   NewFile.reserve(MB->getBufferSize());
669 
670   for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
671        Ptr != End; ++Ptr) {
672     // Eliminate trailing dosish \r.
673     if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
674       continue;
675     }
676 
677     // If current char is not a horizontal whitespace or if horizontal
678     // whitespace canonicalization is disabled, dump it to output as is.
679     if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
680       NewFile.push_back(*Ptr);
681       continue;
682     }
683 
684     // Otherwise, add one space and advance over neighboring space.
685     NewFile.push_back(' ');
686     while (Ptr+1 != End &&
687            (Ptr[1] == ' ' || Ptr[1] == '\t'))
688       ++Ptr;
689   }
690 
691   return std::unique_ptr<MemoryBuffer>(
692       MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
693 }
694 
IsPartOfWord(char c)695 static bool IsPartOfWord(char c) {
696   return (isalnum(c) || c == '-' || c == '_');
697 }
698 
699 // Get the size of the prefix extension.
CheckTypeSize(Check::CheckType Ty)700 static size_t CheckTypeSize(Check::CheckType Ty) {
701   switch (Ty) {
702   case Check::CheckNone:
703   case Check::CheckBadNot:
704     return 0;
705 
706   case Check::CheckPlain:
707     return sizeof(":") - 1;
708 
709   case Check::CheckNext:
710     return sizeof("-NEXT:") - 1;
711 
712   case Check::CheckSame:
713     return sizeof("-SAME:") - 1;
714 
715   case Check::CheckNot:
716     return sizeof("-NOT:") - 1;
717 
718   case Check::CheckDAG:
719     return sizeof("-DAG:") - 1;
720 
721   case Check::CheckLabel:
722     return sizeof("-LABEL:") - 1;
723 
724   case Check::CheckEOF:
725     llvm_unreachable("Should not be using EOF size");
726   }
727 
728   llvm_unreachable("Bad check type");
729 }
730 
FindCheckType(StringRef Buffer,StringRef Prefix)731 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
732   char NextChar = Buffer[Prefix.size()];
733 
734   // Verify that the : is present after the prefix.
735   if (NextChar == ':')
736     return Check::CheckPlain;
737 
738   if (NextChar != '-')
739     return Check::CheckNone;
740 
741   StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
742   if (Rest.startswith("NEXT:"))
743     return Check::CheckNext;
744 
745   if (Rest.startswith("SAME:"))
746     return Check::CheckSame;
747 
748   if (Rest.startswith("NOT:"))
749     return Check::CheckNot;
750 
751   if (Rest.startswith("DAG:"))
752     return Check::CheckDAG;
753 
754   if (Rest.startswith("LABEL:"))
755     return Check::CheckLabel;
756 
757   // You can't combine -NOT with another suffix.
758   if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
759       Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
760       Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:"))
761     return Check::CheckBadNot;
762 
763   return Check::CheckNone;
764 }
765 
766 // From the given position, find the next character after the word.
SkipWord(StringRef Str,size_t Loc)767 static size_t SkipWord(StringRef Str, size_t Loc) {
768   while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
769     ++Loc;
770   return Loc;
771 }
772 
773 // Try to find the first match in buffer for any prefix. If a valid match is
774 // found, return that prefix and set its type and location.  If there are almost
775 // matches (e.g. the actual prefix string is found, but is not an actual check
776 // string), but no valid match, return an empty string and set the position to
777 // resume searching from. If no partial matches are found, return an empty
778 // string and the location will be StringRef::npos. If one prefix is a substring
779 // of another, the maximal match should be found. e.g. if "A" and "AA" are
780 // prefixes then AA-CHECK: should match the second one.
FindFirstCandidateMatch(StringRef & Buffer,Check::CheckType & CheckTy,size_t & CheckLoc)781 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
782                                          Check::CheckType &CheckTy,
783                                          size_t &CheckLoc) {
784   StringRef FirstPrefix;
785   size_t FirstLoc = StringRef::npos;
786   size_t SearchLoc = StringRef::npos;
787   Check::CheckType FirstTy = Check::CheckNone;
788 
789   CheckTy = Check::CheckNone;
790   CheckLoc = StringRef::npos;
791 
792   for (StringRef Prefix : CheckPrefixes) {
793     size_t PrefixLoc = Buffer.find(Prefix);
794 
795     if (PrefixLoc == StringRef::npos)
796       continue;
797 
798     // Track where we are searching for invalid prefixes that look almost right.
799     // We need to only advance to the first partial match on the next attempt
800     // since a partial match could be a substring of a later, valid prefix.
801     // Need to skip to the end of the word, otherwise we could end up
802     // matching a prefix in a substring later.
803     if (PrefixLoc < SearchLoc)
804       SearchLoc = SkipWord(Buffer, PrefixLoc);
805 
806     // We only want to find the first match to avoid skipping some.
807     if (PrefixLoc > FirstLoc)
808       continue;
809     // If one matching check-prefix is a prefix of another, choose the
810     // longer one.
811     if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
812       continue;
813 
814     StringRef Rest = Buffer.drop_front(PrefixLoc);
815     // Make sure we have actually found the prefix, and not a word containing
816     // it. This should also prevent matching the wrong prefix when one is a
817     // substring of another.
818     if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
819       FirstTy = Check::CheckNone;
820     else
821       FirstTy = FindCheckType(Rest, Prefix);
822 
823     FirstLoc = PrefixLoc;
824     FirstPrefix = Prefix;
825   }
826 
827   // If the first prefix is invalid, we should continue the search after it.
828   if (FirstTy == Check::CheckNone) {
829     CheckLoc = SearchLoc;
830     return "";
831   }
832 
833   CheckTy = FirstTy;
834   CheckLoc = FirstLoc;
835   return FirstPrefix;
836 }
837 
FindFirstMatchingPrefix(StringRef & Buffer,unsigned & LineNumber,Check::CheckType & CheckTy,size_t & CheckLoc)838 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
839                                          unsigned &LineNumber,
840                                          Check::CheckType &CheckTy,
841                                          size_t &CheckLoc) {
842   while (!Buffer.empty()) {
843     StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
844     // If we found a real match, we are done.
845     if (!Prefix.empty()) {
846       LineNumber += Buffer.substr(0, CheckLoc).count('\n');
847       return Prefix;
848     }
849 
850     // We didn't find any almost matches either, we are also done.
851     if (CheckLoc == StringRef::npos)
852       return StringRef();
853 
854     LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
855 
856     // Advance to the last possible match we found and try again.
857     Buffer = Buffer.drop_front(CheckLoc + 1);
858   }
859 
860   return StringRef();
861 }
862 
863 /// ReadCheckFile - Read the check file, which specifies the sequence of
864 /// expected strings.  The strings are added to the CheckStrings vector.
865 /// Returns true in case of an error, false otherwise.
ReadCheckFile(SourceMgr & SM,std::vector<CheckString> & CheckStrings)866 static bool ReadCheckFile(SourceMgr &SM,
867                           std::vector<CheckString> &CheckStrings) {
868   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
869       MemoryBuffer::getFileOrSTDIN(CheckFilename);
870   if (std::error_code EC = FileOrErr.getError()) {
871     errs() << "Could not open check file '" << CheckFilename
872            << "': " << EC.message() << '\n';
873     return true;
874   }
875 
876   // If we want to canonicalize whitespace, strip excess whitespace from the
877   // buffer containing the CHECK lines. Remove DOS style line endings.
878   std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile(
879       std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
880 
881   // Find all instances of CheckPrefix followed by : in the file.
882   StringRef Buffer = F->getBuffer();
883 
884   SM.AddNewSourceBuffer(std::move(F), SMLoc());
885 
886   std::vector<Pattern> ImplicitNegativeChecks;
887   for (const auto &PatternString : ImplicitCheckNot) {
888     // Create a buffer with fake command line content in order to display the
889     // command line option responsible for the specific implicit CHECK-NOT.
890     std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
891     std::string Suffix = "'";
892     std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
893         Prefix + PatternString + Suffix, "command line");
894 
895     StringRef PatternInBuffer =
896         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
897     SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
898 
899     ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
900     ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
901                                                "IMPLICIT-CHECK", SM, 0);
902   }
903 
904 
905   std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
906 
907   // LineNumber keeps track of the line on which CheckPrefix instances are
908   // found.
909   unsigned LineNumber = 1;
910 
911   while (1) {
912     Check::CheckType CheckTy;
913     size_t PrefixLoc;
914 
915     // See if a prefix occurs in the memory buffer.
916     StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
917                                                    LineNumber,
918                                                    CheckTy,
919                                                    PrefixLoc);
920     if (UsedPrefix.empty())
921       break;
922 
923     Buffer = Buffer.drop_front(PrefixLoc);
924 
925     // Location to use for error messages.
926     const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
927 
928     // PrefixLoc is to the start of the prefix. Skip to the end.
929     Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
930 
931     // Complain about useful-looking but unsupported suffixes.
932     if (CheckTy == Check::CheckBadNot) {
933       SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
934                       SourceMgr::DK_Error,
935                       "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
936       return true;
937     }
938 
939     // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
940     // leading and trailing whitespace.
941     Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
942 
943     // Scan ahead to the end of line.
944     size_t EOL = Buffer.find_first_of("\n\r");
945 
946     // Remember the location of the start of the pattern, for diagnostics.
947     SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
948 
949     // Parse the pattern.
950     Pattern P(CheckTy);
951     if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
952       return true;
953 
954     // Verify that CHECK-LABEL lines do not define or use variables
955     if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
956       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
957                       SourceMgr::DK_Error,
958                       "found '" + UsedPrefix + "-LABEL:'"
959                       " with variable definition or use");
960       return true;
961     }
962 
963     Buffer = Buffer.substr(EOL);
964 
965     // Verify that CHECK-NEXT lines have at least one CHECK line before them.
966     if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
967         CheckStrings.empty()) {
968       StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
969       SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
970                       SourceMgr::DK_Error,
971                       "found '" + UsedPrefix + "-" + Type + "' without previous '"
972                       + UsedPrefix + ": line");
973       return true;
974     }
975 
976     // Handle CHECK-DAG/-NOT.
977     if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
978       DagNotMatches.push_back(P);
979       continue;
980     }
981 
982     // Okay, add the string we captured to the output vector and move on.
983     CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
984     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
985     DagNotMatches = ImplicitNegativeChecks;
986   }
987 
988   // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
989   // prefix as a filler for the error message.
990   if (!DagNotMatches.empty()) {
991     CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
992                               SMLoc::getFromPointer(Buffer.data()));
993     std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
994   }
995 
996   if (CheckStrings.empty()) {
997     errs() << "error: no check strings found with prefix"
998            << (CheckPrefixes.size() > 1 ? "es " : " ");
999     prefix_iterator I = CheckPrefixes.begin();
1000     prefix_iterator E = CheckPrefixes.end();
1001     if (I != E) {
1002       errs() << "\'" << *I << ":'";
1003       ++I;
1004     }
1005     for (; I != E; ++I)
1006       errs() << ", \'" << *I << ":'";
1007 
1008     errs() << '\n';
1009     return true;
1010   }
1011 
1012   return false;
1013 }
1014 
PrintCheckFailed(const SourceMgr & SM,SMLoc Loc,const Pattern & Pat,StringRef Buffer,StringMap<StringRef> & VariableTable)1015 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc,
1016                              const Pattern &Pat, StringRef Buffer,
1017                              StringMap<StringRef> &VariableTable) {
1018   // Otherwise, we have an error, emit an error message.
1019   SM.PrintMessage(Loc, SourceMgr::DK_Error,
1020                   "expected string not found in input");
1021 
1022   // Print the "scanning from here" line.  If the current position is at the
1023   // end of a line, advance to the start of the next line.
1024   Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
1025 
1026   SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1027                   "scanning from here");
1028 
1029   // Allow the pattern to print additional information if desired.
1030   Pat.PrintFailureInfo(SM, Buffer, VariableTable);
1031 }
1032 
PrintCheckFailed(const SourceMgr & SM,const CheckString & CheckStr,StringRef Buffer,StringMap<StringRef> & VariableTable)1033 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
1034                              StringRef Buffer,
1035                              StringMap<StringRef> &VariableTable) {
1036   PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
1037 }
1038 
1039 /// CountNumNewlinesBetween - Count the number of newlines in the specified
1040 /// range.
CountNumNewlinesBetween(StringRef Range,const char * & FirstNewLine)1041 static unsigned CountNumNewlinesBetween(StringRef Range,
1042                                         const char *&FirstNewLine) {
1043   unsigned NumNewLines = 0;
1044   while (1) {
1045     // Scan for newline.
1046     Range = Range.substr(Range.find_first_of("\n\r"));
1047     if (Range.empty()) return NumNewLines;
1048 
1049     ++NumNewLines;
1050 
1051     // Handle \n\r and \r\n as a single newline.
1052     if (Range.size() > 1 &&
1053         (Range[1] == '\n' || Range[1] == '\r') &&
1054         (Range[0] != Range[1]))
1055       Range = Range.substr(1);
1056     Range = Range.substr(1);
1057 
1058     if (NumNewLines == 1)
1059       FirstNewLine = Range.begin();
1060   }
1061 }
1062 
Check(const SourceMgr & SM,StringRef Buffer,bool IsLabelScanMode,size_t & MatchLen,StringMap<StringRef> & VariableTable) const1063 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1064                           bool IsLabelScanMode, size_t &MatchLen,
1065                           StringMap<StringRef> &VariableTable) const {
1066   size_t LastPos = 0;
1067   std::vector<const Pattern *> NotStrings;
1068 
1069   // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1070   // bounds; we have not processed variable definitions within the bounded block
1071   // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1072   // over the block again (including the last CHECK-LABEL) in normal mode.
1073   if (!IsLabelScanMode) {
1074     // Match "dag strings" (with mixed "not strings" if any).
1075     LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1076     if (LastPos == StringRef::npos)
1077       return StringRef::npos;
1078   }
1079 
1080   // Match itself from the last position after matching CHECK-DAG.
1081   StringRef MatchBuffer = Buffer.substr(LastPos);
1082   size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1083   if (MatchPos == StringRef::npos) {
1084     PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1085     return StringRef::npos;
1086   }
1087 
1088   // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1089   // or CHECK-NOT
1090   if (!IsLabelScanMode) {
1091     StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1092 
1093     // If this check is a "CHECK-NEXT", verify that the previous match was on
1094     // the previous line (i.e. that there is one newline between them).
1095     if (CheckNext(SM, SkippedRegion))
1096       return StringRef::npos;
1097 
1098     // If this check is a "CHECK-SAME", verify that the previous match was on
1099     // the same line (i.e. that there is no newline between them).
1100     if (CheckSame(SM, SkippedRegion))
1101       return StringRef::npos;
1102 
1103     // If this match had "not strings", verify that they don't exist in the
1104     // skipped region.
1105     if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1106       return StringRef::npos;
1107   }
1108 
1109   return LastPos + MatchPos;
1110 }
1111 
CheckNext(const SourceMgr & SM,StringRef Buffer) const1112 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1113   if (Pat.getCheckTy() != Check::CheckNext)
1114     return false;
1115 
1116   // Count the number of newlines between the previous match and this one.
1117   assert(Buffer.data() !=
1118          SM.getMemoryBuffer(
1119            SM.FindBufferContainingLoc(
1120              SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1121          "CHECK-NEXT can't be the first check in a file");
1122 
1123   const char *FirstNewLine = nullptr;
1124   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1125 
1126   if (NumNewLines == 0) {
1127     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1128                     "-NEXT: is on the same line as previous match");
1129     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1130                     SourceMgr::DK_Note, "'next' match was here");
1131     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1132                     "previous match ended here");
1133     return true;
1134   }
1135 
1136   if (NumNewLines != 1) {
1137     SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1138                     "-NEXT: is not on the line after the previous match");
1139     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1140                     SourceMgr::DK_Note, "'next' match was here");
1141     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1142                     "previous match ended here");
1143     SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1144                     "non-matching line after previous match is here");
1145     return true;
1146   }
1147 
1148   return false;
1149 }
1150 
CheckSame(const SourceMgr & SM,StringRef Buffer) const1151 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1152   if (Pat.getCheckTy() != Check::CheckSame)
1153     return false;
1154 
1155   // Count the number of newlines between the previous match and this one.
1156   assert(Buffer.data() !=
1157              SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1158                                     SMLoc::getFromPointer(Buffer.data())))
1159                  ->getBufferStart() &&
1160          "CHECK-SAME can't be the first check in a file");
1161 
1162   const char *FirstNewLine = nullptr;
1163   unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1164 
1165   if (NumNewLines != 0) {
1166     SM.PrintMessage(Loc, SourceMgr::DK_Error,
1167                     Prefix +
1168                         "-SAME: is not on the same line as the previous match");
1169     SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1170                     "'next' match was here");
1171     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1172                     "previous match ended here");
1173     return true;
1174   }
1175 
1176   return false;
1177 }
1178 
CheckNot(const SourceMgr & SM,StringRef Buffer,const std::vector<const Pattern * > & NotStrings,StringMap<StringRef> & VariableTable) const1179 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1180                            const std::vector<const Pattern *> &NotStrings,
1181                            StringMap<StringRef> &VariableTable) const {
1182   for (const Pattern *Pat : NotStrings) {
1183     assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1184 
1185     size_t MatchLen = 0;
1186     size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1187 
1188     if (Pos == StringRef::npos) continue;
1189 
1190     SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1191                     SourceMgr::DK_Error,
1192                     Prefix + "-NOT: string occurred!");
1193     SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1194                     Prefix + "-NOT: pattern specified here");
1195     return true;
1196   }
1197 
1198   return false;
1199 }
1200 
CheckDag(const SourceMgr & SM,StringRef Buffer,std::vector<const Pattern * > & NotStrings,StringMap<StringRef> & VariableTable) const1201 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1202                              std::vector<const Pattern *> &NotStrings,
1203                              StringMap<StringRef> &VariableTable) const {
1204   if (DagNotStrings.empty())
1205     return 0;
1206 
1207   size_t LastPos = 0;
1208   size_t StartPos = LastPos;
1209 
1210   for (const Pattern &Pat : DagNotStrings) {
1211     assert((Pat.getCheckTy() == Check::CheckDAG ||
1212             Pat.getCheckTy() == Check::CheckNot) &&
1213            "Invalid CHECK-DAG or CHECK-NOT!");
1214 
1215     if (Pat.getCheckTy() == Check::CheckNot) {
1216       NotStrings.push_back(&Pat);
1217       continue;
1218     }
1219 
1220     assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1221 
1222     size_t MatchLen = 0, MatchPos;
1223 
1224     // CHECK-DAG always matches from the start.
1225     StringRef MatchBuffer = Buffer.substr(StartPos);
1226     MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1227     // With a group of CHECK-DAGs, a single mismatching means the match on
1228     // that group of CHECK-DAGs fails immediately.
1229     if (MatchPos == StringRef::npos) {
1230       PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1231       return StringRef::npos;
1232     }
1233     // Re-calc it as the offset relative to the start of the original string.
1234     MatchPos += StartPos;
1235 
1236     if (!NotStrings.empty()) {
1237       if (MatchPos < LastPos) {
1238         // Reordered?
1239         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1240                         SourceMgr::DK_Error,
1241                         Prefix + "-DAG: found a match of CHECK-DAG"
1242                         " reordering across a CHECK-NOT");
1243         SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1244                         SourceMgr::DK_Note,
1245                         Prefix + "-DAG: the farthest match of CHECK-DAG"
1246                         " is found here");
1247         SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1248                         Prefix + "-NOT: the crossed pattern specified"
1249                         " here");
1250         SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1251                         Prefix + "-DAG: the reordered pattern specified"
1252                         " here");
1253         return StringRef::npos;
1254       }
1255       // All subsequent CHECK-DAGs should be matched from the farthest
1256       // position of all precedent CHECK-DAGs (including this one.)
1257       StartPos = LastPos;
1258       // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1259       // CHECK-DAG, verify that there's no 'not' strings occurred in that
1260       // region.
1261       StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1262       if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1263         return StringRef::npos;
1264       // Clear "not strings".
1265       NotStrings.clear();
1266     }
1267 
1268     // Update the last position with CHECK-DAG matches.
1269     LastPos = std::max(MatchPos + MatchLen, LastPos);
1270   }
1271 
1272   return LastPos;
1273 }
1274 
1275 // A check prefix must contain only alphanumeric, hyphens and underscores.
ValidateCheckPrefix(StringRef CheckPrefix)1276 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1277   Regex Validator("^[a-zA-Z0-9_-]*$");
1278   return Validator.match(CheckPrefix);
1279 }
1280 
ValidateCheckPrefixes()1281 static bool ValidateCheckPrefixes() {
1282   StringSet<> PrefixSet;
1283 
1284   for (StringRef Prefix : CheckPrefixes) {
1285     // Reject empty prefixes.
1286     if (Prefix == "")
1287       return false;
1288 
1289     if (!PrefixSet.insert(Prefix).second)
1290       return false;
1291 
1292     if (!ValidateCheckPrefix(Prefix))
1293       return false;
1294   }
1295 
1296   return true;
1297 }
1298 
1299 // I don't think there's a way to specify an initial value for cl::list,
1300 // so if nothing was specified, add the default
AddCheckPrefixIfNeeded()1301 static void AddCheckPrefixIfNeeded() {
1302   if (CheckPrefixes.empty())
1303     CheckPrefixes.push_back("CHECK");
1304 }
1305 
DumpCommandLine(int argc,char ** argv)1306 static void DumpCommandLine(int argc, char **argv) {
1307   errs() << "FileCheck command line: ";
1308   for (int I = 0; I < argc; I++)
1309     errs() << " " << argv[I];
1310   errs() << "\n";
1311 }
1312 
main(int argc,char ** argv)1313 int main(int argc, char **argv) {
1314   sys::PrintStackTraceOnErrorSignal(argv[0]);
1315   PrettyStackTraceProgram X(argc, argv);
1316   cl::ParseCommandLineOptions(argc, argv);
1317 
1318   if (!ValidateCheckPrefixes()) {
1319     errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1320               "start with a letter and contain only alphanumeric characters, "
1321               "hyphens and underscores\n";
1322     return 2;
1323   }
1324 
1325   AddCheckPrefixIfNeeded();
1326 
1327   SourceMgr SM;
1328 
1329   // Read the expected strings from the check file.
1330   std::vector<CheckString> CheckStrings;
1331   if (ReadCheckFile(SM, CheckStrings))
1332     return 2;
1333 
1334   // Open the file to check and add it to SourceMgr.
1335   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1336       MemoryBuffer::getFileOrSTDIN(InputFilename);
1337   if (std::error_code EC = FileOrErr.getError()) {
1338     errs() << "Could not open input file '" << InputFilename
1339            << "': " << EC.message() << '\n';
1340     return 2;
1341   }
1342   std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
1343 
1344   if (File->getBufferSize() == 0 && !AllowEmptyInput) {
1345     errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1346     DumpCommandLine(argc, argv);
1347     return 2;
1348   }
1349 
1350   // Remove duplicate spaces in the input file if requested.
1351   // Remove DOS style line endings.
1352   std::unique_ptr<MemoryBuffer> F =
1353       CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
1354 
1355   // Check that we have all of the expected strings, in order, in the input
1356   // file.
1357   StringRef Buffer = F->getBuffer();
1358 
1359   SM.AddNewSourceBuffer(std::move(F), SMLoc());
1360 
1361   /// VariableTable - This holds all the current filecheck variables.
1362   StringMap<StringRef> VariableTable;
1363 
1364   bool hasError = false;
1365 
1366   unsigned i = 0, j = 0, e = CheckStrings.size();
1367 
1368   while (true) {
1369     StringRef CheckRegion;
1370     if (j == e) {
1371       CheckRegion = Buffer;
1372     } else {
1373       const CheckString &CheckLabelStr = CheckStrings[j];
1374       if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1375         ++j;
1376         continue;
1377       }
1378 
1379       // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1380       size_t MatchLabelLen = 0;
1381       size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1382                                                  MatchLabelLen, VariableTable);
1383       if (MatchLabelPos == StringRef::npos) {
1384         hasError = true;
1385         break;
1386       }
1387 
1388       CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1389       Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1390       ++j;
1391     }
1392 
1393     for ( ; i != j; ++i) {
1394       const CheckString &CheckStr = CheckStrings[i];
1395 
1396       // Check each string within the scanned region, including a second check
1397       // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1398       size_t MatchLen = 0;
1399       size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1400                                        VariableTable);
1401 
1402       if (MatchPos == StringRef::npos) {
1403         hasError = true;
1404         i = j;
1405         break;
1406       }
1407 
1408       CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1409     }
1410 
1411     if (j == e)
1412       break;
1413   }
1414 
1415   return hasError ? 1 : 0;
1416 }
1417