1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content. This is useful for regression tests etc.
12 //
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37
38 static cl::opt<std::string>
39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40
41 static cl::opt<std::string>
42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43 cl::init("-"), cl::value_desc("filename"));
44
45 static cl::list<std::string>
46 CheckPrefixes("check-prefix",
47 cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48
49 static cl::opt<bool>
50 NoCanonicalizeWhiteSpace("strict-whitespace",
51 cl::desc("Do not treat all horizontal whitespace as equivalent"));
52
53 static cl::list<std::string> ImplicitCheckNot(
54 "implicit-check-not",
55 cl::desc("Add an implicit negative check with this pattern to every\n"
56 "positive check. This can be used to ensure that no instances of\n"
57 "this pattern occur which are not matched by a positive pattern"),
58 cl::value_desc("pattern"));
59
60 static cl::opt<bool> AllowEmptyInput(
61 "allow-empty", cl::init(false),
62 cl::desc("Allow the input file to be empty. This is useful when making\n"
63 "checks that some error message does not occur, for example."));
64
65 typedef cl::list<std::string>::const_iterator prefix_iterator;
66
67 //===----------------------------------------------------------------------===//
68 // Pattern Handling Code.
69 //===----------------------------------------------------------------------===//
70
71 namespace Check {
72 enum CheckType {
73 CheckNone = 0,
74 CheckPlain,
75 CheckNext,
76 CheckSame,
77 CheckNot,
78 CheckDAG,
79 CheckLabel,
80
81 /// MatchEOF - When set, this pattern only matches the end of file. This is
82 /// used for trailing CHECK-NOTs.
83 CheckEOF
84 };
85 }
86
87 class Pattern {
88 SMLoc PatternLoc;
89
90 Check::CheckType CheckTy;
91
92 /// FixedStr - If non-empty, this pattern is a fixed string match with the
93 /// specified fixed string.
94 StringRef FixedStr;
95
96 /// RegEx - If non-empty, this is a regex pattern.
97 std::string RegExStr;
98
99 /// \brief Contains the number of line this pattern is in.
100 unsigned LineNumber;
101
102 /// VariableUses - Entries in this vector map to uses of a variable in the
103 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain
104 /// "foobaz" and we'll get an entry in this vector that tells us to insert the
105 /// value of bar at offset 3.
106 std::vector<std::pair<StringRef, unsigned> > VariableUses;
107
108 /// VariableDefs - Maps definitions of variables to their parenthesized
109 /// capture numbers.
110 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
111 std::map<StringRef, unsigned> VariableDefs;
112
113 public:
114
Pattern(Check::CheckType Ty)115 Pattern(Check::CheckType Ty)
116 : CheckTy(Ty) { }
117
118 /// getLoc - Return the location in source code.
getLoc() const119 SMLoc getLoc() const { return PatternLoc; }
120
121 /// ParsePattern - Parse the given string into the Pattern. Prefix provides
122 /// which prefix is being matched, SM provides the SourceMgr used for error
123 /// reports, and LineNumber is the line number in the input file from which
124 /// the pattern string was read. Returns true in case of an error, false
125 /// otherwise.
126 bool ParsePattern(StringRef PatternStr,
127 StringRef Prefix,
128 SourceMgr &SM,
129 unsigned LineNumber);
130
131 /// Match - Match the pattern string against the input buffer Buffer. This
132 /// returns the position that is matched or npos if there is no match. If
133 /// there is a match, the size of the matched string is returned in MatchLen.
134 ///
135 /// The VariableTable StringMap provides the current values of filecheck
136 /// variables and is updated if this match defines new values.
137 size_t Match(StringRef Buffer, size_t &MatchLen,
138 StringMap<StringRef> &VariableTable) const;
139
140 /// PrintFailureInfo - Print additional information about a failure to match
141 /// involving this pattern.
142 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
143 const StringMap<StringRef> &VariableTable) const;
144
hasVariable() const145 bool hasVariable() const { return !(VariableUses.empty() &&
146 VariableDefs.empty()); }
147
getCheckTy() const148 Check::CheckType getCheckTy() const { return CheckTy; }
149
150 private:
151 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
152 void AddBackrefToRegEx(unsigned BackrefNum);
153
154 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
155 /// matching this pattern at the start of \arg Buffer; a distance of zero
156 /// should correspond to a perfect match.
157 unsigned ComputeMatchDistance(StringRef Buffer,
158 const StringMap<StringRef> &VariableTable) const;
159
160 /// \brief Evaluates expression and stores the result to \p Value.
161 /// \return true on success. false when the expression has invalid syntax.
162 bool EvaluateExpression(StringRef Expr, std::string &Value) const;
163
164 /// \brief Finds the closing sequence of a regex variable usage or
165 /// definition. Str has to point in the beginning of the definition
166 /// (right after the opening sequence).
167 /// \return offset of the closing sequence within Str, or npos if it was not
168 /// found.
169 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
170 };
171
172
ParsePattern(StringRef PatternStr,StringRef Prefix,SourceMgr & SM,unsigned LineNumber)173 bool Pattern::ParsePattern(StringRef PatternStr,
174 StringRef Prefix,
175 SourceMgr &SM,
176 unsigned LineNumber) {
177 this->LineNumber = LineNumber;
178 PatternLoc = SMLoc::getFromPointer(PatternStr.data());
179
180 // Ignore trailing whitespace.
181 while (!PatternStr.empty() &&
182 (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
183 PatternStr = PatternStr.substr(0, PatternStr.size()-1);
184
185 // Check that there is something on the line.
186 if (PatternStr.empty()) {
187 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
188 "found empty check string with prefix '" +
189 Prefix + ":'");
190 return true;
191 }
192
193 // Check to see if this is a fixed string, or if it has regex pieces.
194 if (PatternStr.size() < 2 ||
195 (PatternStr.find("{{") == StringRef::npos &&
196 PatternStr.find("[[") == StringRef::npos)) {
197 FixedStr = PatternStr;
198 return false;
199 }
200
201 // Paren value #0 is for the fully matched string. Any new parenthesized
202 // values add from there.
203 unsigned CurParen = 1;
204
205 // Otherwise, there is at least one regex piece. Build up the regex pattern
206 // by escaping scary characters in fixed strings, building up one big regex.
207 while (!PatternStr.empty()) {
208 // RegEx matches.
209 if (PatternStr.startswith("{{")) {
210 // This is the start of a regex match. Scan for the }}.
211 size_t End = PatternStr.find("}}");
212 if (End == StringRef::npos) {
213 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
214 SourceMgr::DK_Error,
215 "found start of regex string with no end '}}'");
216 return true;
217 }
218
219 // Enclose {{}} patterns in parens just like [[]] even though we're not
220 // capturing the result for any purpose. This is required in case the
221 // expression contains an alternation like: CHECK: abc{{x|z}}def. We
222 // want this to turn into: "abc(x|z)def" not "abcx|zdef".
223 RegExStr += '(';
224 ++CurParen;
225
226 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
227 return true;
228 RegExStr += ')';
229
230 PatternStr = PatternStr.substr(End+2);
231 continue;
232 }
233
234 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
235 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
236 // second form is [[foo]] which is a reference to foo. The variable name
237 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
238 // it. This is to catch some common errors.
239 if (PatternStr.startswith("[[")) {
240 // Find the closing bracket pair ending the match. End is going to be an
241 // offset relative to the beginning of the match string.
242 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
243
244 if (End == StringRef::npos) {
245 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
246 SourceMgr::DK_Error,
247 "invalid named regex reference, no ]] found");
248 return true;
249 }
250
251 StringRef MatchStr = PatternStr.substr(2, End);
252 PatternStr = PatternStr.substr(End+4);
253
254 // Get the regex name (e.g. "foo").
255 size_t NameEnd = MatchStr.find(':');
256 StringRef Name = MatchStr.substr(0, NameEnd);
257
258 if (Name.empty()) {
259 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
260 "invalid name in named regex: empty name");
261 return true;
262 }
263
264 // Verify that the name/expression is well formed. FileCheck currently
265 // supports @LINE, @LINE+number, @LINE-number expressions. The check here
266 // is relaxed, more strict check is performed in \c EvaluateExpression.
267 bool IsExpression = false;
268 for (unsigned i = 0, e = Name.size(); i != e; ++i) {
269 if (i == 0 && Name[i] == '@') {
270 if (NameEnd != StringRef::npos) {
271 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
272 SourceMgr::DK_Error,
273 "invalid name in named regex definition");
274 return true;
275 }
276 IsExpression = true;
277 continue;
278 }
279 if (Name[i] != '_' && !isalnum(Name[i]) &&
280 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
281 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
282 SourceMgr::DK_Error, "invalid name in named regex");
283 return true;
284 }
285 }
286
287 // Name can't start with a digit.
288 if (isdigit(static_cast<unsigned char>(Name[0]))) {
289 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
290 "invalid name in named regex");
291 return true;
292 }
293
294 // Handle [[foo]].
295 if (NameEnd == StringRef::npos) {
296 // Handle variables that were defined earlier on the same line by
297 // emitting a backreference.
298 if (VariableDefs.find(Name) != VariableDefs.end()) {
299 unsigned VarParenNum = VariableDefs[Name];
300 if (VarParenNum < 1 || VarParenNum > 9) {
301 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
302 SourceMgr::DK_Error,
303 "Can't back-reference more than 9 variables");
304 return true;
305 }
306 AddBackrefToRegEx(VarParenNum);
307 } else {
308 VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
309 }
310 continue;
311 }
312
313 // Handle [[foo:.*]].
314 VariableDefs[Name] = CurParen;
315 RegExStr += '(';
316 ++CurParen;
317
318 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
319 return true;
320
321 RegExStr += ')';
322 }
323
324 // Handle fixed string matches.
325 // Find the end, which is the start of the next regex.
326 size_t FixedMatchEnd = PatternStr.find("{{");
327 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
328 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
329 PatternStr = PatternStr.substr(FixedMatchEnd);
330 }
331
332 return false;
333 }
334
AddRegExToRegEx(StringRef RS,unsigned & CurParen,SourceMgr & SM)335 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
336 SourceMgr &SM) {
337 Regex R(RS);
338 std::string Error;
339 if (!R.isValid(Error)) {
340 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
341 "invalid regex: " + Error);
342 return true;
343 }
344
345 RegExStr += RS.str();
346 CurParen += R.getNumMatches();
347 return false;
348 }
349
AddBackrefToRegEx(unsigned BackrefNum)350 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
351 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
352 std::string Backref = std::string("\\") +
353 std::string(1, '0' + BackrefNum);
354 RegExStr += Backref;
355 }
356
EvaluateExpression(StringRef Expr,std::string & Value) const357 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
358 // The only supported expression is @LINE([\+-]\d+)?
359 if (!Expr.startswith("@LINE"))
360 return false;
361 Expr = Expr.substr(StringRef("@LINE").size());
362 int Offset = 0;
363 if (!Expr.empty()) {
364 if (Expr[0] == '+')
365 Expr = Expr.substr(1);
366 else if (Expr[0] != '-')
367 return false;
368 if (Expr.getAsInteger(10, Offset))
369 return false;
370 }
371 Value = llvm::itostr(LineNumber + Offset);
372 return true;
373 }
374
375 /// Match - Match the pattern string against the input buffer Buffer. This
376 /// returns the position that is matched or npos if there is no match. If
377 /// there is a match, the size of the matched string is returned in MatchLen.
Match(StringRef Buffer,size_t & MatchLen,StringMap<StringRef> & VariableTable) const378 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
379 StringMap<StringRef> &VariableTable) const {
380 // If this is the EOF pattern, match it immediately.
381 if (CheckTy == Check::CheckEOF) {
382 MatchLen = 0;
383 return Buffer.size();
384 }
385
386 // If this is a fixed string pattern, just match it now.
387 if (!FixedStr.empty()) {
388 MatchLen = FixedStr.size();
389 return Buffer.find(FixedStr);
390 }
391
392 // Regex match.
393
394 // If there are variable uses, we need to create a temporary string with the
395 // actual value.
396 StringRef RegExToMatch = RegExStr;
397 std::string TmpStr;
398 if (!VariableUses.empty()) {
399 TmpStr = RegExStr;
400
401 unsigned InsertOffset = 0;
402 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
403 std::string Value;
404
405 if (VariableUses[i].first[0] == '@') {
406 if (!EvaluateExpression(VariableUses[i].first, Value))
407 return StringRef::npos;
408 } else {
409 StringMap<StringRef>::iterator it =
410 VariableTable.find(VariableUses[i].first);
411 // If the variable is undefined, return an error.
412 if (it == VariableTable.end())
413 return StringRef::npos;
414
415 // Look up the value and escape it so that we can put it into the regex.
416 Value += Regex::escape(it->second);
417 }
418
419 // Plop it into the regex at the adjusted offset.
420 TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
421 Value.begin(), Value.end());
422 InsertOffset += Value.size();
423 }
424
425 // Match the newly constructed regex.
426 RegExToMatch = TmpStr;
427 }
428
429
430 SmallVector<StringRef, 4> MatchInfo;
431 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
432 return StringRef::npos;
433
434 // Successful regex match.
435 assert(!MatchInfo.empty() && "Didn't get any match");
436 StringRef FullMatch = MatchInfo[0];
437
438 // If this defines any variables, remember their values.
439 for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(),
440 E = VariableDefs.end();
441 I != E; ++I) {
442 assert(I->second < MatchInfo.size() && "Internal paren error");
443 VariableTable[I->first] = MatchInfo[I->second];
444 }
445
446 MatchLen = FullMatch.size();
447 return FullMatch.data()-Buffer.data();
448 }
449
ComputeMatchDistance(StringRef Buffer,const StringMap<StringRef> & VariableTable) const450 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
451 const StringMap<StringRef> &VariableTable) const {
452 // Just compute the number of matching characters. For regular expressions, we
453 // just compare against the regex itself and hope for the best.
454 //
455 // FIXME: One easy improvement here is have the regex lib generate a single
456 // example regular expression which matches, and use that as the example
457 // string.
458 StringRef ExampleString(FixedStr);
459 if (ExampleString.empty())
460 ExampleString = RegExStr;
461
462 // Only compare up to the first line in the buffer, or the string size.
463 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
464 BufferPrefix = BufferPrefix.split('\n').first;
465 return BufferPrefix.edit_distance(ExampleString);
466 }
467
PrintFailureInfo(const SourceMgr & SM,StringRef Buffer,const StringMap<StringRef> & VariableTable) const468 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
469 const StringMap<StringRef> &VariableTable) const{
470 // If this was a regular expression using variables, print the current
471 // variable values.
472 if (!VariableUses.empty()) {
473 for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
474 SmallString<256> Msg;
475 raw_svector_ostream OS(Msg);
476 StringRef Var = VariableUses[i].first;
477 if (Var[0] == '@') {
478 std::string Value;
479 if (EvaluateExpression(Var, Value)) {
480 OS << "with expression \"";
481 OS.write_escaped(Var) << "\" equal to \"";
482 OS.write_escaped(Value) << "\"";
483 } else {
484 OS << "uses incorrect expression \"";
485 OS.write_escaped(Var) << "\"";
486 }
487 } else {
488 StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
489
490 // Check for undefined variable references.
491 if (it == VariableTable.end()) {
492 OS << "uses undefined variable \"";
493 OS.write_escaped(Var) << "\"";
494 } else {
495 OS << "with variable \"";
496 OS.write_escaped(Var) << "\" equal to \"";
497 OS.write_escaped(it->second) << "\"";
498 }
499 }
500
501 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
502 OS.str());
503 }
504 }
505
506 // Attempt to find the closest/best fuzzy match. Usually an error happens
507 // because some string in the output didn't exactly match. In these cases, we
508 // would like to show the user a best guess at what "should have" matched, to
509 // save them having to actually check the input manually.
510 size_t NumLinesForward = 0;
511 size_t Best = StringRef::npos;
512 double BestQuality = 0;
513
514 // Use an arbitrary 4k limit on how far we will search.
515 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
516 if (Buffer[i] == '\n')
517 ++NumLinesForward;
518
519 // Patterns have leading whitespace stripped, so skip whitespace when
520 // looking for something which looks like a pattern.
521 if (Buffer[i] == ' ' || Buffer[i] == '\t')
522 continue;
523
524 // Compute the "quality" of this match as an arbitrary combination of the
525 // match distance and the number of lines skipped to get to this match.
526 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
527 double Quality = Distance + (NumLinesForward / 100.);
528
529 if (Quality < BestQuality || Best == StringRef::npos) {
530 Best = i;
531 BestQuality = Quality;
532 }
533 }
534
535 // Print the "possible intended match here" line if we found something
536 // reasonable and not equal to what we showed in the "scanning from here"
537 // line.
538 if (Best && Best != StringRef::npos && BestQuality < 50) {
539 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
540 SourceMgr::DK_Note, "possible intended match here");
541
542 // FIXME: If we wanted to be really friendly we would show why the match
543 // failed, as it can be hard to spot simple one character differences.
544 }
545 }
546
FindRegexVarEnd(StringRef Str,SourceMgr & SM)547 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
548 // Offset keeps track of the current offset within the input Str
549 size_t Offset = 0;
550 // [...] Nesting depth
551 size_t BracketDepth = 0;
552
553 while (!Str.empty()) {
554 if (Str.startswith("]]") && BracketDepth == 0)
555 return Offset;
556 if (Str[0] == '\\') {
557 // Backslash escapes the next char within regexes, so skip them both.
558 Str = Str.substr(2);
559 Offset += 2;
560 } else {
561 switch (Str[0]) {
562 default:
563 break;
564 case '[':
565 BracketDepth++;
566 break;
567 case ']':
568 if (BracketDepth == 0) {
569 SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
570 SourceMgr::DK_Error,
571 "missing closing \"]\" for regex variable");
572 exit(1);
573 }
574 BracketDepth--;
575 break;
576 }
577 Str = Str.substr(1);
578 Offset++;
579 }
580 }
581
582 return StringRef::npos;
583 }
584
585
586 //===----------------------------------------------------------------------===//
587 // Check Strings.
588 //===----------------------------------------------------------------------===//
589
590 /// CheckString - This is a check that we found in the input file.
591 struct CheckString {
592 /// Pat - The pattern to match.
593 Pattern Pat;
594
595 /// Prefix - Which prefix name this check matched.
596 StringRef Prefix;
597
598 /// Loc - The location in the match file that the check string was specified.
599 SMLoc Loc;
600
601 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
602 /// as opposed to a CHECK: directive.
603 Check::CheckType CheckTy;
604
605 /// DagNotStrings - These are all of the strings that are disallowed from
606 /// occurring between this match string and the previous one (or start of
607 /// file).
608 std::vector<Pattern> DagNotStrings;
609
610
CheckStringCheckString611 CheckString(const Pattern &P,
612 StringRef S,
613 SMLoc L,
614 Check::CheckType Ty)
615 : Pat(P), Prefix(S), Loc(L), CheckTy(Ty) {}
616
617 /// Check - Match check string and its "not strings" and/or "dag strings".
618 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
619 size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
620
621 /// CheckNext - Verify there is a single line in the given buffer.
622 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
623
624 /// CheckSame - Verify there is no newline in the given buffer.
625 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
626
627 /// CheckNot - Verify there's no "not strings" in the given buffer.
628 bool CheckNot(const SourceMgr &SM, StringRef Buffer,
629 const std::vector<const Pattern *> &NotStrings,
630 StringMap<StringRef> &VariableTable) const;
631
632 /// CheckDag - Match "dag strings" and their mixed "not strings".
633 size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
634 std::vector<const Pattern *> &NotStrings,
635 StringMap<StringRef> &VariableTable) const;
636 };
637
638 /// Canonicalize whitespaces in the input file. Line endings are replaced
639 /// with UNIX-style '\n'.
640 ///
641 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
642 /// characters to a single space.
643 static std::unique_ptr<MemoryBuffer>
CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,bool PreserveHorizontal)644 CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
645 bool PreserveHorizontal) {
646 SmallString<128> NewFile;
647 NewFile.reserve(MB->getBufferSize());
648
649 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
650 Ptr != End; ++Ptr) {
651 // Eliminate trailing dosish \r.
652 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
653 continue;
654 }
655
656 // If current char is not a horizontal whitespace or if horizontal
657 // whitespace canonicalization is disabled, dump it to output as is.
658 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
659 NewFile.push_back(*Ptr);
660 continue;
661 }
662
663 // Otherwise, add one space and advance over neighboring space.
664 NewFile.push_back(' ');
665 while (Ptr+1 != End &&
666 (Ptr[1] == ' ' || Ptr[1] == '\t'))
667 ++Ptr;
668 }
669
670 return std::unique_ptr<MemoryBuffer>(
671 MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
672 }
673
IsPartOfWord(char c)674 static bool IsPartOfWord(char c) {
675 return (isalnum(c) || c == '-' || c == '_');
676 }
677
678 // Get the size of the prefix extension.
CheckTypeSize(Check::CheckType Ty)679 static size_t CheckTypeSize(Check::CheckType Ty) {
680 switch (Ty) {
681 case Check::CheckNone:
682 return 0;
683
684 case Check::CheckPlain:
685 return sizeof(":") - 1;
686
687 case Check::CheckNext:
688 return sizeof("-NEXT:") - 1;
689
690 case Check::CheckSame:
691 return sizeof("-SAME:") - 1;
692
693 case Check::CheckNot:
694 return sizeof("-NOT:") - 1;
695
696 case Check::CheckDAG:
697 return sizeof("-DAG:") - 1;
698
699 case Check::CheckLabel:
700 return sizeof("-LABEL:") - 1;
701
702 case Check::CheckEOF:
703 llvm_unreachable("Should not be using EOF size");
704 }
705
706 llvm_unreachable("Bad check type");
707 }
708
FindCheckType(StringRef Buffer,StringRef Prefix)709 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
710 char NextChar = Buffer[Prefix.size()];
711
712 // Verify that the : is present after the prefix.
713 if (NextChar == ':')
714 return Check::CheckPlain;
715
716 if (NextChar != '-')
717 return Check::CheckNone;
718
719 StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
720 if (Rest.startswith("NEXT:"))
721 return Check::CheckNext;
722
723 if (Rest.startswith("SAME:"))
724 return Check::CheckSame;
725
726 if (Rest.startswith("NOT:"))
727 return Check::CheckNot;
728
729 if (Rest.startswith("DAG:"))
730 return Check::CheckDAG;
731
732 if (Rest.startswith("LABEL:"))
733 return Check::CheckLabel;
734
735 return Check::CheckNone;
736 }
737
738 // From the given position, find the next character after the word.
SkipWord(StringRef Str,size_t Loc)739 static size_t SkipWord(StringRef Str, size_t Loc) {
740 while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
741 ++Loc;
742 return Loc;
743 }
744
745 // Try to find the first match in buffer for any prefix. If a valid match is
746 // found, return that prefix and set its type and location. If there are almost
747 // matches (e.g. the actual prefix string is found, but is not an actual check
748 // string), but no valid match, return an empty string and set the position to
749 // resume searching from. If no partial matches are found, return an empty
750 // string and the location will be StringRef::npos. If one prefix is a substring
751 // of another, the maximal match should be found. e.g. if "A" and "AA" are
752 // prefixes then AA-CHECK: should match the second one.
FindFirstCandidateMatch(StringRef & Buffer,Check::CheckType & CheckTy,size_t & CheckLoc)753 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
754 Check::CheckType &CheckTy,
755 size_t &CheckLoc) {
756 StringRef FirstPrefix;
757 size_t FirstLoc = StringRef::npos;
758 size_t SearchLoc = StringRef::npos;
759 Check::CheckType FirstTy = Check::CheckNone;
760
761 CheckTy = Check::CheckNone;
762 CheckLoc = StringRef::npos;
763
764 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
765 I != E; ++I) {
766 StringRef Prefix(*I);
767 size_t PrefixLoc = Buffer.find(Prefix);
768
769 if (PrefixLoc == StringRef::npos)
770 continue;
771
772 // Track where we are searching for invalid prefixes that look almost right.
773 // We need to only advance to the first partial match on the next attempt
774 // since a partial match could be a substring of a later, valid prefix.
775 // Need to skip to the end of the word, otherwise we could end up
776 // matching a prefix in a substring later.
777 if (PrefixLoc < SearchLoc)
778 SearchLoc = SkipWord(Buffer, PrefixLoc);
779
780 // We only want to find the first match to avoid skipping some.
781 if (PrefixLoc > FirstLoc)
782 continue;
783 // If one matching check-prefix is a prefix of another, choose the
784 // longer one.
785 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
786 continue;
787
788 StringRef Rest = Buffer.drop_front(PrefixLoc);
789 // Make sure we have actually found the prefix, and not a word containing
790 // it. This should also prevent matching the wrong prefix when one is a
791 // substring of another.
792 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
793 FirstTy = Check::CheckNone;
794 else
795 FirstTy = FindCheckType(Rest, Prefix);
796
797 FirstLoc = PrefixLoc;
798 FirstPrefix = Prefix;
799 }
800
801 // If the first prefix is invalid, we should continue the search after it.
802 if (FirstTy == Check::CheckNone) {
803 CheckLoc = SearchLoc;
804 return "";
805 }
806
807 CheckTy = FirstTy;
808 CheckLoc = FirstLoc;
809 return FirstPrefix;
810 }
811
FindFirstMatchingPrefix(StringRef & Buffer,unsigned & LineNumber,Check::CheckType & CheckTy,size_t & CheckLoc)812 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
813 unsigned &LineNumber,
814 Check::CheckType &CheckTy,
815 size_t &CheckLoc) {
816 while (!Buffer.empty()) {
817 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
818 // If we found a real match, we are done.
819 if (!Prefix.empty()) {
820 LineNumber += Buffer.substr(0, CheckLoc).count('\n');
821 return Prefix;
822 }
823
824 // We didn't find any almost matches either, we are also done.
825 if (CheckLoc == StringRef::npos)
826 return StringRef();
827
828 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
829
830 // Advance to the last possible match we found and try again.
831 Buffer = Buffer.drop_front(CheckLoc + 1);
832 }
833
834 return StringRef();
835 }
836
837 /// ReadCheckFile - Read the check file, which specifies the sequence of
838 /// expected strings. The strings are added to the CheckStrings vector.
839 /// Returns true in case of an error, false otherwise.
ReadCheckFile(SourceMgr & SM,std::vector<CheckString> & CheckStrings)840 static bool ReadCheckFile(SourceMgr &SM,
841 std::vector<CheckString> &CheckStrings) {
842 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
843 MemoryBuffer::getFileOrSTDIN(CheckFilename);
844 if (std::error_code EC = FileOrErr.getError()) {
845 errs() << "Could not open check file '" << CheckFilename
846 << "': " << EC.message() << '\n';
847 return true;
848 }
849
850 // If we want to canonicalize whitespace, strip excess whitespace from the
851 // buffer containing the CHECK lines. Remove DOS style line endings.
852 std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile(
853 std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
854
855 // Find all instances of CheckPrefix followed by : in the file.
856 StringRef Buffer = F->getBuffer();
857
858 SM.AddNewSourceBuffer(std::move(F), SMLoc());
859
860 std::vector<Pattern> ImplicitNegativeChecks;
861 for (const auto &PatternString : ImplicitCheckNot) {
862 // Create a buffer with fake command line content in order to display the
863 // command line option responsible for the specific implicit CHECK-NOT.
864 std::string Prefix = std::string("-") + ImplicitCheckNot.ArgStr + "='";
865 std::string Suffix = "'";
866 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
867 Prefix + PatternString + Suffix, "command line");
868
869 StringRef PatternInBuffer =
870 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
871 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
872
873 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
874 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
875 "IMPLICIT-CHECK", SM, 0);
876 }
877
878
879 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
880
881 // LineNumber keeps track of the line on which CheckPrefix instances are
882 // found.
883 unsigned LineNumber = 1;
884
885 while (1) {
886 Check::CheckType CheckTy;
887 size_t PrefixLoc;
888
889 // See if a prefix occurs in the memory buffer.
890 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
891 LineNumber,
892 CheckTy,
893 PrefixLoc);
894 if (UsedPrefix.empty())
895 break;
896
897 Buffer = Buffer.drop_front(PrefixLoc);
898
899 // Location to use for error messages.
900 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
901
902 // PrefixLoc is to the start of the prefix. Skip to the end.
903 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
904
905 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
906 // leading and trailing whitespace.
907 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
908
909 // Scan ahead to the end of line.
910 size_t EOL = Buffer.find_first_of("\n\r");
911
912 // Remember the location of the start of the pattern, for diagnostics.
913 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
914
915 // Parse the pattern.
916 Pattern P(CheckTy);
917 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
918 return true;
919
920 // Verify that CHECK-LABEL lines do not define or use variables
921 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
922 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
923 SourceMgr::DK_Error,
924 "found '" + UsedPrefix + "-LABEL:'"
925 " with variable definition or use");
926 return true;
927 }
928
929 Buffer = Buffer.substr(EOL);
930
931 // Verify that CHECK-NEXT lines have at least one CHECK line before them.
932 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
933 CheckStrings.empty()) {
934 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
935 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
936 SourceMgr::DK_Error,
937 "found '" + UsedPrefix + "-" + Type + "' without previous '"
938 + UsedPrefix + ": line");
939 return true;
940 }
941
942 // Handle CHECK-DAG/-NOT.
943 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
944 DagNotMatches.push_back(P);
945 continue;
946 }
947
948 // Okay, add the string we captured to the output vector and move on.
949 CheckStrings.push_back(CheckString(P,
950 UsedPrefix,
951 PatternLoc,
952 CheckTy));
953 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
954 DagNotMatches = ImplicitNegativeChecks;
955 }
956
957 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
958 // prefix as a filler for the error message.
959 if (!DagNotMatches.empty()) {
960 CheckStrings.push_back(CheckString(Pattern(Check::CheckEOF),
961 CheckPrefixes[0],
962 SMLoc::getFromPointer(Buffer.data()),
963 Check::CheckEOF));
964 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
965 }
966
967 if (CheckStrings.empty()) {
968 errs() << "error: no check strings found with prefix"
969 << (CheckPrefixes.size() > 1 ? "es " : " ");
970 for (size_t I = 0, N = CheckPrefixes.size(); I != N; ++I) {
971 StringRef Prefix(CheckPrefixes[I]);
972 errs() << '\'' << Prefix << ":'";
973 if (I != N - 1)
974 errs() << ", ";
975 }
976
977 errs() << '\n';
978 return true;
979 }
980
981 return false;
982 }
983
PrintCheckFailed(const SourceMgr & SM,const SMLoc & Loc,const Pattern & Pat,StringRef Buffer,StringMap<StringRef> & VariableTable)984 static void PrintCheckFailed(const SourceMgr &SM, const SMLoc &Loc,
985 const Pattern &Pat, StringRef Buffer,
986 StringMap<StringRef> &VariableTable) {
987 // Otherwise, we have an error, emit an error message.
988 SM.PrintMessage(Loc, SourceMgr::DK_Error,
989 "expected string not found in input");
990
991 // Print the "scanning from here" line. If the current position is at the
992 // end of a line, advance to the start of the next line.
993 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
994
995 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
996 "scanning from here");
997
998 // Allow the pattern to print additional information if desired.
999 Pat.PrintFailureInfo(SM, Buffer, VariableTable);
1000 }
1001
PrintCheckFailed(const SourceMgr & SM,const CheckString & CheckStr,StringRef Buffer,StringMap<StringRef> & VariableTable)1002 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
1003 StringRef Buffer,
1004 StringMap<StringRef> &VariableTable) {
1005 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
1006 }
1007
1008 /// CountNumNewlinesBetween - Count the number of newlines in the specified
1009 /// range.
CountNumNewlinesBetween(StringRef Range,const char * & FirstNewLine)1010 static unsigned CountNumNewlinesBetween(StringRef Range,
1011 const char *&FirstNewLine) {
1012 unsigned NumNewLines = 0;
1013 while (1) {
1014 // Scan for newline.
1015 Range = Range.substr(Range.find_first_of("\n\r"));
1016 if (Range.empty()) return NumNewLines;
1017
1018 ++NumNewLines;
1019
1020 // Handle \n\r and \r\n as a single newline.
1021 if (Range.size() > 1 &&
1022 (Range[1] == '\n' || Range[1] == '\r') &&
1023 (Range[0] != Range[1]))
1024 Range = Range.substr(1);
1025 Range = Range.substr(1);
1026
1027 if (NumNewLines == 1)
1028 FirstNewLine = Range.begin();
1029 }
1030 }
1031
Check(const SourceMgr & SM,StringRef Buffer,bool IsLabelScanMode,size_t & MatchLen,StringMap<StringRef> & VariableTable) const1032 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1033 bool IsLabelScanMode, size_t &MatchLen,
1034 StringMap<StringRef> &VariableTable) const {
1035 size_t LastPos = 0;
1036 std::vector<const Pattern *> NotStrings;
1037
1038 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1039 // bounds; we have not processed variable definitions within the bounded block
1040 // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1041 // over the block again (including the last CHECK-LABEL) in normal mode.
1042 if (!IsLabelScanMode) {
1043 // Match "dag strings" (with mixed "not strings" if any).
1044 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1045 if (LastPos == StringRef::npos)
1046 return StringRef::npos;
1047 }
1048
1049 // Match itself from the last position after matching CHECK-DAG.
1050 StringRef MatchBuffer = Buffer.substr(LastPos);
1051 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1052 if (MatchPos == StringRef::npos) {
1053 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1054 return StringRef::npos;
1055 }
1056
1057 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1058 // or CHECK-NOT
1059 if (!IsLabelScanMode) {
1060 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1061
1062 // If this check is a "CHECK-NEXT", verify that the previous match was on
1063 // the previous line (i.e. that there is one newline between them).
1064 if (CheckNext(SM, SkippedRegion))
1065 return StringRef::npos;
1066
1067 // If this check is a "CHECK-SAME", verify that the previous match was on
1068 // the same line (i.e. that there is no newline between them).
1069 if (CheckSame(SM, SkippedRegion))
1070 return StringRef::npos;
1071
1072 // If this match had "not strings", verify that they don't exist in the
1073 // skipped region.
1074 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1075 return StringRef::npos;
1076 }
1077
1078 return LastPos + MatchPos;
1079 }
1080
CheckNext(const SourceMgr & SM,StringRef Buffer) const1081 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1082 if (CheckTy != Check::CheckNext)
1083 return false;
1084
1085 // Count the number of newlines between the previous match and this one.
1086 assert(Buffer.data() !=
1087 SM.getMemoryBuffer(
1088 SM.FindBufferContainingLoc(
1089 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1090 "CHECK-NEXT can't be the first check in a file");
1091
1092 const char *FirstNewLine = nullptr;
1093 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1094
1095 if (NumNewLines == 0) {
1096 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1097 "-NEXT: is on the same line as previous match");
1098 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1099 SourceMgr::DK_Note, "'next' match was here");
1100 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1101 "previous match ended here");
1102 return true;
1103 }
1104
1105 if (NumNewLines != 1) {
1106 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1107 "-NEXT: is not on the line after the previous match");
1108 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1109 SourceMgr::DK_Note, "'next' match was here");
1110 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1111 "previous match ended here");
1112 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1113 "non-matching line after previous match is here");
1114 return true;
1115 }
1116
1117 return false;
1118 }
1119
CheckSame(const SourceMgr & SM,StringRef Buffer) const1120 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1121 if (CheckTy != Check::CheckSame)
1122 return false;
1123
1124 // Count the number of newlines between the previous match and this one.
1125 assert(Buffer.data() !=
1126 SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1127 SMLoc::getFromPointer(Buffer.data())))
1128 ->getBufferStart() &&
1129 "CHECK-SAME can't be the first check in a file");
1130
1131 const char *FirstNewLine = nullptr;
1132 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1133
1134 if (NumNewLines != 0) {
1135 SM.PrintMessage(Loc, SourceMgr::DK_Error,
1136 Prefix +
1137 "-SAME: is not on the same line as the previous match");
1138 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1139 "'next' match was here");
1140 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1141 "previous match ended here");
1142 return true;
1143 }
1144
1145 return false;
1146 }
1147
CheckNot(const SourceMgr & SM,StringRef Buffer,const std::vector<const Pattern * > & NotStrings,StringMap<StringRef> & VariableTable) const1148 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1149 const std::vector<const Pattern *> &NotStrings,
1150 StringMap<StringRef> &VariableTable) const {
1151 for (unsigned ChunkNo = 0, e = NotStrings.size();
1152 ChunkNo != e; ++ChunkNo) {
1153 const Pattern *Pat = NotStrings[ChunkNo];
1154 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1155
1156 size_t MatchLen = 0;
1157 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1158
1159 if (Pos == StringRef::npos) continue;
1160
1161 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1162 SourceMgr::DK_Error,
1163 Prefix + "-NOT: string occurred!");
1164 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1165 Prefix + "-NOT: pattern specified here");
1166 return true;
1167 }
1168
1169 return false;
1170 }
1171
CheckDag(const SourceMgr & SM,StringRef Buffer,std::vector<const Pattern * > & NotStrings,StringMap<StringRef> & VariableTable) const1172 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1173 std::vector<const Pattern *> &NotStrings,
1174 StringMap<StringRef> &VariableTable) const {
1175 if (DagNotStrings.empty())
1176 return 0;
1177
1178 size_t LastPos = 0;
1179 size_t StartPos = LastPos;
1180
1181 for (unsigned ChunkNo = 0, e = DagNotStrings.size();
1182 ChunkNo != e; ++ChunkNo) {
1183 const Pattern &Pat = DagNotStrings[ChunkNo];
1184
1185 assert((Pat.getCheckTy() == Check::CheckDAG ||
1186 Pat.getCheckTy() == Check::CheckNot) &&
1187 "Invalid CHECK-DAG or CHECK-NOT!");
1188
1189 if (Pat.getCheckTy() == Check::CheckNot) {
1190 NotStrings.push_back(&Pat);
1191 continue;
1192 }
1193
1194 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1195
1196 size_t MatchLen = 0, MatchPos;
1197
1198 // CHECK-DAG always matches from the start.
1199 StringRef MatchBuffer = Buffer.substr(StartPos);
1200 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1201 // With a group of CHECK-DAGs, a single mismatching means the match on
1202 // that group of CHECK-DAGs fails immediately.
1203 if (MatchPos == StringRef::npos) {
1204 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1205 return StringRef::npos;
1206 }
1207 // Re-calc it as the offset relative to the start of the original string.
1208 MatchPos += StartPos;
1209
1210 if (!NotStrings.empty()) {
1211 if (MatchPos < LastPos) {
1212 // Reordered?
1213 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1214 SourceMgr::DK_Error,
1215 Prefix + "-DAG: found a match of CHECK-DAG"
1216 " reordering across a CHECK-NOT");
1217 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1218 SourceMgr::DK_Note,
1219 Prefix + "-DAG: the farthest match of CHECK-DAG"
1220 " is found here");
1221 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1222 Prefix + "-NOT: the crossed pattern specified"
1223 " here");
1224 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1225 Prefix + "-DAG: the reordered pattern specified"
1226 " here");
1227 return StringRef::npos;
1228 }
1229 // All subsequent CHECK-DAGs should be matched from the farthest
1230 // position of all precedent CHECK-DAGs (including this one.)
1231 StartPos = LastPos;
1232 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1233 // CHECK-DAG, verify that there's no 'not' strings occurred in that
1234 // region.
1235 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1236 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1237 return StringRef::npos;
1238 // Clear "not strings".
1239 NotStrings.clear();
1240 }
1241
1242 // Update the last position with CHECK-DAG matches.
1243 LastPos = std::max(MatchPos + MatchLen, LastPos);
1244 }
1245
1246 return LastPos;
1247 }
1248
1249 // A check prefix must contain only alphanumeric, hyphens and underscores.
ValidateCheckPrefix(StringRef CheckPrefix)1250 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1251 Regex Validator("^[a-zA-Z0-9_-]*$");
1252 return Validator.match(CheckPrefix);
1253 }
1254
ValidateCheckPrefixes()1255 static bool ValidateCheckPrefixes() {
1256 StringSet<> PrefixSet;
1257
1258 for (prefix_iterator I = CheckPrefixes.begin(), E = CheckPrefixes.end();
1259 I != E; ++I) {
1260 StringRef Prefix(*I);
1261
1262 // Reject empty prefixes.
1263 if (Prefix == "")
1264 return false;
1265
1266 if (!PrefixSet.insert(Prefix).second)
1267 return false;
1268
1269 if (!ValidateCheckPrefix(Prefix))
1270 return false;
1271 }
1272
1273 return true;
1274 }
1275
1276 // I don't think there's a way to specify an initial value for cl::list,
1277 // so if nothing was specified, add the default
AddCheckPrefixIfNeeded()1278 static void AddCheckPrefixIfNeeded() {
1279 if (CheckPrefixes.empty())
1280 CheckPrefixes.push_back("CHECK");
1281 }
1282
main(int argc,char ** argv)1283 int main(int argc, char **argv) {
1284 sys::PrintStackTraceOnErrorSignal();
1285 PrettyStackTraceProgram X(argc, argv);
1286 cl::ParseCommandLineOptions(argc, argv);
1287
1288 if (!ValidateCheckPrefixes()) {
1289 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1290 "start with a letter and contain only alphanumeric characters, "
1291 "hyphens and underscores\n";
1292 return 2;
1293 }
1294
1295 AddCheckPrefixIfNeeded();
1296
1297 SourceMgr SM;
1298
1299 // Read the expected strings from the check file.
1300 std::vector<CheckString> CheckStrings;
1301 if (ReadCheckFile(SM, CheckStrings))
1302 return 2;
1303
1304 // Open the file to check and add it to SourceMgr.
1305 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1306 MemoryBuffer::getFileOrSTDIN(InputFilename);
1307 if (std::error_code EC = FileOrErr.getError()) {
1308 errs() << "Could not open input file '" << InputFilename
1309 << "': " << EC.message() << '\n';
1310 return 2;
1311 }
1312 std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
1313
1314 if (File->getBufferSize() == 0 && !AllowEmptyInput) {
1315 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1316 return 2;
1317 }
1318
1319 // Remove duplicate spaces in the input file if requested.
1320 // Remove DOS style line endings.
1321 std::unique_ptr<MemoryBuffer> F =
1322 CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
1323
1324 // Check that we have all of the expected strings, in order, in the input
1325 // file.
1326 StringRef Buffer = F->getBuffer();
1327
1328 SM.AddNewSourceBuffer(std::move(F), SMLoc());
1329
1330 /// VariableTable - This holds all the current filecheck variables.
1331 StringMap<StringRef> VariableTable;
1332
1333 bool hasError = false;
1334
1335 unsigned i = 0, j = 0, e = CheckStrings.size();
1336
1337 while (true) {
1338 StringRef CheckRegion;
1339 if (j == e) {
1340 CheckRegion = Buffer;
1341 } else {
1342 const CheckString &CheckLabelStr = CheckStrings[j];
1343 if (CheckLabelStr.CheckTy != Check::CheckLabel) {
1344 ++j;
1345 continue;
1346 }
1347
1348 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1349 size_t MatchLabelLen = 0;
1350 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1351 MatchLabelLen, VariableTable);
1352 if (MatchLabelPos == StringRef::npos) {
1353 hasError = true;
1354 break;
1355 }
1356
1357 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1358 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1359 ++j;
1360 }
1361
1362 for ( ; i != j; ++i) {
1363 const CheckString &CheckStr = CheckStrings[i];
1364
1365 // Check each string within the scanned region, including a second check
1366 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1367 size_t MatchLen = 0;
1368 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1369 VariableTable);
1370
1371 if (MatchPos == StringRef::npos) {
1372 hasError = true;
1373 i = j;
1374 break;
1375 }
1376
1377 CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1378 }
1379
1380 if (j == e)
1381 break;
1382 }
1383
1384 return hasError ? 1 : 0;
1385 }
1386