1 //===- FileCheck.cpp - Check that File's Contents match what is expected --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // FileCheck does a line-by line check of a file that validates whether it
11 // contains the expected content. This is useful for regression tests etc.
12 //
13 // This program exits with an error status of 2 on error, exit status of 0 if
14 // the file matched the expected contents, and exit status of 1 if it did not
15 // contain the expected contents.
16 //
17 //===----------------------------------------------------------------------===//
18
19 #include "llvm/ADT/SmallString.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringSet.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/MemoryBuffer.h"
25 #include "llvm/Support/PrettyStackTrace.h"
26 #include "llvm/Support/Regex.h"
27 #include "llvm/Support/Signals.h"
28 #include "llvm/Support/SourceMgr.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cctype>
32 #include <map>
33 #include <string>
34 #include <system_error>
35 #include <vector>
36 using namespace llvm;
37
38 static cl::opt<std::string>
39 CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
40
41 static cl::opt<std::string>
42 InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
43 cl::init("-"), cl::value_desc("filename"));
44
45 static cl::list<std::string>
46 CheckPrefixes("check-prefix",
47 cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
48 static cl::alias CheckPrefixesAlias(
49 "check-prefixes", cl::aliasopt(CheckPrefixes), cl::CommaSeparated,
50 cl::NotHidden,
51 cl::desc(
52 "Alias for -check-prefix permitting multiple comma separated values"));
53
54 static cl::opt<bool>
55 NoCanonicalizeWhiteSpace("strict-whitespace",
56 cl::desc("Do not treat all horizontal whitespace as equivalent"));
57
58 static cl::list<std::string> ImplicitCheckNot(
59 "implicit-check-not",
60 cl::desc("Add an implicit negative check with this pattern to every\n"
61 "positive check. This can be used to ensure that no instances of\n"
62 "this pattern occur which are not matched by a positive pattern"),
63 cl::value_desc("pattern"));
64
65 static cl::opt<bool> AllowEmptyInput(
66 "allow-empty", cl::init(false),
67 cl::desc("Allow the input file to be empty. This is useful when making\n"
68 "checks that some error message does not occur, for example."));
69
70 static cl::opt<bool> MatchFullLines(
71 "match-full-lines", cl::init(false),
72 cl::desc("Require all positive matches to cover an entire input line.\n"
73 "Allows leading and trailing whitespace if --strict-whitespace\n"
74 "is not also passed."));
75
76 typedef cl::list<std::string>::const_iterator prefix_iterator;
77
78 //===----------------------------------------------------------------------===//
79 // Pattern Handling Code.
80 //===----------------------------------------------------------------------===//
81
82 namespace Check {
83 enum CheckType {
84 CheckNone = 0,
85 CheckPlain,
86 CheckNext,
87 CheckSame,
88 CheckNot,
89 CheckDAG,
90 CheckLabel,
91
92 /// MatchEOF - When set, this pattern only matches the end of file. This is
93 /// used for trailing CHECK-NOTs.
94 CheckEOF,
95 /// CheckBadNot - Found -NOT combined with another CHECK suffix.
96 CheckBadNot
97 };
98 }
99
100 class Pattern {
101 SMLoc PatternLoc;
102
103 Check::CheckType CheckTy;
104
105 /// FixedStr - If non-empty, this pattern is a fixed string match with the
106 /// specified fixed string.
107 StringRef FixedStr;
108
109 /// RegEx - If non-empty, this is a regex pattern.
110 std::string RegExStr;
111
112 /// \brief Contains the number of line this pattern is in.
113 unsigned LineNumber;
114
115 /// VariableUses - Entries in this vector map to uses of a variable in the
116 /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain
117 /// "foobaz" and we'll get an entry in this vector that tells us to insert the
118 /// value of bar at offset 3.
119 std::vector<std::pair<StringRef, unsigned> > VariableUses;
120
121 /// VariableDefs - Maps definitions of variables to their parenthesized
122 /// capture numbers.
123 /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1.
124 std::map<StringRef, unsigned> VariableDefs;
125
126 public:
127
Pattern(Check::CheckType Ty)128 Pattern(Check::CheckType Ty)
129 : CheckTy(Ty) { }
130
131 /// getLoc - Return the location in source code.
getLoc() const132 SMLoc getLoc() const { return PatternLoc; }
133
134 /// ParsePattern - Parse the given string into the Pattern. Prefix provides
135 /// which prefix is being matched, SM provides the SourceMgr used for error
136 /// reports, and LineNumber is the line number in the input file from which
137 /// the pattern string was read. Returns true in case of an error, false
138 /// otherwise.
139 bool ParsePattern(StringRef PatternStr,
140 StringRef Prefix,
141 SourceMgr &SM,
142 unsigned LineNumber);
143
144 /// Match - Match the pattern string against the input buffer Buffer. This
145 /// returns the position that is matched or npos if there is no match. If
146 /// there is a match, the size of the matched string is returned in MatchLen.
147 ///
148 /// The VariableTable StringMap provides the current values of filecheck
149 /// variables and is updated if this match defines new values.
150 size_t Match(StringRef Buffer, size_t &MatchLen,
151 StringMap<StringRef> &VariableTable) const;
152
153 /// PrintFailureInfo - Print additional information about a failure to match
154 /// involving this pattern.
155 void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
156 const StringMap<StringRef> &VariableTable) const;
157
hasVariable() const158 bool hasVariable() const { return !(VariableUses.empty() &&
159 VariableDefs.empty()); }
160
getCheckTy() const161 Check::CheckType getCheckTy() const { return CheckTy; }
162
163 private:
164 bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM);
165 void AddBackrefToRegEx(unsigned BackrefNum);
166
167 /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
168 /// matching this pattern at the start of \arg Buffer; a distance of zero
169 /// should correspond to a perfect match.
170 unsigned ComputeMatchDistance(StringRef Buffer,
171 const StringMap<StringRef> &VariableTable) const;
172
173 /// \brief Evaluates expression and stores the result to \p Value.
174 /// \return true on success. false when the expression has invalid syntax.
175 bool EvaluateExpression(StringRef Expr, std::string &Value) const;
176
177 /// \brief Finds the closing sequence of a regex variable usage or
178 /// definition. Str has to point in the beginning of the definition
179 /// (right after the opening sequence).
180 /// \return offset of the closing sequence within Str, or npos if it was not
181 /// found.
182 size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM);
183 };
184
185
ParsePattern(StringRef PatternStr,StringRef Prefix,SourceMgr & SM,unsigned LineNumber)186 bool Pattern::ParsePattern(StringRef PatternStr,
187 StringRef Prefix,
188 SourceMgr &SM,
189 unsigned LineNumber) {
190 bool MatchFullLinesHere = MatchFullLines && CheckTy != Check::CheckNot;
191
192 this->LineNumber = LineNumber;
193 PatternLoc = SMLoc::getFromPointer(PatternStr.data());
194
195 // Ignore trailing whitespace.
196 while (!PatternStr.empty() &&
197 (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
198 PatternStr = PatternStr.substr(0, PatternStr.size()-1);
199
200 // Check that there is something on the line.
201 if (PatternStr.empty()) {
202 SM.PrintMessage(PatternLoc, SourceMgr::DK_Error,
203 "found empty check string with prefix '" +
204 Prefix + ":'");
205 return true;
206 }
207
208 // Check to see if this is a fixed string, or if it has regex pieces.
209 if (!MatchFullLinesHere &&
210 (PatternStr.size() < 2 || (PatternStr.find("{{") == StringRef::npos &&
211 PatternStr.find("[[") == StringRef::npos))) {
212 FixedStr = PatternStr;
213 return false;
214 }
215
216 if (MatchFullLinesHere) {
217 RegExStr += '^';
218 if (!NoCanonicalizeWhiteSpace)
219 RegExStr += " *";
220 }
221
222 // Paren value #0 is for the fully matched string. Any new parenthesized
223 // values add from there.
224 unsigned CurParen = 1;
225
226 // Otherwise, there is at least one regex piece. Build up the regex pattern
227 // by escaping scary characters in fixed strings, building up one big regex.
228 while (!PatternStr.empty()) {
229 // RegEx matches.
230 if (PatternStr.startswith("{{")) {
231 // This is the start of a regex match. Scan for the }}.
232 size_t End = PatternStr.find("}}");
233 if (End == StringRef::npos) {
234 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
235 SourceMgr::DK_Error,
236 "found start of regex string with no end '}}'");
237 return true;
238 }
239
240 // Enclose {{}} patterns in parens just like [[]] even though we're not
241 // capturing the result for any purpose. This is required in case the
242 // expression contains an alternation like: CHECK: abc{{x|z}}def. We
243 // want this to turn into: "abc(x|z)def" not "abcx|zdef".
244 RegExStr += '(';
245 ++CurParen;
246
247 if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
248 return true;
249 RegExStr += ')';
250
251 PatternStr = PatternStr.substr(End+2);
252 continue;
253 }
254
255 // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .*
256 // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
257 // second form is [[foo]] which is a reference to foo. The variable name
258 // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
259 // it. This is to catch some common errors.
260 if (PatternStr.startswith("[[")) {
261 // Find the closing bracket pair ending the match. End is going to be an
262 // offset relative to the beginning of the match string.
263 size_t End = FindRegexVarEnd(PatternStr.substr(2), SM);
264
265 if (End == StringRef::npos) {
266 SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
267 SourceMgr::DK_Error,
268 "invalid named regex reference, no ]] found");
269 return true;
270 }
271
272 StringRef MatchStr = PatternStr.substr(2, End);
273 PatternStr = PatternStr.substr(End+4);
274
275 // Get the regex name (e.g. "foo").
276 size_t NameEnd = MatchStr.find(':');
277 StringRef Name = MatchStr.substr(0, NameEnd);
278
279 if (Name.empty()) {
280 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
281 "invalid name in named regex: empty name");
282 return true;
283 }
284
285 // Verify that the name/expression is well formed. FileCheck currently
286 // supports @LINE, @LINE+number, @LINE-number expressions. The check here
287 // is relaxed, more strict check is performed in \c EvaluateExpression.
288 bool IsExpression = false;
289 for (unsigned i = 0, e = Name.size(); i != e; ++i) {
290 if (i == 0 && Name[i] == '@') {
291 if (NameEnd != StringRef::npos) {
292 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
293 SourceMgr::DK_Error,
294 "invalid name in named regex definition");
295 return true;
296 }
297 IsExpression = true;
298 continue;
299 }
300 if (Name[i] != '_' && !isalnum(Name[i]) &&
301 (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) {
302 SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
303 SourceMgr::DK_Error, "invalid name in named regex");
304 return true;
305 }
306 }
307
308 // Name can't start with a digit.
309 if (isdigit(static_cast<unsigned char>(Name[0]))) {
310 SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
311 "invalid name in named regex");
312 return true;
313 }
314
315 // Handle [[foo]].
316 if (NameEnd == StringRef::npos) {
317 // Handle variables that were defined earlier on the same line by
318 // emitting a backreference.
319 if (VariableDefs.find(Name) != VariableDefs.end()) {
320 unsigned VarParenNum = VariableDefs[Name];
321 if (VarParenNum < 1 || VarParenNum > 9) {
322 SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
323 SourceMgr::DK_Error,
324 "Can't back-reference more than 9 variables");
325 return true;
326 }
327 AddBackrefToRegEx(VarParenNum);
328 } else {
329 VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
330 }
331 continue;
332 }
333
334 // Handle [[foo:.*]].
335 VariableDefs[Name] = CurParen;
336 RegExStr += '(';
337 ++CurParen;
338
339 if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
340 return true;
341
342 RegExStr += ')';
343 }
344
345 // Handle fixed string matches.
346 // Find the end, which is the start of the next regex.
347 size_t FixedMatchEnd = PatternStr.find("{{");
348 FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
349 RegExStr += Regex::escape(PatternStr.substr(0, FixedMatchEnd));
350 PatternStr = PatternStr.substr(FixedMatchEnd);
351 }
352
353 if (MatchFullLinesHere) {
354 if (!NoCanonicalizeWhiteSpace)
355 RegExStr += " *";
356 RegExStr += '$';
357 }
358
359 return false;
360 }
361
AddRegExToRegEx(StringRef RS,unsigned & CurParen,SourceMgr & SM)362 bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen,
363 SourceMgr &SM) {
364 Regex R(RS);
365 std::string Error;
366 if (!R.isValid(Error)) {
367 SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error,
368 "invalid regex: " + Error);
369 return true;
370 }
371
372 RegExStr += RS.str();
373 CurParen += R.getNumMatches();
374 return false;
375 }
376
AddBackrefToRegEx(unsigned BackrefNum)377 void Pattern::AddBackrefToRegEx(unsigned BackrefNum) {
378 assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number");
379 std::string Backref = std::string("\\") +
380 std::string(1, '0' + BackrefNum);
381 RegExStr += Backref;
382 }
383
EvaluateExpression(StringRef Expr,std::string & Value) const384 bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const {
385 // The only supported expression is @LINE([\+-]\d+)?
386 if (!Expr.startswith("@LINE"))
387 return false;
388 Expr = Expr.substr(StringRef("@LINE").size());
389 int Offset = 0;
390 if (!Expr.empty()) {
391 if (Expr[0] == '+')
392 Expr = Expr.substr(1);
393 else if (Expr[0] != '-')
394 return false;
395 if (Expr.getAsInteger(10, Offset))
396 return false;
397 }
398 Value = llvm::itostr(LineNumber + Offset);
399 return true;
400 }
401
402 /// Match - Match the pattern string against the input buffer Buffer. This
403 /// returns the position that is matched or npos if there is no match. If
404 /// there is a match, the size of the matched string is returned in MatchLen.
Match(StringRef Buffer,size_t & MatchLen,StringMap<StringRef> & VariableTable) const405 size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
406 StringMap<StringRef> &VariableTable) const {
407 // If this is the EOF pattern, match it immediately.
408 if (CheckTy == Check::CheckEOF) {
409 MatchLen = 0;
410 return Buffer.size();
411 }
412
413 // If this is a fixed string pattern, just match it now.
414 if (!FixedStr.empty()) {
415 MatchLen = FixedStr.size();
416 return Buffer.find(FixedStr);
417 }
418
419 // Regex match.
420
421 // If there are variable uses, we need to create a temporary string with the
422 // actual value.
423 StringRef RegExToMatch = RegExStr;
424 std::string TmpStr;
425 if (!VariableUses.empty()) {
426 TmpStr = RegExStr;
427
428 unsigned InsertOffset = 0;
429 for (const auto &VariableUse : VariableUses) {
430 std::string Value;
431
432 if (VariableUse.first[0] == '@') {
433 if (!EvaluateExpression(VariableUse.first, Value))
434 return StringRef::npos;
435 } else {
436 StringMap<StringRef>::iterator it =
437 VariableTable.find(VariableUse.first);
438 // If the variable is undefined, return an error.
439 if (it == VariableTable.end())
440 return StringRef::npos;
441
442 // Look up the value and escape it so that we can put it into the regex.
443 Value += Regex::escape(it->second);
444 }
445
446 // Plop it into the regex at the adjusted offset.
447 TmpStr.insert(TmpStr.begin() + VariableUse.second + InsertOffset,
448 Value.begin(), Value.end());
449 InsertOffset += Value.size();
450 }
451
452 // Match the newly constructed regex.
453 RegExToMatch = TmpStr;
454 }
455
456
457 SmallVector<StringRef, 4> MatchInfo;
458 if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
459 return StringRef::npos;
460
461 // Successful regex match.
462 assert(!MatchInfo.empty() && "Didn't get any match");
463 StringRef FullMatch = MatchInfo[0];
464
465 // If this defines any variables, remember their values.
466 for (const auto &VariableDef : VariableDefs) {
467 assert(VariableDef.second < MatchInfo.size() && "Internal paren error");
468 VariableTable[VariableDef.first] = MatchInfo[VariableDef.second];
469 }
470
471 MatchLen = FullMatch.size();
472 return FullMatch.data()-Buffer.data();
473 }
474
ComputeMatchDistance(StringRef Buffer,const StringMap<StringRef> & VariableTable) const475 unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
476 const StringMap<StringRef> &VariableTable) const {
477 // Just compute the number of matching characters. For regular expressions, we
478 // just compare against the regex itself and hope for the best.
479 //
480 // FIXME: One easy improvement here is have the regex lib generate a single
481 // example regular expression which matches, and use that as the example
482 // string.
483 StringRef ExampleString(FixedStr);
484 if (ExampleString.empty())
485 ExampleString = RegExStr;
486
487 // Only compare up to the first line in the buffer, or the string size.
488 StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
489 BufferPrefix = BufferPrefix.split('\n').first;
490 return BufferPrefix.edit_distance(ExampleString);
491 }
492
PrintFailureInfo(const SourceMgr & SM,StringRef Buffer,const StringMap<StringRef> & VariableTable) const493 void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
494 const StringMap<StringRef> &VariableTable) const{
495 // If this was a regular expression using variables, print the current
496 // variable values.
497 if (!VariableUses.empty()) {
498 for (const auto &VariableUse : VariableUses) {
499 SmallString<256> Msg;
500 raw_svector_ostream OS(Msg);
501 StringRef Var = VariableUse.first;
502 if (Var[0] == '@') {
503 std::string Value;
504 if (EvaluateExpression(Var, Value)) {
505 OS << "with expression \"";
506 OS.write_escaped(Var) << "\" equal to \"";
507 OS.write_escaped(Value) << "\"";
508 } else {
509 OS << "uses incorrect expression \"";
510 OS.write_escaped(Var) << "\"";
511 }
512 } else {
513 StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
514
515 // Check for undefined variable references.
516 if (it == VariableTable.end()) {
517 OS << "uses undefined variable \"";
518 OS.write_escaped(Var) << "\"";
519 } else {
520 OS << "with variable \"";
521 OS.write_escaped(Var) << "\" equal to \"";
522 OS.write_escaped(it->second) << "\"";
523 }
524 }
525
526 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
527 OS.str());
528 }
529 }
530
531 // Attempt to find the closest/best fuzzy match. Usually an error happens
532 // because some string in the output didn't exactly match. In these cases, we
533 // would like to show the user a best guess at what "should have" matched, to
534 // save them having to actually check the input manually.
535 size_t NumLinesForward = 0;
536 size_t Best = StringRef::npos;
537 double BestQuality = 0;
538
539 // Use an arbitrary 4k limit on how far we will search.
540 for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
541 if (Buffer[i] == '\n')
542 ++NumLinesForward;
543
544 // Patterns have leading whitespace stripped, so skip whitespace when
545 // looking for something which looks like a pattern.
546 if (Buffer[i] == ' ' || Buffer[i] == '\t')
547 continue;
548
549 // Compute the "quality" of this match as an arbitrary combination of the
550 // match distance and the number of lines skipped to get to this match.
551 unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
552 double Quality = Distance + (NumLinesForward / 100.);
553
554 if (Quality < BestQuality || Best == StringRef::npos) {
555 Best = i;
556 BestQuality = Quality;
557 }
558 }
559
560 // Print the "possible intended match here" line if we found something
561 // reasonable and not equal to what we showed in the "scanning from here"
562 // line.
563 if (Best && Best != StringRef::npos && BestQuality < 50) {
564 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
565 SourceMgr::DK_Note, "possible intended match here");
566
567 // FIXME: If we wanted to be really friendly we would show why the match
568 // failed, as it can be hard to spot simple one character differences.
569 }
570 }
571
FindRegexVarEnd(StringRef Str,SourceMgr & SM)572 size_t Pattern::FindRegexVarEnd(StringRef Str, SourceMgr &SM) {
573 // Offset keeps track of the current offset within the input Str
574 size_t Offset = 0;
575 // [...] Nesting depth
576 size_t BracketDepth = 0;
577
578 while (!Str.empty()) {
579 if (Str.startswith("]]") && BracketDepth == 0)
580 return Offset;
581 if (Str[0] == '\\') {
582 // Backslash escapes the next char within regexes, so skip them both.
583 Str = Str.substr(2);
584 Offset += 2;
585 } else {
586 switch (Str[0]) {
587 default:
588 break;
589 case '[':
590 BracketDepth++;
591 break;
592 case ']':
593 if (BracketDepth == 0) {
594 SM.PrintMessage(SMLoc::getFromPointer(Str.data()),
595 SourceMgr::DK_Error,
596 "missing closing \"]\" for regex variable");
597 exit(1);
598 }
599 BracketDepth--;
600 break;
601 }
602 Str = Str.substr(1);
603 Offset++;
604 }
605 }
606
607 return StringRef::npos;
608 }
609
610
611 //===----------------------------------------------------------------------===//
612 // Check Strings.
613 //===----------------------------------------------------------------------===//
614
615 /// CheckString - This is a check that we found in the input file.
616 struct CheckString {
617 /// Pat - The pattern to match.
618 Pattern Pat;
619
620 /// Prefix - Which prefix name this check matched.
621 StringRef Prefix;
622
623 /// Loc - The location in the match file that the check string was specified.
624 SMLoc Loc;
625
626 /// CheckTy - Specify what kind of check this is. e.g. CHECK-NEXT: directive,
627 /// as opposed to a CHECK: directive.
628 // Check::CheckType CheckTy;
629
630 /// DagNotStrings - These are all of the strings that are disallowed from
631 /// occurring between this match string and the previous one (or start of
632 /// file).
633 std::vector<Pattern> DagNotStrings;
634
CheckStringCheckString635 CheckString(const Pattern &P, StringRef S, SMLoc L)
636 : Pat(P), Prefix(S), Loc(L) {}
637
638 /// Check - Match check string and its "not strings" and/or "dag strings".
639 size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode,
640 size_t &MatchLen, StringMap<StringRef> &VariableTable) const;
641
642 /// CheckNext - Verify there is a single line in the given buffer.
643 bool CheckNext(const SourceMgr &SM, StringRef Buffer) const;
644
645 /// CheckSame - Verify there is no newline in the given buffer.
646 bool CheckSame(const SourceMgr &SM, StringRef Buffer) const;
647
648 /// CheckNot - Verify there's no "not strings" in the given buffer.
649 bool CheckNot(const SourceMgr &SM, StringRef Buffer,
650 const std::vector<const Pattern *> &NotStrings,
651 StringMap<StringRef> &VariableTable) const;
652
653 /// CheckDag - Match "dag strings" and their mixed "not strings".
654 size_t CheckDag(const SourceMgr &SM, StringRef Buffer,
655 std::vector<const Pattern *> &NotStrings,
656 StringMap<StringRef> &VariableTable) const;
657 };
658
659 /// Canonicalize whitespaces in the input file. Line endings are replaced
660 /// with UNIX-style '\n'.
661 ///
662 /// \param PreserveHorizontal Don't squash consecutive horizontal whitespace
663 /// characters to a single space.
664 static std::unique_ptr<MemoryBuffer>
CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,bool PreserveHorizontal)665 CanonicalizeInputFile(std::unique_ptr<MemoryBuffer> MB,
666 bool PreserveHorizontal) {
667 SmallString<128> NewFile;
668 NewFile.reserve(MB->getBufferSize());
669
670 for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
671 Ptr != End; ++Ptr) {
672 // Eliminate trailing dosish \r.
673 if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
674 continue;
675 }
676
677 // If current char is not a horizontal whitespace or if horizontal
678 // whitespace canonicalization is disabled, dump it to output as is.
679 if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) {
680 NewFile.push_back(*Ptr);
681 continue;
682 }
683
684 // Otherwise, add one space and advance over neighboring space.
685 NewFile.push_back(' ');
686 while (Ptr+1 != End &&
687 (Ptr[1] == ' ' || Ptr[1] == '\t'))
688 ++Ptr;
689 }
690
691 return std::unique_ptr<MemoryBuffer>(
692 MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier()));
693 }
694
IsPartOfWord(char c)695 static bool IsPartOfWord(char c) {
696 return (isalnum(c) || c == '-' || c == '_');
697 }
698
699 // Get the size of the prefix extension.
CheckTypeSize(Check::CheckType Ty)700 static size_t CheckTypeSize(Check::CheckType Ty) {
701 switch (Ty) {
702 case Check::CheckNone:
703 case Check::CheckBadNot:
704 return 0;
705
706 case Check::CheckPlain:
707 return sizeof(":") - 1;
708
709 case Check::CheckNext:
710 return sizeof("-NEXT:") - 1;
711
712 case Check::CheckSame:
713 return sizeof("-SAME:") - 1;
714
715 case Check::CheckNot:
716 return sizeof("-NOT:") - 1;
717
718 case Check::CheckDAG:
719 return sizeof("-DAG:") - 1;
720
721 case Check::CheckLabel:
722 return sizeof("-LABEL:") - 1;
723
724 case Check::CheckEOF:
725 llvm_unreachable("Should not be using EOF size");
726 }
727
728 llvm_unreachable("Bad check type");
729 }
730
FindCheckType(StringRef Buffer,StringRef Prefix)731 static Check::CheckType FindCheckType(StringRef Buffer, StringRef Prefix) {
732 char NextChar = Buffer[Prefix.size()];
733
734 // Verify that the : is present after the prefix.
735 if (NextChar == ':')
736 return Check::CheckPlain;
737
738 if (NextChar != '-')
739 return Check::CheckNone;
740
741 StringRef Rest = Buffer.drop_front(Prefix.size() + 1);
742 if (Rest.startswith("NEXT:"))
743 return Check::CheckNext;
744
745 if (Rest.startswith("SAME:"))
746 return Check::CheckSame;
747
748 if (Rest.startswith("NOT:"))
749 return Check::CheckNot;
750
751 if (Rest.startswith("DAG:"))
752 return Check::CheckDAG;
753
754 if (Rest.startswith("LABEL:"))
755 return Check::CheckLabel;
756
757 // You can't combine -NOT with another suffix.
758 if (Rest.startswith("DAG-NOT:") || Rest.startswith("NOT-DAG:") ||
759 Rest.startswith("NEXT-NOT:") || Rest.startswith("NOT-NEXT:") ||
760 Rest.startswith("SAME-NOT:") || Rest.startswith("NOT-SAME:"))
761 return Check::CheckBadNot;
762
763 return Check::CheckNone;
764 }
765
766 // From the given position, find the next character after the word.
SkipWord(StringRef Str,size_t Loc)767 static size_t SkipWord(StringRef Str, size_t Loc) {
768 while (Loc < Str.size() && IsPartOfWord(Str[Loc]))
769 ++Loc;
770 return Loc;
771 }
772
773 // Try to find the first match in buffer for any prefix. If a valid match is
774 // found, return that prefix and set its type and location. If there are almost
775 // matches (e.g. the actual prefix string is found, but is not an actual check
776 // string), but no valid match, return an empty string and set the position to
777 // resume searching from. If no partial matches are found, return an empty
778 // string and the location will be StringRef::npos. If one prefix is a substring
779 // of another, the maximal match should be found. e.g. if "A" and "AA" are
780 // prefixes then AA-CHECK: should match the second one.
FindFirstCandidateMatch(StringRef & Buffer,Check::CheckType & CheckTy,size_t & CheckLoc)781 static StringRef FindFirstCandidateMatch(StringRef &Buffer,
782 Check::CheckType &CheckTy,
783 size_t &CheckLoc) {
784 StringRef FirstPrefix;
785 size_t FirstLoc = StringRef::npos;
786 size_t SearchLoc = StringRef::npos;
787 Check::CheckType FirstTy = Check::CheckNone;
788
789 CheckTy = Check::CheckNone;
790 CheckLoc = StringRef::npos;
791
792 for (StringRef Prefix : CheckPrefixes) {
793 size_t PrefixLoc = Buffer.find(Prefix);
794
795 if (PrefixLoc == StringRef::npos)
796 continue;
797
798 // Track where we are searching for invalid prefixes that look almost right.
799 // We need to only advance to the first partial match on the next attempt
800 // since a partial match could be a substring of a later, valid prefix.
801 // Need to skip to the end of the word, otherwise we could end up
802 // matching a prefix in a substring later.
803 if (PrefixLoc < SearchLoc)
804 SearchLoc = SkipWord(Buffer, PrefixLoc);
805
806 // We only want to find the first match to avoid skipping some.
807 if (PrefixLoc > FirstLoc)
808 continue;
809 // If one matching check-prefix is a prefix of another, choose the
810 // longer one.
811 if (PrefixLoc == FirstLoc && Prefix.size() < FirstPrefix.size())
812 continue;
813
814 StringRef Rest = Buffer.drop_front(PrefixLoc);
815 // Make sure we have actually found the prefix, and not a word containing
816 // it. This should also prevent matching the wrong prefix when one is a
817 // substring of another.
818 if (PrefixLoc != 0 && IsPartOfWord(Buffer[PrefixLoc - 1]))
819 FirstTy = Check::CheckNone;
820 else
821 FirstTy = FindCheckType(Rest, Prefix);
822
823 FirstLoc = PrefixLoc;
824 FirstPrefix = Prefix;
825 }
826
827 // If the first prefix is invalid, we should continue the search after it.
828 if (FirstTy == Check::CheckNone) {
829 CheckLoc = SearchLoc;
830 return "";
831 }
832
833 CheckTy = FirstTy;
834 CheckLoc = FirstLoc;
835 return FirstPrefix;
836 }
837
FindFirstMatchingPrefix(StringRef & Buffer,unsigned & LineNumber,Check::CheckType & CheckTy,size_t & CheckLoc)838 static StringRef FindFirstMatchingPrefix(StringRef &Buffer,
839 unsigned &LineNumber,
840 Check::CheckType &CheckTy,
841 size_t &CheckLoc) {
842 while (!Buffer.empty()) {
843 StringRef Prefix = FindFirstCandidateMatch(Buffer, CheckTy, CheckLoc);
844 // If we found a real match, we are done.
845 if (!Prefix.empty()) {
846 LineNumber += Buffer.substr(0, CheckLoc).count('\n');
847 return Prefix;
848 }
849
850 // We didn't find any almost matches either, we are also done.
851 if (CheckLoc == StringRef::npos)
852 return StringRef();
853
854 LineNumber += Buffer.substr(0, CheckLoc + 1).count('\n');
855
856 // Advance to the last possible match we found and try again.
857 Buffer = Buffer.drop_front(CheckLoc + 1);
858 }
859
860 return StringRef();
861 }
862
863 /// ReadCheckFile - Read the check file, which specifies the sequence of
864 /// expected strings. The strings are added to the CheckStrings vector.
865 /// Returns true in case of an error, false otherwise.
ReadCheckFile(SourceMgr & SM,std::vector<CheckString> & CheckStrings)866 static bool ReadCheckFile(SourceMgr &SM,
867 std::vector<CheckString> &CheckStrings) {
868 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
869 MemoryBuffer::getFileOrSTDIN(CheckFilename);
870 if (std::error_code EC = FileOrErr.getError()) {
871 errs() << "Could not open check file '" << CheckFilename
872 << "': " << EC.message() << '\n';
873 return true;
874 }
875
876 // If we want to canonicalize whitespace, strip excess whitespace from the
877 // buffer containing the CHECK lines. Remove DOS style line endings.
878 std::unique_ptr<MemoryBuffer> F = CanonicalizeInputFile(
879 std::move(FileOrErr.get()), NoCanonicalizeWhiteSpace);
880
881 // Find all instances of CheckPrefix followed by : in the file.
882 StringRef Buffer = F->getBuffer();
883
884 SM.AddNewSourceBuffer(std::move(F), SMLoc());
885
886 std::vector<Pattern> ImplicitNegativeChecks;
887 for (const auto &PatternString : ImplicitCheckNot) {
888 // Create a buffer with fake command line content in order to display the
889 // command line option responsible for the specific implicit CHECK-NOT.
890 std::string Prefix = (Twine("-") + ImplicitCheckNot.ArgStr + "='").str();
891 std::string Suffix = "'";
892 std::unique_ptr<MemoryBuffer> CmdLine = MemoryBuffer::getMemBufferCopy(
893 Prefix + PatternString + Suffix, "command line");
894
895 StringRef PatternInBuffer =
896 CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
897 SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
898
899 ImplicitNegativeChecks.push_back(Pattern(Check::CheckNot));
900 ImplicitNegativeChecks.back().ParsePattern(PatternInBuffer,
901 "IMPLICIT-CHECK", SM, 0);
902 }
903
904
905 std::vector<Pattern> DagNotMatches = ImplicitNegativeChecks;
906
907 // LineNumber keeps track of the line on which CheckPrefix instances are
908 // found.
909 unsigned LineNumber = 1;
910
911 while (1) {
912 Check::CheckType CheckTy;
913 size_t PrefixLoc;
914
915 // See if a prefix occurs in the memory buffer.
916 StringRef UsedPrefix = FindFirstMatchingPrefix(Buffer,
917 LineNumber,
918 CheckTy,
919 PrefixLoc);
920 if (UsedPrefix.empty())
921 break;
922
923 Buffer = Buffer.drop_front(PrefixLoc);
924
925 // Location to use for error messages.
926 const char *UsedPrefixStart = Buffer.data() + (PrefixLoc == 0 ? 0 : 1);
927
928 // PrefixLoc is to the start of the prefix. Skip to the end.
929 Buffer = Buffer.drop_front(UsedPrefix.size() + CheckTypeSize(CheckTy));
930
931 // Complain about useful-looking but unsupported suffixes.
932 if (CheckTy == Check::CheckBadNot) {
933 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
934 SourceMgr::DK_Error,
935 "unsupported -NOT combo on prefix '" + UsedPrefix + "'");
936 return true;
937 }
938
939 // Okay, we found the prefix, yay. Remember the rest of the line, but ignore
940 // leading and trailing whitespace.
941 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
942
943 // Scan ahead to the end of line.
944 size_t EOL = Buffer.find_first_of("\n\r");
945
946 // Remember the location of the start of the pattern, for diagnostics.
947 SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
948
949 // Parse the pattern.
950 Pattern P(CheckTy);
951 if (P.ParsePattern(Buffer.substr(0, EOL), UsedPrefix, SM, LineNumber))
952 return true;
953
954 // Verify that CHECK-LABEL lines do not define or use variables
955 if ((CheckTy == Check::CheckLabel) && P.hasVariable()) {
956 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
957 SourceMgr::DK_Error,
958 "found '" + UsedPrefix + "-LABEL:'"
959 " with variable definition or use");
960 return true;
961 }
962
963 Buffer = Buffer.substr(EOL);
964
965 // Verify that CHECK-NEXT lines have at least one CHECK line before them.
966 if ((CheckTy == Check::CheckNext || CheckTy == Check::CheckSame) &&
967 CheckStrings.empty()) {
968 StringRef Type = CheckTy == Check::CheckNext ? "NEXT" : "SAME";
969 SM.PrintMessage(SMLoc::getFromPointer(UsedPrefixStart),
970 SourceMgr::DK_Error,
971 "found '" + UsedPrefix + "-" + Type + "' without previous '"
972 + UsedPrefix + ": line");
973 return true;
974 }
975
976 // Handle CHECK-DAG/-NOT.
977 if (CheckTy == Check::CheckDAG || CheckTy == Check::CheckNot) {
978 DagNotMatches.push_back(P);
979 continue;
980 }
981
982 // Okay, add the string we captured to the output vector and move on.
983 CheckStrings.emplace_back(P, UsedPrefix, PatternLoc);
984 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
985 DagNotMatches = ImplicitNegativeChecks;
986 }
987
988 // Add an EOF pattern for any trailing CHECK-DAG/-NOTs, and use the first
989 // prefix as a filler for the error message.
990 if (!DagNotMatches.empty()) {
991 CheckStrings.emplace_back(Pattern(Check::CheckEOF), *CheckPrefixes.begin(),
992 SMLoc::getFromPointer(Buffer.data()));
993 std::swap(DagNotMatches, CheckStrings.back().DagNotStrings);
994 }
995
996 if (CheckStrings.empty()) {
997 errs() << "error: no check strings found with prefix"
998 << (CheckPrefixes.size() > 1 ? "es " : " ");
999 prefix_iterator I = CheckPrefixes.begin();
1000 prefix_iterator E = CheckPrefixes.end();
1001 if (I != E) {
1002 errs() << "\'" << *I << ":'";
1003 ++I;
1004 }
1005 for (; I != E; ++I)
1006 errs() << ", \'" << *I << ":'";
1007
1008 errs() << '\n';
1009 return true;
1010 }
1011
1012 return false;
1013 }
1014
PrintCheckFailed(const SourceMgr & SM,SMLoc Loc,const Pattern & Pat,StringRef Buffer,StringMap<StringRef> & VariableTable)1015 static void PrintCheckFailed(const SourceMgr &SM, SMLoc Loc,
1016 const Pattern &Pat, StringRef Buffer,
1017 StringMap<StringRef> &VariableTable) {
1018 // Otherwise, we have an error, emit an error message.
1019 SM.PrintMessage(Loc, SourceMgr::DK_Error,
1020 "expected string not found in input");
1021
1022 // Print the "scanning from here" line. If the current position is at the
1023 // end of a line, advance to the start of the next line.
1024 Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
1025
1026 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1027 "scanning from here");
1028
1029 // Allow the pattern to print additional information if desired.
1030 Pat.PrintFailureInfo(SM, Buffer, VariableTable);
1031 }
1032
PrintCheckFailed(const SourceMgr & SM,const CheckString & CheckStr,StringRef Buffer,StringMap<StringRef> & VariableTable)1033 static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
1034 StringRef Buffer,
1035 StringMap<StringRef> &VariableTable) {
1036 PrintCheckFailed(SM, CheckStr.Loc, CheckStr.Pat, Buffer, VariableTable);
1037 }
1038
1039 /// CountNumNewlinesBetween - Count the number of newlines in the specified
1040 /// range.
CountNumNewlinesBetween(StringRef Range,const char * & FirstNewLine)1041 static unsigned CountNumNewlinesBetween(StringRef Range,
1042 const char *&FirstNewLine) {
1043 unsigned NumNewLines = 0;
1044 while (1) {
1045 // Scan for newline.
1046 Range = Range.substr(Range.find_first_of("\n\r"));
1047 if (Range.empty()) return NumNewLines;
1048
1049 ++NumNewLines;
1050
1051 // Handle \n\r and \r\n as a single newline.
1052 if (Range.size() > 1 &&
1053 (Range[1] == '\n' || Range[1] == '\r') &&
1054 (Range[0] != Range[1]))
1055 Range = Range.substr(1);
1056 Range = Range.substr(1);
1057
1058 if (NumNewLines == 1)
1059 FirstNewLine = Range.begin();
1060 }
1061 }
1062
Check(const SourceMgr & SM,StringRef Buffer,bool IsLabelScanMode,size_t & MatchLen,StringMap<StringRef> & VariableTable) const1063 size_t CheckString::Check(const SourceMgr &SM, StringRef Buffer,
1064 bool IsLabelScanMode, size_t &MatchLen,
1065 StringMap<StringRef> &VariableTable) const {
1066 size_t LastPos = 0;
1067 std::vector<const Pattern *> NotStrings;
1068
1069 // IsLabelScanMode is true when we are scanning forward to find CHECK-LABEL
1070 // bounds; we have not processed variable definitions within the bounded block
1071 // yet so cannot handle any final CHECK-DAG yet; this is handled when going
1072 // over the block again (including the last CHECK-LABEL) in normal mode.
1073 if (!IsLabelScanMode) {
1074 // Match "dag strings" (with mixed "not strings" if any).
1075 LastPos = CheckDag(SM, Buffer, NotStrings, VariableTable);
1076 if (LastPos == StringRef::npos)
1077 return StringRef::npos;
1078 }
1079
1080 // Match itself from the last position after matching CHECK-DAG.
1081 StringRef MatchBuffer = Buffer.substr(LastPos);
1082 size_t MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1083 if (MatchPos == StringRef::npos) {
1084 PrintCheckFailed(SM, *this, MatchBuffer, VariableTable);
1085 return StringRef::npos;
1086 }
1087
1088 // Similar to the above, in "label-scan mode" we can't yet handle CHECK-NEXT
1089 // or CHECK-NOT
1090 if (!IsLabelScanMode) {
1091 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1092
1093 // If this check is a "CHECK-NEXT", verify that the previous match was on
1094 // the previous line (i.e. that there is one newline between them).
1095 if (CheckNext(SM, SkippedRegion))
1096 return StringRef::npos;
1097
1098 // If this check is a "CHECK-SAME", verify that the previous match was on
1099 // the same line (i.e. that there is no newline between them).
1100 if (CheckSame(SM, SkippedRegion))
1101 return StringRef::npos;
1102
1103 // If this match had "not strings", verify that they don't exist in the
1104 // skipped region.
1105 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1106 return StringRef::npos;
1107 }
1108
1109 return LastPos + MatchPos;
1110 }
1111
CheckNext(const SourceMgr & SM,StringRef Buffer) const1112 bool CheckString::CheckNext(const SourceMgr &SM, StringRef Buffer) const {
1113 if (Pat.getCheckTy() != Check::CheckNext)
1114 return false;
1115
1116 // Count the number of newlines between the previous match and this one.
1117 assert(Buffer.data() !=
1118 SM.getMemoryBuffer(
1119 SM.FindBufferContainingLoc(
1120 SMLoc::getFromPointer(Buffer.data())))->getBufferStart() &&
1121 "CHECK-NEXT can't be the first check in a file");
1122
1123 const char *FirstNewLine = nullptr;
1124 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1125
1126 if (NumNewLines == 0) {
1127 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1128 "-NEXT: is on the same line as previous match");
1129 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1130 SourceMgr::DK_Note, "'next' match was here");
1131 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1132 "previous match ended here");
1133 return true;
1134 }
1135
1136 if (NumNewLines != 1) {
1137 SM.PrintMessage(Loc, SourceMgr::DK_Error, Prefix +
1138 "-NEXT: is not on the line after the previous match");
1139 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()),
1140 SourceMgr::DK_Note, "'next' match was here");
1141 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1142 "previous match ended here");
1143 SM.PrintMessage(SMLoc::getFromPointer(FirstNewLine), SourceMgr::DK_Note,
1144 "non-matching line after previous match is here");
1145 return true;
1146 }
1147
1148 return false;
1149 }
1150
CheckSame(const SourceMgr & SM,StringRef Buffer) const1151 bool CheckString::CheckSame(const SourceMgr &SM, StringRef Buffer) const {
1152 if (Pat.getCheckTy() != Check::CheckSame)
1153 return false;
1154
1155 // Count the number of newlines between the previous match and this one.
1156 assert(Buffer.data() !=
1157 SM.getMemoryBuffer(SM.FindBufferContainingLoc(
1158 SMLoc::getFromPointer(Buffer.data())))
1159 ->getBufferStart() &&
1160 "CHECK-SAME can't be the first check in a file");
1161
1162 const char *FirstNewLine = nullptr;
1163 unsigned NumNewLines = CountNumNewlinesBetween(Buffer, FirstNewLine);
1164
1165 if (NumNewLines != 0) {
1166 SM.PrintMessage(Loc, SourceMgr::DK_Error,
1167 Prefix +
1168 "-SAME: is not on the same line as the previous match");
1169 SM.PrintMessage(SMLoc::getFromPointer(Buffer.end()), SourceMgr::DK_Note,
1170 "'next' match was here");
1171 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note,
1172 "previous match ended here");
1173 return true;
1174 }
1175
1176 return false;
1177 }
1178
CheckNot(const SourceMgr & SM,StringRef Buffer,const std::vector<const Pattern * > & NotStrings,StringMap<StringRef> & VariableTable) const1179 bool CheckString::CheckNot(const SourceMgr &SM, StringRef Buffer,
1180 const std::vector<const Pattern *> &NotStrings,
1181 StringMap<StringRef> &VariableTable) const {
1182 for (const Pattern *Pat : NotStrings) {
1183 assert((Pat->getCheckTy() == Check::CheckNot) && "Expect CHECK-NOT!");
1184
1185 size_t MatchLen = 0;
1186 size_t Pos = Pat->Match(Buffer, MatchLen, VariableTable);
1187
1188 if (Pos == StringRef::npos) continue;
1189
1190 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()+Pos),
1191 SourceMgr::DK_Error,
1192 Prefix + "-NOT: string occurred!");
1193 SM.PrintMessage(Pat->getLoc(), SourceMgr::DK_Note,
1194 Prefix + "-NOT: pattern specified here");
1195 return true;
1196 }
1197
1198 return false;
1199 }
1200
CheckDag(const SourceMgr & SM,StringRef Buffer,std::vector<const Pattern * > & NotStrings,StringMap<StringRef> & VariableTable) const1201 size_t CheckString::CheckDag(const SourceMgr &SM, StringRef Buffer,
1202 std::vector<const Pattern *> &NotStrings,
1203 StringMap<StringRef> &VariableTable) const {
1204 if (DagNotStrings.empty())
1205 return 0;
1206
1207 size_t LastPos = 0;
1208 size_t StartPos = LastPos;
1209
1210 for (const Pattern &Pat : DagNotStrings) {
1211 assert((Pat.getCheckTy() == Check::CheckDAG ||
1212 Pat.getCheckTy() == Check::CheckNot) &&
1213 "Invalid CHECK-DAG or CHECK-NOT!");
1214
1215 if (Pat.getCheckTy() == Check::CheckNot) {
1216 NotStrings.push_back(&Pat);
1217 continue;
1218 }
1219
1220 assert((Pat.getCheckTy() == Check::CheckDAG) && "Expect CHECK-DAG!");
1221
1222 size_t MatchLen = 0, MatchPos;
1223
1224 // CHECK-DAG always matches from the start.
1225 StringRef MatchBuffer = Buffer.substr(StartPos);
1226 MatchPos = Pat.Match(MatchBuffer, MatchLen, VariableTable);
1227 // With a group of CHECK-DAGs, a single mismatching means the match on
1228 // that group of CHECK-DAGs fails immediately.
1229 if (MatchPos == StringRef::npos) {
1230 PrintCheckFailed(SM, Pat.getLoc(), Pat, MatchBuffer, VariableTable);
1231 return StringRef::npos;
1232 }
1233 // Re-calc it as the offset relative to the start of the original string.
1234 MatchPos += StartPos;
1235
1236 if (!NotStrings.empty()) {
1237 if (MatchPos < LastPos) {
1238 // Reordered?
1239 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + MatchPos),
1240 SourceMgr::DK_Error,
1241 Prefix + "-DAG: found a match of CHECK-DAG"
1242 " reordering across a CHECK-NOT");
1243 SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + LastPos),
1244 SourceMgr::DK_Note,
1245 Prefix + "-DAG: the farthest match of CHECK-DAG"
1246 " is found here");
1247 SM.PrintMessage(NotStrings[0]->getLoc(), SourceMgr::DK_Note,
1248 Prefix + "-NOT: the crossed pattern specified"
1249 " here");
1250 SM.PrintMessage(Pat.getLoc(), SourceMgr::DK_Note,
1251 Prefix + "-DAG: the reordered pattern specified"
1252 " here");
1253 return StringRef::npos;
1254 }
1255 // All subsequent CHECK-DAGs should be matched from the farthest
1256 // position of all precedent CHECK-DAGs (including this one.)
1257 StartPos = LastPos;
1258 // If there's CHECK-NOTs between two CHECK-DAGs or from CHECK to
1259 // CHECK-DAG, verify that there's no 'not' strings occurred in that
1260 // region.
1261 StringRef SkippedRegion = Buffer.substr(LastPos, MatchPos);
1262 if (CheckNot(SM, SkippedRegion, NotStrings, VariableTable))
1263 return StringRef::npos;
1264 // Clear "not strings".
1265 NotStrings.clear();
1266 }
1267
1268 // Update the last position with CHECK-DAG matches.
1269 LastPos = std::max(MatchPos + MatchLen, LastPos);
1270 }
1271
1272 return LastPos;
1273 }
1274
1275 // A check prefix must contain only alphanumeric, hyphens and underscores.
ValidateCheckPrefix(StringRef CheckPrefix)1276 static bool ValidateCheckPrefix(StringRef CheckPrefix) {
1277 Regex Validator("^[a-zA-Z0-9_-]*$");
1278 return Validator.match(CheckPrefix);
1279 }
1280
ValidateCheckPrefixes()1281 static bool ValidateCheckPrefixes() {
1282 StringSet<> PrefixSet;
1283
1284 for (StringRef Prefix : CheckPrefixes) {
1285 // Reject empty prefixes.
1286 if (Prefix == "")
1287 return false;
1288
1289 if (!PrefixSet.insert(Prefix).second)
1290 return false;
1291
1292 if (!ValidateCheckPrefix(Prefix))
1293 return false;
1294 }
1295
1296 return true;
1297 }
1298
1299 // I don't think there's a way to specify an initial value for cl::list,
1300 // so if nothing was specified, add the default
AddCheckPrefixIfNeeded()1301 static void AddCheckPrefixIfNeeded() {
1302 if (CheckPrefixes.empty())
1303 CheckPrefixes.push_back("CHECK");
1304 }
1305
DumpCommandLine(int argc,char ** argv)1306 static void DumpCommandLine(int argc, char **argv) {
1307 errs() << "FileCheck command line: ";
1308 for (int I = 0; I < argc; I++)
1309 errs() << " " << argv[I];
1310 errs() << "\n";
1311 }
1312
main(int argc,char ** argv)1313 int main(int argc, char **argv) {
1314 sys::PrintStackTraceOnErrorSignal(argv[0]);
1315 PrettyStackTraceProgram X(argc, argv);
1316 cl::ParseCommandLineOptions(argc, argv);
1317
1318 if (!ValidateCheckPrefixes()) {
1319 errs() << "Supplied check-prefix is invalid! Prefixes must be unique and "
1320 "start with a letter and contain only alphanumeric characters, "
1321 "hyphens and underscores\n";
1322 return 2;
1323 }
1324
1325 AddCheckPrefixIfNeeded();
1326
1327 SourceMgr SM;
1328
1329 // Read the expected strings from the check file.
1330 std::vector<CheckString> CheckStrings;
1331 if (ReadCheckFile(SM, CheckStrings))
1332 return 2;
1333
1334 // Open the file to check and add it to SourceMgr.
1335 ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
1336 MemoryBuffer::getFileOrSTDIN(InputFilename);
1337 if (std::error_code EC = FileOrErr.getError()) {
1338 errs() << "Could not open input file '" << InputFilename
1339 << "': " << EC.message() << '\n';
1340 return 2;
1341 }
1342 std::unique_ptr<MemoryBuffer> &File = FileOrErr.get();
1343
1344 if (File->getBufferSize() == 0 && !AllowEmptyInput) {
1345 errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
1346 DumpCommandLine(argc, argv);
1347 return 2;
1348 }
1349
1350 // Remove duplicate spaces in the input file if requested.
1351 // Remove DOS style line endings.
1352 std::unique_ptr<MemoryBuffer> F =
1353 CanonicalizeInputFile(std::move(File), NoCanonicalizeWhiteSpace);
1354
1355 // Check that we have all of the expected strings, in order, in the input
1356 // file.
1357 StringRef Buffer = F->getBuffer();
1358
1359 SM.AddNewSourceBuffer(std::move(F), SMLoc());
1360
1361 /// VariableTable - This holds all the current filecheck variables.
1362 StringMap<StringRef> VariableTable;
1363
1364 bool hasError = false;
1365
1366 unsigned i = 0, j = 0, e = CheckStrings.size();
1367
1368 while (true) {
1369 StringRef CheckRegion;
1370 if (j == e) {
1371 CheckRegion = Buffer;
1372 } else {
1373 const CheckString &CheckLabelStr = CheckStrings[j];
1374 if (CheckLabelStr.Pat.getCheckTy() != Check::CheckLabel) {
1375 ++j;
1376 continue;
1377 }
1378
1379 // Scan to next CHECK-LABEL match, ignoring CHECK-NOT and CHECK-DAG
1380 size_t MatchLabelLen = 0;
1381 size_t MatchLabelPos = CheckLabelStr.Check(SM, Buffer, true,
1382 MatchLabelLen, VariableTable);
1383 if (MatchLabelPos == StringRef::npos) {
1384 hasError = true;
1385 break;
1386 }
1387
1388 CheckRegion = Buffer.substr(0, MatchLabelPos + MatchLabelLen);
1389 Buffer = Buffer.substr(MatchLabelPos + MatchLabelLen);
1390 ++j;
1391 }
1392
1393 for ( ; i != j; ++i) {
1394 const CheckString &CheckStr = CheckStrings[i];
1395
1396 // Check each string within the scanned region, including a second check
1397 // of any final CHECK-LABEL (to verify CHECK-NOT and CHECK-DAG)
1398 size_t MatchLen = 0;
1399 size_t MatchPos = CheckStr.Check(SM, CheckRegion, false, MatchLen,
1400 VariableTable);
1401
1402 if (MatchPos == StringRef::npos) {
1403 hasError = true;
1404 i = j;
1405 break;
1406 }
1407
1408 CheckRegion = CheckRegion.substr(MatchPos + MatchLen);
1409 }
1410
1411 if (j == e)
1412 break;
1413 }
1414
1415 return hasError ? 1 : 0;
1416 }
1417