1 //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/Lex/Lexer.h"
10 #include "clang/Basic/Diagnostic.h"
11 #include "clang/Basic/DiagnosticOptions.h"
12 #include "clang/Basic/FileManager.h"
13 #include "clang/Basic/LangOptions.h"
14 #include "clang/Basic/SourceLocation.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Basic/TargetInfo.h"
17 #include "clang/Basic/TargetOptions.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "clang/Lex/HeaderSearch.h"
20 #include "clang/Lex/HeaderSearchOptions.h"
21 #include "clang/Lex/MacroArgs.h"
22 #include "clang/Lex/MacroInfo.h"
23 #include "clang/Lex/ModuleLoader.h"
24 #include "clang/Lex/Preprocessor.h"
25 #include "clang/Lex/PreprocessorOptions.h"
26 #include "gmock/gmock.h"
27 #include "gtest/gtest.h"
28 #include <vector>
29 
30 namespace {
31 using namespace clang;
32 using testing::ElementsAre;
33 
34 // The test fixture.
35 class LexerTest : public ::testing::Test {
36 protected:
LexerTest()37   LexerTest()
38     : FileMgr(FileMgrOpts),
39       DiagID(new DiagnosticIDs()),
40       Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
41       SourceMgr(Diags, FileMgr),
42       TargetOpts(new TargetOptions)
43   {
44     TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
45     Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
46   }
47 
CreatePP(StringRef Source,TrivialModuleLoader & ModLoader)48   std::unique_ptr<Preprocessor> CreatePP(StringRef Source,
49                                          TrivialModuleLoader &ModLoader) {
50     std::unique_ptr<llvm::MemoryBuffer> Buf =
51         llvm::MemoryBuffer::getMemBuffer(Source);
52     SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
53 
54     HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
55                             Diags, LangOpts, Target.get());
56     std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>(
57         std::make_shared<PreprocessorOptions>(), Diags, LangOpts, SourceMgr,
58         HeaderInfo, ModLoader,
59         /*IILookup =*/nullptr,
60         /*OwnsHeaderSearch =*/false);
61     PP->Initialize(*Target);
62     PP->EnterMainSourceFile();
63     return PP;
64   }
65 
Lex(StringRef Source)66   std::vector<Token> Lex(StringRef Source) {
67     TrivialModuleLoader ModLoader;
68     auto PP = CreatePP(Source, ModLoader);
69 
70     std::vector<Token> toks;
71     while (1) {
72       Token tok;
73       PP->Lex(tok);
74       if (tok.is(tok::eof))
75         break;
76       toks.push_back(tok);
77     }
78 
79     return toks;
80   }
81 
CheckLex(StringRef Source,ArrayRef<tok::TokenKind> ExpectedTokens)82   std::vector<Token> CheckLex(StringRef Source,
83                               ArrayRef<tok::TokenKind> ExpectedTokens) {
84     auto toks = Lex(Source);
85     EXPECT_EQ(ExpectedTokens.size(), toks.size());
86     for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) {
87       EXPECT_EQ(ExpectedTokens[i], toks[i].getKind());
88     }
89 
90     return toks;
91   }
92 
getSourceText(Token Begin,Token End)93   std::string getSourceText(Token Begin, Token End) {
94     bool Invalid;
95     StringRef Str =
96         Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange(
97                                     Begin.getLocation(), End.getLocation())),
98                              SourceMgr, LangOpts, &Invalid);
99     if (Invalid)
100       return "<INVALID>";
101     return std::string(Str);
102   }
103 
104   FileSystemOptions FileMgrOpts;
105   FileManager FileMgr;
106   IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
107   DiagnosticsEngine Diags;
108   SourceManager SourceMgr;
109   LangOptions LangOpts;
110   std::shared_ptr<TargetOptions> TargetOpts;
111   IntrusiveRefCntPtr<TargetInfo> Target;
112 };
113 
TEST_F(LexerTest,GetSourceTextExpandsToMaximumInMacroArgument)114 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
115   std::vector<tok::TokenKind> ExpectedTokens;
116   ExpectedTokens.push_back(tok::identifier);
117   ExpectedTokens.push_back(tok::l_paren);
118   ExpectedTokens.push_back(tok::identifier);
119   ExpectedTokens.push_back(tok::r_paren);
120 
121   std::vector<Token> toks = CheckLex("#define M(x) x\n"
122                                      "M(f(M(i)))",
123                                      ExpectedTokens);
124 
125   EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2]));
126 }
127 
TEST_F(LexerTest,GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro)128 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) {
129   std::vector<tok::TokenKind> ExpectedTokens;
130   ExpectedTokens.push_back(tok::identifier);
131   ExpectedTokens.push_back(tok::identifier);
132 
133   std::vector<Token> toks = CheckLex("#define M(x) x\n"
134                                      "M(M(i) c)",
135                                      ExpectedTokens);
136 
137   EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0]));
138 }
139 
TEST_F(LexerTest,GetSourceTextExpandsInMacroArgumentForBeginOfMacro)140 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) {
141   std::vector<tok::TokenKind> ExpectedTokens;
142   ExpectedTokens.push_back(tok::identifier);
143   ExpectedTokens.push_back(tok::identifier);
144   ExpectedTokens.push_back(tok::identifier);
145 
146   std::vector<Token> toks = CheckLex("#define M(x) x\n"
147                                      "M(c c M(i))",
148                                      ExpectedTokens);
149 
150   EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2]));
151 }
152 
TEST_F(LexerTest,GetSourceTextExpandsInMacroArgumentForEndOfMacro)153 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) {
154   std::vector<tok::TokenKind> ExpectedTokens;
155   ExpectedTokens.push_back(tok::identifier);
156   ExpectedTokens.push_back(tok::identifier);
157   ExpectedTokens.push_back(tok::identifier);
158 
159   std::vector<Token> toks = CheckLex("#define M(x) x\n"
160                                      "M(M(i) c c)",
161                                      ExpectedTokens);
162 
163   EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1]));
164 }
165 
TEST_F(LexerTest,GetSourceTextInSeparateFnMacros)166 TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) {
167   std::vector<tok::TokenKind> ExpectedTokens;
168   ExpectedTokens.push_back(tok::identifier);
169   ExpectedTokens.push_back(tok::identifier);
170   ExpectedTokens.push_back(tok::identifier);
171   ExpectedTokens.push_back(tok::identifier);
172 
173   std::vector<Token> toks = CheckLex("#define M(x) x\n"
174                                      "M(c M(i)) M(M(i) c)",
175                                      ExpectedTokens);
176 
177   EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2]));
178 }
179 
TEST_F(LexerTest,GetSourceTextWorksAcrossTokenPastes)180 TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) {
181   std::vector<tok::TokenKind> ExpectedTokens;
182   ExpectedTokens.push_back(tok::identifier);
183   ExpectedTokens.push_back(tok::l_paren);
184   ExpectedTokens.push_back(tok::identifier);
185   ExpectedTokens.push_back(tok::r_paren);
186 
187   std::vector<Token> toks = CheckLex("#define M(x) x\n"
188                                      "#define C(x) M(x##c)\n"
189                                      "M(f(C(i)))",
190                                      ExpectedTokens);
191 
192   EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2]));
193 }
194 
TEST_F(LexerTest,GetSourceTextExpandsAcrossMultipleMacroCalls)195 TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) {
196   std::vector<tok::TokenKind> ExpectedTokens;
197   ExpectedTokens.push_back(tok::identifier);
198   ExpectedTokens.push_back(tok::l_paren);
199   ExpectedTokens.push_back(tok::identifier);
200   ExpectedTokens.push_back(tok::r_paren);
201 
202   std::vector<Token> toks = CheckLex("#define M(x) x\n"
203                                      "f(M(M(i)))",
204                                      ExpectedTokens);
205   EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2]));
206 }
207 
TEST_F(LexerTest,GetSourceTextInMiddleOfMacroArgument)208 TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) {
209   std::vector<tok::TokenKind> ExpectedTokens;
210   ExpectedTokens.push_back(tok::identifier);
211   ExpectedTokens.push_back(tok::l_paren);
212   ExpectedTokens.push_back(tok::identifier);
213   ExpectedTokens.push_back(tok::r_paren);
214 
215   std::vector<Token> toks = CheckLex("#define M(x) x\n"
216                                      "M(f(i))",
217                                      ExpectedTokens);
218   EXPECT_EQ("i", getSourceText(toks[2], toks[2]));
219 }
220 
TEST_F(LexerTest,GetSourceTextExpandsAroundDifferentMacroCalls)221 TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) {
222   std::vector<tok::TokenKind> ExpectedTokens;
223   ExpectedTokens.push_back(tok::identifier);
224   ExpectedTokens.push_back(tok::l_paren);
225   ExpectedTokens.push_back(tok::identifier);
226   ExpectedTokens.push_back(tok::r_paren);
227 
228   std::vector<Token> toks = CheckLex("#define M(x) x\n"
229                                      "#define C(x) x\n"
230                                      "f(C(M(i)))",
231                                      ExpectedTokens);
232   EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2]));
233 }
234 
TEST_F(LexerTest,GetSourceTextOnlyExpandsIfFirstTokenInMacro)235 TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) {
236   std::vector<tok::TokenKind> ExpectedTokens;
237   ExpectedTokens.push_back(tok::identifier);
238   ExpectedTokens.push_back(tok::l_paren);
239   ExpectedTokens.push_back(tok::identifier);
240   ExpectedTokens.push_back(tok::identifier);
241   ExpectedTokens.push_back(tok::r_paren);
242 
243   std::vector<Token> toks = CheckLex("#define M(x) x\n"
244                                      "#define C(x) c x\n"
245                                      "f(C(M(i)))",
246                                      ExpectedTokens);
247   EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
248 }
249 
TEST_F(LexerTest,GetSourceTextExpandsRecursively)250 TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
251   std::vector<tok::TokenKind> ExpectedTokens;
252   ExpectedTokens.push_back(tok::identifier);
253   ExpectedTokens.push_back(tok::identifier);
254   ExpectedTokens.push_back(tok::l_paren);
255   ExpectedTokens.push_back(tok::identifier);
256   ExpectedTokens.push_back(tok::r_paren);
257 
258   std::vector<Token> toks = CheckLex("#define M(x) x\n"
259                                      "#define C(x) c M(x)\n"
260                                      "C(f(M(i)))",
261                                      ExpectedTokens);
262   EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
263 }
264 
TEST_F(LexerTest,LexAPI)265 TEST_F(LexerTest, LexAPI) {
266   std::vector<tok::TokenKind> ExpectedTokens;
267   ExpectedTokens.push_back(tok::l_square);
268   ExpectedTokens.push_back(tok::identifier);
269   ExpectedTokens.push_back(tok::r_square);
270   ExpectedTokens.push_back(tok::l_square);
271   ExpectedTokens.push_back(tok::identifier);
272   ExpectedTokens.push_back(tok::r_square);
273   ExpectedTokens.push_back(tok::identifier);
274   ExpectedTokens.push_back(tok::identifier);
275   ExpectedTokens.push_back(tok::identifier);
276   ExpectedTokens.push_back(tok::identifier);
277 
278   std::vector<Token> toks = CheckLex("#define M(x) [x]\n"
279                                      "#define N(x) x\n"
280                                      "#define INN(x) x\n"
281                                      "#define NOF1 INN(val)\n"
282                                      "#define NOF2 val\n"
283                                      "M(foo) N([bar])\n"
284                                      "N(INN(val)) N(NOF1) N(NOF2) N(val)",
285                                      ExpectedTokens);
286 
287   SourceLocation lsqrLoc = toks[0].getLocation();
288   SourceLocation idLoc = toks[1].getLocation();
289   SourceLocation rsqrLoc = toks[2].getLocation();
290   CharSourceRange macroRange = SourceMgr.getExpansionRange(lsqrLoc);
291 
292   SourceLocation Loc;
293   EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc));
294   EXPECT_EQ(Loc, macroRange.getBegin());
295   EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts));
296   EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts));
297   EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc));
298   EXPECT_EQ(Loc, macroRange.getEnd());
299   EXPECT_TRUE(macroRange.isTokenRange());
300 
301   CharSourceRange range = Lexer::makeFileCharRange(
302            CharSourceRange::getTokenRange(lsqrLoc, idLoc), SourceMgr, LangOpts);
303   EXPECT_TRUE(range.isInvalid());
304   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(idLoc, rsqrLoc),
305                                    SourceMgr, LangOpts);
306   EXPECT_TRUE(range.isInvalid());
307   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
308                                    SourceMgr, LangOpts);
309   EXPECT_TRUE(!range.isTokenRange());
310   EXPECT_EQ(range.getAsRange(),
311             SourceRange(macroRange.getBegin(),
312                         macroRange.getEnd().getLocWithOffset(1)));
313 
314   StringRef text = Lexer::getSourceText(
315                                CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
316                                SourceMgr, LangOpts);
317   EXPECT_EQ(text, "M(foo)");
318 
319   SourceLocation macroLsqrLoc = toks[3].getLocation();
320   SourceLocation macroIdLoc = toks[4].getLocation();
321   SourceLocation macroRsqrLoc = toks[5].getLocation();
322   SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(macroLsqrLoc);
323   SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(macroIdLoc);
324   SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(macroRsqrLoc);
325 
326   range = Lexer::makeFileCharRange(
327       CharSourceRange::getTokenRange(macroLsqrLoc, macroIdLoc),
328       SourceMgr, LangOpts);
329   EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)),
330             range.getAsRange());
331 
332   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(macroIdLoc, macroRsqrLoc),
333                                    SourceMgr, LangOpts);
334   EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)),
335             range.getAsRange());
336 
337   macroRange = SourceMgr.getExpansionRange(macroLsqrLoc);
338   range = Lexer::makeFileCharRange(
339                      CharSourceRange::getTokenRange(macroLsqrLoc, macroRsqrLoc),
340                      SourceMgr, LangOpts);
341   EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)),
342             range.getAsRange());
343 
344   text = Lexer::getSourceText(
345           CharSourceRange::getTokenRange(SourceRange(macroLsqrLoc, macroIdLoc)),
346           SourceMgr, LangOpts);
347   EXPECT_EQ(text, "[bar");
348 
349 
350   SourceLocation idLoc1 = toks[6].getLocation();
351   SourceLocation idLoc2 = toks[7].getLocation();
352   SourceLocation idLoc3 = toks[8].getLocation();
353   SourceLocation idLoc4 = toks[9].getLocation();
354   EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts));
355   EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts));
356   EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts));
357   EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
358 }
359 
TEST_F(LexerTest,DontMergeMacroArgsFromDifferentMacroFiles)360 TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
361   std::vector<Token> toks =
362       Lex("#define helper1 0\n"
363           "void helper2(const char *, ...);\n"
364           "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n"
365           "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n"
366           "void f1() { M2(\"a\", \"b\"); }");
367 
368   // Check the file corresponding to the "helper1" macro arg in M2.
369   //
370   // The lexer used to report its size as 31, meaning that the end of the
371   // expansion would be on the *next line* (just past `M2("a", "b")`). Make
372   // sure that we get the correct end location (the comma after "helper1").
373   SourceLocation helper1ArgLoc = toks[20].getLocation();
374   EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
375 }
376 
TEST_F(LexerTest,DontOverallocateStringifyArgs)377 TEST_F(LexerTest, DontOverallocateStringifyArgs) {
378   TrivialModuleLoader ModLoader;
379   auto PP = CreatePP("\"StrArg\", 5, 'C'", ModLoader);
380 
381   llvm::BumpPtrAllocator Allocator;
382   std::array<IdentifierInfo *, 3> ParamList;
383   MacroInfo *MI = PP->AllocateMacroInfo({});
384   MI->setIsFunctionLike();
385   MI->setParameterList(ParamList, Allocator);
386   EXPECT_EQ(3u, MI->getNumParams());
387   EXPECT_TRUE(MI->isFunctionLike());
388 
389   Token Eof;
390   Eof.setKind(tok::eof);
391   std::vector<Token> ArgTokens;
392   while (1) {
393     Token tok;
394     PP->Lex(tok);
395     if (tok.is(tok::eof)) {
396       ArgTokens.push_back(Eof);
397       break;
398     }
399     if (tok.is(tok::comma))
400       ArgTokens.push_back(Eof);
401     else
402       ArgTokens.push_back(tok);
403   }
404 
405   auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(*PP); };
406   std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA(
407       MacroArgs::create(MI, ArgTokens, false, *PP), MacroArgsDeleter);
408   auto StringifyArg = [&](int ArgNo) {
409     return MA->StringifyArgument(MA->getUnexpArgument(ArgNo), *PP,
410                                  /*Charify=*/false, {}, {});
411   };
412   Token Result = StringifyArg(0);
413   EXPECT_EQ(tok::string_literal, Result.getKind());
414   EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData());
415   Result = StringifyArg(1);
416   EXPECT_EQ(tok::string_literal, Result.getKind());
417   EXPECT_STREQ("\"5\"", Result.getLiteralData());
418   Result = StringifyArg(2);
419   EXPECT_EQ(tok::string_literal, Result.getKind());
420   EXPECT_STREQ("\"'C'\"", Result.getLiteralData());
421 #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
422   EXPECT_DEATH(StringifyArg(3), "Invalid arg #");
423 #endif
424 }
425 
TEST_F(LexerTest,IsNewLineEscapedValid)426 TEST_F(LexerTest, IsNewLineEscapedValid) {
427   auto hasNewLineEscaped = [](const char *S) {
428     return Lexer::isNewLineEscaped(S, S + strlen(S) - 1);
429   };
430 
431   EXPECT_TRUE(hasNewLineEscaped("\\\r"));
432   EXPECT_TRUE(hasNewLineEscaped("\\\n"));
433   EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
434   EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
435   EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
436   EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
437 
438   EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
439   EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
440   EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
441   EXPECT_FALSE(hasNewLineEscaped("\r"));
442   EXPECT_FALSE(hasNewLineEscaped("\n"));
443   EXPECT_FALSE(hasNewLineEscaped("\r\n"));
444   EXPECT_FALSE(hasNewLineEscaped("\n\r"));
445   EXPECT_FALSE(hasNewLineEscaped("\r\r"));
446   EXPECT_FALSE(hasNewLineEscaped("\n\n"));
447 }
448 
TEST_F(LexerTest,GetBeginningOfTokenWithEscapedNewLine)449 TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
450   // Each line should have the same length for
451   // further offset calculation to be more straightforward.
452   const unsigned IdentifierLength = 8;
453   std::string TextToLex = "rabarbar\n"
454                           "foo\\\nbar\n"
455                           "foo\\\rbar\n"
456                           "fo\\\r\nbar\n"
457                           "foo\\\n\rba\n";
458   std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier};
459   std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens);
460 
461   for (const Token &Tok : LexedTokens) {
462     std::pair<FileID, unsigned> OriginalLocation =
463         SourceMgr.getDecomposedLoc(Tok.getLocation());
464     for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
465       SourceLocation LookupLocation =
466           Tok.getLocation().getLocWithOffset(Offset);
467 
468       std::pair<FileID, unsigned> FoundLocation =
469           SourceMgr.getDecomposedExpansionLoc(
470               Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts));
471 
472       // Check that location returned by the GetBeginningOfToken
473       // is the same as original token location reported by Lexer.
474       EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
475     }
476   }
477 }
478 
TEST_F(LexerTest,AvoidPastEndOfStringDereference)479 TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
480   EXPECT_TRUE(Lex("  //  \\\n").empty());
481   EXPECT_TRUE(Lex("#include <\\\\").empty());
482   EXPECT_TRUE(Lex("#include <\\\\\n").empty());
483 }
484 
TEST_F(LexerTest,StringizingRasString)485 TEST_F(LexerTest, StringizingRasString) {
486   // For "std::string Lexer::Stringify(StringRef Str, bool Charify)".
487   std::string String1 = R"(foo
488     {"bar":[]}
489     baz)";
490   // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)".
491   SmallString<128> String2;
492   String2 += String1.c_str();
493 
494   // Corner cases.
495   std::string String3 = R"(\
496     \n
497     \\n
498     \\)";
499   SmallString<128> String4;
500   String4 += String3.c_str();
501   std::string String5 = R"(a\
502 
503 
504     \\b)";
505   SmallString<128> String6;
506   String6 += String5.c_str();
507 
508   String1 = Lexer::Stringify(StringRef(String1));
509   Lexer::Stringify(String2);
510   String3 = Lexer::Stringify(StringRef(String3));
511   Lexer::Stringify(String4);
512   String5 = Lexer::Stringify(StringRef(String5));
513   Lexer::Stringify(String6);
514 
515   EXPECT_EQ(String1, R"(foo\n    {\"bar\":[]}\n    baz)");
516   EXPECT_EQ(String2, R"(foo\n    {\"bar\":[]}\n    baz)");
517   EXPECT_EQ(String3, R"(\\\n    \\n\n    \\\\n\n    \\\\)");
518   EXPECT_EQ(String4, R"(\\\n    \\n\n    \\\\n\n    \\\\)");
519   EXPECT_EQ(String5, R"(a\\\n\n\n    \\\\b)");
520   EXPECT_EQ(String6, R"(a\\\n\n\n    \\\\b)");
521 }
522 
TEST_F(LexerTest,CharRangeOffByOne)523 TEST_F(LexerTest, CharRangeOffByOne) {
524   std::vector<Token> toks = Lex(R"(#define MOO 1
525     void foo() { MOO; })");
526   const Token &moo = toks[5];
527 
528   EXPECT_EQ(getSourceText(moo, moo), "MOO");
529 
530   SourceRange R{moo.getLocation(), moo.getLocation()};
531 
532   EXPECT_TRUE(
533       Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts));
534   EXPECT_TRUE(
535       Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts));
536 
537   CharSourceRange CR = Lexer::getAsCharRange(R, SourceMgr, LangOpts);
538 
539   EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO".
540 }
541 
TEST_F(LexerTest,FindNextToken)542 TEST_F(LexerTest, FindNextToken) {
543   Lex("int abcd = 0;\n"
544       "int xyz = abcd;\n");
545   std::vector<std::string> GeneratedByNextToken;
546   SourceLocation Loc =
547       SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
548   while (true) {
549     auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts);
550     ASSERT_TRUE(T.hasValue());
551     if (T->is(tok::eof))
552       break;
553     GeneratedByNextToken.push_back(getSourceText(*T, *T));
554     Loc = T->getLocation();
555   }
556   EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int",
557                                                 "xyz", "=", "abcd", ";"));
558 }
559 
TEST_F(LexerTest,CreatedFIDCountForPredefinedBuffer)560 TEST_F(LexerTest, CreatedFIDCountForPredefinedBuffer) {
561   TrivialModuleLoader ModLoader;
562   auto PP = CreatePP("", ModLoader);
563   while (1) {
564     Token tok;
565     PP->Lex(tok);
566     if (tok.is(tok::eof))
567       break;
568   }
569   EXPECT_EQ(SourceMgr.getNumCreatedFIDsForFileID(PP->getPredefinesFileID()),
570             1U);
571 }
572 } // anonymous namespace
573