1 //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "clang/Lex/Lexer.h"
11 #include "clang/Basic/Diagnostic.h"
12 #include "clang/Basic/DiagnosticOptions.h"
13 #include "clang/Basic/FileManager.h"
14 #include "clang/Basic/LangOptions.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "clang/Basic/TargetInfo.h"
17 #include "clang/Basic/TargetOptions.h"
18 #include "clang/Lex/HeaderSearch.h"
19 #include "clang/Lex/HeaderSearchOptions.h"
20 #include "clang/Lex/ModuleLoader.h"
21 #include "clang/Lex/Preprocessor.h"
22 #include "clang/Lex/PreprocessorOptions.h"
23 #include "gtest/gtest.h"
24 
25 using namespace clang;
26 
27 namespace {
28 
29 class VoidModuleLoader : public ModuleLoader {
loadModule(SourceLocation ImportLoc,ModuleIdPath Path,Module::NameVisibilityKind Visibility,bool IsInclusionDirective)30   ModuleLoadResult loadModule(SourceLocation ImportLoc,
31                               ModuleIdPath Path,
32                               Module::NameVisibilityKind Visibility,
33                               bool IsInclusionDirective) override {
34     return ModuleLoadResult();
35   }
36 
makeModuleVisible(Module * Mod,Module::NameVisibilityKind Visibility,SourceLocation ImportLoc)37   void makeModuleVisible(Module *Mod,
38                          Module::NameVisibilityKind Visibility,
39                          SourceLocation ImportLoc) override { }
40 
loadGlobalModuleIndex(SourceLocation TriggerLoc)41   GlobalModuleIndex *loadGlobalModuleIndex(SourceLocation TriggerLoc) override
42     { return nullptr; }
lookupMissingImports(StringRef Name,SourceLocation TriggerLoc)43   bool lookupMissingImports(StringRef Name, SourceLocation TriggerLoc) override
44     { return 0; }
45 };
46 
47 // The test fixture.
48 class LexerTest : public ::testing::Test {
49 protected:
LexerTest()50   LexerTest()
51     : FileMgr(FileMgrOpts),
52       DiagID(new DiagnosticIDs()),
53       Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
54       SourceMgr(Diags, FileMgr),
55       TargetOpts(new TargetOptions)
56   {
57     TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
58     Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
59   }
60 
Lex(StringRef Source)61   std::vector<Token> Lex(StringRef Source) {
62     std::unique_ptr<llvm::MemoryBuffer> Buf =
63         llvm::MemoryBuffer::getMemBuffer(Source);
64     SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
65 
66     VoidModuleLoader ModLoader;
67     HeaderSearch HeaderInfo(new HeaderSearchOptions, SourceMgr, Diags, LangOpts,
68                             Target.get());
69     Preprocessor PP(new PreprocessorOptions(), Diags, LangOpts, SourceMgr,
70                     HeaderInfo, ModLoader, /*IILookup =*/nullptr,
71                     /*OwnsHeaderSearch =*/false);
72     PP.Initialize(*Target);
73     PP.EnterMainSourceFile();
74 
75     std::vector<Token> toks;
76     while (1) {
77       Token tok;
78       PP.Lex(tok);
79       if (tok.is(tok::eof))
80         break;
81       toks.push_back(tok);
82     }
83 
84     return toks;
85   }
86 
CheckLex(StringRef Source,ArrayRef<tok::TokenKind> ExpectedTokens)87   std::vector<Token> CheckLex(StringRef Source,
88                               ArrayRef<tok::TokenKind> ExpectedTokens) {
89     auto toks = Lex(Source);
90     EXPECT_EQ(ExpectedTokens.size(), toks.size());
91     for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) {
92       EXPECT_EQ(ExpectedTokens[i], toks[i].getKind());
93     }
94 
95     return toks;
96   }
97 
getSourceText(Token Begin,Token End)98   std::string getSourceText(Token Begin, Token End) {
99     bool Invalid;
100     StringRef Str =
101         Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange(
102                                     Begin.getLocation(), End.getLocation())),
103                              SourceMgr, LangOpts, &Invalid);
104     if (Invalid)
105       return "<INVALID>";
106     return Str;
107   }
108 
109   FileSystemOptions FileMgrOpts;
110   FileManager FileMgr;
111   IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
112   DiagnosticsEngine Diags;
113   SourceManager SourceMgr;
114   LangOptions LangOpts;
115   std::shared_ptr<TargetOptions> TargetOpts;
116   IntrusiveRefCntPtr<TargetInfo> Target;
117 };
118 
TEST_F(LexerTest,GetSourceTextExpandsToMaximumInMacroArgument)119 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
120   std::vector<tok::TokenKind> ExpectedTokens;
121   ExpectedTokens.push_back(tok::identifier);
122   ExpectedTokens.push_back(tok::l_paren);
123   ExpectedTokens.push_back(tok::identifier);
124   ExpectedTokens.push_back(tok::r_paren);
125 
126   std::vector<Token> toks = CheckLex("#define M(x) x\n"
127                                      "M(f(M(i)))",
128                                      ExpectedTokens);
129 
130   EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2]));
131 }
132 
TEST_F(LexerTest,GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro)133 TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) {
134   std::vector<tok::TokenKind> ExpectedTokens;
135   ExpectedTokens.push_back(tok::identifier);
136   ExpectedTokens.push_back(tok::identifier);
137 
138   std::vector<Token> toks = CheckLex("#define M(x) x\n"
139                                      "M(M(i) c)",
140                                      ExpectedTokens);
141 
142   EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0]));
143 }
144 
TEST_F(LexerTest,GetSourceTextExpandsInMacroArgumentForBeginOfMacro)145 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) {
146   std::vector<tok::TokenKind> ExpectedTokens;
147   ExpectedTokens.push_back(tok::identifier);
148   ExpectedTokens.push_back(tok::identifier);
149   ExpectedTokens.push_back(tok::identifier);
150 
151   std::vector<Token> toks = CheckLex("#define M(x) x\n"
152                                      "M(c c M(i))",
153                                      ExpectedTokens);
154 
155   EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2]));
156 }
157 
TEST_F(LexerTest,GetSourceTextExpandsInMacroArgumentForEndOfMacro)158 TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) {
159   std::vector<tok::TokenKind> ExpectedTokens;
160   ExpectedTokens.push_back(tok::identifier);
161   ExpectedTokens.push_back(tok::identifier);
162   ExpectedTokens.push_back(tok::identifier);
163 
164   std::vector<Token> toks = CheckLex("#define M(x) x\n"
165                                      "M(M(i) c c)",
166                                      ExpectedTokens);
167 
168   EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1]));
169 }
170 
TEST_F(LexerTest,GetSourceTextInSeparateFnMacros)171 TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) {
172   std::vector<tok::TokenKind> ExpectedTokens;
173   ExpectedTokens.push_back(tok::identifier);
174   ExpectedTokens.push_back(tok::identifier);
175   ExpectedTokens.push_back(tok::identifier);
176   ExpectedTokens.push_back(tok::identifier);
177 
178   std::vector<Token> toks = CheckLex("#define M(x) x\n"
179                                      "M(c M(i)) M(M(i) c)",
180                                      ExpectedTokens);
181 
182   EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2]));
183 }
184 
TEST_F(LexerTest,GetSourceTextWorksAcrossTokenPastes)185 TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) {
186   std::vector<tok::TokenKind> ExpectedTokens;
187   ExpectedTokens.push_back(tok::identifier);
188   ExpectedTokens.push_back(tok::l_paren);
189   ExpectedTokens.push_back(tok::identifier);
190   ExpectedTokens.push_back(tok::r_paren);
191 
192   std::vector<Token> toks = CheckLex("#define M(x) x\n"
193                                      "#define C(x) M(x##c)\n"
194                                      "M(f(C(i)))",
195                                      ExpectedTokens);
196 
197   EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2]));
198 }
199 
TEST_F(LexerTest,GetSourceTextExpandsAcrossMultipleMacroCalls)200 TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) {
201   std::vector<tok::TokenKind> ExpectedTokens;
202   ExpectedTokens.push_back(tok::identifier);
203   ExpectedTokens.push_back(tok::l_paren);
204   ExpectedTokens.push_back(tok::identifier);
205   ExpectedTokens.push_back(tok::r_paren);
206 
207   std::vector<Token> toks = CheckLex("#define M(x) x\n"
208                                      "f(M(M(i)))",
209                                      ExpectedTokens);
210   EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2]));
211 }
212 
TEST_F(LexerTest,GetSourceTextInMiddleOfMacroArgument)213 TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) {
214   std::vector<tok::TokenKind> ExpectedTokens;
215   ExpectedTokens.push_back(tok::identifier);
216   ExpectedTokens.push_back(tok::l_paren);
217   ExpectedTokens.push_back(tok::identifier);
218   ExpectedTokens.push_back(tok::r_paren);
219 
220   std::vector<Token> toks = CheckLex("#define M(x) x\n"
221                                      "M(f(i))",
222                                      ExpectedTokens);
223   EXPECT_EQ("i", getSourceText(toks[2], toks[2]));
224 }
225 
TEST_F(LexerTest,GetSourceTextExpandsAroundDifferentMacroCalls)226 TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) {
227   std::vector<tok::TokenKind> ExpectedTokens;
228   ExpectedTokens.push_back(tok::identifier);
229   ExpectedTokens.push_back(tok::l_paren);
230   ExpectedTokens.push_back(tok::identifier);
231   ExpectedTokens.push_back(tok::r_paren);
232 
233   std::vector<Token> toks = CheckLex("#define M(x) x\n"
234                                      "#define C(x) x\n"
235                                      "f(C(M(i)))",
236                                      ExpectedTokens);
237   EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2]));
238 }
239 
TEST_F(LexerTest,GetSourceTextOnlyExpandsIfFirstTokenInMacro)240 TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) {
241   std::vector<tok::TokenKind> ExpectedTokens;
242   ExpectedTokens.push_back(tok::identifier);
243   ExpectedTokens.push_back(tok::l_paren);
244   ExpectedTokens.push_back(tok::identifier);
245   ExpectedTokens.push_back(tok::identifier);
246   ExpectedTokens.push_back(tok::r_paren);
247 
248   std::vector<Token> toks = CheckLex("#define M(x) x\n"
249                                      "#define C(x) c x\n"
250                                      "f(C(M(i)))",
251                                      ExpectedTokens);
252   EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
253 }
254 
TEST_F(LexerTest,GetSourceTextExpandsRecursively)255 TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
256   std::vector<tok::TokenKind> ExpectedTokens;
257   ExpectedTokens.push_back(tok::identifier);
258   ExpectedTokens.push_back(tok::identifier);
259   ExpectedTokens.push_back(tok::l_paren);
260   ExpectedTokens.push_back(tok::identifier);
261   ExpectedTokens.push_back(tok::r_paren);
262 
263   std::vector<Token> toks = CheckLex("#define M(x) x\n"
264                                      "#define C(x) c M(x)\n"
265                                      "C(f(M(i)))",
266                                      ExpectedTokens);
267   EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
268 }
269 
TEST_F(LexerTest,LexAPI)270 TEST_F(LexerTest, LexAPI) {
271   std::vector<tok::TokenKind> ExpectedTokens;
272   ExpectedTokens.push_back(tok::l_square);
273   ExpectedTokens.push_back(tok::identifier);
274   ExpectedTokens.push_back(tok::r_square);
275   ExpectedTokens.push_back(tok::l_square);
276   ExpectedTokens.push_back(tok::identifier);
277   ExpectedTokens.push_back(tok::r_square);
278   ExpectedTokens.push_back(tok::identifier);
279   ExpectedTokens.push_back(tok::identifier);
280   ExpectedTokens.push_back(tok::identifier);
281   ExpectedTokens.push_back(tok::identifier);
282 
283   std::vector<Token> toks = CheckLex("#define M(x) [x]\n"
284                                      "#define N(x) x\n"
285                                      "#define INN(x) x\n"
286                                      "#define NOF1 INN(val)\n"
287                                      "#define NOF2 val\n"
288                                      "M(foo) N([bar])\n"
289                                      "N(INN(val)) N(NOF1) N(NOF2) N(val)",
290                                      ExpectedTokens);
291 
292   SourceLocation lsqrLoc = toks[0].getLocation();
293   SourceLocation idLoc = toks[1].getLocation();
294   SourceLocation rsqrLoc = toks[2].getLocation();
295   std::pair<SourceLocation,SourceLocation>
296     macroPair = SourceMgr.getExpansionRange(lsqrLoc);
297   SourceRange macroRange = SourceRange(macroPair.first, macroPair.second);
298 
299   SourceLocation Loc;
300   EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc));
301   EXPECT_EQ(Loc, macroRange.getBegin());
302   EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts));
303   EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts));
304   EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc));
305   EXPECT_EQ(Loc, macroRange.getEnd());
306 
307   CharSourceRange range = Lexer::makeFileCharRange(
308            CharSourceRange::getTokenRange(lsqrLoc, idLoc), SourceMgr, LangOpts);
309   EXPECT_TRUE(range.isInvalid());
310   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(idLoc, rsqrLoc),
311                                    SourceMgr, LangOpts);
312   EXPECT_TRUE(range.isInvalid());
313   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
314                                    SourceMgr, LangOpts);
315   EXPECT_TRUE(!range.isTokenRange());
316   EXPECT_EQ(range.getAsRange(),
317             SourceRange(macroRange.getBegin(),
318                         macroRange.getEnd().getLocWithOffset(1)));
319 
320   StringRef text = Lexer::getSourceText(
321                                CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
322                                SourceMgr, LangOpts);
323   EXPECT_EQ(text, "M(foo)");
324 
325   SourceLocation macroLsqrLoc = toks[3].getLocation();
326   SourceLocation macroIdLoc = toks[4].getLocation();
327   SourceLocation macroRsqrLoc = toks[5].getLocation();
328   SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(macroLsqrLoc);
329   SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(macroIdLoc);
330   SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(macroRsqrLoc);
331 
332   range = Lexer::makeFileCharRange(
333       CharSourceRange::getTokenRange(macroLsqrLoc, macroIdLoc),
334       SourceMgr, LangOpts);
335   EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)),
336             range.getAsRange());
337 
338   range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(macroIdLoc, macroRsqrLoc),
339                                    SourceMgr, LangOpts);
340   EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)),
341             range.getAsRange());
342 
343   macroPair = SourceMgr.getExpansionRange(macroLsqrLoc);
344   range = Lexer::makeFileCharRange(
345                      CharSourceRange::getTokenRange(macroLsqrLoc, macroRsqrLoc),
346                      SourceMgr, LangOpts);
347   EXPECT_EQ(SourceRange(macroPair.first, macroPair.second.getLocWithOffset(1)),
348             range.getAsRange());
349 
350   text = Lexer::getSourceText(
351           CharSourceRange::getTokenRange(SourceRange(macroLsqrLoc, macroIdLoc)),
352           SourceMgr, LangOpts);
353   EXPECT_EQ(text, "[bar");
354 
355 
356   SourceLocation idLoc1 = toks[6].getLocation();
357   SourceLocation idLoc2 = toks[7].getLocation();
358   SourceLocation idLoc3 = toks[8].getLocation();
359   SourceLocation idLoc4 = toks[9].getLocation();
360   EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts));
361   EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts));
362   EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts));
363   EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
364 }
365 
TEST_F(LexerTest,DontMergeMacroArgsFromDifferentMacroFiles)366 TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
367   std::vector<Token> toks =
368       Lex("#define helper1 0\n"
369           "void helper2(const char *, ...);\n"
370           "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n"
371           "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n"
372           "void f1() { M2(\"a\", \"b\"); }");
373 
374   // Check the file corresponding to the "helper1" macro arg in M2.
375   //
376   // The lexer used to report its size as 31, meaning that the end of the
377   // expansion would be on the *next line* (just past `M2("a", "b")`). Make
378   // sure that we get the correct end location (the comma after "helper1").
379   SourceLocation helper1ArgLoc = toks[20].getLocation();
380   EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
381 }
382 
383 } // anonymous namespace
384