1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 //   -H       - Print the name of each header file used.
16 //   -d[DNI] - Dump various things.
17 //   -fworking-directory - #line's with preprocessor's working dir.
18 //   -fpreprocessed
19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 //   -W*
21 //   -w
22 //
23 // Messages to emit:
24 //   "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/Pragma.h"
42 #include "clang/Lex/PreprocessingRecord.h"
43 #include "clang/Lex/PreprocessorOptions.h"
44 #include "clang/Lex/ScratchBuffer.h"
45 #include "llvm/ADT/APFloat.h"
46 #include "llvm/ADT/STLExtras.h"
47 #include "llvm/ADT/SmallString.h"
48 #include "llvm/ADT/StringExtras.h"
49 #include "llvm/Support/Capacity.h"
50 #include "llvm/Support/ConvertUTF.h"
51 #include "llvm/Support/MemoryBuffer.h"
52 #include "llvm/Support/raw_ostream.h"
53 using namespace clang;
54 
55 //===----------------------------------------------------------------------===//
~ExternalPreprocessorSource()56 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
57 
Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,DiagnosticsEngine & diags,LangOptions & opts,SourceManager & SM,HeaderSearch & Headers,ModuleLoader & TheModuleLoader,IdentifierInfoLookup * IILookup,bool OwnsHeaders,TranslationUnitKind TUKind)58 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
59                            DiagnosticsEngine &diags, LangOptions &opts,
60                            SourceManager &SM, HeaderSearch &Headers,
61                            ModuleLoader &TheModuleLoader,
62                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
63                            TranslationUnitKind TUKind)
64     : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
65       FileMgr(Headers.getFileMgr()), SourceMgr(SM),
66       ScratchBuf(new ScratchBuffer(SourceMgr)),HeaderInfo(Headers),
67       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
68       Identifiers(opts, IILookup),
69       PragmaHandlers(new PragmaNamespace(StringRef())),
70       IncrementalProcessing(false), TUKind(TUKind),
71       CodeComplete(nullptr), CodeCompletionFile(nullptr),
72       CodeCompletionOffset(0), LastTokenWasAt(false),
73       ModuleImportExpectsIdentifier(false), CodeCompletionReached(0),
74       MainFileDir(nullptr), SkipMainFilePreamble(0, true), CurPPLexer(nullptr),
75       CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr),
76       Callbacks(nullptr), MacroArgCache(nullptr), Record(nullptr),
77       MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
78   OwnsHeaderSearch = OwnsHeaders;
79 
80   CounterValue = 0; // __COUNTER__ starts at 0.
81 
82   // Clear stats.
83   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
84   NumIf = NumElse = NumEndif = 0;
85   NumEnteredSourceFiles = 0;
86   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
87   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
88   MaxIncludeStackDepth = 0;
89   NumSkipped = 0;
90 
91   // Default to discarding comments.
92   KeepComments = false;
93   KeepMacroComments = false;
94   SuppressIncludeNotFoundError = false;
95 
96   // Macro expansion is enabled.
97   DisableMacroExpansion = false;
98   MacroExpansionInDirectivesOverride = false;
99   InMacroArgs = false;
100   InMacroArgPreExpansion = false;
101   NumCachedTokenLexers = 0;
102   PragmasEnabled = true;
103   ParsingIfOrElifDirective = false;
104   PreprocessedOutput = false;
105 
106   CachedLexPos = 0;
107 
108   // We haven't read anything from the external source.
109   ReadMacrosFromExternalSource = false;
110 
111   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
112   // This gets unpoisoned where it is allowed.
113   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
114   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
115 
116   // Initialize the pragma handlers.
117   RegisterBuiltinPragmas();
118 
119   // Initialize builtin macros like __LINE__ and friends.
120   RegisterBuiltinMacros();
121 
122   if(LangOpts.Borland) {
123     Ident__exception_info        = getIdentifierInfo("_exception_info");
124     Ident___exception_info       = getIdentifierInfo("__exception_info");
125     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
126     Ident__exception_code        = getIdentifierInfo("_exception_code");
127     Ident___exception_code       = getIdentifierInfo("__exception_code");
128     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
129     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
130     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
131     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
132   } else {
133     Ident__exception_info = Ident__exception_code = nullptr;
134     Ident__abnormal_termination = Ident___exception_info = nullptr;
135     Ident___exception_code = Ident___abnormal_termination = nullptr;
136     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
137     Ident_AbnormalTermination = nullptr;
138   }
139 }
140 
~Preprocessor()141 Preprocessor::~Preprocessor() {
142   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
143 
144   IncludeMacroStack.clear();
145 
146   // Destroy any macro definitions.
147   while (MacroInfoChain *I = MIChainHead) {
148     MIChainHead = I->Next;
149     I->~MacroInfoChain();
150   }
151 
152   // Free any cached macro expanders.
153   // This populates MacroArgCache, so all TokenLexers need to be destroyed
154   // before the code below that frees up the MacroArgCache list.
155   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
156   CurTokenLexer.reset();
157 
158   while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
159     DeserialMIChainHead = I->Next;
160     I->~DeserializedMacroInfoChain();
161   }
162 
163   // Free any cached MacroArgs.
164   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
165     ArgList = ArgList->deallocate();
166 
167   // Delete the header search info, if we own it.
168   if (OwnsHeaderSearch)
169     delete &HeaderInfo;
170 }
171 
Initialize(const TargetInfo & Target)172 void Preprocessor::Initialize(const TargetInfo &Target) {
173   assert((!this->Target || this->Target == &Target) &&
174          "Invalid override of target information");
175   this->Target = &Target;
176 
177   // Initialize information about built-ins.
178   BuiltinInfo.InitializeTarget(Target);
179   HeaderInfo.setTarget(Target);
180 }
181 
InitializeForModelFile()182 void Preprocessor::InitializeForModelFile() {
183   NumEnteredSourceFiles = 0;
184 
185   // Reset pragmas
186   PragmaHandlersBackup = std::move(PragmaHandlers);
187   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
188   RegisterBuiltinPragmas();
189 
190   // Reset PredefinesFileID
191   PredefinesFileID = FileID();
192 }
193 
FinalizeForModelFile()194 void Preprocessor::FinalizeForModelFile() {
195   NumEnteredSourceFiles = 1;
196 
197   PragmaHandlers = std::move(PragmaHandlersBackup);
198 }
199 
setPTHManager(PTHManager * pm)200 void Preprocessor::setPTHManager(PTHManager* pm) {
201   PTH.reset(pm);
202   FileMgr.addStatCache(PTH->createStatCache());
203 }
204 
DumpToken(const Token & Tok,bool DumpFlags) const205 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
206   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
207                << getSpelling(Tok) << "'";
208 
209   if (!DumpFlags) return;
210 
211   llvm::errs() << "\t";
212   if (Tok.isAtStartOfLine())
213     llvm::errs() << " [StartOfLine]";
214   if (Tok.hasLeadingSpace())
215     llvm::errs() << " [LeadingSpace]";
216   if (Tok.isExpandDisabled())
217     llvm::errs() << " [ExpandDisabled]";
218   if (Tok.needsCleaning()) {
219     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
220     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
221                  << "']";
222   }
223 
224   llvm::errs() << "\tLoc=<";
225   DumpLocation(Tok.getLocation());
226   llvm::errs() << ">";
227 }
228 
DumpLocation(SourceLocation Loc) const229 void Preprocessor::DumpLocation(SourceLocation Loc) const {
230   Loc.dump(SourceMgr);
231 }
232 
DumpMacro(const MacroInfo & MI) const233 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
234   llvm::errs() << "MACRO: ";
235   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
236     DumpToken(MI.getReplacementToken(i));
237     llvm::errs() << "  ";
238   }
239   llvm::errs() << "\n";
240 }
241 
PrintStats()242 void Preprocessor::PrintStats() {
243   llvm::errs() << "\n*** Preprocessor Stats:\n";
244   llvm::errs() << NumDirectives << " directives found:\n";
245   llvm::errs() << "  " << NumDefined << " #define.\n";
246   llvm::errs() << "  " << NumUndefined << " #undef.\n";
247   llvm::errs() << "  #include/#include_next/#import:\n";
248   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
249   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
250   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
251   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
252   llvm::errs() << "  " << NumEndif << " #endif.\n";
253   llvm::errs() << "  " << NumPragma << " #pragma.\n";
254   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
255 
256   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
257              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
258              << NumFastMacroExpanded << " on the fast path.\n";
259   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
260              << " token paste (##) operations performed, "
261              << NumFastTokenPaste << " on the fast path.\n";
262 
263   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
264 
265   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
266   llvm::errs() << "\n  Macro Expanded Tokens: "
267                << llvm::capacity_in_bytes(MacroExpandedTokens);
268   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
269   llvm::errs() << "\n  Macros: " << llvm::capacity_in_bytes(Macros);
270   llvm::errs() << "\n  #pragma push_macro Info: "
271                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
272   llvm::errs() << "\n  Poison Reasons: "
273                << llvm::capacity_in_bytes(PoisonReasons);
274   llvm::errs() << "\n  Comment Handlers: "
275                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
276 }
277 
278 Preprocessor::macro_iterator
macro_begin(bool IncludeExternalMacros) const279 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
280   if (IncludeExternalMacros && ExternalSource &&
281       !ReadMacrosFromExternalSource) {
282     ReadMacrosFromExternalSource = true;
283     ExternalSource->ReadDefinedMacros();
284   }
285 
286   return Macros.begin();
287 }
288 
getTotalMemory() const289 size_t Preprocessor::getTotalMemory() const {
290   return BP.getTotalMemory()
291     + llvm::capacity_in_bytes(MacroExpandedTokens)
292     + Predefines.capacity() /* Predefines buffer. */
293     + llvm::capacity_in_bytes(Macros)
294     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
295     + llvm::capacity_in_bytes(PoisonReasons)
296     + llvm::capacity_in_bytes(CommentHandlers);
297 }
298 
299 Preprocessor::macro_iterator
macro_end(bool IncludeExternalMacros) const300 Preprocessor::macro_end(bool IncludeExternalMacros) const {
301   if (IncludeExternalMacros && ExternalSource &&
302       !ReadMacrosFromExternalSource) {
303     ReadMacrosFromExternalSource = true;
304     ExternalSource->ReadDefinedMacros();
305   }
306 
307   return Macros.end();
308 }
309 
310 /// \brief Compares macro tokens with a specified token value sequence.
MacroDefinitionEquals(const MacroInfo * MI,ArrayRef<TokenValue> Tokens)311 static bool MacroDefinitionEquals(const MacroInfo *MI,
312                                   ArrayRef<TokenValue> Tokens) {
313   return Tokens.size() == MI->getNumTokens() &&
314       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
315 }
316 
getLastMacroWithSpelling(SourceLocation Loc,ArrayRef<TokenValue> Tokens) const317 StringRef Preprocessor::getLastMacroWithSpelling(
318                                     SourceLocation Loc,
319                                     ArrayRef<TokenValue> Tokens) const {
320   SourceLocation BestLocation;
321   StringRef BestSpelling;
322   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
323        I != E; ++I) {
324     const MacroDirective::DefInfo
325       Def = I->second->findDirectiveAtLoc(Loc, SourceMgr);
326     if (!Def || !Def.getMacroInfo())
327       continue;
328     if (!Def.getMacroInfo()->isObjectLike())
329       continue;
330     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
331       continue;
332     SourceLocation Location = Def.getLocation();
333     // Choose the macro defined latest.
334     if (BestLocation.isInvalid() ||
335         (Location.isValid() &&
336          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
337       BestLocation = Location;
338       BestSpelling = I->first->getName();
339     }
340   }
341   return BestSpelling;
342 }
343 
recomputeCurLexerKind()344 void Preprocessor::recomputeCurLexerKind() {
345   if (CurLexer)
346     CurLexerKind = CLK_Lexer;
347   else if (CurPTHLexer)
348     CurLexerKind = CLK_PTHLexer;
349   else if (CurTokenLexer)
350     CurLexerKind = CLK_TokenLexer;
351   else
352     CurLexerKind = CLK_CachingLexer;
353 }
354 
SetCodeCompletionPoint(const FileEntry * File,unsigned CompleteLine,unsigned CompleteColumn)355 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
356                                           unsigned CompleteLine,
357                                           unsigned CompleteColumn) {
358   assert(File);
359   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
360   assert(!CodeCompletionFile && "Already set");
361 
362   using llvm::MemoryBuffer;
363 
364   // Load the actual file's contents.
365   bool Invalid = false;
366   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
367   if (Invalid)
368     return true;
369 
370   // Find the byte position of the truncation point.
371   const char *Position = Buffer->getBufferStart();
372   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
373     for (; *Position; ++Position) {
374       if (*Position != '\r' && *Position != '\n')
375         continue;
376 
377       // Eat \r\n or \n\r as a single line.
378       if ((Position[1] == '\r' || Position[1] == '\n') &&
379           Position[0] != Position[1])
380         ++Position;
381       ++Position;
382       break;
383     }
384   }
385 
386   Position += CompleteColumn - 1;
387 
388   // If pointing inside the preamble, adjust the position at the beginning of
389   // the file after the preamble.
390   if (SkipMainFilePreamble.first &&
391       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
392     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
393       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
394   }
395 
396   if (Position > Buffer->getBufferEnd())
397     Position = Buffer->getBufferEnd();
398 
399   CodeCompletionFile = File;
400   CodeCompletionOffset = Position - Buffer->getBufferStart();
401 
402   std::unique_ptr<MemoryBuffer> NewBuffer =
403       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
404                                           Buffer->getBufferIdentifier());
405   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
406   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
407   *NewPos = '\0';
408   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
409   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
410 
411   return false;
412 }
413 
CodeCompleteNaturalLanguage()414 void Preprocessor::CodeCompleteNaturalLanguage() {
415   if (CodeComplete)
416     CodeComplete->CodeCompleteNaturalLanguage();
417   setCodeCompletionReached();
418 }
419 
420 /// getSpelling - This method is used to get the spelling of a token into a
421 /// SmallVector. Note that the returned StringRef may not point to the
422 /// supplied buffer if a copy can be avoided.
getSpelling(const Token & Tok,SmallVectorImpl<char> & Buffer,bool * Invalid) const423 StringRef Preprocessor::getSpelling(const Token &Tok,
424                                           SmallVectorImpl<char> &Buffer,
425                                           bool *Invalid) const {
426   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
427   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
428     // Try the fast path.
429     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
430       return II->getName();
431   }
432 
433   // Resize the buffer if we need to copy into it.
434   if (Tok.needsCleaning())
435     Buffer.resize(Tok.getLength());
436 
437   const char *Ptr = Buffer.data();
438   unsigned Len = getSpelling(Tok, Ptr, Invalid);
439   return StringRef(Ptr, Len);
440 }
441 
442 /// CreateString - Plop the specified string into a scratch buffer and return a
443 /// location for it.  If specified, the source location provides a source
444 /// location for the token.
CreateString(StringRef Str,Token & Tok,SourceLocation ExpansionLocStart,SourceLocation ExpansionLocEnd)445 void Preprocessor::CreateString(StringRef Str, Token &Tok,
446                                 SourceLocation ExpansionLocStart,
447                                 SourceLocation ExpansionLocEnd) {
448   Tok.setLength(Str.size());
449 
450   const char *DestPtr;
451   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
452 
453   if (ExpansionLocStart.isValid())
454     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
455                                        ExpansionLocEnd, Str.size());
456   Tok.setLocation(Loc);
457 
458   // If this is a raw identifier or a literal token, set the pointer data.
459   if (Tok.is(tok::raw_identifier))
460     Tok.setRawIdentifierData(DestPtr);
461   else if (Tok.isLiteral())
462     Tok.setLiteralData(DestPtr);
463 }
464 
getCurrentModule()465 Module *Preprocessor::getCurrentModule() {
466   if (getLangOpts().CurrentModule.empty())
467     return nullptr;
468 
469   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
470 }
471 
472 //===----------------------------------------------------------------------===//
473 // Preprocessor Initialization Methods
474 //===----------------------------------------------------------------------===//
475 
476 
477 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
478 /// which implicitly adds the builtin defines etc.
EnterMainSourceFile()479 void Preprocessor::EnterMainSourceFile() {
480   // We do not allow the preprocessor to reenter the main file.  Doing so will
481   // cause FileID's to accumulate information from both runs (e.g. #line
482   // information) and predefined macros aren't guaranteed to be set properly.
483   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
484   FileID MainFileID = SourceMgr.getMainFileID();
485 
486   // If MainFileID is loaded it means we loaded an AST file, no need to enter
487   // a main file.
488   if (!SourceMgr.isLoadedFileID(MainFileID)) {
489     // Enter the main file source buffer.
490     EnterSourceFile(MainFileID, nullptr, SourceLocation());
491 
492     // If we've been asked to skip bytes in the main file (e.g., as part of a
493     // precompiled preamble), do so now.
494     if (SkipMainFilePreamble.first > 0)
495       CurLexer->SkipBytes(SkipMainFilePreamble.first,
496                           SkipMainFilePreamble.second);
497 
498     // Tell the header info that the main file was entered.  If the file is later
499     // #imported, it won't be re-entered.
500     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
501       HeaderInfo.IncrementIncludeCount(FE);
502   }
503 
504   // Preprocess Predefines to populate the initial preprocessor state.
505   std::unique_ptr<llvm::MemoryBuffer> SB =
506     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
507   assert(SB && "Cannot create predefined source buffer");
508   FileID FID = SourceMgr.createFileID(std::move(SB));
509   assert(!FID.isInvalid() && "Could not create FileID for predefines?");
510   setPredefinesFileID(FID);
511 
512   // Start parsing the predefines.
513   EnterSourceFile(FID, nullptr, SourceLocation());
514 }
515 
EndSourceFile()516 void Preprocessor::EndSourceFile() {
517   // Notify the client that we reached the end of the source file.
518   if (Callbacks)
519     Callbacks->EndOfMainFile();
520 }
521 
522 //===----------------------------------------------------------------------===//
523 // Lexer Event Handling.
524 //===----------------------------------------------------------------------===//
525 
526 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
527 /// identifier information for the token and install it into the token,
528 /// updating the token kind accordingly.
LookUpIdentifierInfo(Token & Identifier) const529 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
530   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
531 
532   // Look up this token, see if it is a macro, or if it is a language keyword.
533   IdentifierInfo *II;
534   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
535     // No cleaning needed, just use the characters from the lexed buffer.
536     II = getIdentifierInfo(Identifier.getRawIdentifier());
537   } else {
538     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
539     SmallString<64> IdentifierBuffer;
540     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
541 
542     if (Identifier.hasUCN()) {
543       SmallString<64> UCNIdentifierBuffer;
544       expandUCNs(UCNIdentifierBuffer, CleanedStr);
545       II = getIdentifierInfo(UCNIdentifierBuffer);
546     } else {
547       II = getIdentifierInfo(CleanedStr);
548     }
549   }
550 
551   // Update the token info (identifier info and appropriate token kind).
552   Identifier.setIdentifierInfo(II);
553   Identifier.setKind(II->getTokenID());
554 
555   return II;
556 }
557 
SetPoisonReason(IdentifierInfo * II,unsigned DiagID)558 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
559   PoisonReasons[II] = DiagID;
560 }
561 
PoisonSEHIdentifiers(bool Poison)562 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
563   assert(Ident__exception_code && Ident__exception_info);
564   assert(Ident___exception_code && Ident___exception_info);
565   Ident__exception_code->setIsPoisoned(Poison);
566   Ident___exception_code->setIsPoisoned(Poison);
567   Ident_GetExceptionCode->setIsPoisoned(Poison);
568   Ident__exception_info->setIsPoisoned(Poison);
569   Ident___exception_info->setIsPoisoned(Poison);
570   Ident_GetExceptionInfo->setIsPoisoned(Poison);
571   Ident__abnormal_termination->setIsPoisoned(Poison);
572   Ident___abnormal_termination->setIsPoisoned(Poison);
573   Ident_AbnormalTermination->setIsPoisoned(Poison);
574 }
575 
HandlePoisonedIdentifier(Token & Identifier)576 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
577   assert(Identifier.getIdentifierInfo() &&
578          "Can't handle identifiers without identifier info!");
579   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
580     PoisonReasons.find(Identifier.getIdentifierInfo());
581   if(it == PoisonReasons.end())
582     Diag(Identifier, diag::err_pp_used_poisoned_id);
583   else
584     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
585 }
586 
587 /// HandleIdentifier - This callback is invoked when the lexer reads an
588 /// identifier.  This callback looks up the identifier in the map and/or
589 /// potentially macro expands it or turns it into a named token (like 'for').
590 ///
591 /// Note that callers of this method are guarded by checking the
592 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
593 /// IdentifierInfo methods that compute these properties will need to change to
594 /// match.
HandleIdentifier(Token & Identifier)595 bool Preprocessor::HandleIdentifier(Token &Identifier) {
596   assert(Identifier.getIdentifierInfo() &&
597          "Can't handle identifiers without identifier info!");
598 
599   IdentifierInfo &II = *Identifier.getIdentifierInfo();
600 
601   // If the information about this identifier is out of date, update it from
602   // the external source.
603   // We have to treat __VA_ARGS__ in a special way, since it gets
604   // serialized with isPoisoned = true, but our preprocessor may have
605   // unpoisoned it if we're defining a C99 macro.
606   if (II.isOutOfDate()) {
607     bool CurrentIsPoisoned = false;
608     if (&II == Ident__VA_ARGS__)
609       CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
610 
611     ExternalSource->updateOutOfDateIdentifier(II);
612     Identifier.setKind(II.getTokenID());
613 
614     if (&II == Ident__VA_ARGS__)
615       II.setIsPoisoned(CurrentIsPoisoned);
616   }
617 
618   // If this identifier was poisoned, and if it was not produced from a macro
619   // expansion, emit an error.
620   if (II.isPoisoned() && CurPPLexer) {
621     HandlePoisonedIdentifier(Identifier);
622   }
623 
624   // If this is a macro to be expanded, do it.
625   if (MacroDirective *MD = getMacroDirective(&II)) {
626     MacroInfo *MI = MD->getMacroInfo();
627     if (!DisableMacroExpansion) {
628       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
629         // C99 6.10.3p10: If the preprocessing token immediately after the
630         // macro name isn't a '(', this macro should not be expanded.
631         if (!MI->isFunctionLike() || isNextPPTokenLParen())
632           return HandleMacroExpandedIdentifier(Identifier, MD);
633       } else {
634         // C99 6.10.3.4p2 says that a disabled macro may never again be
635         // expanded, even if it's in a context where it could be expanded in the
636         // future.
637         Identifier.setFlag(Token::DisableExpand);
638         if (MI->isObjectLike() || isNextPPTokenLParen())
639           Diag(Identifier, diag::pp_disabled_macro_expansion);
640       }
641     }
642   }
643 
644   // If this identifier is a keyword in C++11, produce a warning. Don't warn if
645   // we're not considering macro expansion, since this identifier might be the
646   // name of a macro.
647   // FIXME: This warning is disabled in cases where it shouldn't be, like
648   //   "#define constexpr constexpr", "int constexpr;"
649   if (II.isCXX11CompatKeyword() && !DisableMacroExpansion) {
650     Diag(Identifier, diag::warn_cxx11_keyword) << II.getName();
651     // Don't diagnose this keyword again in this translation unit.
652     II.setIsCXX11CompatKeyword(false);
653   }
654 
655   // C++ 2.11p2: If this is an alternative representation of a C++ operator,
656   // then we act as if it is the actual operator and not the textual
657   // representation of it.
658   if (II.isCPlusPlusOperatorKeyword())
659     Identifier.setIdentifierInfo(nullptr);
660 
661   // If this is an extension token, diagnose its use.
662   // We avoid diagnosing tokens that originate from macro definitions.
663   // FIXME: This warning is disabled in cases where it shouldn't be,
664   // like "#define TY typeof", "TY(1) x".
665   if (II.isExtensionToken() && !DisableMacroExpansion)
666     Diag(Identifier, diag::ext_token_used);
667 
668   // If this is the 'import' contextual keyword following an '@', note
669   // that the next token indicates a module name.
670   //
671   // Note that we do not treat 'import' as a contextual
672   // keyword when we're in a caching lexer, because caching lexers only get
673   // used in contexts where import declarations are disallowed.
674   if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
675       !DisableMacroExpansion &&
676       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
677       CurLexerKind != CLK_CachingLexer) {
678     ModuleImportLoc = Identifier.getLocation();
679     ModuleImportPath.clear();
680     ModuleImportExpectsIdentifier = true;
681     CurLexerKind = CLK_LexAfterModuleImport;
682   }
683   return true;
684 }
685 
Lex(Token & Result)686 void Preprocessor::Lex(Token &Result) {
687   // We loop here until a lex function retuns a token; this avoids recursion.
688   bool ReturnedToken;
689   do {
690     switch (CurLexerKind) {
691     case CLK_Lexer:
692       ReturnedToken = CurLexer->Lex(Result);
693       break;
694     case CLK_PTHLexer:
695       ReturnedToken = CurPTHLexer->Lex(Result);
696       break;
697     case CLK_TokenLexer:
698       ReturnedToken = CurTokenLexer->Lex(Result);
699       break;
700     case CLK_CachingLexer:
701       CachingLex(Result);
702       ReturnedToken = true;
703       break;
704     case CLK_LexAfterModuleImport:
705       LexAfterModuleImport(Result);
706       ReturnedToken = true;
707       break;
708     }
709   } while (!ReturnedToken);
710 
711   LastTokenWasAt = Result.is(tok::at);
712 }
713 
714 
715 /// \brief Lex a token following the 'import' contextual keyword.
716 ///
LexAfterModuleImport(Token & Result)717 void Preprocessor::LexAfterModuleImport(Token &Result) {
718   // Figure out what kind of lexer we actually have.
719   recomputeCurLexerKind();
720 
721   // Lex the next token.
722   Lex(Result);
723 
724   // The token sequence
725   //
726   //   import identifier (. identifier)*
727   //
728   // indicates a module import directive. We already saw the 'import'
729   // contextual keyword, so now we're looking for the identifiers.
730   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
731     // We expected to see an identifier here, and we did; continue handling
732     // identifiers.
733     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
734                                               Result.getLocation()));
735     ModuleImportExpectsIdentifier = false;
736     CurLexerKind = CLK_LexAfterModuleImport;
737     return;
738   }
739 
740   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
741   // see the next identifier.
742   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
743     ModuleImportExpectsIdentifier = true;
744     CurLexerKind = CLK_LexAfterModuleImport;
745     return;
746   }
747 
748   // If we have a non-empty module path, load the named module.
749   if (!ModuleImportPath.empty()) {
750     Module *Imported = nullptr;
751     if (getLangOpts().Modules)
752       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
753                                             ModuleImportPath,
754                                             Module::MacrosVisible,
755                                             /*IsIncludeDirective=*/false);
756     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
757       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
758   }
759 }
760 
FinishLexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)761 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
762                                           const char *DiagnosticTag,
763                                           bool AllowMacroExpansion) {
764   // We need at least one string literal.
765   if (Result.isNot(tok::string_literal)) {
766     Diag(Result, diag::err_expected_string_literal)
767       << /*Source='in...'*/0 << DiagnosticTag;
768     return false;
769   }
770 
771   // Lex string literal tokens, optionally with macro expansion.
772   SmallVector<Token, 4> StrToks;
773   do {
774     StrToks.push_back(Result);
775 
776     if (Result.hasUDSuffix())
777       Diag(Result, diag::err_invalid_string_udl);
778 
779     if (AllowMacroExpansion)
780       Lex(Result);
781     else
782       LexUnexpandedToken(Result);
783   } while (Result.is(tok::string_literal));
784 
785   // Concatenate and parse the strings.
786   StringLiteralParser Literal(StrToks, *this);
787   assert(Literal.isAscii() && "Didn't allow wide strings in");
788 
789   if (Literal.hadError)
790     return false;
791 
792   if (Literal.Pascal) {
793     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
794       << /*Source='in...'*/0 << DiagnosticTag;
795     return false;
796   }
797 
798   String = Literal.GetString();
799   return true;
800 }
801 
parseSimpleIntegerLiteral(Token & Tok,uint64_t & Value)802 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
803   assert(Tok.is(tok::numeric_constant));
804   SmallString<8> IntegerBuffer;
805   bool NumberInvalid = false;
806   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
807   if (NumberInvalid)
808     return false;
809   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
810   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
811     return false;
812   llvm::APInt APVal(64, 0);
813   if (Literal.GetIntegerValue(APVal))
814     return false;
815   Lex(Tok);
816   Value = APVal.getLimitedValue();
817   return true;
818 }
819 
addCommentHandler(CommentHandler * Handler)820 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
821   assert(Handler && "NULL comment handler");
822   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
823          CommentHandlers.end() && "Comment handler already registered");
824   CommentHandlers.push_back(Handler);
825 }
826 
removeCommentHandler(CommentHandler * Handler)827 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
828   std::vector<CommentHandler *>::iterator Pos
829   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
830   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
831   CommentHandlers.erase(Pos);
832 }
833 
HandleComment(Token & result,SourceRange Comment)834 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
835   bool AnyPendingTokens = false;
836   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
837        HEnd = CommentHandlers.end();
838        H != HEnd; ++H) {
839     if ((*H)->HandleComment(*this, Comment))
840       AnyPendingTokens = true;
841   }
842   if (!AnyPendingTokens || getCommentRetentionState())
843     return false;
844   Lex(result);
845   return true;
846 }
847 
~ModuleLoader()848 ModuleLoader::~ModuleLoader() { }
849 
~CommentHandler()850 CommentHandler::~CommentHandler() { }
851 
~CodeCompletionHandler()852 CodeCompletionHandler::~CodeCompletionHandler() { }
853 
createPreprocessingRecord()854 void Preprocessor::createPreprocessingRecord() {
855   if (Record)
856     return;
857 
858   Record = new PreprocessingRecord(getSourceManager());
859   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
860 }
861