1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 //   -H       - Print the name of each header file used.
16 //   -d[DNI] - Dump various things.
17 //   -fworking-directory - #line's with preprocessor's working dir.
18 //   -fpreprocessed
19 //   -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 //   -W*
21 //   -w
22 //
23 // Messages to emit:
24 //   "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27 
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/PTHManager.h"
42 #include "clang/Lex/Pragma.h"
43 #include "clang/Lex/PreprocessingRecord.h"
44 #include "clang/Lex/PreprocessorOptions.h"
45 #include "clang/Lex/ScratchBuffer.h"
46 #include "llvm/ADT/APFloat.h"
47 #include "llvm/ADT/STLExtras.h"
48 #include "llvm/ADT/SmallString.h"
49 #include "llvm/ADT/StringExtras.h"
50 #include "llvm/Support/Capacity.h"
51 #include "llvm/Support/ConvertUTF.h"
52 #include "llvm/Support/MemoryBuffer.h"
53 #include "llvm/Support/raw_ostream.h"
54 #include <utility>
55 using namespace clang;
56 
57 template class llvm::Registry<clang::PragmaHandler>;
58 
59 //===----------------------------------------------------------------------===//
~ExternalPreprocessorSource()60 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
61 
Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,DiagnosticsEngine & diags,LangOptions & opts,SourceManager & SM,HeaderSearch & Headers,ModuleLoader & TheModuleLoader,IdentifierInfoLookup * IILookup,bool OwnsHeaders,TranslationUnitKind TUKind)62 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
63                            DiagnosticsEngine &diags, LangOptions &opts,
64                            SourceManager &SM, HeaderSearch &Headers,
65                            ModuleLoader &TheModuleLoader,
66                            IdentifierInfoLookup *IILookup, bool OwnsHeaders,
67                            TranslationUnitKind TUKind)
68     : PPOpts(std::move(PPOpts)), Diags(&diags), LangOpts(opts), Target(nullptr),
69       AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
70       ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
71       TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
72       Identifiers(opts, IILookup),
73       PragmaHandlers(new PragmaNamespace(StringRef())),
74       IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
75       CodeCompletionFile(nullptr), CodeCompletionOffset(0),
76       LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
77       CodeCompletionReached(0), MainFileDir(nullptr),
78       SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr),
79       CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr),
80       CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
81       Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
82   OwnsHeaderSearch = OwnsHeaders;
83 
84   CounterValue = 0; // __COUNTER__ starts at 0.
85 
86   // Clear stats.
87   NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
88   NumIf = NumElse = NumEndif = 0;
89   NumEnteredSourceFiles = 0;
90   NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
91   NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
92   MaxIncludeStackDepth = 0;
93   NumSkipped = 0;
94 
95   // Default to discarding comments.
96   KeepComments = false;
97   KeepMacroComments = false;
98   SuppressIncludeNotFoundError = false;
99 
100   // Macro expansion is enabled.
101   DisableMacroExpansion = false;
102   MacroExpansionInDirectivesOverride = false;
103   InMacroArgs = false;
104   InMacroArgPreExpansion = false;
105   NumCachedTokenLexers = 0;
106   PragmasEnabled = true;
107   ParsingIfOrElifDirective = false;
108   PreprocessedOutput = false;
109 
110   CachedLexPos = 0;
111 
112   // We haven't read anything from the external source.
113   ReadMacrosFromExternalSource = false;
114 
115   // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
116   // This gets unpoisoned where it is allowed.
117   (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
118   SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
119 
120   // Initialize the pragma handlers.
121   RegisterBuiltinPragmas();
122 
123   // Initialize builtin macros like __LINE__ and friends.
124   RegisterBuiltinMacros();
125 
126   if(LangOpts.Borland) {
127     Ident__exception_info        = getIdentifierInfo("_exception_info");
128     Ident___exception_info       = getIdentifierInfo("__exception_info");
129     Ident_GetExceptionInfo       = getIdentifierInfo("GetExceptionInformation");
130     Ident__exception_code        = getIdentifierInfo("_exception_code");
131     Ident___exception_code       = getIdentifierInfo("__exception_code");
132     Ident_GetExceptionCode       = getIdentifierInfo("GetExceptionCode");
133     Ident__abnormal_termination  = getIdentifierInfo("_abnormal_termination");
134     Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
135     Ident_AbnormalTermination    = getIdentifierInfo("AbnormalTermination");
136   } else {
137     Ident__exception_info = Ident__exception_code = nullptr;
138     Ident__abnormal_termination = Ident___exception_info = nullptr;
139     Ident___exception_code = Ident___abnormal_termination = nullptr;
140     Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
141     Ident_AbnormalTermination = nullptr;
142   }
143 }
144 
~Preprocessor()145 Preprocessor::~Preprocessor() {
146   assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
147 
148   IncludeMacroStack.clear();
149 
150   // Destroy any macro definitions.
151   while (MacroInfoChain *I = MIChainHead) {
152     MIChainHead = I->Next;
153     I->~MacroInfoChain();
154   }
155 
156   // Free any cached macro expanders.
157   // This populates MacroArgCache, so all TokenLexers need to be destroyed
158   // before the code below that frees up the MacroArgCache list.
159   std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
160   CurTokenLexer.reset();
161 
162   while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
163     DeserialMIChainHead = I->Next;
164     I->~DeserializedMacroInfoChain();
165   }
166 
167   // Free any cached MacroArgs.
168   for (MacroArgs *ArgList = MacroArgCache; ArgList;)
169     ArgList = ArgList->deallocate();
170 
171   // Delete the header search info, if we own it.
172   if (OwnsHeaderSearch)
173     delete &HeaderInfo;
174 }
175 
Initialize(const TargetInfo & Target,const TargetInfo * AuxTarget)176 void Preprocessor::Initialize(const TargetInfo &Target,
177                               const TargetInfo *AuxTarget) {
178   assert((!this->Target || this->Target == &Target) &&
179          "Invalid override of target information");
180   this->Target = &Target;
181 
182   assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
183          "Invalid override of aux target information.");
184   this->AuxTarget = AuxTarget;
185 
186   // Initialize information about built-ins.
187   BuiltinInfo.InitializeTarget(Target, AuxTarget);
188   HeaderInfo.setTarget(Target);
189 }
190 
InitializeForModelFile()191 void Preprocessor::InitializeForModelFile() {
192   NumEnteredSourceFiles = 0;
193 
194   // Reset pragmas
195   PragmaHandlersBackup = std::move(PragmaHandlers);
196   PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
197   RegisterBuiltinPragmas();
198 
199   // Reset PredefinesFileID
200   PredefinesFileID = FileID();
201 }
202 
FinalizeForModelFile()203 void Preprocessor::FinalizeForModelFile() {
204   NumEnteredSourceFiles = 1;
205 
206   PragmaHandlers = std::move(PragmaHandlersBackup);
207 }
208 
setPTHManager(PTHManager * pm)209 void Preprocessor::setPTHManager(PTHManager* pm) {
210   PTH.reset(pm);
211   FileMgr.addStatCache(PTH->createStatCache());
212 }
213 
DumpToken(const Token & Tok,bool DumpFlags) const214 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
215   llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
216                << getSpelling(Tok) << "'";
217 
218   if (!DumpFlags) return;
219 
220   llvm::errs() << "\t";
221   if (Tok.isAtStartOfLine())
222     llvm::errs() << " [StartOfLine]";
223   if (Tok.hasLeadingSpace())
224     llvm::errs() << " [LeadingSpace]";
225   if (Tok.isExpandDisabled())
226     llvm::errs() << " [ExpandDisabled]";
227   if (Tok.needsCleaning()) {
228     const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
229     llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
230                  << "']";
231   }
232 
233   llvm::errs() << "\tLoc=<";
234   DumpLocation(Tok.getLocation());
235   llvm::errs() << ">";
236 }
237 
DumpLocation(SourceLocation Loc) const238 void Preprocessor::DumpLocation(SourceLocation Loc) const {
239   Loc.dump(SourceMgr);
240 }
241 
DumpMacro(const MacroInfo & MI) const242 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
243   llvm::errs() << "MACRO: ";
244   for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
245     DumpToken(MI.getReplacementToken(i));
246     llvm::errs() << "  ";
247   }
248   llvm::errs() << "\n";
249 }
250 
PrintStats()251 void Preprocessor::PrintStats() {
252   llvm::errs() << "\n*** Preprocessor Stats:\n";
253   llvm::errs() << NumDirectives << " directives found:\n";
254   llvm::errs() << "  " << NumDefined << " #define.\n";
255   llvm::errs() << "  " << NumUndefined << " #undef.\n";
256   llvm::errs() << "  #include/#include_next/#import:\n";
257   llvm::errs() << "    " << NumEnteredSourceFiles << " source files entered.\n";
258   llvm::errs() << "    " << MaxIncludeStackDepth << " max include stack depth\n";
259   llvm::errs() << "  " << NumIf << " #if/#ifndef/#ifdef.\n";
260   llvm::errs() << "  " << NumElse << " #else/#elif.\n";
261   llvm::errs() << "  " << NumEndif << " #endif.\n";
262   llvm::errs() << "  " << NumPragma << " #pragma.\n";
263   llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
264 
265   llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
266              << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
267              << NumFastMacroExpanded << " on the fast path.\n";
268   llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
269              << " token paste (##) operations performed, "
270              << NumFastTokenPaste << " on the fast path.\n";
271 
272   llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
273 
274   llvm::errs() << "\n  BumpPtr: " << BP.getTotalMemory();
275   llvm::errs() << "\n  Macro Expanded Tokens: "
276                << llvm::capacity_in_bytes(MacroExpandedTokens);
277   llvm::errs() << "\n  Predefines Buffer: " << Predefines.capacity();
278   // FIXME: List information for all submodules.
279   llvm::errs() << "\n  Macros: "
280                << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
281   llvm::errs() << "\n  #pragma push_macro Info: "
282                << llvm::capacity_in_bytes(PragmaPushMacroInfo);
283   llvm::errs() << "\n  Poison Reasons: "
284                << llvm::capacity_in_bytes(PoisonReasons);
285   llvm::errs() << "\n  Comment Handlers: "
286                << llvm::capacity_in_bytes(CommentHandlers) << "\n";
287 }
288 
289 Preprocessor::macro_iterator
macro_begin(bool IncludeExternalMacros) const290 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
291   if (IncludeExternalMacros && ExternalSource &&
292       !ReadMacrosFromExternalSource) {
293     ReadMacrosFromExternalSource = true;
294     ExternalSource->ReadDefinedMacros();
295   }
296 
297   // Make sure we cover all macros in visible modules.
298   for (const ModuleMacro &Macro : ModuleMacros)
299     CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
300 
301   return CurSubmoduleState->Macros.begin();
302 }
303 
getTotalMemory() const304 size_t Preprocessor::getTotalMemory() const {
305   return BP.getTotalMemory()
306     + llvm::capacity_in_bytes(MacroExpandedTokens)
307     + Predefines.capacity() /* Predefines buffer. */
308     // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
309     // and ModuleMacros.
310     + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
311     + llvm::capacity_in_bytes(PragmaPushMacroInfo)
312     + llvm::capacity_in_bytes(PoisonReasons)
313     + llvm::capacity_in_bytes(CommentHandlers);
314 }
315 
316 Preprocessor::macro_iterator
macro_end(bool IncludeExternalMacros) const317 Preprocessor::macro_end(bool IncludeExternalMacros) const {
318   if (IncludeExternalMacros && ExternalSource &&
319       !ReadMacrosFromExternalSource) {
320     ReadMacrosFromExternalSource = true;
321     ExternalSource->ReadDefinedMacros();
322   }
323 
324   return CurSubmoduleState->Macros.end();
325 }
326 
327 /// \brief Compares macro tokens with a specified token value sequence.
MacroDefinitionEquals(const MacroInfo * MI,ArrayRef<TokenValue> Tokens)328 static bool MacroDefinitionEquals(const MacroInfo *MI,
329                                   ArrayRef<TokenValue> Tokens) {
330   return Tokens.size() == MI->getNumTokens() &&
331       std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
332 }
333 
getLastMacroWithSpelling(SourceLocation Loc,ArrayRef<TokenValue> Tokens) const334 StringRef Preprocessor::getLastMacroWithSpelling(
335                                     SourceLocation Loc,
336                                     ArrayRef<TokenValue> Tokens) const {
337   SourceLocation BestLocation;
338   StringRef BestSpelling;
339   for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
340        I != E; ++I) {
341     const MacroDirective::DefInfo
342       Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
343     if (!Def || !Def.getMacroInfo())
344       continue;
345     if (!Def.getMacroInfo()->isObjectLike())
346       continue;
347     if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
348       continue;
349     SourceLocation Location = Def.getLocation();
350     // Choose the macro defined latest.
351     if (BestLocation.isInvalid() ||
352         (Location.isValid() &&
353          SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
354       BestLocation = Location;
355       BestSpelling = I->first->getName();
356     }
357   }
358   return BestSpelling;
359 }
360 
recomputeCurLexerKind()361 void Preprocessor::recomputeCurLexerKind() {
362   if (CurLexer)
363     CurLexerKind = CLK_Lexer;
364   else if (CurPTHLexer)
365     CurLexerKind = CLK_PTHLexer;
366   else if (CurTokenLexer)
367     CurLexerKind = CLK_TokenLexer;
368   else
369     CurLexerKind = CLK_CachingLexer;
370 }
371 
SetCodeCompletionPoint(const FileEntry * File,unsigned CompleteLine,unsigned CompleteColumn)372 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
373                                           unsigned CompleteLine,
374                                           unsigned CompleteColumn) {
375   assert(File);
376   assert(CompleteLine && CompleteColumn && "Starts from 1:1");
377   assert(!CodeCompletionFile && "Already set");
378 
379   using llvm::MemoryBuffer;
380 
381   // Load the actual file's contents.
382   bool Invalid = false;
383   const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
384   if (Invalid)
385     return true;
386 
387   // Find the byte position of the truncation point.
388   const char *Position = Buffer->getBufferStart();
389   for (unsigned Line = 1; Line < CompleteLine; ++Line) {
390     for (; *Position; ++Position) {
391       if (*Position != '\r' && *Position != '\n')
392         continue;
393 
394       // Eat \r\n or \n\r as a single line.
395       if ((Position[1] == '\r' || Position[1] == '\n') &&
396           Position[0] != Position[1])
397         ++Position;
398       ++Position;
399       break;
400     }
401   }
402 
403   Position += CompleteColumn - 1;
404 
405   // If pointing inside the preamble, adjust the position at the beginning of
406   // the file after the preamble.
407   if (SkipMainFilePreamble.first &&
408       SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
409     if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
410       Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
411   }
412 
413   if (Position > Buffer->getBufferEnd())
414     Position = Buffer->getBufferEnd();
415 
416   CodeCompletionFile = File;
417   CodeCompletionOffset = Position - Buffer->getBufferStart();
418 
419   std::unique_ptr<MemoryBuffer> NewBuffer =
420       MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
421                                           Buffer->getBufferIdentifier());
422   char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
423   char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
424   *NewPos = '\0';
425   std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
426   SourceMgr.overrideFileContents(File, std::move(NewBuffer));
427 
428   return false;
429 }
430 
CodeCompleteNaturalLanguage()431 void Preprocessor::CodeCompleteNaturalLanguage() {
432   if (CodeComplete)
433     CodeComplete->CodeCompleteNaturalLanguage();
434   setCodeCompletionReached();
435 }
436 
437 /// getSpelling - This method is used to get the spelling of a token into a
438 /// SmallVector. Note that the returned StringRef may not point to the
439 /// supplied buffer if a copy can be avoided.
getSpelling(const Token & Tok,SmallVectorImpl<char> & Buffer,bool * Invalid) const440 StringRef Preprocessor::getSpelling(const Token &Tok,
441                                           SmallVectorImpl<char> &Buffer,
442                                           bool *Invalid) const {
443   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
444   if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
445     // Try the fast path.
446     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
447       return II->getName();
448   }
449 
450   // Resize the buffer if we need to copy into it.
451   if (Tok.needsCleaning())
452     Buffer.resize(Tok.getLength());
453 
454   const char *Ptr = Buffer.data();
455   unsigned Len = getSpelling(Tok, Ptr, Invalid);
456   return StringRef(Ptr, Len);
457 }
458 
459 /// CreateString - Plop the specified string into a scratch buffer and return a
460 /// location for it.  If specified, the source location provides a source
461 /// location for the token.
CreateString(StringRef Str,Token & Tok,SourceLocation ExpansionLocStart,SourceLocation ExpansionLocEnd)462 void Preprocessor::CreateString(StringRef Str, Token &Tok,
463                                 SourceLocation ExpansionLocStart,
464                                 SourceLocation ExpansionLocEnd) {
465   Tok.setLength(Str.size());
466 
467   const char *DestPtr;
468   SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
469 
470   if (ExpansionLocStart.isValid())
471     Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
472                                        ExpansionLocEnd, Str.size());
473   Tok.setLocation(Loc);
474 
475   // If this is a raw identifier or a literal token, set the pointer data.
476   if (Tok.is(tok::raw_identifier))
477     Tok.setRawIdentifierData(DestPtr);
478   else if (Tok.isLiteral())
479     Tok.setLiteralData(DestPtr);
480 }
481 
getCurrentModule()482 Module *Preprocessor::getCurrentModule() {
483   if (!getLangOpts().CompilingModule)
484     return nullptr;
485 
486   return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
487 }
488 
489 //===----------------------------------------------------------------------===//
490 // Preprocessor Initialization Methods
491 //===----------------------------------------------------------------------===//
492 
493 
494 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
495 /// which implicitly adds the builtin defines etc.
EnterMainSourceFile()496 void Preprocessor::EnterMainSourceFile() {
497   // We do not allow the preprocessor to reenter the main file.  Doing so will
498   // cause FileID's to accumulate information from both runs (e.g. #line
499   // information) and predefined macros aren't guaranteed to be set properly.
500   assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
501   FileID MainFileID = SourceMgr.getMainFileID();
502 
503   // If MainFileID is loaded it means we loaded an AST file, no need to enter
504   // a main file.
505   if (!SourceMgr.isLoadedFileID(MainFileID)) {
506     // Enter the main file source buffer.
507     EnterSourceFile(MainFileID, nullptr, SourceLocation());
508 
509     // If we've been asked to skip bytes in the main file (e.g., as part of a
510     // precompiled preamble), do so now.
511     if (SkipMainFilePreamble.first > 0)
512       CurLexer->SkipBytes(SkipMainFilePreamble.first,
513                           SkipMainFilePreamble.second);
514 
515     // Tell the header info that the main file was entered.  If the file is later
516     // #imported, it won't be re-entered.
517     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
518       HeaderInfo.IncrementIncludeCount(FE);
519   }
520 
521   // Preprocess Predefines to populate the initial preprocessor state.
522   std::unique_ptr<llvm::MemoryBuffer> SB =
523     llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
524   assert(SB && "Cannot create predefined source buffer");
525   FileID FID = SourceMgr.createFileID(std::move(SB));
526   assert(FID.isValid() && "Could not create FileID for predefines?");
527   setPredefinesFileID(FID);
528 
529   // Start parsing the predefines.
530   EnterSourceFile(FID, nullptr, SourceLocation());
531 }
532 
EndSourceFile()533 void Preprocessor::EndSourceFile() {
534   // Notify the client that we reached the end of the source file.
535   if (Callbacks)
536     Callbacks->EndOfMainFile();
537 }
538 
539 //===----------------------------------------------------------------------===//
540 // Lexer Event Handling.
541 //===----------------------------------------------------------------------===//
542 
543 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
544 /// identifier information for the token and install it into the token,
545 /// updating the token kind accordingly.
LookUpIdentifierInfo(Token & Identifier) const546 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
547   assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
548 
549   // Look up this token, see if it is a macro, or if it is a language keyword.
550   IdentifierInfo *II;
551   if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
552     // No cleaning needed, just use the characters from the lexed buffer.
553     II = getIdentifierInfo(Identifier.getRawIdentifier());
554   } else {
555     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
556     SmallString<64> IdentifierBuffer;
557     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
558 
559     if (Identifier.hasUCN()) {
560       SmallString<64> UCNIdentifierBuffer;
561       expandUCNs(UCNIdentifierBuffer, CleanedStr);
562       II = getIdentifierInfo(UCNIdentifierBuffer);
563     } else {
564       II = getIdentifierInfo(CleanedStr);
565     }
566   }
567 
568   // Update the token info (identifier info and appropriate token kind).
569   Identifier.setIdentifierInfo(II);
570   Identifier.setKind(II->getTokenID());
571 
572   return II;
573 }
574 
SetPoisonReason(IdentifierInfo * II,unsigned DiagID)575 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
576   PoisonReasons[II] = DiagID;
577 }
578 
PoisonSEHIdentifiers(bool Poison)579 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
580   assert(Ident__exception_code && Ident__exception_info);
581   assert(Ident___exception_code && Ident___exception_info);
582   Ident__exception_code->setIsPoisoned(Poison);
583   Ident___exception_code->setIsPoisoned(Poison);
584   Ident_GetExceptionCode->setIsPoisoned(Poison);
585   Ident__exception_info->setIsPoisoned(Poison);
586   Ident___exception_info->setIsPoisoned(Poison);
587   Ident_GetExceptionInfo->setIsPoisoned(Poison);
588   Ident__abnormal_termination->setIsPoisoned(Poison);
589   Ident___abnormal_termination->setIsPoisoned(Poison);
590   Ident_AbnormalTermination->setIsPoisoned(Poison);
591 }
592 
HandlePoisonedIdentifier(Token & Identifier)593 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
594   assert(Identifier.getIdentifierInfo() &&
595          "Can't handle identifiers without identifier info!");
596   llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
597     PoisonReasons.find(Identifier.getIdentifierInfo());
598   if(it == PoisonReasons.end())
599     Diag(Identifier, diag::err_pp_used_poisoned_id);
600   else
601     Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
602 }
603 
604 /// \brief Returns a diagnostic message kind for reporting a future keyword as
605 /// appropriate for the identifier and specified language.
getFutureCompatDiagKind(const IdentifierInfo & II,const LangOptions & LangOpts)606 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
607                                           const LangOptions &LangOpts) {
608   assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
609 
610   if (LangOpts.CPlusPlus)
611     return llvm::StringSwitch<diag::kind>(II.getName())
612 #define CXX11_KEYWORD(NAME, FLAGS)                                             \
613         .Case(#NAME, diag::warn_cxx11_keyword)
614 #include "clang/Basic/TokenKinds.def"
615         ;
616 
617   llvm_unreachable(
618       "Keyword not known to come from a newer Standard or proposed Standard");
619 }
620 
621 /// HandleIdentifier - This callback is invoked when the lexer reads an
622 /// identifier.  This callback looks up the identifier in the map and/or
623 /// potentially macro expands it or turns it into a named token (like 'for').
624 ///
625 /// Note that callers of this method are guarded by checking the
626 /// IdentifierInfo's 'isHandleIdentifierCase' bit.  If this method changes, the
627 /// IdentifierInfo methods that compute these properties will need to change to
628 /// match.
HandleIdentifier(Token & Identifier)629 bool Preprocessor::HandleIdentifier(Token &Identifier) {
630   assert(Identifier.getIdentifierInfo() &&
631          "Can't handle identifiers without identifier info!");
632 
633   IdentifierInfo &II = *Identifier.getIdentifierInfo();
634 
635   // If the information about this identifier is out of date, update it from
636   // the external source.
637   // We have to treat __VA_ARGS__ in a special way, since it gets
638   // serialized with isPoisoned = true, but our preprocessor may have
639   // unpoisoned it if we're defining a C99 macro.
640   if (II.isOutOfDate()) {
641     bool CurrentIsPoisoned = false;
642     if (&II == Ident__VA_ARGS__)
643       CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
644 
645     ExternalSource->updateOutOfDateIdentifier(II);
646     Identifier.setKind(II.getTokenID());
647 
648     if (&II == Ident__VA_ARGS__)
649       II.setIsPoisoned(CurrentIsPoisoned);
650   }
651 
652   // If this identifier was poisoned, and if it was not produced from a macro
653   // expansion, emit an error.
654   if (II.isPoisoned() && CurPPLexer) {
655     HandlePoisonedIdentifier(Identifier);
656   }
657 
658   // If this is a macro to be expanded, do it.
659   if (MacroDefinition MD = getMacroDefinition(&II)) {
660     auto *MI = MD.getMacroInfo();
661     assert(MI && "macro definition with no macro info?");
662     if (!DisableMacroExpansion) {
663       if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
664         // C99 6.10.3p10: If the preprocessing token immediately after the
665         // macro name isn't a '(', this macro should not be expanded.
666         if (!MI->isFunctionLike() || isNextPPTokenLParen())
667           return HandleMacroExpandedIdentifier(Identifier, MD);
668       } else {
669         // C99 6.10.3.4p2 says that a disabled macro may never again be
670         // expanded, even if it's in a context where it could be expanded in the
671         // future.
672         Identifier.setFlag(Token::DisableExpand);
673         if (MI->isObjectLike() || isNextPPTokenLParen())
674           Diag(Identifier, diag::pp_disabled_macro_expansion);
675       }
676     }
677   }
678 
679   // If this identifier is a keyword in a newer Standard or proposed Standard,
680   // produce a warning. Don't warn if we're not considering macro expansion,
681   // since this identifier might be the name of a macro.
682   // FIXME: This warning is disabled in cases where it shouldn't be, like
683   //   "#define constexpr constexpr", "int constexpr;"
684   if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
685     Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
686         << II.getName();
687     // Don't diagnose this keyword again in this translation unit.
688     II.setIsFutureCompatKeyword(false);
689   }
690 
691   // C++ 2.11p2: If this is an alternative representation of a C++ operator,
692   // then we act as if it is the actual operator and not the textual
693   // representation of it.
694   if (II.isCPlusPlusOperatorKeyword())
695     Identifier.setIdentifierInfo(nullptr);
696 
697   // If this is an extension token, diagnose its use.
698   // We avoid diagnosing tokens that originate from macro definitions.
699   // FIXME: This warning is disabled in cases where it shouldn't be,
700   // like "#define TY typeof", "TY(1) x".
701   if (II.isExtensionToken() && !DisableMacroExpansion)
702     Diag(Identifier, diag::ext_token_used);
703 
704   // If this is the 'import' contextual keyword following an '@', note
705   // that the next token indicates a module name.
706   //
707   // Note that we do not treat 'import' as a contextual
708   // keyword when we're in a caching lexer, because caching lexers only get
709   // used in contexts where import declarations are disallowed.
710   if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
711       !DisableMacroExpansion &&
712       (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
713       CurLexerKind != CLK_CachingLexer) {
714     ModuleImportLoc = Identifier.getLocation();
715     ModuleImportPath.clear();
716     ModuleImportExpectsIdentifier = true;
717     CurLexerKind = CLK_LexAfterModuleImport;
718   }
719   return true;
720 }
721 
Lex(Token & Result)722 void Preprocessor::Lex(Token &Result) {
723   // We loop here until a lex function returns a token; this avoids recursion.
724   bool ReturnedToken;
725   do {
726     switch (CurLexerKind) {
727     case CLK_Lexer:
728       ReturnedToken = CurLexer->Lex(Result);
729       break;
730     case CLK_PTHLexer:
731       ReturnedToken = CurPTHLexer->Lex(Result);
732       break;
733     case CLK_TokenLexer:
734       ReturnedToken = CurTokenLexer->Lex(Result);
735       break;
736     case CLK_CachingLexer:
737       CachingLex(Result);
738       ReturnedToken = true;
739       break;
740     case CLK_LexAfterModuleImport:
741       LexAfterModuleImport(Result);
742       ReturnedToken = true;
743       break;
744     }
745   } while (!ReturnedToken);
746 
747   LastTokenWasAt = Result.is(tok::at);
748 }
749 
750 
751 /// \brief Lex a token following the 'import' contextual keyword.
752 ///
LexAfterModuleImport(Token & Result)753 void Preprocessor::LexAfterModuleImport(Token &Result) {
754   // Figure out what kind of lexer we actually have.
755   recomputeCurLexerKind();
756 
757   // Lex the next token.
758   Lex(Result);
759 
760   // The token sequence
761   //
762   //   import identifier (. identifier)*
763   //
764   // indicates a module import directive. We already saw the 'import'
765   // contextual keyword, so now we're looking for the identifiers.
766   if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
767     // We expected to see an identifier here, and we did; continue handling
768     // identifiers.
769     ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
770                                               Result.getLocation()));
771     ModuleImportExpectsIdentifier = false;
772     CurLexerKind = CLK_LexAfterModuleImport;
773     return;
774   }
775 
776   // If we're expecting a '.' or a ';', and we got a '.', then wait until we
777   // see the next identifier.
778   if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
779     ModuleImportExpectsIdentifier = true;
780     CurLexerKind = CLK_LexAfterModuleImport;
781     return;
782   }
783 
784   // If we have a non-empty module path, load the named module.
785   if (!ModuleImportPath.empty()) {
786     Module *Imported = nullptr;
787     if (getLangOpts().Modules) {
788       Imported = TheModuleLoader.loadModule(ModuleImportLoc,
789                                             ModuleImportPath,
790                                             Module::Hidden,
791                                             /*IsIncludeDirective=*/false);
792       if (Imported)
793         makeModuleVisible(Imported, ModuleImportLoc);
794     }
795     if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
796       Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
797   }
798 }
799 
makeModuleVisible(Module * M,SourceLocation Loc)800 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
801   CurSubmoduleState->VisibleModules.setVisible(
802       M, Loc, [](Module *) {},
803       [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
804         // FIXME: Include the path in the diagnostic.
805         // FIXME: Include the import location for the conflicting module.
806         Diag(ModuleImportLoc, diag::warn_module_conflict)
807             << Path[0]->getFullModuleName()
808             << Conflict->getFullModuleName()
809             << Message;
810       });
811 
812   // Add this module to the imports list of the currently-built submodule.
813   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
814     BuildingSubmoduleStack.back().M->Imports.insert(M);
815 }
816 
FinishLexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)817 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
818                                           const char *DiagnosticTag,
819                                           bool AllowMacroExpansion) {
820   // We need at least one string literal.
821   if (Result.isNot(tok::string_literal)) {
822     Diag(Result, diag::err_expected_string_literal)
823       << /*Source='in...'*/0 << DiagnosticTag;
824     return false;
825   }
826 
827   // Lex string literal tokens, optionally with macro expansion.
828   SmallVector<Token, 4> StrToks;
829   do {
830     StrToks.push_back(Result);
831 
832     if (Result.hasUDSuffix())
833       Diag(Result, diag::err_invalid_string_udl);
834 
835     if (AllowMacroExpansion)
836       Lex(Result);
837     else
838       LexUnexpandedToken(Result);
839   } while (Result.is(tok::string_literal));
840 
841   // Concatenate and parse the strings.
842   StringLiteralParser Literal(StrToks, *this);
843   assert(Literal.isAscii() && "Didn't allow wide strings in");
844 
845   if (Literal.hadError)
846     return false;
847 
848   if (Literal.Pascal) {
849     Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
850       << /*Source='in...'*/0 << DiagnosticTag;
851     return false;
852   }
853 
854   String = Literal.GetString();
855   return true;
856 }
857 
parseSimpleIntegerLiteral(Token & Tok,uint64_t & Value)858 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
859   assert(Tok.is(tok::numeric_constant));
860   SmallString<8> IntegerBuffer;
861   bool NumberInvalid = false;
862   StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
863   if (NumberInvalid)
864     return false;
865   NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
866   if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
867     return false;
868   llvm::APInt APVal(64, 0);
869   if (Literal.GetIntegerValue(APVal))
870     return false;
871   Lex(Tok);
872   Value = APVal.getLimitedValue();
873   return true;
874 }
875 
addCommentHandler(CommentHandler * Handler)876 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
877   assert(Handler && "NULL comment handler");
878   assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
879          CommentHandlers.end() && "Comment handler already registered");
880   CommentHandlers.push_back(Handler);
881 }
882 
removeCommentHandler(CommentHandler * Handler)883 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
884   std::vector<CommentHandler *>::iterator Pos
885   = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
886   assert(Pos != CommentHandlers.end() && "Comment handler not registered");
887   CommentHandlers.erase(Pos);
888 }
889 
HandleComment(Token & result,SourceRange Comment)890 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
891   bool AnyPendingTokens = false;
892   for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
893        HEnd = CommentHandlers.end();
894        H != HEnd; ++H) {
895     if ((*H)->HandleComment(*this, Comment))
896       AnyPendingTokens = true;
897   }
898   if (!AnyPendingTokens || getCommentRetentionState())
899     return false;
900   Lex(result);
901   return true;
902 }
903 
~ModuleLoader()904 ModuleLoader::~ModuleLoader() { }
905 
~CommentHandler()906 CommentHandler::~CommentHandler() { }
907 
~CodeCompletionHandler()908 CodeCompletionHandler::~CodeCompletionHandler() { }
909 
createPreprocessingRecord()910 void Preprocessor::createPreprocessingRecord() {
911   if (Record)
912     return;
913 
914   Record = new PreprocessingRecord(getSourceManager());
915   addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
916 }
917