1 //===--- Preprocessor.h - C Language Family Preprocessor --------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Defines the clang::Preprocessor interface.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
16 #define LLVM_CLANG_LEX_PREPROCESSOR_H
17 
18 #include "clang/Basic/Builtins.h"
19 #include "clang/Basic/Diagnostic.h"
20 #include "clang/Basic/IdentifierTable.h"
21 #include "clang/Basic/SourceLocation.h"
22 #include "clang/Lex/Lexer.h"
23 #include "clang/Lex/MacroInfo.h"
24 #include "clang/Lex/ModuleMap.h"
25 #include "clang/Lex/PPCallbacks.h"
26 #include "clang/Lex/PTHLexer.h"
27 #include "clang/Lex/TokenLexer.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/IntrusiveRefCntPtr.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/TinyPtrVector.h"
34 #include "llvm/Support/Allocator.h"
35 #include <memory>
36 #include <vector>
37 
38 namespace llvm {
39   template<unsigned InternalLen> class SmallString;
40 }
41 
42 namespace clang {
43 
44 class SourceManager;
45 class ExternalPreprocessorSource;
46 class FileManager;
47 class FileEntry;
48 class HeaderSearch;
49 class PragmaNamespace;
50 class PragmaHandler;
51 class CommentHandler;
52 class ScratchBuffer;
53 class TargetInfo;
54 class PPCallbacks;
55 class CodeCompletionHandler;
56 class DirectoryLookup;
57 class PreprocessingRecord;
58 class ModuleLoader;
59 class PTHManager;
60 class PreprocessorOptions;
61 
62 /// \brief Stores token information for comparing actual tokens with
63 /// predefined values.  Only handles simple tokens and identifiers.
64 class TokenValue {
65   tok::TokenKind Kind;
66   IdentifierInfo *II;
67 
68 public:
TokenValue(tok::TokenKind Kind)69   TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
70     assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
71     assert(Kind != tok::identifier &&
72            "Identifiers should be created by TokenValue(IdentifierInfo *)");
73     assert(!tok::isLiteral(Kind) && "Literals are not supported.");
74     assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
75   }
TokenValue(IdentifierInfo * II)76   TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
77   bool operator==(const Token &Tok) const {
78     return Tok.getKind() == Kind &&
79         (!II || II == Tok.getIdentifierInfo());
80   }
81 };
82 
83 /// \brief Context in which macro name is used.
84 enum MacroUse {
85   MU_Other  = 0,  // other than #define or #undef
86   MU_Define = 1,  // macro name specified in #define
87   MU_Undef  = 2   // macro name specified in #undef
88 };
89 
90 /// \brief Engages in a tight little dance with the lexer to efficiently
91 /// preprocess tokens.
92 ///
93 /// Lexers know only about tokens within a single source file, and don't
94 /// know anything about preprocessor-level issues like the \#include stack,
95 /// token expansion, etc.
96 class Preprocessor : public RefCountedBase<Preprocessor> {
97   IntrusiveRefCntPtr<PreprocessorOptions> PPOpts;
98   DiagnosticsEngine        *Diags;
99   LangOptions       &LangOpts;
100   const TargetInfo  *Target;
101   const TargetInfo  *AuxTarget;
102   FileManager       &FileMgr;
103   SourceManager     &SourceMgr;
104   std::unique_ptr<ScratchBuffer> ScratchBuf;
105   HeaderSearch      &HeaderInfo;
106   ModuleLoader      &TheModuleLoader;
107 
108   /// \brief External source of macros.
109   ExternalPreprocessorSource *ExternalSource;
110 
111 
112   /// An optional PTHManager object used for getting tokens from
113   /// a token cache rather than lexing the original source file.
114   std::unique_ptr<PTHManager> PTH;
115 
116   /// A BumpPtrAllocator object used to quickly allocate and release
117   /// objects internal to the Preprocessor.
118   llvm::BumpPtrAllocator BP;
119 
120   /// Identifiers for builtin macros and other builtins.
121   IdentifierInfo *Ident__LINE__, *Ident__FILE__;   // __LINE__, __FILE__
122   IdentifierInfo *Ident__DATE__, *Ident__TIME__;   // __DATE__, __TIME__
123   IdentifierInfo *Ident__INCLUDE_LEVEL__;          // __INCLUDE_LEVEL__
124   IdentifierInfo *Ident__BASE_FILE__;              // __BASE_FILE__
125   IdentifierInfo *Ident__TIMESTAMP__;              // __TIMESTAMP__
126   IdentifierInfo *Ident__COUNTER__;                // __COUNTER__
127   IdentifierInfo *Ident_Pragma, *Ident__pragma;    // _Pragma, __pragma
128   IdentifierInfo *Ident__identifier;               // __identifier
129   IdentifierInfo *Ident__VA_ARGS__;                // __VA_ARGS__
130   IdentifierInfo *Ident__has_feature;              // __has_feature
131   IdentifierInfo *Ident__has_extension;            // __has_extension
132   IdentifierInfo *Ident__has_builtin;              // __has_builtin
133   IdentifierInfo *Ident__has_attribute;            // __has_attribute
134   IdentifierInfo *Ident__has_include;              // __has_include
135   IdentifierInfo *Ident__has_include_next;         // __has_include_next
136   IdentifierInfo *Ident__has_warning;              // __has_warning
137   IdentifierInfo *Ident__is_identifier;            // __is_identifier
138   IdentifierInfo *Ident__building_module;          // __building_module
139   IdentifierInfo *Ident__MODULE__;                 // __MODULE__
140   IdentifierInfo *Ident__has_cpp_attribute;        // __has_cpp_attribute
141   IdentifierInfo *Ident__has_declspec;             // __has_declspec_attribute
142 
143   SourceLocation DATELoc, TIMELoc;
144   unsigned CounterValue;  // Next __COUNTER__ value.
145 
146   enum {
147     /// \brief Maximum depth of \#includes.
148     MaxAllowedIncludeStackDepth = 200
149   };
150 
151   // State that is set before the preprocessor begins.
152   bool KeepComments : 1;
153   bool KeepMacroComments : 1;
154   bool SuppressIncludeNotFoundError : 1;
155 
156   // State that changes while the preprocessor runs:
157   bool InMacroArgs : 1;            // True if parsing fn macro invocation args.
158 
159   /// Whether the preprocessor owns the header search object.
160   bool OwnsHeaderSearch : 1;
161 
162   /// True if macro expansion is disabled.
163   bool DisableMacroExpansion : 1;
164 
165   /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
166   /// when parsing preprocessor directives.
167   bool MacroExpansionInDirectivesOverride : 1;
168 
169   class ResetMacroExpansionHelper;
170 
171   /// \brief Whether we have already loaded macros from the external source.
172   mutable bool ReadMacrosFromExternalSource : 1;
173 
174   /// \brief True if pragmas are enabled.
175   bool PragmasEnabled : 1;
176 
177   /// \brief True if the current build action is a preprocessing action.
178   bool PreprocessedOutput : 1;
179 
180   /// \brief True if we are currently preprocessing a #if or #elif directive
181   bool ParsingIfOrElifDirective;
182 
183   /// \brief True if we are pre-expanding macro arguments.
184   bool InMacroArgPreExpansion;
185 
186   /// \brief Mapping/lookup information for all identifiers in
187   /// the program, including program keywords.
188   mutable IdentifierTable Identifiers;
189 
190   /// \brief This table contains all the selectors in the program.
191   ///
192   /// Unlike IdentifierTable above, this table *isn't* populated by the
193   /// preprocessor. It is declared/expanded here because its role/lifetime is
194   /// conceptually similar to the IdentifierTable. In addition, the current
195   /// control flow (in clang::ParseAST()), make it convenient to put here.
196   ///
197   /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
198   /// the lifetime of the preprocessor.
199   SelectorTable Selectors;
200 
201   /// \brief Information about builtins.
202   Builtin::Context BuiltinInfo;
203 
204   /// \brief Tracks all of the pragmas that the client registered
205   /// with this preprocessor.
206   std::unique_ptr<PragmaNamespace> PragmaHandlers;
207 
208   /// \brief Pragma handlers of the original source is stored here during the
209   /// parsing of a model file.
210   std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
211 
212   /// \brief Tracks all of the comment handlers that the client registered
213   /// with this preprocessor.
214   std::vector<CommentHandler *> CommentHandlers;
215 
216   /// \brief True if we want to ignore EOF token and continue later on (thus
217   /// avoid tearing the Lexer and etc. down).
218   bool IncrementalProcessing;
219 
220   /// The kind of translation unit we are processing.
221   TranslationUnitKind TUKind;
222 
223   /// \brief The code-completion handler.
224   CodeCompletionHandler *CodeComplete;
225 
226   /// \brief The file that we're performing code-completion for, if any.
227   const FileEntry *CodeCompletionFile;
228 
229   /// \brief The offset in file for the code-completion point.
230   unsigned CodeCompletionOffset;
231 
232   /// \brief The location for the code-completion point. This gets instantiated
233   /// when the CodeCompletionFile gets \#include'ed for preprocessing.
234   SourceLocation CodeCompletionLoc;
235 
236   /// \brief The start location for the file of the code-completion point.
237   ///
238   /// This gets instantiated when the CodeCompletionFile gets \#include'ed
239   /// for preprocessing.
240   SourceLocation CodeCompletionFileLoc;
241 
242   /// \brief The source location of the \c import contextual keyword we just
243   /// lexed, if any.
244   SourceLocation ModuleImportLoc;
245 
246   /// \brief The module import path that we're currently processing.
247   SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
248 
249   /// \brief Whether the last token we lexed was an '@'.
250   bool LastTokenWasAt;
251 
252   /// \brief Whether the module import expects an identifier next. Otherwise,
253   /// it expects a '.' or ';'.
254   bool ModuleImportExpectsIdentifier;
255 
256   /// \brief The source location of the currently-active
257   /// \#pragma clang arc_cf_code_audited begin.
258   SourceLocation PragmaARCCFCodeAuditedLoc;
259 
260   /// \brief The source location of the currently-active
261   /// \#pragma clang assume_nonnull begin.
262   SourceLocation PragmaAssumeNonNullLoc;
263 
264   /// \brief True if we hit the code-completion point.
265   bool CodeCompletionReached;
266 
267   /// \brief The directory that the main file should be considered to occupy,
268   /// if it does not correspond to a real file (as happens when building a
269   /// module).
270   const DirectoryEntry *MainFileDir;
271 
272   /// \brief The number of bytes that we will initially skip when entering the
273   /// main file, along with a flag that indicates whether skipping this number
274   /// of bytes will place the lexer at the start of a line.
275   ///
276   /// This is used when loading a precompiled preamble.
277   std::pair<int, bool> SkipMainFilePreamble;
278 
279   /// \brief The current top of the stack that we're lexing from if
280   /// not expanding a macro and we are lexing directly from source code.
281   ///
282   /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
283   std::unique_ptr<Lexer> CurLexer;
284 
285   /// \brief The current top of stack that we're lexing from if
286   /// not expanding from a macro and we are lexing from a PTH cache.
287   ///
288   /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
289   std::unique_ptr<PTHLexer> CurPTHLexer;
290 
291   /// \brief The current top of the stack what we're lexing from
292   /// if not expanding a macro.
293   ///
294   /// This is an alias for either CurLexer or  CurPTHLexer.
295   PreprocessorLexer *CurPPLexer;
296 
297   /// \brief Used to find the current FileEntry, if CurLexer is non-null
298   /// and if applicable.
299   ///
300   /// This allows us to implement \#include_next and find directory-specific
301   /// properties.
302   const DirectoryLookup *CurDirLookup;
303 
304   /// \brief The current macro we are expanding, if we are expanding a macro.
305   ///
306   /// One of CurLexer and CurTokenLexer must be null.
307   std::unique_ptr<TokenLexer> CurTokenLexer;
308 
309   /// \brief The kind of lexer we're currently working with.
310   enum CurLexerKind {
311     CLK_Lexer,
312     CLK_PTHLexer,
313     CLK_TokenLexer,
314     CLK_CachingLexer,
315     CLK_LexAfterModuleImport
316   } CurLexerKind;
317 
318   /// \brief If the current lexer is for a submodule that is being built, this
319   /// is that submodule.
320   Module *CurSubmodule;
321 
322   /// \brief Keeps track of the stack of files currently
323   /// \#included, and macros currently being expanded from, not counting
324   /// CurLexer/CurTokenLexer.
325   struct IncludeStackInfo {
326     enum CurLexerKind           CurLexerKind;
327     Module                     *TheSubmodule;
328     std::unique_ptr<Lexer>      TheLexer;
329     std::unique_ptr<PTHLexer>   ThePTHLexer;
330     PreprocessorLexer          *ThePPLexer;
331     std::unique_ptr<TokenLexer> TheTokenLexer;
332     const DirectoryLookup      *TheDirLookup;
333 
334     // The following constructors are completely useless copies of the default
335     // versions, only needed to pacify MSVC.
IncludeStackInfoIncludeStackInfo336     IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
337                      std::unique_ptr<Lexer> &&TheLexer,
338                      std::unique_ptr<PTHLexer> &&ThePTHLexer,
339                      PreprocessorLexer *ThePPLexer,
340                      std::unique_ptr<TokenLexer> &&TheTokenLexer,
341                      const DirectoryLookup *TheDirLookup)
342         : CurLexerKind(std::move(CurLexerKind)),
343           TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
344           ThePTHLexer(std::move(ThePTHLexer)),
345           ThePPLexer(std::move(ThePPLexer)),
346           TheTokenLexer(std::move(TheTokenLexer)),
347           TheDirLookup(std::move(TheDirLookup)) {}
IncludeStackInfoIncludeStackInfo348     IncludeStackInfo(IncludeStackInfo &&RHS)
349         : CurLexerKind(std::move(RHS.CurLexerKind)),
350           TheSubmodule(std::move(RHS.TheSubmodule)),
351           TheLexer(std::move(RHS.TheLexer)),
352           ThePTHLexer(std::move(RHS.ThePTHLexer)),
353           ThePPLexer(std::move(RHS.ThePPLexer)),
354           TheTokenLexer(std::move(RHS.TheTokenLexer)),
355           TheDirLookup(std::move(RHS.TheDirLookup)) {}
356   };
357   std::vector<IncludeStackInfo> IncludeMacroStack;
358 
359   /// \brief Actions invoked when some preprocessor activity is
360   /// encountered (e.g. a file is \#included, etc).
361   std::unique_ptr<PPCallbacks> Callbacks;
362 
363   struct MacroExpandsInfo {
364     Token Tok;
365     MacroDefinition MD;
366     SourceRange Range;
MacroExpandsInfoMacroExpandsInfo367     MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
368       : Tok(Tok), MD(MD), Range(Range) { }
369   };
370   SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
371 
372   /// Information about a name that has been used to define a module macro.
373   struct ModuleMacroInfo {
ModuleMacroInfoModuleMacroInfo374     ModuleMacroInfo(MacroDirective *MD)
375         : MD(MD), ActiveModuleMacrosGeneration(0), IsAmbiguous(false) {}
376 
377     /// The most recent macro directive for this identifier.
378     MacroDirective *MD;
379     /// The active module macros for this identifier.
380     llvm::TinyPtrVector<ModuleMacro*> ActiveModuleMacros;
381     /// The generation number at which we last updated ActiveModuleMacros.
382     /// \see Preprocessor::VisibleModules.
383     unsigned ActiveModuleMacrosGeneration;
384     /// Whether this macro name is ambiguous.
385     bool IsAmbiguous;
386     /// The module macros that are overridden by this macro.
387     llvm::TinyPtrVector<ModuleMacro*> OverriddenMacros;
388   };
389 
390   /// The state of a macro for an identifier.
391   class MacroState {
392     mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
393 
getModuleInfo(Preprocessor & PP,const IdentifierInfo * II)394     ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
395                                    const IdentifierInfo *II) const {
396       // FIXME: Find a spare bit on IdentifierInfo and store a
397       //        HasModuleMacros flag.
398       if (!II->hasMacroDefinition() ||
399           (!PP.getLangOpts().Modules &&
400            !PP.getLangOpts().ModulesLocalVisibility) ||
401           !PP.CurSubmoduleState->VisibleModules.getGeneration())
402         return nullptr;
403 
404       auto *Info = State.dyn_cast<ModuleMacroInfo*>();
405       if (!Info) {
406         Info = new (PP.getPreprocessorAllocator())
407             ModuleMacroInfo(State.get<MacroDirective *>());
408         State = Info;
409       }
410 
411       if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
412           Info->ActiveModuleMacrosGeneration)
413         PP.updateModuleMacroInfo(II, *Info);
414       return Info;
415     }
416 
417   public:
MacroState()418     MacroState() : MacroState(nullptr) {}
MacroState(MacroDirective * MD)419     MacroState(MacroDirective *MD) : State(MD) {}
MacroState(MacroState && O)420     MacroState(MacroState &&O) LLVM_NOEXCEPT : State(O.State) {
421       O.State = (MacroDirective *)nullptr;
422     }
423     MacroState &operator=(MacroState &&O) LLVM_NOEXCEPT {
424       auto S = O.State;
425       O.State = (MacroDirective *)nullptr;
426       State = S;
427       return *this;
428     }
~MacroState()429     ~MacroState() {
430       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
431         Info->~ModuleMacroInfo();
432     }
433 
getLatest()434     MacroDirective *getLatest() const {
435       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
436         return Info->MD;
437       return State.get<MacroDirective*>();
438     }
setLatest(MacroDirective * MD)439     void setLatest(MacroDirective *MD) {
440       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
441         Info->MD = MD;
442       else
443         State = MD;
444     }
445 
isAmbiguous(Preprocessor & PP,const IdentifierInfo * II)446     bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
447       auto *Info = getModuleInfo(PP, II);
448       return Info ? Info->IsAmbiguous : false;
449     }
450     ArrayRef<ModuleMacro *>
getActiveModuleMacros(Preprocessor & PP,const IdentifierInfo * II)451     getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
452       if (auto *Info = getModuleInfo(PP, II))
453         return Info->ActiveModuleMacros;
454       return None;
455     }
456 
findDirectiveAtLoc(SourceLocation Loc,SourceManager & SourceMgr)457     MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
458                                                SourceManager &SourceMgr) const {
459       // FIXME: Incorporate module macros into the result of this.
460       if (auto *Latest = getLatest())
461         return Latest->findDirectiveAtLoc(Loc, SourceMgr);
462       return MacroDirective::DefInfo();
463     }
464 
overrideActiveModuleMacros(Preprocessor & PP,IdentifierInfo * II)465     void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
466       if (auto *Info = getModuleInfo(PP, II)) {
467         Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
468                                       Info->ActiveModuleMacros.begin(),
469                                       Info->ActiveModuleMacros.end());
470         Info->ActiveModuleMacros.clear();
471         Info->IsAmbiguous = false;
472       }
473     }
getOverriddenMacros()474     ArrayRef<ModuleMacro*> getOverriddenMacros() const {
475       if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
476         return Info->OverriddenMacros;
477       return None;
478     }
setOverriddenMacros(Preprocessor & PP,ArrayRef<ModuleMacro * > Overrides)479     void setOverriddenMacros(Preprocessor &PP,
480                              ArrayRef<ModuleMacro *> Overrides) {
481       auto *Info = State.dyn_cast<ModuleMacroInfo*>();
482       if (!Info) {
483         if (Overrides.empty())
484           return;
485         Info = new (PP.getPreprocessorAllocator())
486             ModuleMacroInfo(State.get<MacroDirective *>());
487         State = Info;
488       }
489       Info->OverriddenMacros.clear();
490       Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
491                                     Overrides.begin(), Overrides.end());
492       Info->ActiveModuleMacrosGeneration = 0;
493     }
494   };
495 
496   /// For each IdentifierInfo that was associated with a macro, we
497   /// keep a mapping to the history of all macro definitions and #undefs in
498   /// the reverse order (the latest one is in the head of the list).
499   ///
500   /// This mapping lives within the \p CurSubmoduleState.
501   typedef llvm::DenseMap<const IdentifierInfo *, MacroState> MacroMap;
502 
503   friend class ASTReader;
504 
505   struct SubmoduleState;
506 
507   /// \brief Information about a submodule that we're currently building.
508   struct BuildingSubmoduleInfo {
BuildingSubmoduleInfoBuildingSubmoduleInfo509     BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc,
510                           SubmoduleState *OuterSubmoduleState)
511         : M(M), ImportLoc(ImportLoc), OuterSubmoduleState(OuterSubmoduleState) {
512     }
513 
514     /// The module that we are building.
515     Module *M;
516     /// The location at which the module was included.
517     SourceLocation ImportLoc;
518     /// The previous SubmoduleState.
519     SubmoduleState *OuterSubmoduleState;
520   };
521   SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
522 
523   /// \brief Information about a submodule's preprocessor state.
524   struct SubmoduleState {
525     /// The macros for the submodule.
526     MacroMap Macros;
527     /// The set of modules that are visible within the submodule.
528     VisibleModuleSet VisibleModules;
529     // FIXME: CounterValue?
530     // FIXME: PragmaPushMacroInfo?
531   };
532   std::map<Module*, SubmoduleState> Submodules;
533 
534   /// The preprocessor state for preprocessing outside of any submodule.
535   SubmoduleState NullSubmoduleState;
536 
537   /// The current submodule state. Will be \p NullSubmoduleState if we're not
538   /// in a submodule.
539   SubmoduleState *CurSubmoduleState;
540 
541   /// The set of known macros exported from modules.
542   llvm::FoldingSet<ModuleMacro> ModuleMacros;
543 
544   /// The list of module macros, for each identifier, that are not overridden by
545   /// any other module macro.
546   llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro*>>
547       LeafModuleMacros;
548 
549   /// \brief Macros that we want to warn because they are not used at the end
550   /// of the translation unit.
551   ///
552   /// We store just their SourceLocations instead of
553   /// something like MacroInfo*. The benefit of this is that when we are
554   /// deserializing from PCH, we don't need to deserialize identifier & macros
555   /// just so that we can report that they are unused, we just warn using
556   /// the SourceLocations of this set (that will be filled by the ASTReader).
557   /// We are using SmallPtrSet instead of a vector for faster removal.
558   typedef llvm::SmallPtrSet<SourceLocation, 32> WarnUnusedMacroLocsTy;
559   WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
560 
561   /// \brief A "freelist" of MacroArg objects that can be
562   /// reused for quick allocation.
563   MacroArgs *MacroArgCache;
564   friend class MacroArgs;
565 
566   /// For each IdentifierInfo used in a \#pragma push_macro directive,
567   /// we keep a MacroInfo stack used to restore the previous macro value.
568   llvm::DenseMap<IdentifierInfo*, std::vector<MacroInfo*> > PragmaPushMacroInfo;
569 
570   // Various statistics we track for performance analysis.
571   unsigned NumDirectives, NumDefined, NumUndefined, NumPragma;
572   unsigned NumIf, NumElse, NumEndif;
573   unsigned NumEnteredSourceFiles, MaxIncludeStackDepth;
574   unsigned NumMacroExpanded, NumFnMacroExpanded, NumBuiltinMacroExpanded;
575   unsigned NumFastMacroExpanded, NumTokenPaste, NumFastTokenPaste;
576   unsigned NumSkipped;
577 
578   /// \brief The predefined macros that preprocessor should use from the
579   /// command line etc.
580   std::string Predefines;
581 
582   /// \brief The file ID for the preprocessor predefines.
583   FileID PredefinesFileID;
584 
585   /// \{
586   /// \brief Cache of macro expanders to reduce malloc traffic.
587   enum { TokenLexerCacheSize = 8 };
588   unsigned NumCachedTokenLexers;
589   std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
590   /// \}
591 
592   /// \brief Keeps macro expanded tokens for TokenLexers.
593   //
594   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
595   /// going to lex in the cache and when it finishes the tokens are removed
596   /// from the end of the cache.
597   SmallVector<Token, 16> MacroExpandedTokens;
598   std::vector<std::pair<TokenLexer *, size_t> > MacroExpandingLexersStack;
599 
600   /// \brief A record of the macro definitions and expansions that
601   /// occurred during preprocessing.
602   ///
603   /// This is an optional side structure that can be enabled with
604   /// \c createPreprocessingRecord() prior to preprocessing.
605   PreprocessingRecord *Record;
606 
607   /// Cached tokens state.
608   typedef SmallVector<Token, 1> CachedTokensTy;
609 
610   /// \brief Cached tokens are stored here when we do backtracking or
611   /// lookahead. They are "lexed" by the CachingLex() method.
612   CachedTokensTy CachedTokens;
613 
614   /// \brief The position of the cached token that CachingLex() should
615   /// "lex" next.
616   ///
617   /// If it points beyond the CachedTokens vector, it means that a normal
618   /// Lex() should be invoked.
619   CachedTokensTy::size_type CachedLexPos;
620 
621   /// \brief Stack of backtrack positions, allowing nested backtracks.
622   ///
623   /// The EnableBacktrackAtThisPos() method pushes a position to
624   /// indicate where CachedLexPos should be set when the BackTrack() method is
625   /// invoked (at which point the last position is popped).
626   std::vector<CachedTokensTy::size_type> BacktrackPositions;
627 
628   struct MacroInfoChain {
629     MacroInfo MI;
630     MacroInfoChain *Next;
631   };
632 
633   /// MacroInfos are managed as a chain for easy disposal.  This is the head
634   /// of that list.
635   MacroInfoChain *MIChainHead;
636 
637   struct DeserializedMacroInfoChain {
638     MacroInfo MI;
639     unsigned OwningModuleID; // MUST be immediately after the MacroInfo object
640                      // so it can be accessed by MacroInfo::getOwningModuleID().
641     DeserializedMacroInfoChain *Next;
642   };
643   DeserializedMacroInfoChain *DeserialMIChainHead;
644 
645 public:
646   Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
647                DiagnosticsEngine &diags, LangOptions &opts,
648                SourceManager &SM, HeaderSearch &Headers,
649                ModuleLoader &TheModuleLoader,
650                IdentifierInfoLookup *IILookup = nullptr,
651                bool OwnsHeaderSearch = false,
652                TranslationUnitKind TUKind = TU_Complete);
653 
654   ~Preprocessor();
655 
656   /// \brief Initialize the preprocessor using information about the target.
657   ///
658   /// \param Target is owned by the caller and must remain valid for the
659   /// lifetime of the preprocessor.
660   /// \param AuxTarget is owned by the caller and must remain valid for
661   /// the lifetime of the preprocessor.
662   void Initialize(const TargetInfo &Target,
663                   const TargetInfo *AuxTarget = nullptr);
664 
665   /// \brief Initialize the preprocessor to parse a model file
666   ///
667   /// To parse model files the preprocessor of the original source is reused to
668   /// preserver the identifier table. However to avoid some duplicate
669   /// information in the preprocessor some cleanup is needed before it is used
670   /// to parse model files. This method does that cleanup.
671   void InitializeForModelFile();
672 
673   /// \brief Cleanup after model file parsing
674   void FinalizeForModelFile();
675 
676   /// \brief Retrieve the preprocessor options used to initialize this
677   /// preprocessor.
getPreprocessorOpts()678   PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
679 
getDiagnostics()680   DiagnosticsEngine &getDiagnostics() const { return *Diags; }
setDiagnostics(DiagnosticsEngine & D)681   void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
682 
getLangOpts()683   const LangOptions &getLangOpts() const { return LangOpts; }
getTargetInfo()684   const TargetInfo &getTargetInfo() const { return *Target; }
getAuxTargetInfo()685   const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
getFileManager()686   FileManager &getFileManager() const { return FileMgr; }
getSourceManager()687   SourceManager &getSourceManager() const { return SourceMgr; }
getHeaderSearchInfo()688   HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
689 
getIdentifierTable()690   IdentifierTable &getIdentifierTable() { return Identifiers; }
getIdentifierTable()691   const IdentifierTable &getIdentifierTable() const { return Identifiers; }
getSelectorTable()692   SelectorTable &getSelectorTable() { return Selectors; }
getBuiltinInfo()693   Builtin::Context &getBuiltinInfo() { return BuiltinInfo; }
getPreprocessorAllocator()694   llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
695 
696   void setPTHManager(PTHManager* pm);
697 
getPTHManager()698   PTHManager *getPTHManager() { return PTH.get(); }
699 
setExternalSource(ExternalPreprocessorSource * Source)700   void setExternalSource(ExternalPreprocessorSource *Source) {
701     ExternalSource = Source;
702   }
703 
getExternalSource()704   ExternalPreprocessorSource *getExternalSource() const {
705     return ExternalSource;
706   }
707 
708   /// \brief Retrieve the module loader associated with this preprocessor.
getModuleLoader()709   ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
710 
hadModuleLoaderFatalFailure()711   bool hadModuleLoaderFatalFailure() const {
712     return TheModuleLoader.HadFatalFailure;
713   }
714 
715   /// \brief True if we are currently preprocessing a #if or #elif directive
isParsingIfOrElifDirective()716   bool isParsingIfOrElifDirective() const {
717     return ParsingIfOrElifDirective;
718   }
719 
720   /// \brief Control whether the preprocessor retains comments in output.
SetCommentRetentionState(bool KeepComments,bool KeepMacroComments)721   void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
722     this->KeepComments = KeepComments | KeepMacroComments;
723     this->KeepMacroComments = KeepMacroComments;
724   }
725 
getCommentRetentionState()726   bool getCommentRetentionState() const { return KeepComments; }
727 
setPragmasEnabled(bool Enabled)728   void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
getPragmasEnabled()729   bool getPragmasEnabled() const { return PragmasEnabled; }
730 
SetSuppressIncludeNotFoundError(bool Suppress)731   void SetSuppressIncludeNotFoundError(bool Suppress) {
732     SuppressIncludeNotFoundError = Suppress;
733   }
734 
GetSuppressIncludeNotFoundError()735   bool GetSuppressIncludeNotFoundError() {
736     return SuppressIncludeNotFoundError;
737   }
738 
739   /// Sets whether the preprocessor is responsible for producing output or if
740   /// it is producing tokens to be consumed by Parse and Sema.
setPreprocessedOutput(bool IsPreprocessedOutput)741   void setPreprocessedOutput(bool IsPreprocessedOutput) {
742     PreprocessedOutput = IsPreprocessedOutput;
743   }
744 
745   /// Returns true if the preprocessor is responsible for generating output,
746   /// false if it is producing tokens to be consumed by Parse and Sema.
isPreprocessedOutput()747   bool isPreprocessedOutput() const { return PreprocessedOutput; }
748 
749   /// \brief Return true if we are lexing directly from the specified lexer.
isCurrentLexer(const PreprocessorLexer * L)750   bool isCurrentLexer(const PreprocessorLexer *L) const {
751     return CurPPLexer == L;
752   }
753 
754   /// \brief Return the current lexer being lexed from.
755   ///
756   /// Note that this ignores any potentially active macro expansions and _Pragma
757   /// expansions going on at the time.
getCurrentLexer()758   PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
759 
760   /// \brief Return the current file lexer being lexed from.
761   ///
762   /// Note that this ignores any potentially active macro expansions and _Pragma
763   /// expansions going on at the time.
764   PreprocessorLexer *getCurrentFileLexer() const;
765 
766   /// \brief Return the submodule owning the file being lexed.
getCurrentSubmodule()767   Module *getCurrentSubmodule() const { return CurSubmodule; }
768 
769   /// \brief Returns the FileID for the preprocessor predefines.
getPredefinesFileID()770   FileID getPredefinesFileID() const { return PredefinesFileID; }
771 
772   /// \{
773   /// \brief Accessors for preprocessor callbacks.
774   ///
775   /// Note that this class takes ownership of any PPCallbacks object given to
776   /// it.
getPPCallbacks()777   PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
addPPCallbacks(std::unique_ptr<PPCallbacks> C)778   void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
779     if (Callbacks)
780       C = llvm::make_unique<PPChainedCallbacks>(std::move(C),
781                                                 std::move(Callbacks));
782     Callbacks = std::move(C);
783   }
784   /// \}
785 
isMacroDefined(StringRef Id)786   bool isMacroDefined(StringRef Id) {
787     return isMacroDefined(&Identifiers.get(Id));
788   }
isMacroDefined(const IdentifierInfo * II)789   bool isMacroDefined(const IdentifierInfo *II) {
790     return II->hasMacroDefinition() &&
791            (!getLangOpts().Modules || (bool)getMacroDefinition(II));
792   }
793 
794   /// \brief Determine whether II is defined as a macro within the module M,
795   /// if that is a module that we've already preprocessed. Does not check for
796   /// macros imported into M.
isMacroDefinedInLocalModule(const IdentifierInfo * II,Module * M)797   bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
798     if (!II->hasMacroDefinition())
799       return false;
800     auto I = Submodules.find(M);
801     if (I == Submodules.end())
802       return false;
803     auto J = I->second.Macros.find(II);
804     if (J == I->second.Macros.end())
805       return false;
806     auto *MD = J->second.getLatest();
807     return MD && MD->isDefined();
808   }
809 
getMacroDefinition(const IdentifierInfo * II)810   MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
811     if (!II->hasMacroDefinition())
812       return MacroDefinition();
813 
814     MacroState &S = CurSubmoduleState->Macros[II];
815     auto *MD = S.getLatest();
816     while (MD && isa<VisibilityMacroDirective>(MD))
817       MD = MD->getPrevious();
818     return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
819                            S.getActiveModuleMacros(*this, II),
820                            S.isAmbiguous(*this, II));
821   }
822 
getMacroDefinitionAtLoc(const IdentifierInfo * II,SourceLocation Loc)823   MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
824                                           SourceLocation Loc) {
825     if (!II->hadMacroDefinition())
826       return MacroDefinition();
827 
828     MacroState &S = CurSubmoduleState->Macros[II];
829     MacroDirective::DefInfo DI;
830     if (auto *MD = S.getLatest())
831       DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
832     // FIXME: Compute the set of active module macros at the specified location.
833     return MacroDefinition(DI.getDirective(),
834                            S.getActiveModuleMacros(*this, II),
835                            S.isAmbiguous(*this, II));
836   }
837 
838   /// \brief Given an identifier, return its latest non-imported MacroDirective
839   /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
getLocalMacroDirective(const IdentifierInfo * II)840   MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
841     if (!II->hasMacroDefinition())
842       return nullptr;
843 
844     auto *MD = getLocalMacroDirectiveHistory(II);
845     if (!MD || MD->getDefinition().isUndefined())
846       return nullptr;
847 
848     return MD;
849   }
850 
getMacroInfo(const IdentifierInfo * II)851   const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
852     return const_cast<Preprocessor*>(this)->getMacroInfo(II);
853   }
854 
getMacroInfo(const IdentifierInfo * II)855   MacroInfo *getMacroInfo(const IdentifierInfo *II) {
856     if (!II->hasMacroDefinition())
857       return nullptr;
858     if (auto MD = getMacroDefinition(II))
859       return MD.getMacroInfo();
860     return nullptr;
861   }
862 
863   /// \brief Given an identifier, return the latest non-imported macro
864   /// directive for that identifier.
865   ///
866   /// One can iterate over all previous macro directives from the most recent
867   /// one.
868   MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
869 
870   /// \brief Add a directive to the macro directive history for this identifier.
871   void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI,SourceLocation Loc)872   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
873                                              SourceLocation Loc) {
874     DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
875     appendMacroDirective(II, MD);
876     return MD;
877   }
appendDefMacroDirective(IdentifierInfo * II,MacroInfo * MI)878   DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
879                                              MacroInfo *MI) {
880     return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
881   }
882   /// \brief Set a MacroDirective that was loaded from a PCH file.
883   void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *MD);
884 
885   /// \brief Register an exported macro for a module and identifier.
886   ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro,
887                               ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
888   ModuleMacro *getModuleMacro(Module *Mod, IdentifierInfo *II);
889 
890   /// \brief Get the list of leaf (non-overridden) module macros for a name.
getLeafModuleMacros(const IdentifierInfo * II)891   ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
892     auto I = LeafModuleMacros.find(II);
893     if (I != LeafModuleMacros.end())
894       return I->second;
895     return None;
896   }
897 
898   /// \{
899   /// Iterators for the macro history table. Currently defined macros have
900   /// IdentifierInfo::hasMacroDefinition() set and an empty
901   /// MacroInfo::getUndefLoc() at the head of the list.
902   typedef MacroMap::const_iterator macro_iterator;
903   macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
904   macro_iterator macro_end(bool IncludeExternalMacros = true) const;
905   llvm::iterator_range<macro_iterator>
906   macros(bool IncludeExternalMacros = true) const {
907     return llvm::make_range(macro_begin(IncludeExternalMacros),
908                             macro_end(IncludeExternalMacros));
909   }
910   /// \}
911 
912   /// \brief Return the name of the macro defined before \p Loc that has
913   /// spelling \p Tokens.  If there are multiple macros with same spelling,
914   /// return the last one defined.
915   StringRef getLastMacroWithSpelling(SourceLocation Loc,
916                                      ArrayRef<TokenValue> Tokens) const;
917 
getPredefines()918   const std::string &getPredefines() const { return Predefines; }
919   /// \brief Set the predefines for this Preprocessor.
920   ///
921   /// These predefines are automatically injected when parsing the main file.
setPredefines(const char * P)922   void setPredefines(const char *P) { Predefines = P; }
setPredefines(StringRef P)923   void setPredefines(StringRef P) { Predefines = P; }
924 
925   /// Return information about the specified preprocessor
926   /// identifier token.
getIdentifierInfo(StringRef Name)927   IdentifierInfo *getIdentifierInfo(StringRef Name) const {
928     return &Identifiers.get(Name);
929   }
930 
931   /// \brief Add the specified pragma handler to this preprocessor.
932   ///
933   /// If \p Namespace is non-null, then it is a token required to exist on the
934   /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
935   void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
AddPragmaHandler(PragmaHandler * Handler)936   void AddPragmaHandler(PragmaHandler *Handler) {
937     AddPragmaHandler(StringRef(), Handler);
938   }
939 
940   /// \brief Remove the specific pragma handler from this preprocessor.
941   ///
942   /// If \p Namespace is non-null, then it should be the namespace that
943   /// \p Handler was added to. It is an error to remove a handler that
944   /// has not been registered.
945   void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
RemovePragmaHandler(PragmaHandler * Handler)946   void RemovePragmaHandler(PragmaHandler *Handler) {
947     RemovePragmaHandler(StringRef(), Handler);
948   }
949 
950   /// Install empty handlers for all pragmas (making them ignored).
951   void IgnorePragmas();
952 
953   /// \brief Add the specified comment handler to the preprocessor.
954   void addCommentHandler(CommentHandler *Handler);
955 
956   /// \brief Remove the specified comment handler.
957   ///
958   /// It is an error to remove a handler that has not been registered.
959   void removeCommentHandler(CommentHandler *Handler);
960 
961   /// \brief Set the code completion handler to the given object.
setCodeCompletionHandler(CodeCompletionHandler & Handler)962   void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
963     CodeComplete = &Handler;
964   }
965 
966   /// \brief Retrieve the current code-completion handler.
getCodeCompletionHandler()967   CodeCompletionHandler *getCodeCompletionHandler() const {
968     return CodeComplete;
969   }
970 
971   /// \brief Clear out the code completion handler.
clearCodeCompletionHandler()972   void clearCodeCompletionHandler() {
973     CodeComplete = nullptr;
974   }
975 
976   /// \brief Hook used by the lexer to invoke the "natural language" code
977   /// completion point.
978   void CodeCompleteNaturalLanguage();
979 
980   /// \brief Retrieve the preprocessing record, or NULL if there is no
981   /// preprocessing record.
getPreprocessingRecord()982   PreprocessingRecord *getPreprocessingRecord() const { return Record; }
983 
984   /// \brief Create a new preprocessing record, which will keep track of
985   /// all macro expansions, macro definitions, etc.
986   void createPreprocessingRecord();
987 
988   /// \brief Enter the specified FileID as the main source file,
989   /// which implicitly adds the builtin defines etc.
990   void EnterMainSourceFile();
991 
992   /// \brief Inform the preprocessor callbacks that processing is complete.
993   void EndSourceFile();
994 
995   /// \brief Add a source file to the top of the include stack and
996   /// start lexing tokens from it instead of the current buffer.
997   ///
998   /// Emits a diagnostic, doesn't enter the file, and returns true on error.
999   bool EnterSourceFile(FileID CurFileID, const DirectoryLookup *Dir,
1000                        SourceLocation Loc);
1001 
1002   /// \brief Add a Macro to the top of the include stack and start lexing
1003   /// tokens from it instead of the current buffer.
1004   ///
1005   /// \param Args specifies the tokens input to a function-like macro.
1006   /// \param ILEnd specifies the location of the ')' for a function-like macro
1007   /// or the identifier for an object-like macro.
1008   void EnterMacro(Token &Identifier, SourceLocation ILEnd, MacroInfo *Macro,
1009                   MacroArgs *Args);
1010 
1011   /// \brief Add a "macro" context to the top of the include stack,
1012   /// which will cause the lexer to start returning the specified tokens.
1013   ///
1014   /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1015   /// will not be subject to further macro expansion. Otherwise, these tokens
1016   /// will be re-macro-expanded when/if expansion is enabled.
1017   ///
1018   /// If \p OwnsTokens is false, this method assumes that the specified stream
1019   /// of tokens has a permanent owner somewhere, so they do not need to be
1020   /// copied. If it is true, it assumes the array of tokens is allocated with
1021   /// \c new[] and must be freed.
1022   void EnterTokenStream(const Token *Toks, unsigned NumToks,
1023                         bool DisableMacroExpansion, bool OwnsTokens);
1024 
1025   /// \brief Pop the current lexer/macro exp off the top of the lexer stack.
1026   ///
1027   /// This should only be used in situations where the current state of the
1028   /// top-of-stack lexer is known.
1029   void RemoveTopOfLexerStack();
1030 
1031   /// From the point that this method is called, and until
1032   /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1033   /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1034   /// make the Preprocessor re-lex the same tokens.
1035   ///
1036   /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1037   /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1038   /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1039   ///
1040   /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1041   /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1042   /// tokens will continue indefinitely.
1043   ///
1044   void EnableBacktrackAtThisPos();
1045 
1046   /// \brief Disable the last EnableBacktrackAtThisPos call.
1047   void CommitBacktrackedTokens();
1048 
1049   /// \brief Make Preprocessor re-lex the tokens that were lexed since
1050   /// EnableBacktrackAtThisPos() was previously called.
1051   void Backtrack();
1052 
1053   /// \brief True if EnableBacktrackAtThisPos() was called and
1054   /// caching of tokens is on.
isBacktrackEnabled()1055   bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1056 
1057   /// \brief Lex the next token for this preprocessor.
1058   void Lex(Token &Result);
1059 
1060   void LexAfterModuleImport(Token &Result);
1061 
1062   void makeModuleVisible(Module *M, SourceLocation Loc);
1063 
getModuleImportLoc(Module * M)1064   SourceLocation getModuleImportLoc(Module *M) const {
1065     return CurSubmoduleState->VisibleModules.getImportLoc(M);
1066   }
1067 
1068   /// \brief Lex a string literal, which may be the concatenation of multiple
1069   /// string literals and may even come from macro expansion.
1070   /// \returns true on success, false if a error diagnostic has been generated.
LexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)1071   bool LexStringLiteral(Token &Result, std::string &String,
1072                         const char *DiagnosticTag, bool AllowMacroExpansion) {
1073     if (AllowMacroExpansion)
1074       Lex(Result);
1075     else
1076       LexUnexpandedToken(Result);
1077     return FinishLexStringLiteral(Result, String, DiagnosticTag,
1078                                   AllowMacroExpansion);
1079   }
1080 
1081   /// \brief Complete the lexing of a string literal where the first token has
1082   /// already been lexed (see LexStringLiteral).
1083   bool FinishLexStringLiteral(Token &Result, std::string &String,
1084                               const char *DiagnosticTag,
1085                               bool AllowMacroExpansion);
1086 
1087   /// \brief Lex a token.  If it's a comment, keep lexing until we get
1088   /// something not a comment.
1089   ///
1090   /// This is useful in -E -C mode where comments would foul up preprocessor
1091   /// directive handling.
LexNonComment(Token & Result)1092   void LexNonComment(Token &Result) {
1093     do
1094       Lex(Result);
1095     while (Result.getKind() == tok::comment);
1096   }
1097 
1098   /// \brief Just like Lex, but disables macro expansion of identifier tokens.
LexUnexpandedToken(Token & Result)1099   void LexUnexpandedToken(Token &Result) {
1100     // Disable macro expansion.
1101     bool OldVal = DisableMacroExpansion;
1102     DisableMacroExpansion = true;
1103     // Lex the token.
1104     Lex(Result);
1105 
1106     // Reenable it.
1107     DisableMacroExpansion = OldVal;
1108   }
1109 
1110   /// \brief Like LexNonComment, but this disables macro expansion of
1111   /// identifier tokens.
LexUnexpandedNonComment(Token & Result)1112   void LexUnexpandedNonComment(Token &Result) {
1113     do
1114       LexUnexpandedToken(Result);
1115     while (Result.getKind() == tok::comment);
1116   }
1117 
1118   /// \brief Parses a simple integer literal to get its numeric value.  Floating
1119   /// point literals and user defined literals are rejected.  Used primarily to
1120   /// handle pragmas that accept integer arguments.
1121   bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1122 
1123   /// Disables macro expansion everywhere except for preprocessor directives.
SetMacroExpansionOnlyInDirectives()1124   void SetMacroExpansionOnlyInDirectives() {
1125     DisableMacroExpansion = true;
1126     MacroExpansionInDirectivesOverride = true;
1127   }
1128 
1129   /// \brief Peeks ahead N tokens and returns that token without consuming any
1130   /// tokens.
1131   ///
1132   /// LookAhead(0) returns the next token that would be returned by Lex(),
1133   /// LookAhead(1) returns the token after it, etc.  This returns normal
1134   /// tokens after phase 5.  As such, it is equivalent to using
1135   /// 'Lex', not 'LexUnexpandedToken'.
LookAhead(unsigned N)1136   const Token &LookAhead(unsigned N) {
1137     if (CachedLexPos + N < CachedTokens.size())
1138       return CachedTokens[CachedLexPos+N];
1139     else
1140       return PeekAhead(N+1);
1141   }
1142 
1143   /// \brief When backtracking is enabled and tokens are cached,
1144   /// this allows to revert a specific number of tokens.
1145   ///
1146   /// Note that the number of tokens being reverted should be up to the last
1147   /// backtrack position, not more.
RevertCachedTokens(unsigned N)1148   void RevertCachedTokens(unsigned N) {
1149     assert(isBacktrackEnabled() &&
1150            "Should only be called when tokens are cached for backtracking");
1151     assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
1152          && "Should revert tokens up to the last backtrack position, not more");
1153     assert(signed(CachedLexPos) - signed(N) >= 0 &&
1154            "Corrupted backtrack positions ?");
1155     CachedLexPos -= N;
1156   }
1157 
1158   /// \brief Enters a token in the token stream to be lexed next.
1159   ///
1160   /// If BackTrack() is called afterwards, the token will remain at the
1161   /// insertion point.
EnterToken(const Token & Tok)1162   void EnterToken(const Token &Tok) {
1163     EnterCachingLexMode();
1164     CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1165   }
1166 
1167   /// We notify the Preprocessor that if it is caching tokens (because
1168   /// backtrack is enabled) it should replace the most recent cached tokens
1169   /// with the given annotation token. This function has no effect if
1170   /// backtracking is not enabled.
1171   ///
1172   /// Note that the use of this function is just for optimization, so that the
1173   /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1174   /// invoked.
AnnotateCachedTokens(const Token & Tok)1175   void AnnotateCachedTokens(const Token &Tok) {
1176     assert(Tok.isAnnotation() && "Expected annotation token");
1177     if (CachedLexPos != 0 && isBacktrackEnabled())
1178       AnnotatePreviousCachedTokens(Tok);
1179   }
1180 
1181   /// Get the location of the last cached token, suitable for setting the end
1182   /// location of an annotation token.
getLastCachedTokenLocation()1183   SourceLocation getLastCachedTokenLocation() const {
1184     assert(CachedLexPos != 0);
1185     return CachedTokens[CachedLexPos-1].getLastLoc();
1186   }
1187 
1188   /// \brief Replace the last token with an annotation token.
1189   ///
1190   /// Like AnnotateCachedTokens(), this routine replaces an
1191   /// already-parsed (and resolved) token with an annotation
1192   /// token. However, this routine only replaces the last token with
1193   /// the annotation token; it does not affect any other cached
1194   /// tokens. This function has no effect if backtracking is not
1195   /// enabled.
ReplaceLastTokenWithAnnotation(const Token & Tok)1196   void ReplaceLastTokenWithAnnotation(const Token &Tok) {
1197     assert(Tok.isAnnotation() && "Expected annotation token");
1198     if (CachedLexPos != 0 && isBacktrackEnabled())
1199       CachedTokens[CachedLexPos-1] = Tok;
1200   }
1201 
1202   /// Update the current token to represent the provided
1203   /// identifier, in order to cache an action performed by typo correction.
TypoCorrectToken(const Token & Tok)1204   void TypoCorrectToken(const Token &Tok) {
1205     assert(Tok.getIdentifierInfo() && "Expected identifier token");
1206     if (CachedLexPos != 0 && isBacktrackEnabled())
1207       CachedTokens[CachedLexPos-1] = Tok;
1208   }
1209 
1210   /// \brief Recompute the current lexer kind based on the CurLexer/CurPTHLexer/
1211   /// CurTokenLexer pointers.
1212   void recomputeCurLexerKind();
1213 
1214   /// \brief Returns true if incremental processing is enabled
isIncrementalProcessingEnabled()1215   bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
1216 
1217   /// \brief Enables the incremental processing
1218   void enableIncrementalProcessing(bool value = true) {
1219     IncrementalProcessing = value;
1220   }
1221 
1222   /// \brief Specify the point at which code-completion will be performed.
1223   ///
1224   /// \param File the file in which code completion should occur. If
1225   /// this file is included multiple times, code-completion will
1226   /// perform completion the first time it is included. If NULL, this
1227   /// function clears out the code-completion point.
1228   ///
1229   /// \param Line the line at which code completion should occur
1230   /// (1-based).
1231   ///
1232   /// \param Column the column at which code completion should occur
1233   /// (1-based).
1234   ///
1235   /// \returns true if an error occurred, false otherwise.
1236   bool SetCodeCompletionPoint(const FileEntry *File,
1237                               unsigned Line, unsigned Column);
1238 
1239   /// \brief Determine if we are performing code completion.
isCodeCompletionEnabled()1240   bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
1241 
1242   /// \brief Returns the location of the code-completion point.
1243   ///
1244   /// Returns an invalid location if code-completion is not enabled or the file
1245   /// containing the code-completion point has not been lexed yet.
getCodeCompletionLoc()1246   SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
1247 
1248   /// \brief Returns the start location of the file of code-completion point.
1249   ///
1250   /// Returns an invalid location if code-completion is not enabled or the file
1251   /// containing the code-completion point has not been lexed yet.
getCodeCompletionFileLoc()1252   SourceLocation getCodeCompletionFileLoc() const {
1253     return CodeCompletionFileLoc;
1254   }
1255 
1256   /// \brief Returns true if code-completion is enabled and we have hit the
1257   /// code-completion point.
isCodeCompletionReached()1258   bool isCodeCompletionReached() const { return CodeCompletionReached; }
1259 
1260   /// \brief Note that we hit the code-completion point.
setCodeCompletionReached()1261   void setCodeCompletionReached() {
1262     assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
1263     CodeCompletionReached = true;
1264     // Silence any diagnostics that occur after we hit the code-completion.
1265     getDiagnostics().setSuppressAllDiagnostics(true);
1266   }
1267 
1268   /// \brief The location of the currently-active \#pragma clang
1269   /// arc_cf_code_audited begin.
1270   ///
1271   /// Returns an invalid location if there is no such pragma active.
getPragmaARCCFCodeAuditedLoc()1272   SourceLocation getPragmaARCCFCodeAuditedLoc() const {
1273     return PragmaARCCFCodeAuditedLoc;
1274   }
1275 
1276   /// \brief Set the location of the currently-active \#pragma clang
1277   /// arc_cf_code_audited begin.  An invalid location ends the pragma.
setPragmaARCCFCodeAuditedLoc(SourceLocation Loc)1278   void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) {
1279     PragmaARCCFCodeAuditedLoc = Loc;
1280   }
1281 
1282   /// \brief The location of the currently-active \#pragma clang
1283   /// assume_nonnull begin.
1284   ///
1285   /// Returns an invalid location if there is no such pragma active.
getPragmaAssumeNonNullLoc()1286   SourceLocation getPragmaAssumeNonNullLoc() const {
1287     return PragmaAssumeNonNullLoc;
1288   }
1289 
1290   /// \brief Set the location of the currently-active \#pragma clang
1291   /// assume_nonnull begin.  An invalid location ends the pragma.
setPragmaAssumeNonNullLoc(SourceLocation Loc)1292   void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
1293     PragmaAssumeNonNullLoc = Loc;
1294   }
1295 
1296   /// \brief Set the directory in which the main file should be considered
1297   /// to have been found, if it is not a real file.
setMainFileDir(const DirectoryEntry * Dir)1298   void setMainFileDir(const DirectoryEntry *Dir) {
1299     MainFileDir = Dir;
1300   }
1301 
1302   /// \brief Instruct the preprocessor to skip part of the main source file.
1303   ///
1304   /// \param Bytes The number of bytes in the preamble to skip.
1305   ///
1306   /// \param StartOfLine Whether skipping these bytes puts the lexer at the
1307   /// start of a line.
setSkipMainFilePreamble(unsigned Bytes,bool StartOfLine)1308   void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
1309     SkipMainFilePreamble.first = Bytes;
1310     SkipMainFilePreamble.second = StartOfLine;
1311   }
1312 
1313   /// Forwarding function for diagnostics.  This emits a diagnostic at
1314   /// the specified Token's location, translating the token's start
1315   /// position in the current buffer into a SourcePosition object for rendering.
Diag(SourceLocation Loc,unsigned DiagID)1316   DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
1317     return Diags->Report(Loc, DiagID);
1318   }
1319 
Diag(const Token & Tok,unsigned DiagID)1320   DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
1321     return Diags->Report(Tok.getLocation(), DiagID);
1322   }
1323 
1324   /// Return the 'spelling' of the token at the given
1325   /// location; does not go up to the spelling location or down to the
1326   /// expansion location.
1327   ///
1328   /// \param buffer A buffer which will be used only if the token requires
1329   ///   "cleaning", e.g. if it contains trigraphs or escaped newlines
1330   /// \param invalid If non-null, will be set \c true if an error occurs.
1331   StringRef getSpelling(SourceLocation loc,
1332                         SmallVectorImpl<char> &buffer,
1333                         bool *invalid = nullptr) const {
1334     return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
1335   }
1336 
1337   /// \brief Return the 'spelling' of the Tok token.
1338   ///
1339   /// The spelling of a token is the characters used to represent the token in
1340   /// the source file after trigraph expansion and escaped-newline folding.  In
1341   /// particular, this wants to get the true, uncanonicalized, spelling of
1342   /// things like digraphs, UCNs, etc.
1343   ///
1344   /// \param Invalid If non-null, will be set \c true if an error occurs.
1345   std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
1346     return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
1347   }
1348 
1349   /// \brief Get the spelling of a token into a preallocated buffer, instead
1350   /// of as an std::string.
1351   ///
1352   /// The caller is required to allocate enough space for the token, which is
1353   /// guaranteed to be at least Tok.getLength() bytes long. The length of the
1354   /// actual result is returned.
1355   ///
1356   /// Note that this method may do two possible things: it may either fill in
1357   /// the buffer specified with characters, or it may *change the input pointer*
1358   /// to point to a constant buffer with the data already in it (avoiding a
1359   /// copy).  The caller is not allowed to modify the returned buffer pointer
1360   /// if an internal buffer is returned.
1361   unsigned getSpelling(const Token &Tok, const char *&Buffer,
1362                        bool *Invalid = nullptr) const {
1363     return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
1364   }
1365 
1366   /// \brief Get the spelling of a token into a SmallVector.
1367   ///
1368   /// Note that the returned StringRef may not point to the
1369   /// supplied buffer if a copy can be avoided.
1370   StringRef getSpelling(const Token &Tok,
1371                         SmallVectorImpl<char> &Buffer,
1372                         bool *Invalid = nullptr) const;
1373 
1374   /// \brief Relex the token at the specified location.
1375   /// \returns true if there was a failure, false on success.
1376   bool getRawToken(SourceLocation Loc, Token &Result,
1377                    bool IgnoreWhiteSpace = false) {
1378     return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
1379   }
1380 
1381   /// \brief Given a Token \p Tok that is a numeric constant with length 1,
1382   /// return the character.
1383   char
1384   getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
1385                                               bool *Invalid = nullptr) const {
1386     assert(Tok.is(tok::numeric_constant) &&
1387            Tok.getLength() == 1 && "Called on unsupported token");
1388     assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
1389 
1390     // If the token is carrying a literal data pointer, just use it.
1391     if (const char *D = Tok.getLiteralData())
1392       return *D;
1393 
1394     // Otherwise, fall back on getCharacterData, which is slower, but always
1395     // works.
1396     return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
1397   }
1398 
1399   /// \brief Retrieve the name of the immediate macro expansion.
1400   ///
1401   /// This routine starts from a source location, and finds the name of the
1402   /// macro responsible for its immediate expansion. It looks through any
1403   /// intervening macro argument expansions to compute this. It returns a
1404   /// StringRef that refers to the SourceManager-owned buffer of the source
1405   /// where that macro name is spelled. Thus, the result shouldn't out-live
1406   /// the SourceManager.
getImmediateMacroName(SourceLocation Loc)1407   StringRef getImmediateMacroName(SourceLocation Loc) {
1408     return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
1409   }
1410 
1411   /// \brief Plop the specified string into a scratch buffer and set the
1412   /// specified token's location and length to it.
1413   ///
1414   /// If specified, the source location provides a location of the expansion
1415   /// point of the token.
1416   void CreateString(StringRef Str, Token &Tok,
1417                     SourceLocation ExpansionLocStart = SourceLocation(),
1418                     SourceLocation ExpansionLocEnd = SourceLocation());
1419 
1420   /// \brief Computes the source location just past the end of the
1421   /// token at this source location.
1422   ///
1423   /// This routine can be used to produce a source location that
1424   /// points just past the end of the token referenced by \p Loc, and
1425   /// is generally used when a diagnostic needs to point just after a
1426   /// token where it expected something different that it received. If
1427   /// the returned source location would not be meaningful (e.g., if
1428   /// it points into a macro), this routine returns an invalid
1429   /// source location.
1430   ///
1431   /// \param Offset an offset from the end of the token, where the source
1432   /// location should refer to. The default offset (0) produces a source
1433   /// location pointing just past the end of the token; an offset of 1 produces
1434   /// a source location pointing to the last character in the token, etc.
1435   SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
1436     return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
1437   }
1438 
1439   /// \brief Returns true if the given MacroID location points at the first
1440   /// token of the macro expansion.
1441   ///
1442   /// \param MacroBegin If non-null and function returns true, it is set to
1443   /// begin location of the macro.
1444   bool isAtStartOfMacroExpansion(SourceLocation loc,
1445                                  SourceLocation *MacroBegin = nullptr) const {
1446     return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
1447                                             MacroBegin);
1448   }
1449 
1450   /// \brief Returns true if the given MacroID location points at the last
1451   /// token of the macro expansion.
1452   ///
1453   /// \param MacroEnd If non-null and function returns true, it is set to
1454   /// end location of the macro.
1455   bool isAtEndOfMacroExpansion(SourceLocation loc,
1456                                SourceLocation *MacroEnd = nullptr) const {
1457     return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
1458   }
1459 
1460   /// \brief Print the token to stderr, used for debugging.
1461   void DumpToken(const Token &Tok, bool DumpFlags = false) const;
1462   void DumpLocation(SourceLocation Loc) const;
1463   void DumpMacro(const MacroInfo &MI) const;
1464   void dumpMacroInfo(const IdentifierInfo *II);
1465 
1466   /// \brief Given a location that specifies the start of a
1467   /// token, return a new location that specifies a character within the token.
AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char)1468   SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
1469                                          unsigned Char) const {
1470     return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
1471   }
1472 
1473   /// \brief Increment the counters for the number of token paste operations
1474   /// performed.
1475   ///
1476   /// If fast was specified, this is a 'fast paste' case we handled.
IncrementPasteCounter(bool isFast)1477   void IncrementPasteCounter(bool isFast) {
1478     if (isFast)
1479       ++NumFastTokenPaste;
1480     else
1481       ++NumTokenPaste;
1482   }
1483 
1484   void PrintStats();
1485 
1486   size_t getTotalMemory() const;
1487 
1488   /// When the macro expander pastes together a comment (/##/) in Microsoft
1489   /// mode, this method handles updating the current state, returning the
1490   /// token on the next source line.
1491   void HandleMicrosoftCommentPaste(Token &Tok);
1492 
1493   //===--------------------------------------------------------------------===//
1494   // Preprocessor callback methods.  These are invoked by a lexer as various
1495   // directives and events are found.
1496 
1497   /// Given a tok::raw_identifier token, look up the
1498   /// identifier information for the token and install it into the token,
1499   /// updating the token kind accordingly.
1500   IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
1501 
1502 private:
1503   llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
1504 
1505 public:
1506 
1507   /// \brief Specifies the reason for poisoning an identifier.
1508   ///
1509   /// If that identifier is accessed while poisoned, then this reason will be
1510   /// used instead of the default "poisoned" diagnostic.
1511   void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
1512 
1513   /// \brief Display reason for poisoned identifier.
1514   void HandlePoisonedIdentifier(Token & Tok);
1515 
MaybeHandlePoisonedIdentifier(Token & Identifier)1516   void MaybeHandlePoisonedIdentifier(Token & Identifier) {
1517     if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
1518       if(II->isPoisoned()) {
1519         HandlePoisonedIdentifier(Identifier);
1520       }
1521     }
1522   }
1523 
1524 private:
1525   /// Identifiers used for SEH handling in Borland. These are only
1526   /// allowed in particular circumstances
1527   // __except block
1528   IdentifierInfo *Ident__exception_code,
1529                  *Ident___exception_code,
1530                  *Ident_GetExceptionCode;
1531   // __except filter expression
1532   IdentifierInfo *Ident__exception_info,
1533                  *Ident___exception_info,
1534                  *Ident_GetExceptionInfo;
1535   // __finally
1536   IdentifierInfo *Ident__abnormal_termination,
1537                  *Ident___abnormal_termination,
1538                  *Ident_AbnormalTermination;
1539 
1540   const char *getCurLexerEndPos();
1541 
1542 public:
1543   void PoisonSEHIdentifiers(bool Poison = true); // Borland
1544 
1545   /// \brief Callback invoked when the lexer reads an identifier and has
1546   /// filled in the tokens IdentifierInfo member.
1547   ///
1548   /// This callback potentially macro expands it or turns it into a named
1549   /// token (like 'for').
1550   ///
1551   /// \returns true if we actually computed a token, false if we need to
1552   /// lex again.
1553   bool HandleIdentifier(Token &Identifier);
1554 
1555 
1556   /// \brief Callback invoked when the lexer hits the end of the current file.
1557   ///
1558   /// This either returns the EOF token and returns true, or
1559   /// pops a level off the include stack and returns false, at which point the
1560   /// client should call lex again.
1561   bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
1562 
1563   /// \brief Callback invoked when the current TokenLexer hits the end of its
1564   /// token stream.
1565   bool HandleEndOfTokenLexer(Token &Result);
1566 
1567   /// \brief Callback invoked when the lexer sees a # token at the start of a
1568   /// line.
1569   ///
1570   /// This consumes the directive, modifies the lexer/preprocessor state, and
1571   /// advances the lexer(s) so that the next token read is the correct one.
1572   void HandleDirective(Token &Result);
1573 
1574   /// \brief Ensure that the next token is a tok::eod token.
1575   ///
1576   /// If not, emit a diagnostic and consume up until the eod.
1577   /// If \p EnableMacros is true, then we consider macros that expand to zero
1578   /// tokens as being ok.
1579   void CheckEndOfDirective(const char *Directive, bool EnableMacros = false);
1580 
1581   /// \brief Read and discard all tokens remaining on the current line until
1582   /// the tok::eod token is found.
1583   void DiscardUntilEndOfDirective();
1584 
1585   /// \brief Returns true if the preprocessor has seen a use of
1586   /// __DATE__ or __TIME__ in the file so far.
SawDateOrTime()1587   bool SawDateOrTime() const {
1588     return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
1589   }
getCounterValue()1590   unsigned getCounterValue() const { return CounterValue; }
setCounterValue(unsigned V)1591   void setCounterValue(unsigned V) { CounterValue = V; }
1592 
1593   /// \brief Retrieves the module that we're currently building, if any.
1594   Module *getCurrentModule();
1595 
1596   /// \brief Allocate a new MacroInfo object with the provided SourceLocation.
1597   MacroInfo *AllocateMacroInfo(SourceLocation L);
1598 
1599   /// \brief Allocate a new MacroInfo object loaded from an AST file.
1600   MacroInfo *AllocateDeserializedMacroInfo(SourceLocation L,
1601                                            unsigned SubModuleID);
1602 
1603   /// \brief Turn the specified lexer token into a fully checked and spelled
1604   /// filename, e.g. as an operand of \#include.
1605   ///
1606   /// The caller is expected to provide a buffer that is large enough to hold
1607   /// the spelling of the filename, but is also expected to handle the case
1608   /// when this method decides to use a different buffer.
1609   ///
1610   /// \returns true if the input filename was in <>'s or false if it was
1611   /// in ""'s.
1612   bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Filename);
1613 
1614   /// \brief Given a "foo" or \<foo> reference, look up the indicated file.
1615   ///
1616   /// Returns null on failure.  \p isAngled indicates whether the file
1617   /// reference is for system \#include's or not (i.e. using <> instead of "").
1618   const FileEntry *LookupFile(SourceLocation FilenameLoc, StringRef Filename,
1619                               bool isAngled, const DirectoryLookup *FromDir,
1620                               const FileEntry *FromFile,
1621                               const DirectoryLookup *&CurDir,
1622                               SmallVectorImpl<char> *SearchPath,
1623                               SmallVectorImpl<char> *RelativePath,
1624                               ModuleMap::KnownHeader *SuggestedModule,
1625                               bool SkipCache = false);
1626 
1627   /// \brief Get the DirectoryLookup structure used to find the current
1628   /// FileEntry, if CurLexer is non-null and if applicable.
1629   ///
1630   /// This allows us to implement \#include_next and find directory-specific
1631   /// properties.
GetCurDirLookup()1632   const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
1633 
1634   /// \brief Return true if we're in the top-level file, not in a \#include.
1635   bool isInPrimaryFile() const;
1636 
1637   /// \brief Handle cases where the \#include name is expanded
1638   /// from a macro as multiple tokens, which need to be glued together.
1639   ///
1640   /// This occurs for code like:
1641   /// \code
1642   ///    \#define FOO <x/y.h>
1643   ///    \#include FOO
1644   /// \endcode
1645   /// because in this case, "<x/y.h>" is returned as 7 tokens, not one.
1646   ///
1647   /// This code concatenates and consumes tokens up to the '>' token.  It
1648   /// returns false if the > was found, otherwise it returns true if it finds
1649   /// and consumes the EOD marker.
1650   bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
1651                               SourceLocation &End);
1652 
1653   /// \brief Lex an on-off-switch (C99 6.10.6p2) and verify that it is
1654   /// followed by EOD.  Return true if the token is not a valid on-off-switch.
1655   bool LexOnOffSwitch(tok::OnOffSwitch &OOS);
1656 
1657   bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
1658                       bool *ShadowFlag = nullptr);
1659 
1660 private:
1661 
PushIncludeMacroStack()1662   void PushIncludeMacroStack() {
1663     assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer");
1664     IncludeMacroStack.emplace_back(
1665         CurLexerKind, CurSubmodule, std::move(CurLexer), std::move(CurPTHLexer),
1666         CurPPLexer, std::move(CurTokenLexer), CurDirLookup);
1667     CurPPLexer = nullptr;
1668   }
1669 
PopIncludeMacroStack()1670   void PopIncludeMacroStack() {
1671     CurLexer = std::move(IncludeMacroStack.back().TheLexer);
1672     CurPTHLexer = std::move(IncludeMacroStack.back().ThePTHLexer);
1673     CurPPLexer = IncludeMacroStack.back().ThePPLexer;
1674     CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
1675     CurDirLookup  = IncludeMacroStack.back().TheDirLookup;
1676     CurSubmodule = IncludeMacroStack.back().TheSubmodule;
1677     CurLexerKind = IncludeMacroStack.back().CurLexerKind;
1678     IncludeMacroStack.pop_back();
1679   }
1680 
1681   void PropagateLineStartLeadingSpaceInfo(Token &Result);
1682 
1683   void EnterSubmodule(Module *M, SourceLocation ImportLoc);
1684   void LeaveSubmodule();
1685 
1686   /// Update the set of active module macros and ambiguity flag for a module
1687   /// macro name.
1688   void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
1689 
1690   /// \brief Allocate a new MacroInfo object.
1691   MacroInfo *AllocateMacroInfo();
1692 
1693   DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
1694                                                SourceLocation Loc);
1695   UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
1696   VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
1697                                                              bool isPublic);
1698 
1699   /// \brief Lex and validate a macro name, which occurs after a
1700   /// \#define or \#undef.
1701   ///
1702   /// \param MacroNameTok Token that represents the name defined or undefined.
1703   /// \param IsDefineUndef Kind if preprocessor directive.
1704   /// \param ShadowFlag Points to flag that is set if macro name shadows
1705   ///                   a keyword.
1706   ///
1707   /// This emits a diagnostic, sets the token kind to eod,
1708   /// and discards the rest of the macro line if the macro name is invalid.
1709   void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
1710                      bool *ShadowFlag = nullptr);
1711 
1712   /// The ( starting an argument list of a macro definition has just been read.
1713   /// Lex the rest of the arguments and the closing ), updating \p MI with
1714   /// what we learn and saving in \p LastTok the last token read.
1715   /// Return true if an error occurs parsing the arg list.
1716   bool ReadMacroDefinitionArgList(MacroInfo *MI, Token& LastTok);
1717 
1718   /// We just read a \#if or related directive and decided that the
1719   /// subsequent tokens are in the \#if'd out portion of the
1720   /// file.  Lex the rest of the file, until we see an \#endif.  If \p
1721   /// FoundNonSkipPortion is true, then we have already emitted code for part of
1722   /// this \#if directive, so \#else/\#elif blocks should never be entered. If
1723   /// \p FoundElse is false, then \#else directives are ok, if not, then we have
1724   /// already seen one so a \#else directive is a duplicate.  When this returns,
1725   /// the caller can lex the first valid token.
1726   void SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
1727                                     bool FoundNonSkipPortion, bool FoundElse,
1728                                     SourceLocation ElseLoc = SourceLocation());
1729 
1730   /// \brief A fast PTH version of SkipExcludedConditionalBlock.
1731   void PTHSkipExcludedConditionalBlock();
1732 
1733   /// \brief Evaluate an integer constant expression that may occur after a
1734   /// \#if or \#elif directive and return it as a bool.
1735   ///
1736   /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
1737   bool EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
1738 
1739   /// \brief Install the standard preprocessor pragmas:
1740   /// \#pragma GCC poison/system_header/dependency and \#pragma once.
1741   void RegisterBuiltinPragmas();
1742 
1743   /// \brief Register builtin macros such as __LINE__ with the identifier table.
1744   void RegisterBuiltinMacros();
1745 
1746   /// If an identifier token is read that is to be expanded as a macro, handle
1747   /// it and return the next token as 'Tok'.  If we lexed a token, return true;
1748   /// otherwise the caller should lex again.
1749   bool HandleMacroExpandedIdentifier(Token &Tok, const MacroDefinition &MD);
1750 
1751   /// \brief Cache macro expanded tokens for TokenLexers.
1752   //
1753   /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
1754   /// going to lex in the cache and when it finishes the tokens are removed
1755   /// from the end of the cache.
1756   Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
1757                                   ArrayRef<Token> tokens);
1758   void removeCachedMacroExpandedTokensOfLastLexer();
1759   friend void TokenLexer::ExpandFunctionArguments();
1760 
1761   /// Determine whether the next preprocessor token to be
1762   /// lexed is a '('.  If so, consume the token and return true, if not, this
1763   /// method should have no observable side-effect on the lexed tokens.
1764   bool isNextPPTokenLParen();
1765 
1766   /// After reading "MACRO(", this method is invoked to read all of the formal
1767   /// arguments specified for the macro invocation.  Returns null on error.
1768   MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI,
1769                                        SourceLocation &ExpansionEnd);
1770 
1771   /// \brief If an identifier token is read that is to be expanded
1772   /// as a builtin macro, handle it and return the next token as 'Tok'.
1773   void ExpandBuiltinMacro(Token &Tok);
1774 
1775   /// \brief Read a \c _Pragma directive, slice it up, process it, then
1776   /// return the first token after the directive.
1777   /// This assumes that the \c _Pragma token has just been read into \p Tok.
1778   void Handle_Pragma(Token &Tok);
1779 
1780   /// \brief Like Handle_Pragma except the pragma text is not enclosed within
1781   /// a string literal.
1782   void HandleMicrosoft__pragma(Token &Tok);
1783 
1784   /// \brief Add a lexer to the top of the include stack and
1785   /// start lexing tokens from it instead of the current buffer.
1786   void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
1787 
1788   /// \brief Add a lexer to the top of the include stack and
1789   /// start getting tokens from it using the PTH cache.
1790   void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir);
1791 
1792   /// \brief Set the FileID for the preprocessor predefines.
setPredefinesFileID(FileID FID)1793   void setPredefinesFileID(FileID FID) {
1794     assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
1795     PredefinesFileID = FID;
1796   }
1797 
1798   /// \brief Returns true if we are lexing from a file and not a
1799   /// pragma or a macro.
IsFileLexer(const Lexer * L,const PreprocessorLexer * P)1800   static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
1801     return L ? !L->isPragmaLexer() : P != nullptr;
1802   }
1803 
IsFileLexer(const IncludeStackInfo & I)1804   static bool IsFileLexer(const IncludeStackInfo& I) {
1805     return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
1806   }
1807 
IsFileLexer()1808   bool IsFileLexer() const {
1809     return IsFileLexer(CurLexer.get(), CurPPLexer);
1810   }
1811 
1812   //===--------------------------------------------------------------------===//
1813   // Caching stuff.
1814   void CachingLex(Token &Result);
InCachingLexMode()1815   bool InCachingLexMode() const {
1816     // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
1817     // that we are past EOF, not that we are in CachingLex mode.
1818     return !CurPPLexer && !CurTokenLexer && !CurPTHLexer &&
1819            !IncludeMacroStack.empty();
1820   }
1821   void EnterCachingLexMode();
ExitCachingLexMode()1822   void ExitCachingLexMode() {
1823     if (InCachingLexMode())
1824       RemoveTopOfLexerStack();
1825   }
1826   const Token &PeekAhead(unsigned N);
1827   void AnnotatePreviousCachedTokens(const Token &Tok);
1828 
1829   //===--------------------------------------------------------------------===//
1830   /// Handle*Directive - implement the various preprocessor directives.  These
1831   /// should side-effect the current preprocessor object so that the next call
1832   /// to Lex() will return the appropriate token next.
1833   void HandleLineDirective(Token &Tok);
1834   void HandleDigitDirective(Token &Tok);
1835   void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
1836   void HandleIdentSCCSDirective(Token &Tok);
1837   void HandleMacroPublicDirective(Token &Tok);
1838   void HandleMacroPrivateDirective(Token &Tok);
1839 
1840   // File inclusion.
1841   void HandleIncludeDirective(SourceLocation HashLoc,
1842                               Token &Tok,
1843                               const DirectoryLookup *LookupFrom = nullptr,
1844                               const FileEntry *LookupFromFile = nullptr,
1845                               bool isImport = false);
1846   void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
1847   void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
1848   void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
1849   void HandleMicrosoftImportDirective(Token &Tok);
1850 
1851 public:
1852   // Module inclusion testing.
1853   /// \brief Find the module that owns the source or header file that
1854   /// \p Loc points to. If the location is in a file that was included
1855   /// into a module, or is outside any module, returns nullptr.
1856   Module *getModuleForLocation(SourceLocation Loc);
1857 
1858   /// \brief Find the module that contains the specified location, either
1859   /// directly or indirectly.
1860   Module *getModuleContainingLocation(SourceLocation Loc);
1861 
1862 private:
1863   // Macro handling.
1864   void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterTopLevelIfndef);
1865   void HandleUndefDirective(Token &Tok);
1866 
1867   // Conditional Inclusion.
1868   void HandleIfdefDirective(Token &Tok, bool isIfndef,
1869                             bool ReadAnyTokensBeforeDirective);
1870   void HandleIfDirective(Token &Tok, bool ReadAnyTokensBeforeDirective);
1871   void HandleEndifDirective(Token &Tok);
1872   void HandleElseDirective(Token &Tok);
1873   void HandleElifDirective(Token &Tok);
1874 
1875   // Pragmas.
1876   void HandlePragmaDirective(SourceLocation IntroducerLoc,
1877                              PragmaIntroducerKind Introducer);
1878 public:
1879   void HandlePragmaOnce(Token &OnceTok);
1880   void HandlePragmaMark();
1881   void HandlePragmaPoison(Token &PoisonTok);
1882   void HandlePragmaSystemHeader(Token &SysHeaderTok);
1883   void HandlePragmaDependency(Token &DependencyTok);
1884   void HandlePragmaPushMacro(Token &Tok);
1885   void HandlePragmaPopMacro(Token &Tok);
1886   void HandlePragmaIncludeAlias(Token &Tok);
1887   IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
1888 
1889   // Return true and store the first token only if any CommentHandler
1890   // has inserted some tokens and getCommentRetentionState() is false.
1891   bool HandleComment(Token &Token, SourceRange Comment);
1892 
1893   /// \brief A macro is used, update information about macros that need unused
1894   /// warnings.
1895   void markMacroAsUsed(MacroInfo *MI);
1896 };
1897 
1898 /// \brief Abstract base class that describes a handler that will receive
1899 /// source ranges for each of the comments encountered in the source file.
1900 class CommentHandler {
1901 public:
1902   virtual ~CommentHandler();
1903 
1904   // The handler shall return true if it has pushed any tokens
1905   // to be read using e.g. EnterToken or EnterTokenStream.
1906   virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
1907 };
1908 
1909 }  // end namespace clang
1910 
1911 #endif
1912