1 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the Preprocessor interface.
11 //
12 //===----------------------------------------------------------------------===//
13 //
14 // Options to support:
15 // -H - Print the name of each header file used.
16 // -d[DNI] - Dump various things.
17 // -fworking-directory - #line's with preprocessor's working dir.
18 // -fpreprocessed
19 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
20 // -W*
21 // -w
22 //
23 // Messages to emit:
24 // "Multiple include guards may be useful for:\n"
25 //
26 //===----------------------------------------------------------------------===//
27
28 #include "clang/Lex/Preprocessor.h"
29 #include "clang/Basic/FileManager.h"
30 #include "clang/Basic/FileSystemStatCache.h"
31 #include "clang/Basic/SourceManager.h"
32 #include "clang/Basic/TargetInfo.h"
33 #include "clang/Lex/CodeCompletionHandler.h"
34 #include "clang/Lex/ExternalPreprocessorSource.h"
35 #include "clang/Lex/HeaderSearch.h"
36 #include "clang/Lex/LexDiagnostic.h"
37 #include "clang/Lex/LiteralSupport.h"
38 #include "clang/Lex/MacroArgs.h"
39 #include "clang/Lex/MacroInfo.h"
40 #include "clang/Lex/ModuleLoader.h"
41 #include "clang/Lex/PTHManager.h"
42 #include "clang/Lex/Pragma.h"
43 #include "clang/Lex/PreprocessingRecord.h"
44 #include "clang/Lex/PreprocessorOptions.h"
45 #include "clang/Lex/ScratchBuffer.h"
46 #include "llvm/ADT/APFloat.h"
47 #include "llvm/ADT/STLExtras.h"
48 #include "llvm/ADT/SmallString.h"
49 #include "llvm/ADT/StringExtras.h"
50 #include "llvm/Support/Capacity.h"
51 #include "llvm/Support/ConvertUTF.h"
52 #include "llvm/Support/MemoryBuffer.h"
53 #include "llvm/Support/raw_ostream.h"
54 using namespace clang;
55
56 //===----------------------------------------------------------------------===//
~ExternalPreprocessorSource()57 ExternalPreprocessorSource::~ExternalPreprocessorSource() { }
58
Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,DiagnosticsEngine & diags,LangOptions & opts,SourceManager & SM,HeaderSearch & Headers,ModuleLoader & TheModuleLoader,IdentifierInfoLookup * IILookup,bool OwnsHeaders,TranslationUnitKind TUKind)59 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts,
60 DiagnosticsEngine &diags, LangOptions &opts,
61 SourceManager &SM, HeaderSearch &Headers,
62 ModuleLoader &TheModuleLoader,
63 IdentifierInfoLookup *IILookup, bool OwnsHeaders,
64 TranslationUnitKind TUKind)
65 : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr),
66 AuxTarget(nullptr), FileMgr(Headers.getFileMgr()), SourceMgr(SM),
67 ScratchBuf(new ScratchBuffer(SourceMgr)), HeaderInfo(Headers),
68 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
69 Identifiers(opts, IILookup),
70 PragmaHandlers(new PragmaNamespace(StringRef())),
71 IncrementalProcessing(false), TUKind(TUKind), CodeComplete(nullptr),
72 CodeCompletionFile(nullptr), CodeCompletionOffset(0),
73 LastTokenWasAt(false), ModuleImportExpectsIdentifier(false),
74 CodeCompletionReached(0), MainFileDir(nullptr),
75 SkipMainFilePreamble(0, true), CurPPLexer(nullptr), CurDirLookup(nullptr),
76 CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), Callbacks(nullptr),
77 CurSubmoduleState(&NullSubmoduleState), MacroArgCache(nullptr),
78 Record(nullptr), MIChainHead(nullptr), DeserialMIChainHead(nullptr) {
79 OwnsHeaderSearch = OwnsHeaders;
80
81 CounterValue = 0; // __COUNTER__ starts at 0.
82
83 // Clear stats.
84 NumDirectives = NumDefined = NumUndefined = NumPragma = 0;
85 NumIf = NumElse = NumEndif = 0;
86 NumEnteredSourceFiles = 0;
87 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0;
88 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0;
89 MaxIncludeStackDepth = 0;
90 NumSkipped = 0;
91
92 // Default to discarding comments.
93 KeepComments = false;
94 KeepMacroComments = false;
95 SuppressIncludeNotFoundError = false;
96
97 // Macro expansion is enabled.
98 DisableMacroExpansion = false;
99 MacroExpansionInDirectivesOverride = false;
100 InMacroArgs = false;
101 InMacroArgPreExpansion = false;
102 NumCachedTokenLexers = 0;
103 PragmasEnabled = true;
104 ParsingIfOrElifDirective = false;
105 PreprocessedOutput = false;
106
107 CachedLexPos = 0;
108
109 // We haven't read anything from the external source.
110 ReadMacrosFromExternalSource = false;
111
112 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro.
113 // This gets unpoisoned where it is allowed.
114 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
115 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
116
117 // Initialize the pragma handlers.
118 RegisterBuiltinPragmas();
119
120 // Initialize builtin macros like __LINE__ and friends.
121 RegisterBuiltinMacros();
122
123 if(LangOpts.Borland) {
124 Ident__exception_info = getIdentifierInfo("_exception_info");
125 Ident___exception_info = getIdentifierInfo("__exception_info");
126 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation");
127 Ident__exception_code = getIdentifierInfo("_exception_code");
128 Ident___exception_code = getIdentifierInfo("__exception_code");
129 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode");
130 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination");
131 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination");
132 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination");
133 } else {
134 Ident__exception_info = Ident__exception_code = nullptr;
135 Ident__abnormal_termination = Ident___exception_info = nullptr;
136 Ident___exception_code = Ident___abnormal_termination = nullptr;
137 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
138 Ident_AbnormalTermination = nullptr;
139 }
140 }
141
~Preprocessor()142 Preprocessor::~Preprocessor() {
143 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
144
145 IncludeMacroStack.clear();
146
147 // Destroy any macro definitions.
148 while (MacroInfoChain *I = MIChainHead) {
149 MIChainHead = I->Next;
150 I->~MacroInfoChain();
151 }
152
153 // Free any cached macro expanders.
154 // This populates MacroArgCache, so all TokenLexers need to be destroyed
155 // before the code below that frees up the MacroArgCache list.
156 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr);
157 CurTokenLexer.reset();
158
159 while (DeserializedMacroInfoChain *I = DeserialMIChainHead) {
160 DeserialMIChainHead = I->Next;
161 I->~DeserializedMacroInfoChain();
162 }
163
164 // Free any cached MacroArgs.
165 for (MacroArgs *ArgList = MacroArgCache; ArgList;)
166 ArgList = ArgList->deallocate();
167
168 // Delete the header search info, if we own it.
169 if (OwnsHeaderSearch)
170 delete &HeaderInfo;
171 }
172
Initialize(const TargetInfo & Target,const TargetInfo * AuxTarget)173 void Preprocessor::Initialize(const TargetInfo &Target,
174 const TargetInfo *AuxTarget) {
175 assert((!this->Target || this->Target == &Target) &&
176 "Invalid override of target information");
177 this->Target = &Target;
178
179 assert((!this->AuxTarget || this->AuxTarget == AuxTarget) &&
180 "Invalid override of aux target information.");
181 this->AuxTarget = AuxTarget;
182
183 // Initialize information about built-ins.
184 BuiltinInfo.InitializeTarget(Target, AuxTarget);
185 HeaderInfo.setTarget(Target);
186 }
187
InitializeForModelFile()188 void Preprocessor::InitializeForModelFile() {
189 NumEnteredSourceFiles = 0;
190
191 // Reset pragmas
192 PragmaHandlersBackup = std::move(PragmaHandlers);
193 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef());
194 RegisterBuiltinPragmas();
195
196 // Reset PredefinesFileID
197 PredefinesFileID = FileID();
198 }
199
FinalizeForModelFile()200 void Preprocessor::FinalizeForModelFile() {
201 NumEnteredSourceFiles = 1;
202
203 PragmaHandlers = std::move(PragmaHandlersBackup);
204 }
205
setPTHManager(PTHManager * pm)206 void Preprocessor::setPTHManager(PTHManager* pm) {
207 PTH.reset(pm);
208 FileMgr.addStatCache(PTH->createStatCache());
209 }
210
DumpToken(const Token & Tok,bool DumpFlags) const211 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
212 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '"
213 << getSpelling(Tok) << "'";
214
215 if (!DumpFlags) return;
216
217 llvm::errs() << "\t";
218 if (Tok.isAtStartOfLine())
219 llvm::errs() << " [StartOfLine]";
220 if (Tok.hasLeadingSpace())
221 llvm::errs() << " [LeadingSpace]";
222 if (Tok.isExpandDisabled())
223 llvm::errs() << " [ExpandDisabled]";
224 if (Tok.needsCleaning()) {
225 const char *Start = SourceMgr.getCharacterData(Tok.getLocation());
226 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
227 << "']";
228 }
229
230 llvm::errs() << "\tLoc=<";
231 DumpLocation(Tok.getLocation());
232 llvm::errs() << ">";
233 }
234
DumpLocation(SourceLocation Loc) const235 void Preprocessor::DumpLocation(SourceLocation Loc) const {
236 Loc.dump(SourceMgr);
237 }
238
DumpMacro(const MacroInfo & MI) const239 void Preprocessor::DumpMacro(const MacroInfo &MI) const {
240 llvm::errs() << "MACRO: ";
241 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) {
242 DumpToken(MI.getReplacementToken(i));
243 llvm::errs() << " ";
244 }
245 llvm::errs() << "\n";
246 }
247
PrintStats()248 void Preprocessor::PrintStats() {
249 llvm::errs() << "\n*** Preprocessor Stats:\n";
250 llvm::errs() << NumDirectives << " directives found:\n";
251 llvm::errs() << " " << NumDefined << " #define.\n";
252 llvm::errs() << " " << NumUndefined << " #undef.\n";
253 llvm::errs() << " #include/#include_next/#import:\n";
254 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
255 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
256 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
257 llvm::errs() << " " << NumElse << " #else/#elif.\n";
258 llvm::errs() << " " << NumEndif << " #endif.\n";
259 llvm::errs() << " " << NumPragma << " #pragma.\n";
260 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
261
262 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
263 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
264 << NumFastMacroExpanded << " on the fast path.\n";
265 llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
266 << " token paste (##) operations performed, "
267 << NumFastTokenPaste << " on the fast path.\n";
268
269 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
270
271 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
272 llvm::errs() << "\n Macro Expanded Tokens: "
273 << llvm::capacity_in_bytes(MacroExpandedTokens);
274 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
275 // FIXME: List information for all submodules.
276 llvm::errs() << "\n Macros: "
277 << llvm::capacity_in_bytes(CurSubmoduleState->Macros);
278 llvm::errs() << "\n #pragma push_macro Info: "
279 << llvm::capacity_in_bytes(PragmaPushMacroInfo);
280 llvm::errs() << "\n Poison Reasons: "
281 << llvm::capacity_in_bytes(PoisonReasons);
282 llvm::errs() << "\n Comment Handlers: "
283 << llvm::capacity_in_bytes(CommentHandlers) << "\n";
284 }
285
286 Preprocessor::macro_iterator
macro_begin(bool IncludeExternalMacros) const287 Preprocessor::macro_begin(bool IncludeExternalMacros) const {
288 if (IncludeExternalMacros && ExternalSource &&
289 !ReadMacrosFromExternalSource) {
290 ReadMacrosFromExternalSource = true;
291 ExternalSource->ReadDefinedMacros();
292 }
293
294 // Make sure we cover all macros in visible modules.
295 for (const ModuleMacro &Macro : ModuleMacros)
296 CurSubmoduleState->Macros.insert(std::make_pair(Macro.II, MacroState()));
297
298 return CurSubmoduleState->Macros.begin();
299 }
300
getTotalMemory() const301 size_t Preprocessor::getTotalMemory() const {
302 return BP.getTotalMemory()
303 + llvm::capacity_in_bytes(MacroExpandedTokens)
304 + Predefines.capacity() /* Predefines buffer. */
305 // FIXME: Include sizes from all submodules, and include MacroInfo sizes,
306 // and ModuleMacros.
307 + llvm::capacity_in_bytes(CurSubmoduleState->Macros)
308 + llvm::capacity_in_bytes(PragmaPushMacroInfo)
309 + llvm::capacity_in_bytes(PoisonReasons)
310 + llvm::capacity_in_bytes(CommentHandlers);
311 }
312
313 Preprocessor::macro_iterator
macro_end(bool IncludeExternalMacros) const314 Preprocessor::macro_end(bool IncludeExternalMacros) const {
315 if (IncludeExternalMacros && ExternalSource &&
316 !ReadMacrosFromExternalSource) {
317 ReadMacrosFromExternalSource = true;
318 ExternalSource->ReadDefinedMacros();
319 }
320
321 return CurSubmoduleState->Macros.end();
322 }
323
324 /// \brief Compares macro tokens with a specified token value sequence.
MacroDefinitionEquals(const MacroInfo * MI,ArrayRef<TokenValue> Tokens)325 static bool MacroDefinitionEquals(const MacroInfo *MI,
326 ArrayRef<TokenValue> Tokens) {
327 return Tokens.size() == MI->getNumTokens() &&
328 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin());
329 }
330
getLastMacroWithSpelling(SourceLocation Loc,ArrayRef<TokenValue> Tokens) const331 StringRef Preprocessor::getLastMacroWithSpelling(
332 SourceLocation Loc,
333 ArrayRef<TokenValue> Tokens) const {
334 SourceLocation BestLocation;
335 StringRef BestSpelling;
336 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
337 I != E; ++I) {
338 const MacroDirective::DefInfo
339 Def = I->second.findDirectiveAtLoc(Loc, SourceMgr);
340 if (!Def || !Def.getMacroInfo())
341 continue;
342 if (!Def.getMacroInfo()->isObjectLike())
343 continue;
344 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens))
345 continue;
346 SourceLocation Location = Def.getLocation();
347 // Choose the macro defined latest.
348 if (BestLocation.isInvalid() ||
349 (Location.isValid() &&
350 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) {
351 BestLocation = Location;
352 BestSpelling = I->first->getName();
353 }
354 }
355 return BestSpelling;
356 }
357
recomputeCurLexerKind()358 void Preprocessor::recomputeCurLexerKind() {
359 if (CurLexer)
360 CurLexerKind = CLK_Lexer;
361 else if (CurPTHLexer)
362 CurLexerKind = CLK_PTHLexer;
363 else if (CurTokenLexer)
364 CurLexerKind = CLK_TokenLexer;
365 else
366 CurLexerKind = CLK_CachingLexer;
367 }
368
SetCodeCompletionPoint(const FileEntry * File,unsigned CompleteLine,unsigned CompleteColumn)369 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File,
370 unsigned CompleteLine,
371 unsigned CompleteColumn) {
372 assert(File);
373 assert(CompleteLine && CompleteColumn && "Starts from 1:1");
374 assert(!CodeCompletionFile && "Already set");
375
376 using llvm::MemoryBuffer;
377
378 // Load the actual file's contents.
379 bool Invalid = false;
380 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid);
381 if (Invalid)
382 return true;
383
384 // Find the byte position of the truncation point.
385 const char *Position = Buffer->getBufferStart();
386 for (unsigned Line = 1; Line < CompleteLine; ++Line) {
387 for (; *Position; ++Position) {
388 if (*Position != '\r' && *Position != '\n')
389 continue;
390
391 // Eat \r\n or \n\r as a single line.
392 if ((Position[1] == '\r' || Position[1] == '\n') &&
393 Position[0] != Position[1])
394 ++Position;
395 ++Position;
396 break;
397 }
398 }
399
400 Position += CompleteColumn - 1;
401
402 // If pointing inside the preamble, adjust the position at the beginning of
403 // the file after the preamble.
404 if (SkipMainFilePreamble.first &&
405 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) {
406 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first)
407 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first;
408 }
409
410 if (Position > Buffer->getBufferEnd())
411 Position = Buffer->getBufferEnd();
412
413 CodeCompletionFile = File;
414 CodeCompletionOffset = Position - Buffer->getBufferStart();
415
416 std::unique_ptr<MemoryBuffer> NewBuffer =
417 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1,
418 Buffer->getBufferIdentifier());
419 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart());
420 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf);
421 *NewPos = '\0';
422 std::copy(Position, Buffer->getBufferEnd(), NewPos+1);
423 SourceMgr.overrideFileContents(File, std::move(NewBuffer));
424
425 return false;
426 }
427
CodeCompleteNaturalLanguage()428 void Preprocessor::CodeCompleteNaturalLanguage() {
429 if (CodeComplete)
430 CodeComplete->CodeCompleteNaturalLanguage();
431 setCodeCompletionReached();
432 }
433
434 /// getSpelling - This method is used to get the spelling of a token into a
435 /// SmallVector. Note that the returned StringRef may not point to the
436 /// supplied buffer if a copy can be avoided.
getSpelling(const Token & Tok,SmallVectorImpl<char> & Buffer,bool * Invalid) const437 StringRef Preprocessor::getSpelling(const Token &Tok,
438 SmallVectorImpl<char> &Buffer,
439 bool *Invalid) const {
440 // NOTE: this has to be checked *before* testing for an IdentifierInfo.
441 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) {
442 // Try the fast path.
443 if (const IdentifierInfo *II = Tok.getIdentifierInfo())
444 return II->getName();
445 }
446
447 // Resize the buffer if we need to copy into it.
448 if (Tok.needsCleaning())
449 Buffer.resize(Tok.getLength());
450
451 const char *Ptr = Buffer.data();
452 unsigned Len = getSpelling(Tok, Ptr, Invalid);
453 return StringRef(Ptr, Len);
454 }
455
456 /// CreateString - Plop the specified string into a scratch buffer and return a
457 /// location for it. If specified, the source location provides a source
458 /// location for the token.
CreateString(StringRef Str,Token & Tok,SourceLocation ExpansionLocStart,SourceLocation ExpansionLocEnd)459 void Preprocessor::CreateString(StringRef Str, Token &Tok,
460 SourceLocation ExpansionLocStart,
461 SourceLocation ExpansionLocEnd) {
462 Tok.setLength(Str.size());
463
464 const char *DestPtr;
465 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr);
466
467 if (ExpansionLocStart.isValid())
468 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart,
469 ExpansionLocEnd, Str.size());
470 Tok.setLocation(Loc);
471
472 // If this is a raw identifier or a literal token, set the pointer data.
473 if (Tok.is(tok::raw_identifier))
474 Tok.setRawIdentifierData(DestPtr);
475 else if (Tok.isLiteral())
476 Tok.setLiteralData(DestPtr);
477 }
478
getCurrentModule()479 Module *Preprocessor::getCurrentModule() {
480 if (getLangOpts().CurrentModule.empty())
481 return nullptr;
482
483 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule);
484 }
485
486 //===----------------------------------------------------------------------===//
487 // Preprocessor Initialization Methods
488 //===----------------------------------------------------------------------===//
489
490
491 /// EnterMainSourceFile - Enter the specified FileID as the main source file,
492 /// which implicitly adds the builtin defines etc.
EnterMainSourceFile()493 void Preprocessor::EnterMainSourceFile() {
494 // We do not allow the preprocessor to reenter the main file. Doing so will
495 // cause FileID's to accumulate information from both runs (e.g. #line
496 // information) and predefined macros aren't guaranteed to be set properly.
497 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!");
498 FileID MainFileID = SourceMgr.getMainFileID();
499
500 // If MainFileID is loaded it means we loaded an AST file, no need to enter
501 // a main file.
502 if (!SourceMgr.isLoadedFileID(MainFileID)) {
503 // Enter the main file source buffer.
504 EnterSourceFile(MainFileID, nullptr, SourceLocation());
505
506 // If we've been asked to skip bytes in the main file (e.g., as part of a
507 // precompiled preamble), do so now.
508 if (SkipMainFilePreamble.first > 0)
509 CurLexer->SkipBytes(SkipMainFilePreamble.first,
510 SkipMainFilePreamble.second);
511
512 // Tell the header info that the main file was entered. If the file is later
513 // #imported, it won't be re-entered.
514 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
515 HeaderInfo.IncrementIncludeCount(FE);
516 }
517
518 // Preprocess Predefines to populate the initial preprocessor state.
519 std::unique_ptr<llvm::MemoryBuffer> SB =
520 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>");
521 assert(SB && "Cannot create predefined source buffer");
522 FileID FID = SourceMgr.createFileID(std::move(SB));
523 assert(FID.isValid() && "Could not create FileID for predefines?");
524 setPredefinesFileID(FID);
525
526 // Start parsing the predefines.
527 EnterSourceFile(FID, nullptr, SourceLocation());
528 }
529
EndSourceFile()530 void Preprocessor::EndSourceFile() {
531 // Notify the client that we reached the end of the source file.
532 if (Callbacks)
533 Callbacks->EndOfMainFile();
534 }
535
536 //===----------------------------------------------------------------------===//
537 // Lexer Event Handling.
538 //===----------------------------------------------------------------------===//
539
540 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
541 /// identifier information for the token and install it into the token,
542 /// updating the token kind accordingly.
LookUpIdentifierInfo(Token & Identifier) const543 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
544 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
545
546 // Look up this token, see if it is a macro, or if it is a language keyword.
547 IdentifierInfo *II;
548 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
549 // No cleaning needed, just use the characters from the lexed buffer.
550 II = getIdentifierInfo(Identifier.getRawIdentifier());
551 } else {
552 // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
553 SmallString<64> IdentifierBuffer;
554 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
555
556 if (Identifier.hasUCN()) {
557 SmallString<64> UCNIdentifierBuffer;
558 expandUCNs(UCNIdentifierBuffer, CleanedStr);
559 II = getIdentifierInfo(UCNIdentifierBuffer);
560 } else {
561 II = getIdentifierInfo(CleanedStr);
562 }
563 }
564
565 // Update the token info (identifier info and appropriate token kind).
566 Identifier.setIdentifierInfo(II);
567 Identifier.setKind(II->getTokenID());
568
569 return II;
570 }
571
SetPoisonReason(IdentifierInfo * II,unsigned DiagID)572 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) {
573 PoisonReasons[II] = DiagID;
574 }
575
PoisonSEHIdentifiers(bool Poison)576 void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
577 assert(Ident__exception_code && Ident__exception_info);
578 assert(Ident___exception_code && Ident___exception_info);
579 Ident__exception_code->setIsPoisoned(Poison);
580 Ident___exception_code->setIsPoisoned(Poison);
581 Ident_GetExceptionCode->setIsPoisoned(Poison);
582 Ident__exception_info->setIsPoisoned(Poison);
583 Ident___exception_info->setIsPoisoned(Poison);
584 Ident_GetExceptionInfo->setIsPoisoned(Poison);
585 Ident__abnormal_termination->setIsPoisoned(Poison);
586 Ident___abnormal_termination->setIsPoisoned(Poison);
587 Ident_AbnormalTermination->setIsPoisoned(Poison);
588 }
589
HandlePoisonedIdentifier(Token & Identifier)590 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
591 assert(Identifier.getIdentifierInfo() &&
592 "Can't handle identifiers without identifier info!");
593 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it =
594 PoisonReasons.find(Identifier.getIdentifierInfo());
595 if(it == PoisonReasons.end())
596 Diag(Identifier, diag::err_pp_used_poisoned_id);
597 else
598 Diag(Identifier,it->second) << Identifier.getIdentifierInfo();
599 }
600
601 /// \brief Returns a diagnostic message kind for reporting a future keyword as
602 /// appropriate for the identifier and specified language.
getFutureCompatDiagKind(const IdentifierInfo & II,const LangOptions & LangOpts)603 static diag::kind getFutureCompatDiagKind(const IdentifierInfo &II,
604 const LangOptions &LangOpts) {
605 assert(II.isFutureCompatKeyword() && "diagnostic should not be needed");
606
607 if (LangOpts.CPlusPlus)
608 return llvm::StringSwitch<diag::kind>(II.getName())
609 #define CXX11_KEYWORD(NAME, FLAGS) \
610 .Case(#NAME, diag::warn_cxx11_keyword)
611 #include "clang/Basic/TokenKinds.def"
612 ;
613
614 llvm_unreachable(
615 "Keyword not known to come from a newer Standard or proposed Standard");
616 }
617
618 /// HandleIdentifier - This callback is invoked when the lexer reads an
619 /// identifier. This callback looks up the identifier in the map and/or
620 /// potentially macro expands it or turns it into a named token (like 'for').
621 ///
622 /// Note that callers of this method are guarded by checking the
623 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
624 /// IdentifierInfo methods that compute these properties will need to change to
625 /// match.
HandleIdentifier(Token & Identifier)626 bool Preprocessor::HandleIdentifier(Token &Identifier) {
627 assert(Identifier.getIdentifierInfo() &&
628 "Can't handle identifiers without identifier info!");
629
630 IdentifierInfo &II = *Identifier.getIdentifierInfo();
631
632 // If the information about this identifier is out of date, update it from
633 // the external source.
634 // We have to treat __VA_ARGS__ in a special way, since it gets
635 // serialized with isPoisoned = true, but our preprocessor may have
636 // unpoisoned it if we're defining a C99 macro.
637 if (II.isOutOfDate()) {
638 bool CurrentIsPoisoned = false;
639 if (&II == Ident__VA_ARGS__)
640 CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned();
641
642 ExternalSource->updateOutOfDateIdentifier(II);
643 Identifier.setKind(II.getTokenID());
644
645 if (&II == Ident__VA_ARGS__)
646 II.setIsPoisoned(CurrentIsPoisoned);
647 }
648
649 // If this identifier was poisoned, and if it was not produced from a macro
650 // expansion, emit an error.
651 if (II.isPoisoned() && CurPPLexer) {
652 HandlePoisonedIdentifier(Identifier);
653 }
654
655 // If this is a macro to be expanded, do it.
656 if (MacroDefinition MD = getMacroDefinition(&II)) {
657 auto *MI = MD.getMacroInfo();
658 assert(MI && "macro definition with no macro info?");
659 if (!DisableMacroExpansion) {
660 if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
661 // C99 6.10.3p10: If the preprocessing token immediately after the
662 // macro name isn't a '(', this macro should not be expanded.
663 if (!MI->isFunctionLike() || isNextPPTokenLParen())
664 return HandleMacroExpandedIdentifier(Identifier, MD);
665 } else {
666 // C99 6.10.3.4p2 says that a disabled macro may never again be
667 // expanded, even if it's in a context where it could be expanded in the
668 // future.
669 Identifier.setFlag(Token::DisableExpand);
670 if (MI->isObjectLike() || isNextPPTokenLParen())
671 Diag(Identifier, diag::pp_disabled_macro_expansion);
672 }
673 }
674 }
675
676 // If this identifier is a keyword in a newer Standard or proposed Standard,
677 // produce a warning. Don't warn if we're not considering macro expansion,
678 // since this identifier might be the name of a macro.
679 // FIXME: This warning is disabled in cases where it shouldn't be, like
680 // "#define constexpr constexpr", "int constexpr;"
681 if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
682 Diag(Identifier, getFutureCompatDiagKind(II, getLangOpts()))
683 << II.getName();
684 // Don't diagnose this keyword again in this translation unit.
685 II.setIsFutureCompatKeyword(false);
686 }
687
688 // C++ 2.11p2: If this is an alternative representation of a C++ operator,
689 // then we act as if it is the actual operator and not the textual
690 // representation of it.
691 if (II.isCPlusPlusOperatorKeyword())
692 Identifier.setIdentifierInfo(nullptr);
693
694 // If this is an extension token, diagnose its use.
695 // We avoid diagnosing tokens that originate from macro definitions.
696 // FIXME: This warning is disabled in cases where it shouldn't be,
697 // like "#define TY typeof", "TY(1) x".
698 if (II.isExtensionToken() && !DisableMacroExpansion)
699 Diag(Identifier, diag::ext_token_used);
700
701 // If this is the 'import' contextual keyword following an '@', note
702 // that the next token indicates a module name.
703 //
704 // Note that we do not treat 'import' as a contextual
705 // keyword when we're in a caching lexer, because caching lexers only get
706 // used in contexts where import declarations are disallowed.
707 if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs &&
708 !DisableMacroExpansion &&
709 (getLangOpts().Modules || getLangOpts().DebuggerSupport) &&
710 CurLexerKind != CLK_CachingLexer) {
711 ModuleImportLoc = Identifier.getLocation();
712 ModuleImportPath.clear();
713 ModuleImportExpectsIdentifier = true;
714 CurLexerKind = CLK_LexAfterModuleImport;
715 }
716 return true;
717 }
718
Lex(Token & Result)719 void Preprocessor::Lex(Token &Result) {
720 // We loop here until a lex function returns a token; this avoids recursion.
721 bool ReturnedToken;
722 do {
723 switch (CurLexerKind) {
724 case CLK_Lexer:
725 ReturnedToken = CurLexer->Lex(Result);
726 break;
727 case CLK_PTHLexer:
728 ReturnedToken = CurPTHLexer->Lex(Result);
729 break;
730 case CLK_TokenLexer:
731 ReturnedToken = CurTokenLexer->Lex(Result);
732 break;
733 case CLK_CachingLexer:
734 CachingLex(Result);
735 ReturnedToken = true;
736 break;
737 case CLK_LexAfterModuleImport:
738 LexAfterModuleImport(Result);
739 ReturnedToken = true;
740 break;
741 }
742 } while (!ReturnedToken);
743
744 LastTokenWasAt = Result.is(tok::at);
745 }
746
747
748 /// \brief Lex a token following the 'import' contextual keyword.
749 ///
LexAfterModuleImport(Token & Result)750 void Preprocessor::LexAfterModuleImport(Token &Result) {
751 // Figure out what kind of lexer we actually have.
752 recomputeCurLexerKind();
753
754 // Lex the next token.
755 Lex(Result);
756
757 // The token sequence
758 //
759 // import identifier (. identifier)*
760 //
761 // indicates a module import directive. We already saw the 'import'
762 // contextual keyword, so now we're looking for the identifiers.
763 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
764 // We expected to see an identifier here, and we did; continue handling
765 // identifiers.
766 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(),
767 Result.getLocation()));
768 ModuleImportExpectsIdentifier = false;
769 CurLexerKind = CLK_LexAfterModuleImport;
770 return;
771 }
772
773 // If we're expecting a '.' or a ';', and we got a '.', then wait until we
774 // see the next identifier.
775 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
776 ModuleImportExpectsIdentifier = true;
777 CurLexerKind = CLK_LexAfterModuleImport;
778 return;
779 }
780
781 // If we have a non-empty module path, load the named module.
782 if (!ModuleImportPath.empty()) {
783 Module *Imported = nullptr;
784 if (getLangOpts().Modules) {
785 Imported = TheModuleLoader.loadModule(ModuleImportLoc,
786 ModuleImportPath,
787 Module::Hidden,
788 /*IsIncludeDirective=*/false);
789 if (Imported)
790 makeModuleVisible(Imported, ModuleImportLoc);
791 }
792 if (Callbacks && (getLangOpts().Modules || getLangOpts().DebuggerSupport))
793 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported);
794 }
795 }
796
makeModuleVisible(Module * M,SourceLocation Loc)797 void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
798 CurSubmoduleState->VisibleModules.setVisible(
799 M, Loc, [](Module *) {},
800 [&](ArrayRef<Module *> Path, Module *Conflict, StringRef Message) {
801 // FIXME: Include the path in the diagnostic.
802 // FIXME: Include the import location for the conflicting module.
803 Diag(ModuleImportLoc, diag::warn_module_conflict)
804 << Path[0]->getFullModuleName()
805 << Conflict->getFullModuleName()
806 << Message;
807 });
808
809 // Add this module to the imports list of the currently-built submodule.
810 if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
811 BuildingSubmoduleStack.back().M->Imports.insert(M);
812 }
813
FinishLexStringLiteral(Token & Result,std::string & String,const char * DiagnosticTag,bool AllowMacroExpansion)814 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
815 const char *DiagnosticTag,
816 bool AllowMacroExpansion) {
817 // We need at least one string literal.
818 if (Result.isNot(tok::string_literal)) {
819 Diag(Result, diag::err_expected_string_literal)
820 << /*Source='in...'*/0 << DiagnosticTag;
821 return false;
822 }
823
824 // Lex string literal tokens, optionally with macro expansion.
825 SmallVector<Token, 4> StrToks;
826 do {
827 StrToks.push_back(Result);
828
829 if (Result.hasUDSuffix())
830 Diag(Result, diag::err_invalid_string_udl);
831
832 if (AllowMacroExpansion)
833 Lex(Result);
834 else
835 LexUnexpandedToken(Result);
836 } while (Result.is(tok::string_literal));
837
838 // Concatenate and parse the strings.
839 StringLiteralParser Literal(StrToks, *this);
840 assert(Literal.isAscii() && "Didn't allow wide strings in");
841
842 if (Literal.hadError)
843 return false;
844
845 if (Literal.Pascal) {
846 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal)
847 << /*Source='in...'*/0 << DiagnosticTag;
848 return false;
849 }
850
851 String = Literal.GetString();
852 return true;
853 }
854
parseSimpleIntegerLiteral(Token & Tok,uint64_t & Value)855 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
856 assert(Tok.is(tok::numeric_constant));
857 SmallString<8> IntegerBuffer;
858 bool NumberInvalid = false;
859 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid);
860 if (NumberInvalid)
861 return false;
862 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this);
863 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix())
864 return false;
865 llvm::APInt APVal(64, 0);
866 if (Literal.GetIntegerValue(APVal))
867 return false;
868 Lex(Tok);
869 Value = APVal.getLimitedValue();
870 return true;
871 }
872
addCommentHandler(CommentHandler * Handler)873 void Preprocessor::addCommentHandler(CommentHandler *Handler) {
874 assert(Handler && "NULL comment handler");
875 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) ==
876 CommentHandlers.end() && "Comment handler already registered");
877 CommentHandlers.push_back(Handler);
878 }
879
removeCommentHandler(CommentHandler * Handler)880 void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
881 std::vector<CommentHandler *>::iterator Pos
882 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler);
883 assert(Pos != CommentHandlers.end() && "Comment handler not registered");
884 CommentHandlers.erase(Pos);
885 }
886
HandleComment(Token & result,SourceRange Comment)887 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
888 bool AnyPendingTokens = false;
889 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
890 HEnd = CommentHandlers.end();
891 H != HEnd; ++H) {
892 if ((*H)->HandleComment(*this, Comment))
893 AnyPendingTokens = true;
894 }
895 if (!AnyPendingTokens || getCommentRetentionState())
896 return false;
897 Lex(result);
898 return true;
899 }
900
~ModuleLoader()901 ModuleLoader::~ModuleLoader() { }
902
~CommentHandler()903 CommentHandler::~CommentHandler() { }
904
~CodeCompletionHandler()905 CodeCompletionHandler::~CodeCompletionHandler() { }
906
createPreprocessingRecord()907 void Preprocessor::createPreprocessingRecord() {
908 if (Record)
909 return;
910
911 Record = new PreprocessingRecord(getSourceManager());
912 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record));
913 }
914