1 //===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the PTHLexer interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Lex/PTHLexer.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/FileSystemStatCache.h"
17 #include "clang/Basic/IdentifierTable.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "clang/Lex/LexDiagnostic.h"
20 #include "clang/Lex/PTHManager.h"
21 #include "clang/Lex/Preprocessor.h"
22 #include "clang/Lex/Token.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringMap.h"
25 #include "llvm/Support/EndianStream.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include <memory>
28 #include <system_error>
29 using namespace clang;
30 
31 static const unsigned StoredTokenSize = 1 + 1 + 2 + 4 + 4;
32 
33 //===----------------------------------------------------------------------===//
34 // PTHLexer methods.
35 //===----------------------------------------------------------------------===//
36 
PTHLexer(Preprocessor & PP,FileID FID,const unsigned char * D,const unsigned char * ppcond,PTHManager & PM)37 PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
38                    const unsigned char *ppcond, PTHManager &PM)
39   : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(nullptr),
40     PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
41 
42   FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
43 }
44 
Lex(Token & Tok)45 bool PTHLexer::Lex(Token& Tok) {
46   //===--------------------------------------==//
47   // Read the raw token data.
48   //===--------------------------------------==//
49   using namespace llvm::support;
50 
51   // Shadow CurPtr into an automatic variable.
52   const unsigned char *CurPtrShadow = CurPtr;
53 
54   // Read in the data for the token.
55   unsigned Word0 = endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
56   uint32_t IdentifierID =
57       endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
58   uint32_t FileOffset =
59       endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
60 
61   tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
62   Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
63   uint32_t Len = Word0 >> 16;
64 
65   CurPtr = CurPtrShadow;
66 
67   //===--------------------------------------==//
68   // Construct the token itself.
69   //===--------------------------------------==//
70 
71   Tok.startToken();
72   Tok.setKind(TKind);
73   Tok.setFlag(TFlags);
74   assert(!LexingRawMode);
75   Tok.setLocation(FileStartLoc.getLocWithOffset(FileOffset));
76   Tok.setLength(Len);
77 
78   // Handle identifiers.
79   if (Tok.isLiteral()) {
80     Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
81   }
82   else if (IdentifierID) {
83     MIOpt.ReadToken();
84     IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
85 
86     Tok.setIdentifierInfo(II);
87 
88     // Change the kind of this identifier to the appropriate token kind, e.g.
89     // turning "for" into a keyword.
90     Tok.setKind(II->getTokenID());
91 
92     if (II->isHandleIdentifierCase())
93       return PP->HandleIdentifier(Tok);
94 
95     return true;
96   }
97 
98   //===--------------------------------------==//
99   // Process the token.
100   //===--------------------------------------==//
101   if (TKind == tok::eof) {
102     // Save the end-of-file token.
103     EofToken = Tok;
104 
105     assert(!ParsingPreprocessorDirective);
106     assert(!LexingRawMode);
107 
108     return LexEndOfFile(Tok);
109   }
110 
111   if (TKind == tok::hash && Tok.isAtStartOfLine()) {
112     LastHashTokPtr = CurPtr - StoredTokenSize;
113     assert(!LexingRawMode);
114     PP->HandleDirective(Tok);
115 
116     return false;
117   }
118 
119   if (TKind == tok::eod) {
120     assert(ParsingPreprocessorDirective);
121     ParsingPreprocessorDirective = false;
122     return true;
123   }
124 
125   MIOpt.ReadToken();
126   return true;
127 }
128 
LexEndOfFile(Token & Result)129 bool PTHLexer::LexEndOfFile(Token &Result) {
130   // If we hit the end of the file while parsing a preprocessor directive,
131   // end the preprocessor directive first.  The next token returned will
132   // then be the end of file.
133   if (ParsingPreprocessorDirective) {
134     ParsingPreprocessorDirective = false; // Done parsing the "line".
135     return true;  // Have a token.
136   }
137 
138   assert(!LexingRawMode);
139 
140   // If we are in a #if directive, emit an error.
141   while (!ConditionalStack.empty()) {
142     if (PP->getCodeCompletionFileLoc() != FileStartLoc)
143       PP->Diag(ConditionalStack.back().IfLoc,
144                diag::err_pp_unterminated_conditional);
145     ConditionalStack.pop_back();
146   }
147 
148   // Finally, let the preprocessor handle this.
149   return PP->HandleEndOfFile(Result);
150 }
151 
152 // FIXME: We can just grab the last token instead of storing a copy
153 // into EofToken.
getEOF(Token & Tok)154 void PTHLexer::getEOF(Token& Tok) {
155   assert(EofToken.is(tok::eof));
156   Tok = EofToken;
157 }
158 
DiscardToEndOfLine()159 void PTHLexer::DiscardToEndOfLine() {
160   assert(ParsingPreprocessorDirective && ParsingFilename == false &&
161          "Must be in a preprocessing directive!");
162 
163   // We assume that if the preprocessor wishes to discard to the end of
164   // the line that it also means to end the current preprocessor directive.
165   ParsingPreprocessorDirective = false;
166 
167   // Skip tokens by only peeking at their token kind and the flags.
168   // We don't need to actually reconstruct full tokens from the token buffer.
169   // This saves some copies and it also reduces IdentifierInfo* lookup.
170   const unsigned char* p = CurPtr;
171   while (1) {
172     // Read the token kind.  Are we at the end of the file?
173     tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
174     if (x == tok::eof) break;
175 
176     // Read the token flags.  Are we at the start of the next line?
177     Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
178     if (y & Token::StartOfLine) break;
179 
180     // Skip to the next token.
181     p += StoredTokenSize;
182   }
183 
184   CurPtr = p;
185 }
186 
187 /// SkipBlock - Used by Preprocessor to skip the current conditional block.
SkipBlock()188 bool PTHLexer::SkipBlock() {
189   using namespace llvm::support;
190   assert(CurPPCondPtr && "No cached PP conditional information.");
191   assert(LastHashTokPtr && "No known '#' token.");
192 
193   const unsigned char *HashEntryI = nullptr;
194   uint32_t TableIdx;
195 
196   do {
197     // Read the token offset from the side-table.
198     uint32_t Offset = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
199 
200     // Read the target table index from the side-table.
201     TableIdx = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
202 
203     // Compute the actual memory address of the '#' token data for this entry.
204     HashEntryI = TokBuf + Offset;
205 
206     // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
207     //  contain nested blocks.  In the side-table we can jump over these
208     //  nested blocks instead of doing a linear search if the next "sibling"
209     //  entry is not at a location greater than LastHashTokPtr.
210     if (HashEntryI < LastHashTokPtr && TableIdx) {
211       // In the side-table we are still at an entry for a '#' token that
212       // is earlier than the last one we saw.  Check if the location we would
213       // stride gets us closer.
214       const unsigned char* NextPPCondPtr =
215         PPCond + TableIdx*(sizeof(uint32_t)*2);
216       assert(NextPPCondPtr >= CurPPCondPtr);
217       // Read where we should jump to.
218       const unsigned char *HashEntryJ =
219           TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
220 
221       if (HashEntryJ <= LastHashTokPtr) {
222         // Jump directly to the next entry in the side table.
223         HashEntryI = HashEntryJ;
224         TableIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
225         CurPPCondPtr = NextPPCondPtr;
226       }
227     }
228   }
229   while (HashEntryI < LastHashTokPtr);
230   assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
231   assert(TableIdx && "No jumping from #endifs.");
232 
233   // Update our side-table iterator.
234   const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
235   assert(NextPPCondPtr >= CurPPCondPtr);
236   CurPPCondPtr = NextPPCondPtr;
237 
238   // Read where we should jump to.
239   HashEntryI =
240       TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
241   uint32_t NextIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
242 
243   // By construction NextIdx will be zero if this is a #endif.  This is useful
244   // to know to obviate lexing another token.
245   bool isEndif = NextIdx == 0;
246 
247   // This case can occur when we see something like this:
248   //
249   //  #if ...
250   //   /* a comment or nothing */
251   //  #elif
252   //
253   // If we are skipping the first #if block it will be the case that CurPtr
254   // already points 'elif'.  Just return.
255 
256   if (CurPtr > HashEntryI) {
257     assert(CurPtr == HashEntryI + StoredTokenSize);
258     // Did we reach a #endif?  If so, go ahead and consume that token as well.
259     if (isEndif)
260       CurPtr += StoredTokenSize * 2;
261     else
262       LastHashTokPtr = HashEntryI;
263 
264     return isEndif;
265   }
266 
267   // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
268   CurPtr = HashEntryI;
269 
270   // Update the location of the last observed '#'.  This is useful if we
271   // are skipping multiple blocks.
272   LastHashTokPtr = CurPtr;
273 
274   // Skip the '#' token.
275   assert(((tok::TokenKind)*CurPtr) == tok::hash);
276   CurPtr += StoredTokenSize;
277 
278   // Did we reach a #endif?  If so, go ahead and consume that token as well.
279   if (isEndif) {
280     CurPtr += StoredTokenSize * 2;
281   }
282 
283   return isEndif;
284 }
285 
getSourceLocation()286 SourceLocation PTHLexer::getSourceLocation() {
287   // getSourceLocation is not on the hot path.  It is used to get the location
288   // of the next token when transitioning back to this lexer when done
289   // handling a #included file.  Just read the necessary data from the token
290   // data buffer to construct the SourceLocation object.
291   // NOTE: This is a virtual function; hence it is defined out-of-line.
292   using namespace llvm::support;
293 
294   const unsigned char *OffsetPtr = CurPtr + (StoredTokenSize - 4);
295   uint32_t Offset = endian::readNext<uint32_t, little, aligned>(OffsetPtr);
296   return FileStartLoc.getLocWithOffset(Offset);
297 }
298 
299 //===----------------------------------------------------------------------===//
300 // PTH file lookup: map from strings to file data.
301 //===----------------------------------------------------------------------===//
302 
303 /// PTHFileLookup - This internal data structure is used by the PTHManager
304 ///  to map from FileEntry objects managed by FileManager to offsets within
305 ///  the PTH file.
306 namespace {
307 class PTHFileData {
308   const uint32_t TokenOff;
309   const uint32_t PPCondOff;
310 public:
PTHFileData(uint32_t tokenOff,uint32_t ppCondOff)311   PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
312     : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
313 
getTokenOffset() const314   uint32_t getTokenOffset() const { return TokenOff; }
getPPCondOffset() const315   uint32_t getPPCondOffset() const { return PPCondOff; }
316 };
317 
318 
319 class PTHFileLookupCommonTrait {
320 public:
321   typedef std::pair<unsigned char, const char*> internal_key_type;
322   typedef unsigned hash_value_type;
323   typedef unsigned offset_type;
324 
ComputeHash(internal_key_type x)325   static hash_value_type ComputeHash(internal_key_type x) {
326     return llvm::HashString(x.second);
327   }
328 
329   static std::pair<unsigned, unsigned>
ReadKeyDataLength(const unsigned char * & d)330   ReadKeyDataLength(const unsigned char*& d) {
331     using namespace llvm::support;
332     unsigned keyLen =
333         (unsigned)endian::readNext<uint16_t, little, unaligned>(d);
334     unsigned dataLen = (unsigned) *(d++);
335     return std::make_pair(keyLen, dataLen);
336   }
337 
ReadKey(const unsigned char * d,unsigned)338   static internal_key_type ReadKey(const unsigned char* d, unsigned) {
339     unsigned char k = *(d++); // Read the entry kind.
340     return std::make_pair(k, (const char*) d);
341   }
342 };
343 
344 } // end anonymous namespace
345 
346 class PTHManager::PTHFileLookupTrait : public PTHFileLookupCommonTrait {
347 public:
348   typedef const FileEntry* external_key_type;
349   typedef PTHFileData      data_type;
350 
GetInternalKey(const FileEntry * FE)351   static internal_key_type GetInternalKey(const FileEntry* FE) {
352     return std::make_pair((unsigned char) 0x1, FE->getName());
353   }
354 
EqualKey(internal_key_type a,internal_key_type b)355   static bool EqualKey(internal_key_type a, internal_key_type b) {
356     return a.first == b.first && strcmp(a.second, b.second) == 0;
357   }
358 
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)359   static PTHFileData ReadData(const internal_key_type& k,
360                               const unsigned char* d, unsigned) {
361     assert(k.first == 0x1 && "Only file lookups can match!");
362     using namespace llvm::support;
363     uint32_t x = endian::readNext<uint32_t, little, unaligned>(d);
364     uint32_t y = endian::readNext<uint32_t, little, unaligned>(d);
365     return PTHFileData(x, y);
366   }
367 };
368 
369 class PTHManager::PTHStringLookupTrait {
370 public:
371   typedef uint32_t data_type;
372   typedef const std::pair<const char*, unsigned> external_key_type;
373   typedef external_key_type internal_key_type;
374   typedef uint32_t hash_value_type;
375   typedef unsigned offset_type;
376 
EqualKey(const internal_key_type & a,const internal_key_type & b)377   static bool EqualKey(const internal_key_type& a,
378                        const internal_key_type& b) {
379     return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
380                                   : false;
381   }
382 
ComputeHash(const internal_key_type & a)383   static hash_value_type ComputeHash(const internal_key_type& a) {
384     return llvm::HashString(StringRef(a.first, a.second));
385   }
386 
387   // This hopefully will just get inlined and removed by the optimizer.
388   static const internal_key_type&
GetInternalKey(const external_key_type & x)389   GetInternalKey(const external_key_type& x) { return x; }
390 
391   static std::pair<unsigned, unsigned>
ReadKeyDataLength(const unsigned char * & d)392   ReadKeyDataLength(const unsigned char*& d) {
393     using namespace llvm::support;
394     return std::make_pair(
395         (unsigned)endian::readNext<uint16_t, little, unaligned>(d),
396         sizeof(uint32_t));
397   }
398 
399   static std::pair<const char*, unsigned>
ReadKey(const unsigned char * d,unsigned n)400   ReadKey(const unsigned char* d, unsigned n) {
401       assert(n >= 2 && d[n-1] == '\0');
402       return std::make_pair((const char*) d, n-1);
403     }
404 
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)405   static uint32_t ReadData(const internal_key_type& k, const unsigned char* d,
406                            unsigned) {
407     using namespace llvm::support;
408     return endian::readNext<uint32_t, little, unaligned>(d);
409   }
410 };
411 
412 //===----------------------------------------------------------------------===//
413 // PTHManager methods.
414 //===----------------------------------------------------------------------===//
415 
PTHManager(std::unique_ptr<const llvm::MemoryBuffer> buf,std::unique_ptr<PTHFileLookup> fileLookup,const unsigned char * idDataTable,std::unique_ptr<IdentifierInfo * [],llvm::FreeDeleter> perIDCache,std::unique_ptr<PTHStringIdLookup> stringIdLookup,unsigned numIds,const unsigned char * spellingBase,const char * originalSourceFile)416 PTHManager::PTHManager(
417     std::unique_ptr<const llvm::MemoryBuffer> buf,
418     std::unique_ptr<PTHFileLookup> fileLookup, const unsigned char *idDataTable,
419     std::unique_ptr<IdentifierInfo *[], llvm::FreeDeleter> perIDCache,
420     std::unique_ptr<PTHStringIdLookup> stringIdLookup, unsigned numIds,
421     const unsigned char *spellingBase, const char *originalSourceFile)
422     : Buf(std::move(buf)), PerIDCache(std::move(perIDCache)),
423       FileLookup(std::move(fileLookup)), IdDataTable(idDataTable),
424       StringIdLookup(std::move(stringIdLookup)), NumIds(numIds), PP(nullptr),
425       SpellingBase(spellingBase), OriginalSourceFile(originalSourceFile) {}
426 
~PTHManager()427 PTHManager::~PTHManager() {
428 }
429 
InvalidPTH(DiagnosticsEngine & Diags,const char * Msg)430 static void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) {
431   Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0")) << Msg;
432 }
433 
Create(StringRef file,DiagnosticsEngine & Diags)434 PTHManager *PTHManager::Create(StringRef file, DiagnosticsEngine &Diags) {
435   // Memory map the PTH file.
436   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
437       llvm::MemoryBuffer::getFile(file);
438 
439   if (!FileOrErr) {
440     // FIXME: Add ec.message() to this diag.
441     Diags.Report(diag::err_invalid_pth_file) << file;
442     return nullptr;
443   }
444   std::unique_ptr<llvm::MemoryBuffer> File = std::move(FileOrErr.get());
445 
446   using namespace llvm::support;
447 
448   // Get the buffer ranges and check if there are at least three 32-bit
449   // words at the end of the file.
450   const unsigned char *BufBeg = (const unsigned char*)File->getBufferStart();
451   const unsigned char *BufEnd = (const unsigned char*)File->getBufferEnd();
452 
453   // Check the prologue of the file.
454   if ((BufEnd - BufBeg) < (signed)(sizeof("cfe-pth") + 4 + 4) ||
455       memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth")) != 0) {
456     Diags.Report(diag::err_invalid_pth_file) << file;
457     return nullptr;
458   }
459 
460   // Read the PTH version.
461   const unsigned char *p = BufBeg + (sizeof("cfe-pth"));
462   unsigned Version = endian::readNext<uint32_t, little, aligned>(p);
463 
464   if (Version < PTHManager::Version) {
465     InvalidPTH(Diags,
466         Version < PTHManager::Version
467         ? "PTH file uses an older PTH format that is no longer supported"
468         : "PTH file uses a newer PTH format that cannot be read");
469     return nullptr;
470   }
471 
472   // Compute the address of the index table at the end of the PTH file.
473   const unsigned char *PrologueOffset = p;
474 
475   if (PrologueOffset >= BufEnd) {
476     Diags.Report(diag::err_invalid_pth_file) << file;
477     return nullptr;
478   }
479 
480   // Construct the file lookup table.  This will be used for mapping from
481   // FileEntry*'s to cached tokens.
482   const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2;
483   const unsigned char *FileTable =
484       BufBeg + endian::readNext<uint32_t, little, aligned>(FileTableOffset);
485 
486   if (!(FileTable > BufBeg && FileTable < BufEnd)) {
487     Diags.Report(diag::err_invalid_pth_file) << file;
488     return nullptr; // FIXME: Proper error diagnostic?
489   }
490 
491   std::unique_ptr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
492 
493   // Warn if the PTH file is empty.  We still want to create a PTHManager
494   // as the PTH could be used with -include-pth.
495   if (FL->isEmpty())
496     InvalidPTH(Diags, "PTH file contains no cached source data");
497 
498   // Get the location of the table mapping from persistent ids to the
499   // data needed to reconstruct identifiers.
500   const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0;
501   const unsigned char *IData =
502       BufBeg + endian::readNext<uint32_t, little, aligned>(IDTableOffset);
503 
504   if (!(IData >= BufBeg && IData < BufEnd)) {
505     Diags.Report(diag::err_invalid_pth_file) << file;
506     return nullptr;
507   }
508 
509   // Get the location of the hashtable mapping between strings and
510   // persistent IDs.
511   const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1;
512   const unsigned char *StringIdTable =
513       BufBeg + endian::readNext<uint32_t, little, aligned>(StringIdTableOffset);
514   if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) {
515     Diags.Report(diag::err_invalid_pth_file) << file;
516     return nullptr;
517   }
518 
519   std::unique_ptr<PTHStringIdLookup> SL(
520       PTHStringIdLookup::Create(StringIdTable, BufBeg));
521 
522   // Get the location of the spelling cache.
523   const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3;
524   const unsigned char *spellingBase =
525       BufBeg + endian::readNext<uint32_t, little, aligned>(spellingBaseOffset);
526   if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
527     Diags.Report(diag::err_invalid_pth_file) << file;
528     return nullptr;
529   }
530 
531   // Get the number of IdentifierInfos and pre-allocate the identifier cache.
532   uint32_t NumIds = endian::readNext<uint32_t, little, aligned>(IData);
533 
534   // Pre-allocate the persistent ID -> IdentifierInfo* cache.  We use calloc()
535   // so that we in the best case only zero out memory once when the OS returns
536   // us new pages.
537   std::unique_ptr<IdentifierInfo *[], llvm::FreeDeleter> PerIDCache;
538 
539   if (NumIds) {
540     PerIDCache.reset((IdentifierInfo **)calloc(NumIds, sizeof(PerIDCache[0])));
541     if (!PerIDCache) {
542       InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
543       return nullptr;
544     }
545   }
546 
547   // Compute the address of the original source file.
548   const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4;
549   unsigned len =
550       endian::readNext<uint16_t, little, unaligned>(originalSourceBase);
551   if (!len) originalSourceBase = nullptr;
552 
553   // Create the new PTHManager.
554   return new PTHManager(std::move(File), std::move(FL), IData,
555                         std::move(PerIDCache), std::move(SL), NumIds,
556                         spellingBase, (const char *)originalSourceBase);
557 }
558 
LazilyCreateIdentifierInfo(unsigned PersistentID)559 IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
560   using namespace llvm::support;
561   // Look in the PTH file for the string data for the IdentifierInfo object.
562   const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
563   const unsigned char *IDData =
564       (const unsigned char *)Buf->getBufferStart() +
565       endian::readNext<uint32_t, little, aligned>(TableEntry);
566   assert(IDData < (const unsigned char*)Buf->getBufferEnd());
567 
568   // Allocate the object.
569   std::pair<IdentifierInfo,const unsigned char*> *Mem =
570     Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
571 
572   Mem->second = IDData;
573   assert(IDData[0] != '\0');
574   IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
575 
576   // Store the new IdentifierInfo in the cache.
577   PerIDCache[PersistentID] = II;
578   assert(II->getNameStart() && II->getNameStart()[0] != '\0');
579   return II;
580 }
581 
get(StringRef Name)582 IdentifierInfo* PTHManager::get(StringRef Name) {
583   // Double check our assumption that the last character isn't '\0'.
584   assert(Name.empty() || Name.back() != '\0');
585   PTHStringIdLookup::iterator I =
586       StringIdLookup->find(std::make_pair(Name.data(), Name.size()));
587   if (I == StringIdLookup->end()) // No identifier found?
588     return nullptr;
589 
590   // Match found.  Return the identifier!
591   assert(*I > 0);
592   return GetIdentifierInfo(*I-1);
593 }
594 
CreateLexer(FileID FID)595 PTHLexer *PTHManager::CreateLexer(FileID FID) {
596   const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
597   if (!FE)
598     return nullptr;
599 
600   using namespace llvm::support;
601 
602   // Lookup the FileEntry object in our file lookup data structure.  It will
603   // return a variant that indicates whether or not there is an offset within
604   // the PTH file that contains cached tokens.
605   PTHFileLookup::iterator I = FileLookup->find(FE);
606 
607   if (I == FileLookup->end()) // No tokens available?
608     return nullptr;
609 
610   const PTHFileData& FileData = *I;
611 
612   const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
613   // Compute the offset of the token data within the buffer.
614   const unsigned char* data = BufStart + FileData.getTokenOffset();
615 
616   // Get the location of pp-conditional table.
617   const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
618   uint32_t Len = endian::readNext<uint32_t, little, aligned>(ppcond);
619   if (Len == 0) ppcond = nullptr;
620 
621   assert(PP && "No preprocessor set yet!");
622   return new PTHLexer(*PP, FID, data, ppcond, *this);
623 }
624 
625 //===----------------------------------------------------------------------===//
626 // 'stat' caching.
627 //===----------------------------------------------------------------------===//
628 
629 namespace {
630 class PTHStatData {
631 public:
632   const bool HasData;
633   uint64_t Size;
634   time_t ModTime;
635   llvm::sys::fs::UniqueID UniqueID;
636   bool IsDirectory;
637 
PTHStatData(uint64_t Size,time_t ModTime,llvm::sys::fs::UniqueID UniqueID,bool IsDirectory)638   PTHStatData(uint64_t Size, time_t ModTime, llvm::sys::fs::UniqueID UniqueID,
639               bool IsDirectory)
640       : HasData(true), Size(Size), ModTime(ModTime), UniqueID(UniqueID),
641         IsDirectory(IsDirectory) {}
642 
PTHStatData()643   PTHStatData() : HasData(false) {}
644 };
645 
646 class PTHStatLookupTrait : public PTHFileLookupCommonTrait {
647 public:
648   typedef const char* external_key_type;  // const char*
649   typedef PTHStatData data_type;
650 
GetInternalKey(const char * path)651   static internal_key_type GetInternalKey(const char *path) {
652     // The key 'kind' doesn't matter here because it is ignored in EqualKey.
653     return std::make_pair((unsigned char) 0x0, path);
654   }
655 
EqualKey(internal_key_type a,internal_key_type b)656   static bool EqualKey(internal_key_type a, internal_key_type b) {
657     // When doing 'stat' lookups we don't care about the kind of 'a' and 'b',
658     // just the paths.
659     return strcmp(a.second, b.second) == 0;
660   }
661 
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)662   static data_type ReadData(const internal_key_type& k, const unsigned char* d,
663                             unsigned) {
664 
665     if (k.first /* File or Directory */) {
666       bool IsDirectory = true;
667       if (k.first == 0x1 /* File */) {
668         IsDirectory = false;
669         d += 4 * 2; // Skip the first 2 words.
670       }
671 
672       using namespace llvm::support;
673 
674       uint64_t File = endian::readNext<uint64_t, little, unaligned>(d);
675       uint64_t Device = endian::readNext<uint64_t, little, unaligned>(d);
676       llvm::sys::fs::UniqueID UniqueID(Device, File);
677       time_t ModTime = endian::readNext<uint64_t, little, unaligned>(d);
678       uint64_t Size = endian::readNext<uint64_t, little, unaligned>(d);
679       return data_type(Size, ModTime, UniqueID, IsDirectory);
680     }
681 
682     // Negative stat.  Don't read anything.
683     return data_type();
684   }
685 };
686 } // end anonymous namespace
687 
688 namespace clang {
689 class PTHStatCache : public FileSystemStatCache {
690   typedef llvm::OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy;
691   CacheTy Cache;
692 
693 public:
PTHStatCache(PTHManager::PTHFileLookup & FL)694   PTHStatCache(PTHManager::PTHFileLookup &FL)
695       : Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(),
696               FL.getBase()) {}
697 
getStat(const char * Path,FileData & Data,bool isFile,std::unique_ptr<vfs::File> * F,vfs::FileSystem & FS)698   LookupResult getStat(const char *Path, FileData &Data, bool isFile,
699                        std::unique_ptr<vfs::File> *F,
700                        vfs::FileSystem &FS) override {
701     // Do the lookup for the file's data in the PTH file.
702     CacheTy::iterator I = Cache.find(Path);
703 
704     // If we don't get a hit in the PTH file just forward to 'stat'.
705     if (I == Cache.end())
706       return statChained(Path, Data, isFile, F, FS);
707 
708     const PTHStatData &D = *I;
709 
710     if (!D.HasData)
711       return CacheMissing;
712 
713     Data.Name = Path;
714     Data.Size = D.Size;
715     Data.ModTime = D.ModTime;
716     Data.UniqueID = D.UniqueID;
717     Data.IsDirectory = D.IsDirectory;
718     Data.IsNamedPipe = false;
719     Data.InPCH = true;
720 
721     return CacheExists;
722   }
723 };
724 }
725 
createStatCache()726 std::unique_ptr<FileSystemStatCache> PTHManager::createStatCache() {
727   return llvm::make_unique<PTHStatCache>(*FileLookup);
728 }
729