1 //===--- PTHLexer.cpp - Lex from a token stream ---------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the PTHLexer interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Lex/PTHLexer.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/FileSystemStatCache.h"
17 #include "clang/Basic/IdentifierTable.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "clang/Lex/LexDiagnostic.h"
20 #include "clang/Lex/PTHManager.h"
21 #include "clang/Lex/Preprocessor.h"
22 #include "clang/Lex/Token.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/ADT/StringMap.h"
25 #include "llvm/Support/EndianStream.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include <memory>
28 #include <system_error>
29 using namespace clang;
30 
31 static const unsigned StoredTokenSize = 1 + 1 + 2 + 4 + 4;
32 
33 //===----------------------------------------------------------------------===//
34 // PTHLexer methods.
35 //===----------------------------------------------------------------------===//
36 
PTHLexer(Preprocessor & PP,FileID FID,const unsigned char * D,const unsigned char * ppcond,PTHManager & PM)37 PTHLexer::PTHLexer(Preprocessor &PP, FileID FID, const unsigned char *D,
38                    const unsigned char *ppcond, PTHManager &PM)
39   : PreprocessorLexer(&PP, FID), TokBuf(D), CurPtr(D), LastHashTokPtr(nullptr),
40     PPCond(ppcond), CurPPCondPtr(ppcond), PTHMgr(PM) {
41 
42   FileStartLoc = PP.getSourceManager().getLocForStartOfFile(FID);
43 }
44 
Lex(Token & Tok)45 bool PTHLexer::Lex(Token& Tok) {
46   //===--------------------------------------==//
47   // Read the raw token data.
48   //===--------------------------------------==//
49   using namespace llvm::support;
50 
51   // Shadow CurPtr into an automatic variable.
52   const unsigned char *CurPtrShadow = CurPtr;
53 
54   // Read in the data for the token.
55   unsigned Word0 = endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
56   uint32_t IdentifierID =
57       endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
58   uint32_t FileOffset =
59       endian::readNext<uint32_t, little, aligned>(CurPtrShadow);
60 
61   tok::TokenKind TKind = (tok::TokenKind) (Word0 & 0xFF);
62   Token::TokenFlags TFlags = (Token::TokenFlags) ((Word0 >> 8) & 0xFF);
63   uint32_t Len = Word0 >> 16;
64 
65   CurPtr = CurPtrShadow;
66 
67   //===--------------------------------------==//
68   // Construct the token itself.
69   //===--------------------------------------==//
70 
71   Tok.startToken();
72   Tok.setKind(TKind);
73   Tok.setFlag(TFlags);
74   assert(!LexingRawMode);
75   Tok.setLocation(FileStartLoc.getLocWithOffset(FileOffset));
76   Tok.setLength(Len);
77 
78   // Handle identifiers.
79   if (Tok.isLiteral()) {
80     Tok.setLiteralData((const char*) (PTHMgr.SpellingBase + IdentifierID));
81   }
82   else if (IdentifierID) {
83     MIOpt.ReadToken();
84     IdentifierInfo *II = PTHMgr.GetIdentifierInfo(IdentifierID-1);
85 
86     Tok.setIdentifierInfo(II);
87 
88     // Change the kind of this identifier to the appropriate token kind, e.g.
89     // turning "for" into a keyword.
90     Tok.setKind(II->getTokenID());
91 
92     if (II->isHandleIdentifierCase())
93       return PP->HandleIdentifier(Tok);
94 
95     return true;
96   }
97 
98   //===--------------------------------------==//
99   // Process the token.
100   //===--------------------------------------==//
101   if (TKind == tok::eof) {
102     // Save the end-of-file token.
103     EofToken = Tok;
104 
105     assert(!ParsingPreprocessorDirective);
106     assert(!LexingRawMode);
107 
108     return LexEndOfFile(Tok);
109   }
110 
111   if (TKind == tok::hash && Tok.isAtStartOfLine()) {
112     LastHashTokPtr = CurPtr - StoredTokenSize;
113     assert(!LexingRawMode);
114     PP->HandleDirective(Tok);
115 
116     return false;
117   }
118 
119   if (TKind == tok::eod) {
120     assert(ParsingPreprocessorDirective);
121     ParsingPreprocessorDirective = false;
122     return true;
123   }
124 
125   MIOpt.ReadToken();
126   return true;
127 }
128 
LexEndOfFile(Token & Result)129 bool PTHLexer::LexEndOfFile(Token &Result) {
130   // If we hit the end of the file while parsing a preprocessor directive,
131   // end the preprocessor directive first.  The next token returned will
132   // then be the end of file.
133   if (ParsingPreprocessorDirective) {
134     ParsingPreprocessorDirective = false; // Done parsing the "line".
135     return true;  // Have a token.
136   }
137 
138   assert(!LexingRawMode);
139 
140   // If we are in a #if directive, emit an error.
141   while (!ConditionalStack.empty()) {
142     if (PP->getCodeCompletionFileLoc() != FileStartLoc)
143       PP->Diag(ConditionalStack.back().IfLoc,
144                diag::err_pp_unterminated_conditional);
145     ConditionalStack.pop_back();
146   }
147 
148   // Finally, let the preprocessor handle this.
149   return PP->HandleEndOfFile(Result);
150 }
151 
152 // FIXME: We can just grab the last token instead of storing a copy
153 // into EofToken.
getEOF(Token & Tok)154 void PTHLexer::getEOF(Token& Tok) {
155   assert(EofToken.is(tok::eof));
156   Tok = EofToken;
157 }
158 
DiscardToEndOfLine()159 void PTHLexer::DiscardToEndOfLine() {
160   assert(ParsingPreprocessorDirective && ParsingFilename == false &&
161          "Must be in a preprocessing directive!");
162 
163   // We assume that if the preprocessor wishes to discard to the end of
164   // the line that it also means to end the current preprocessor directive.
165   ParsingPreprocessorDirective = false;
166 
167   // Skip tokens by only peeking at their token kind and the flags.
168   // We don't need to actually reconstruct full tokens from the token buffer.
169   // This saves some copies and it also reduces IdentifierInfo* lookup.
170   const unsigned char* p = CurPtr;
171   while (1) {
172     // Read the token kind.  Are we at the end of the file?
173     tok::TokenKind x = (tok::TokenKind) (uint8_t) *p;
174     if (x == tok::eof) break;
175 
176     // Read the token flags.  Are we at the start of the next line?
177     Token::TokenFlags y = (Token::TokenFlags) (uint8_t) p[1];
178     if (y & Token::StartOfLine) break;
179 
180     // Skip to the next token.
181     p += StoredTokenSize;
182   }
183 
184   CurPtr = p;
185 }
186 
187 /// SkipBlock - Used by Preprocessor to skip the current conditional block.
SkipBlock()188 bool PTHLexer::SkipBlock() {
189   using namespace llvm::support;
190   assert(CurPPCondPtr && "No cached PP conditional information.");
191   assert(LastHashTokPtr && "No known '#' token.");
192 
193   const unsigned char *HashEntryI = nullptr;
194   uint32_t TableIdx;
195 
196   do {
197     // Read the token offset from the side-table.
198     uint32_t Offset = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
199 
200     // Read the target table index from the side-table.
201     TableIdx = endian::readNext<uint32_t, little, aligned>(CurPPCondPtr);
202 
203     // Compute the actual memory address of the '#' token data for this entry.
204     HashEntryI = TokBuf + Offset;
205 
206     // Optmization: "Sibling jumping".  #if...#else...#endif blocks can
207     //  contain nested blocks.  In the side-table we can jump over these
208     //  nested blocks instead of doing a linear search if the next "sibling"
209     //  entry is not at a location greater than LastHashTokPtr.
210     if (HashEntryI < LastHashTokPtr && TableIdx) {
211       // In the side-table we are still at an entry for a '#' token that
212       // is earlier than the last one we saw.  Check if the location we would
213       // stride gets us closer.
214       const unsigned char* NextPPCondPtr =
215         PPCond + TableIdx*(sizeof(uint32_t)*2);
216       assert(NextPPCondPtr >= CurPPCondPtr);
217       // Read where we should jump to.
218       const unsigned char *HashEntryJ =
219           TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
220 
221       if (HashEntryJ <= LastHashTokPtr) {
222         // Jump directly to the next entry in the side table.
223         HashEntryI = HashEntryJ;
224         TableIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
225         CurPPCondPtr = NextPPCondPtr;
226       }
227     }
228   }
229   while (HashEntryI < LastHashTokPtr);
230   assert(HashEntryI == LastHashTokPtr && "No PP-cond entry found for '#'");
231   assert(TableIdx && "No jumping from #endifs.");
232 
233   // Update our side-table iterator.
234   const unsigned char* NextPPCondPtr = PPCond + TableIdx*(sizeof(uint32_t)*2);
235   assert(NextPPCondPtr >= CurPPCondPtr);
236   CurPPCondPtr = NextPPCondPtr;
237 
238   // Read where we should jump to.
239   HashEntryI =
240       TokBuf + endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
241   uint32_t NextIdx = endian::readNext<uint32_t, little, aligned>(NextPPCondPtr);
242 
243   // By construction NextIdx will be zero if this is a #endif.  This is useful
244   // to know to obviate lexing another token.
245   bool isEndif = NextIdx == 0;
246 
247   // This case can occur when we see something like this:
248   //
249   //  #if ...
250   //   /* a comment or nothing */
251   //  #elif
252   //
253   // If we are skipping the first #if block it will be the case that CurPtr
254   // already points 'elif'.  Just return.
255 
256   if (CurPtr > HashEntryI) {
257     assert(CurPtr == HashEntryI + StoredTokenSize);
258     // Did we reach a #endif?  If so, go ahead and consume that token as well.
259     if (isEndif)
260       CurPtr += StoredTokenSize * 2;
261     else
262       LastHashTokPtr = HashEntryI;
263 
264     return isEndif;
265   }
266 
267   // Otherwise, we need to advance.  Update CurPtr to point to the '#' token.
268   CurPtr = HashEntryI;
269 
270   // Update the location of the last observed '#'.  This is useful if we
271   // are skipping multiple blocks.
272   LastHashTokPtr = CurPtr;
273 
274   // Skip the '#' token.
275   assert(((tok::TokenKind)*CurPtr) == tok::hash);
276   CurPtr += StoredTokenSize;
277 
278   // Did we reach a #endif?  If so, go ahead and consume that token as well.
279   if (isEndif) {
280     CurPtr += StoredTokenSize * 2;
281   }
282 
283   return isEndif;
284 }
285 
getSourceLocation()286 SourceLocation PTHLexer::getSourceLocation() {
287   // getSourceLocation is not on the hot path.  It is used to get the location
288   // of the next token when transitioning back to this lexer when done
289   // handling a #included file.  Just read the necessary data from the token
290   // data buffer to construct the SourceLocation object.
291   // NOTE: This is a virtual function; hence it is defined out-of-line.
292   using namespace llvm::support;
293 
294   const unsigned char *OffsetPtr = CurPtr + (StoredTokenSize - 4);
295   uint32_t Offset = endian::readNext<uint32_t, little, aligned>(OffsetPtr);
296   return FileStartLoc.getLocWithOffset(Offset);
297 }
298 
299 //===----------------------------------------------------------------------===//
300 // PTH file lookup: map from strings to file data.
301 //===----------------------------------------------------------------------===//
302 
303 /// PTHFileLookup - This internal data structure is used by the PTHManager
304 ///  to map from FileEntry objects managed by FileManager to offsets within
305 ///  the PTH file.
306 namespace {
307 class PTHFileData {
308   const uint32_t TokenOff;
309   const uint32_t PPCondOff;
310 public:
PTHFileData(uint32_t tokenOff,uint32_t ppCondOff)311   PTHFileData(uint32_t tokenOff, uint32_t ppCondOff)
312     : TokenOff(tokenOff), PPCondOff(ppCondOff) {}
313 
getTokenOffset() const314   uint32_t getTokenOffset() const { return TokenOff; }
getPPCondOffset() const315   uint32_t getPPCondOffset() const { return PPCondOff; }
316 };
317 
318 
319 class PTHFileLookupCommonTrait {
320 public:
321   typedef std::pair<unsigned char, const char*> internal_key_type;
322   typedef unsigned hash_value_type;
323   typedef unsigned offset_type;
324 
ComputeHash(internal_key_type x)325   static hash_value_type ComputeHash(internal_key_type x) {
326     return llvm::HashString(x.second);
327   }
328 
329   static std::pair<unsigned, unsigned>
ReadKeyDataLength(const unsigned char * & d)330   ReadKeyDataLength(const unsigned char*& d) {
331     using namespace llvm::support;
332     unsigned keyLen =
333         (unsigned)endian::readNext<uint16_t, little, unaligned>(d);
334     unsigned dataLen = (unsigned) *(d++);
335     return std::make_pair(keyLen, dataLen);
336   }
337 
ReadKey(const unsigned char * d,unsigned)338   static internal_key_type ReadKey(const unsigned char* d, unsigned) {
339     unsigned char k = *(d++); // Read the entry kind.
340     return std::make_pair(k, (const char*) d);
341   }
342 };
343 
344 } // end anonymous namespace
345 
346 class PTHManager::PTHFileLookupTrait : public PTHFileLookupCommonTrait {
347 public:
348   typedef const FileEntry* external_key_type;
349   typedef PTHFileData      data_type;
350 
GetInternalKey(const FileEntry * FE)351   static internal_key_type GetInternalKey(const FileEntry* FE) {
352     return std::make_pair((unsigned char) 0x1, FE->getName());
353   }
354 
EqualKey(internal_key_type a,internal_key_type b)355   static bool EqualKey(internal_key_type a, internal_key_type b) {
356     return a.first == b.first && strcmp(a.second, b.second) == 0;
357   }
358 
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)359   static PTHFileData ReadData(const internal_key_type& k,
360                               const unsigned char* d, unsigned) {
361     assert(k.first == 0x1 && "Only file lookups can match!");
362     using namespace llvm::support;
363     uint32_t x = endian::readNext<uint32_t, little, unaligned>(d);
364     uint32_t y = endian::readNext<uint32_t, little, unaligned>(d);
365     return PTHFileData(x, y);
366   }
367 };
368 
369 class PTHManager::PTHStringLookupTrait {
370 public:
371   typedef uint32_t data_type;
372   typedef const std::pair<const char*, unsigned> external_key_type;
373   typedef external_key_type internal_key_type;
374   typedef uint32_t hash_value_type;
375   typedef unsigned offset_type;
376 
EqualKey(const internal_key_type & a,const internal_key_type & b)377   static bool EqualKey(const internal_key_type& a,
378                        const internal_key_type& b) {
379     return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0
380                                   : false;
381   }
382 
ComputeHash(const internal_key_type & a)383   static hash_value_type ComputeHash(const internal_key_type& a) {
384     return llvm::HashString(StringRef(a.first, a.second));
385   }
386 
387   // This hopefully will just get inlined and removed by the optimizer.
388   static const internal_key_type&
GetInternalKey(const external_key_type & x)389   GetInternalKey(const external_key_type& x) { return x; }
390 
391   static std::pair<unsigned, unsigned>
ReadKeyDataLength(const unsigned char * & d)392   ReadKeyDataLength(const unsigned char*& d) {
393     using namespace llvm::support;
394     return std::make_pair(
395         (unsigned)endian::readNext<uint16_t, little, unaligned>(d),
396         sizeof(uint32_t));
397   }
398 
399   static std::pair<const char*, unsigned>
ReadKey(const unsigned char * d,unsigned n)400   ReadKey(const unsigned char* d, unsigned n) {
401       assert(n >= 2 && d[n-1] == '\0');
402       return std::make_pair((const char*) d, n-1);
403     }
404 
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)405   static uint32_t ReadData(const internal_key_type& k, const unsigned char* d,
406                            unsigned) {
407     using namespace llvm::support;
408     return endian::readNext<uint32_t, little, unaligned>(d);
409   }
410 };
411 
412 //===----------------------------------------------------------------------===//
413 // PTHManager methods.
414 //===----------------------------------------------------------------------===//
415 
PTHManager(std::unique_ptr<const llvm::MemoryBuffer> buf,std::unique_ptr<PTHFileLookup> fileLookup,const unsigned char * idDataTable,std::unique_ptr<IdentifierInfo * [],llvm::FreeDeleter> perIDCache,std::unique_ptr<PTHStringIdLookup> stringIdLookup,unsigned numIds,const unsigned char * spellingBase,const char * originalSourceFile)416 PTHManager::PTHManager(
417     std::unique_ptr<const llvm::MemoryBuffer> buf,
418     std::unique_ptr<PTHFileLookup> fileLookup, const unsigned char *idDataTable,
419     std::unique_ptr<IdentifierInfo *[], llvm::FreeDeleter> perIDCache,
420     std::unique_ptr<PTHStringIdLookup> stringIdLookup, unsigned numIds,
421     const unsigned char *spellingBase, const char *originalSourceFile)
422     : Buf(std::move(buf)), PerIDCache(std::move(perIDCache)),
423       FileLookup(std::move(fileLookup)), IdDataTable(idDataTable),
424       StringIdLookup(std::move(stringIdLookup)), NumIds(numIds), PP(nullptr),
425       SpellingBase(spellingBase), OriginalSourceFile(originalSourceFile) {}
426 
~PTHManager()427 PTHManager::~PTHManager() {
428 }
429 
InvalidPTH(DiagnosticsEngine & Diags,const char * Msg)430 static void InvalidPTH(DiagnosticsEngine &Diags, const char *Msg) {
431   Diags.Report(Diags.getCustomDiagID(DiagnosticsEngine::Error, "%0")) << Msg;
432 }
433 
Create(const std::string & file,DiagnosticsEngine & Diags)434 PTHManager *PTHManager::Create(const std::string &file,
435                                DiagnosticsEngine &Diags) {
436   // Memory map the PTH file.
437   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> FileOrErr =
438       llvm::MemoryBuffer::getFile(file);
439 
440   if (!FileOrErr) {
441     // FIXME: Add ec.message() to this diag.
442     Diags.Report(diag::err_invalid_pth_file) << file;
443     return nullptr;
444   }
445   std::unique_ptr<llvm::MemoryBuffer> File = std::move(FileOrErr.get());
446 
447   using namespace llvm::support;
448 
449   // Get the buffer ranges and check if there are at least three 32-bit
450   // words at the end of the file.
451   const unsigned char *BufBeg = (const unsigned char*)File->getBufferStart();
452   const unsigned char *BufEnd = (const unsigned char*)File->getBufferEnd();
453 
454   // Check the prologue of the file.
455   if ((BufEnd - BufBeg) < (signed)(sizeof("cfe-pth") + 4 + 4) ||
456       memcmp(BufBeg, "cfe-pth", sizeof("cfe-pth")) != 0) {
457     Diags.Report(diag::err_invalid_pth_file) << file;
458     return nullptr;
459   }
460 
461   // Read the PTH version.
462   const unsigned char *p = BufBeg + (sizeof("cfe-pth"));
463   unsigned Version = endian::readNext<uint32_t, little, aligned>(p);
464 
465   if (Version < PTHManager::Version) {
466     InvalidPTH(Diags,
467         Version < PTHManager::Version
468         ? "PTH file uses an older PTH format that is no longer supported"
469         : "PTH file uses a newer PTH format that cannot be read");
470     return nullptr;
471   }
472 
473   // Compute the address of the index table at the end of the PTH file.
474   const unsigned char *PrologueOffset = p;
475 
476   if (PrologueOffset >= BufEnd) {
477     Diags.Report(diag::err_invalid_pth_file) << file;
478     return nullptr;
479   }
480 
481   // Construct the file lookup table.  This will be used for mapping from
482   // FileEntry*'s to cached tokens.
483   const unsigned char* FileTableOffset = PrologueOffset + sizeof(uint32_t)*2;
484   const unsigned char *FileTable =
485       BufBeg + endian::readNext<uint32_t, little, aligned>(FileTableOffset);
486 
487   if (!(FileTable > BufBeg && FileTable < BufEnd)) {
488     Diags.Report(diag::err_invalid_pth_file) << file;
489     return nullptr; // FIXME: Proper error diagnostic?
490   }
491 
492   std::unique_ptr<PTHFileLookup> FL(PTHFileLookup::Create(FileTable, BufBeg));
493 
494   // Warn if the PTH file is empty.  We still want to create a PTHManager
495   // as the PTH could be used with -include-pth.
496   if (FL->isEmpty())
497     InvalidPTH(Diags, "PTH file contains no cached source data");
498 
499   // Get the location of the table mapping from persistent ids to the
500   // data needed to reconstruct identifiers.
501   const unsigned char* IDTableOffset = PrologueOffset + sizeof(uint32_t)*0;
502   const unsigned char *IData =
503       BufBeg + endian::readNext<uint32_t, little, aligned>(IDTableOffset);
504 
505   if (!(IData >= BufBeg && IData < BufEnd)) {
506     Diags.Report(diag::err_invalid_pth_file) << file;
507     return nullptr;
508   }
509 
510   // Get the location of the hashtable mapping between strings and
511   // persistent IDs.
512   const unsigned char* StringIdTableOffset = PrologueOffset + sizeof(uint32_t)*1;
513   const unsigned char *StringIdTable =
514       BufBeg + endian::readNext<uint32_t, little, aligned>(StringIdTableOffset);
515   if (!(StringIdTable >= BufBeg && StringIdTable < BufEnd)) {
516     Diags.Report(diag::err_invalid_pth_file) << file;
517     return nullptr;
518   }
519 
520   std::unique_ptr<PTHStringIdLookup> SL(
521       PTHStringIdLookup::Create(StringIdTable, BufBeg));
522 
523   // Get the location of the spelling cache.
524   const unsigned char* spellingBaseOffset = PrologueOffset + sizeof(uint32_t)*3;
525   const unsigned char *spellingBase =
526       BufBeg + endian::readNext<uint32_t, little, aligned>(spellingBaseOffset);
527   if (!(spellingBase >= BufBeg && spellingBase < BufEnd)) {
528     Diags.Report(diag::err_invalid_pth_file) << file;
529     return nullptr;
530   }
531 
532   // Get the number of IdentifierInfos and pre-allocate the identifier cache.
533   uint32_t NumIds = endian::readNext<uint32_t, little, aligned>(IData);
534 
535   // Pre-allocate the persistent ID -> IdentifierInfo* cache.  We use calloc()
536   // so that we in the best case only zero out memory once when the OS returns
537   // us new pages.
538   std::unique_ptr<IdentifierInfo *[], llvm::FreeDeleter> PerIDCache;
539 
540   if (NumIds) {
541     PerIDCache.reset((IdentifierInfo **)calloc(NumIds, sizeof(PerIDCache[0])));
542     if (!PerIDCache) {
543       InvalidPTH(Diags, "Could not allocate memory for processing PTH file");
544       return nullptr;
545     }
546   }
547 
548   // Compute the address of the original source file.
549   const unsigned char* originalSourceBase = PrologueOffset + sizeof(uint32_t)*4;
550   unsigned len =
551       endian::readNext<uint16_t, little, unaligned>(originalSourceBase);
552   if (!len) originalSourceBase = nullptr;
553 
554   // Create the new PTHManager.
555   return new PTHManager(std::move(File), std::move(FL), IData,
556                         std::move(PerIDCache), std::move(SL), NumIds,
557                         spellingBase, (const char *)originalSourceBase);
558 }
559 
LazilyCreateIdentifierInfo(unsigned PersistentID)560 IdentifierInfo* PTHManager::LazilyCreateIdentifierInfo(unsigned PersistentID) {
561   using namespace llvm::support;
562   // Look in the PTH file for the string data for the IdentifierInfo object.
563   const unsigned char* TableEntry = IdDataTable + sizeof(uint32_t)*PersistentID;
564   const unsigned char *IDData =
565       (const unsigned char *)Buf->getBufferStart() +
566       endian::readNext<uint32_t, little, aligned>(TableEntry);
567   assert(IDData < (const unsigned char*)Buf->getBufferEnd());
568 
569   // Allocate the object.
570   std::pair<IdentifierInfo,const unsigned char*> *Mem =
571     Alloc.Allocate<std::pair<IdentifierInfo,const unsigned char*> >();
572 
573   Mem->second = IDData;
574   assert(IDData[0] != '\0');
575   IdentifierInfo *II = new ((void*) Mem) IdentifierInfo();
576 
577   // Store the new IdentifierInfo in the cache.
578   PerIDCache[PersistentID] = II;
579   assert(II->getNameStart() && II->getNameStart()[0] != '\0');
580   return II;
581 }
582 
get(StringRef Name)583 IdentifierInfo* PTHManager::get(StringRef Name) {
584   // Double check our assumption that the last character isn't '\0'.
585   assert(Name.empty() || Name.back() != '\0');
586   PTHStringIdLookup::iterator I =
587       StringIdLookup->find(std::make_pair(Name.data(), Name.size()));
588   if (I == StringIdLookup->end()) // No identifier found?
589     return nullptr;
590 
591   // Match found.  Return the identifier!
592   assert(*I > 0);
593   return GetIdentifierInfo(*I-1);
594 }
595 
CreateLexer(FileID FID)596 PTHLexer *PTHManager::CreateLexer(FileID FID) {
597   const FileEntry *FE = PP->getSourceManager().getFileEntryForID(FID);
598   if (!FE)
599     return nullptr;
600 
601   using namespace llvm::support;
602 
603   // Lookup the FileEntry object in our file lookup data structure.  It will
604   // return a variant that indicates whether or not there is an offset within
605   // the PTH file that contains cached tokens.
606   PTHFileLookup::iterator I = FileLookup->find(FE);
607 
608   if (I == FileLookup->end()) // No tokens available?
609     return nullptr;
610 
611   const PTHFileData& FileData = *I;
612 
613   const unsigned char *BufStart = (const unsigned char *)Buf->getBufferStart();
614   // Compute the offset of the token data within the buffer.
615   const unsigned char* data = BufStart + FileData.getTokenOffset();
616 
617   // Get the location of pp-conditional table.
618   const unsigned char* ppcond = BufStart + FileData.getPPCondOffset();
619   uint32_t Len = endian::readNext<uint32_t, little, aligned>(ppcond);
620   if (Len == 0) ppcond = nullptr;
621 
622   assert(PP && "No preprocessor set yet!");
623   return new PTHLexer(*PP, FID, data, ppcond, *this);
624 }
625 
626 //===----------------------------------------------------------------------===//
627 // 'stat' caching.
628 //===----------------------------------------------------------------------===//
629 
630 namespace {
631 class PTHStatData {
632 public:
633   const bool HasData;
634   uint64_t Size;
635   time_t ModTime;
636   llvm::sys::fs::UniqueID UniqueID;
637   bool IsDirectory;
638 
PTHStatData(uint64_t Size,time_t ModTime,llvm::sys::fs::UniqueID UniqueID,bool IsDirectory)639   PTHStatData(uint64_t Size, time_t ModTime, llvm::sys::fs::UniqueID UniqueID,
640               bool IsDirectory)
641       : HasData(true), Size(Size), ModTime(ModTime), UniqueID(UniqueID),
642         IsDirectory(IsDirectory) {}
643 
PTHStatData()644   PTHStatData() : HasData(false) {}
645 };
646 
647 class PTHStatLookupTrait : public PTHFileLookupCommonTrait {
648 public:
649   typedef const char* external_key_type;  // const char*
650   typedef PTHStatData data_type;
651 
GetInternalKey(const char * path)652   static internal_key_type GetInternalKey(const char *path) {
653     // The key 'kind' doesn't matter here because it is ignored in EqualKey.
654     return std::make_pair((unsigned char) 0x0, path);
655   }
656 
EqualKey(internal_key_type a,internal_key_type b)657   static bool EqualKey(internal_key_type a, internal_key_type b) {
658     // When doing 'stat' lookups we don't care about the kind of 'a' and 'b',
659     // just the paths.
660     return strcmp(a.second, b.second) == 0;
661   }
662 
ReadData(const internal_key_type & k,const unsigned char * d,unsigned)663   static data_type ReadData(const internal_key_type& k, const unsigned char* d,
664                             unsigned) {
665 
666     if (k.first /* File or Directory */) {
667       bool IsDirectory = true;
668       if (k.first == 0x1 /* File */) {
669         IsDirectory = false;
670         d += 4 * 2; // Skip the first 2 words.
671       }
672 
673       using namespace llvm::support;
674 
675       uint64_t File = endian::readNext<uint64_t, little, unaligned>(d);
676       uint64_t Device = endian::readNext<uint64_t, little, unaligned>(d);
677       llvm::sys::fs::UniqueID UniqueID(Device, File);
678       time_t ModTime = endian::readNext<uint64_t, little, unaligned>(d);
679       uint64_t Size = endian::readNext<uint64_t, little, unaligned>(d);
680       return data_type(Size, ModTime, UniqueID, IsDirectory);
681     }
682 
683     // Negative stat.  Don't read anything.
684     return data_type();
685   }
686 };
687 } // end anonymous namespace
688 
689 namespace clang {
690 class PTHStatCache : public FileSystemStatCache {
691   typedef llvm::OnDiskChainedHashTable<PTHStatLookupTrait> CacheTy;
692   CacheTy Cache;
693 
694 public:
PTHStatCache(PTHManager::PTHFileLookup & FL)695   PTHStatCache(PTHManager::PTHFileLookup &FL)
696       : Cache(FL.getNumBuckets(), FL.getNumEntries(), FL.getBuckets(),
697               FL.getBase()) {}
698 
getStat(const char * Path,FileData & Data,bool isFile,std::unique_ptr<vfs::File> * F,vfs::FileSystem & FS)699   LookupResult getStat(const char *Path, FileData &Data, bool isFile,
700                        std::unique_ptr<vfs::File> *F,
701                        vfs::FileSystem &FS) override {
702     // Do the lookup for the file's data in the PTH file.
703     CacheTy::iterator I = Cache.find(Path);
704 
705     // If we don't get a hit in the PTH file just forward to 'stat'.
706     if (I == Cache.end())
707       return statChained(Path, Data, isFile, F, FS);
708 
709     const PTHStatData &D = *I;
710 
711     if (!D.HasData)
712       return CacheMissing;
713 
714     Data.Name = Path;
715     Data.Size = D.Size;
716     Data.ModTime = D.ModTime;
717     Data.UniqueID = D.UniqueID;
718     Data.IsDirectory = D.IsDirectory;
719     Data.IsNamedPipe = false;
720     Data.InPCH = true;
721 
722     return CacheExists;
723   }
724 };
725 }
726 
createStatCache()727 std::unique_ptr<FileSystemStatCache> PTHManager::createStatCache() {
728   return llvm::make_unique<PTHStatCache>(*FileLookup);
729 }
730