1 //===--- IdentifierTable.h - Hash table for identifier lookup ---*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Defines the clang::IdentifierInfo, clang::IdentifierTable, and
12 /// clang::Selector interfaces.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
18 
19 #include "clang/Basic/LLVM.h"
20 #include "clang/Basic/TokenKinds.h"
21 #include "llvm/ADT/StringMap.h"
22 #include "llvm/ADT/StringRef.h"
23 #include <cassert>
24 #include <string>
25 
26 namespace llvm {
27   template <typename T> struct DenseMapInfo;
28 }
29 
30 namespace clang {
31   class LangOptions;
32   class IdentifierInfo;
33   class IdentifierTable;
34   class SourceLocation;
35   class MultiKeywordSelector; // private class used by Selector
36   class DeclarationName;      // AST class that stores declaration names
37 
38   /// \brief A simple pair of identifier info and location.
39   typedef std::pair<IdentifierInfo*, SourceLocation> IdentifierLocPair;
40 
41 
42 /// One of these records is kept for each identifier that
43 /// is lexed.  This contains information about whether the token was \#define'd,
44 /// is a language keyword, or if it is a front-end token of some sort (e.g. a
45 /// variable or function name).  The preprocessor keeps this information in a
46 /// set, and all tok::identifier tokens have a pointer to one of these.
47 class IdentifierInfo {
48   unsigned TokenID            : 9; // Front-end token ID or tok::identifier.
49   // Objective-C keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
50   // First NUM_OBJC_KEYWORDS values are for Objective-C, the remaining values
51   // are for builtins.
52   unsigned ObjCOrBuiltinID    :13;
53   bool HasMacro               : 1; // True if there is a #define for this.
54   bool HadMacro               : 1; // True if there was a #define for this.
55   bool IsExtension            : 1; // True if identifier is a lang extension.
56   bool IsFutureCompatKeyword  : 1; // True if identifier is a keyword in a
57                                    // newer Standard or proposed Standard.
58   bool IsPoisoned             : 1; // True if identifier is poisoned.
59   bool IsCPPOperatorKeyword   : 1; // True if ident is a C++ operator keyword.
60   bool NeedsHandleIdentifier  : 1; // See "RecomputeNeedsHandleIdentifier".
61   bool IsFromAST              : 1; // True if identifier was loaded (at least
62                                    // partially) from an AST file.
63   bool ChangedAfterLoad       : 1; // True if identifier has changed from the
64                                    // definition loaded from an AST file.
65   bool FEChangedAfterLoad     : 1; // True if identifier's frontend information
66                                    // has changed from the definition loaded
67                                    // from an AST file.
68   bool RevertedTokenID        : 1; // True if revertTokenIDToIdentifier was
69                                    // called.
70   bool OutOfDate              : 1; // True if there may be additional
71                                    // information about this identifier
72                                    // stored externally.
73   bool IsModulesImport        : 1; // True if this is the 'import' contextual
74                                    // keyword.
75   // 29 bit left in 64-bit word.
76 
77   void *FETokenInfo;               // Managed by the language front-end.
78   llvm::StringMapEntry<IdentifierInfo*> *Entry;
79 
80   IdentifierInfo(const IdentifierInfo&) = delete;
81   void operator=(const IdentifierInfo&) = delete;
82 
83   friend class IdentifierTable;
84 
85 public:
86   IdentifierInfo();
87 
88 
89   /// \brief Return true if this is the identifier for the specified string.
90   ///
91   /// This is intended to be used for string literals only: II->isStr("foo").
92   template <std::size_t StrLen>
isStr(const char (& Str)[StrLen])93   bool isStr(const char (&Str)[StrLen]) const {
94     return getLength() == StrLen-1 && !memcmp(getNameStart(), Str, StrLen-1);
95   }
96 
97   /// \brief Return the beginning of the actual null-terminated string for this
98   /// identifier.
99   ///
getNameStart()100   const char *getNameStart() const {
101     if (Entry) return Entry->getKeyData();
102     // FIXME: This is gross. It would be best not to embed specific details
103     // of the PTH file format here.
104     // The 'this' pointer really points to a
105     // std::pair<IdentifierInfo, const char*>, where internal pointer
106     // points to the external string data.
107     typedef std::pair<IdentifierInfo, const char*> actualtype;
108     return ((const actualtype*) this)->second;
109   }
110 
111   /// \brief Efficiently return the length of this identifier info.
112   ///
getLength()113   unsigned getLength() const {
114     if (Entry) return Entry->getKeyLength();
115     // FIXME: This is gross. It would be best not to embed specific details
116     // of the PTH file format here.
117     // The 'this' pointer really points to a
118     // std::pair<IdentifierInfo, const char*>, where internal pointer
119     // points to the external string data.
120     typedef std::pair<IdentifierInfo, const char*> actualtype;
121     const char* p = ((const actualtype*) this)->second - 2;
122     return (((unsigned) p[0]) | (((unsigned) p[1]) << 8)) - 1;
123   }
124 
125   /// \brief Return the actual identifier string.
getName()126   StringRef getName() const {
127     return StringRef(getNameStart(), getLength());
128   }
129 
130   /// \brief Return true if this identifier is \#defined to some other value.
131   /// \note The current definition may be in a module and not currently visible.
hasMacroDefinition()132   bool hasMacroDefinition() const {
133     return HasMacro;
134   }
setHasMacroDefinition(bool Val)135   void setHasMacroDefinition(bool Val) {
136     if (HasMacro == Val) return;
137 
138     HasMacro = Val;
139     if (Val) {
140       NeedsHandleIdentifier = 1;
141       HadMacro = true;
142     } else {
143       RecomputeNeedsHandleIdentifier();
144     }
145   }
146   /// \brief Returns true if this identifier was \#defined to some value at any
147   /// moment. In this case there should be an entry for the identifier in the
148   /// macro history table in Preprocessor.
hadMacroDefinition()149   bool hadMacroDefinition() const {
150     return HadMacro;
151   }
152 
153   /// If this is a source-language token (e.g. 'for'), this API
154   /// can be used to cause the lexer to map identifiers to source-language
155   /// tokens.
getTokenID()156   tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
157 
158   /// \brief True if revertTokenIDToIdentifier() was called.
hasRevertedTokenIDToIdentifier()159   bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
160 
161   /// \brief Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
162   /// compatibility.
163   ///
164   /// TokenID is normally read-only but there are 2 instances where we revert it
165   /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
166   /// using this method so we can inform serialization about it.
revertTokenIDToIdentifier()167   void revertTokenIDToIdentifier() {
168     assert(TokenID != tok::identifier && "Already at tok::identifier");
169     TokenID = tok::identifier;
170     RevertedTokenID = true;
171   }
revertIdentifierToTokenID(tok::TokenKind TK)172   void revertIdentifierToTokenID(tok::TokenKind TK) {
173     assert(TokenID == tok::identifier && "Should be at tok::identifier");
174     TokenID = TK;
175     RevertedTokenID = false;
176   }
177 
178   /// \brief Return the preprocessor keyword ID for this identifier.
179   ///
180   /// For example, "define" will return tok::pp_define.
181   tok::PPKeywordKind getPPKeywordID() const;
182 
183   /// \brief Return the Objective-C keyword ID for the this identifier.
184   ///
185   /// For example, 'class' will return tok::objc_class if ObjC is enabled.
getObjCKeywordID()186   tok::ObjCKeywordKind getObjCKeywordID() const {
187     if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
188       return tok::ObjCKeywordKind(ObjCOrBuiltinID);
189     else
190       return tok::objc_not_keyword;
191   }
setObjCKeywordID(tok::ObjCKeywordKind ID)192   void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
193 
194   /// \brief True if setNotBuiltin() was called.
hasRevertedBuiltin()195   bool hasRevertedBuiltin() const {
196     return ObjCOrBuiltinID == tok::NUM_OBJC_KEYWORDS;
197   }
198 
199   /// \brief Revert the identifier to a non-builtin identifier. We do this if
200   /// the name of a known builtin library function is used to declare that
201   /// function, but an unexpected type is specified.
revertBuiltin()202   void revertBuiltin() {
203     setBuiltinID(0);
204   }
205 
206   /// \brief Return a value indicating whether this is a builtin function.
207   ///
208   /// 0 is not-built-in.  1 is builtin-for-some-nonprimary-target.
209   /// 2+ are specific builtin functions.
getBuiltinID()210   unsigned getBuiltinID() const {
211     if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
212       return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
213     else
214       return 0;
215   }
setBuiltinID(unsigned ID)216   void setBuiltinID(unsigned ID) {
217     ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
218     assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
219            && "ID too large for field!");
220   }
221 
getObjCOrBuiltinID()222   unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
setObjCOrBuiltinID(unsigned ID)223   void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
224 
225   /// get/setExtension - Initialize information about whether or not this
226   /// language token is an extension.  This controls extension warnings, and is
227   /// only valid if a custom token ID is set.
isExtensionToken()228   bool isExtensionToken() const { return IsExtension; }
setIsExtensionToken(bool Val)229   void setIsExtensionToken(bool Val) {
230     IsExtension = Val;
231     if (Val)
232       NeedsHandleIdentifier = 1;
233     else
234       RecomputeNeedsHandleIdentifier();
235   }
236 
237   /// is/setIsFutureCompatKeyword - Initialize information about whether or not
238   /// this language token is a keyword in a newer or proposed Standard. This
239   /// controls compatibility warnings, and is only true when not parsing the
240   /// corresponding Standard. Once a compatibility problem has been diagnosed
241   /// with this keyword, the flag will be cleared.
isFutureCompatKeyword()242   bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
setIsFutureCompatKeyword(bool Val)243   void setIsFutureCompatKeyword(bool Val) {
244     IsFutureCompatKeyword = Val;
245     if (Val)
246       NeedsHandleIdentifier = 1;
247     else
248       RecomputeNeedsHandleIdentifier();
249   }
250 
251   /// setIsPoisoned - Mark this identifier as poisoned.  After poisoning, the
252   /// Preprocessor will emit an error every time this token is used.
253   void setIsPoisoned(bool Value = true) {
254     IsPoisoned = Value;
255     if (Value)
256       NeedsHandleIdentifier = 1;
257     else
258       RecomputeNeedsHandleIdentifier();
259   }
260 
261   /// \brief Return true if this token has been poisoned.
isPoisoned()262   bool isPoisoned() const { return IsPoisoned; }
263 
264   /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
265   /// this identifier is a C++ alternate representation of an operator.
266   void setIsCPlusPlusOperatorKeyword(bool Val = true) {
267     IsCPPOperatorKeyword = Val;
268     if (Val)
269       NeedsHandleIdentifier = 1;
270     else
271       RecomputeNeedsHandleIdentifier();
272   }
isCPlusPlusOperatorKeyword()273   bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
274 
275   /// \brief Return true if this token is a keyword in the specified language.
276   bool isKeyword(const LangOptions &LangOpts);
277 
278   /// getFETokenInfo/setFETokenInfo - The language front-end is allowed to
279   /// associate arbitrary metadata with this token.
280   template<typename T>
getFETokenInfo()281   T *getFETokenInfo() const { return static_cast<T*>(FETokenInfo); }
setFETokenInfo(void * T)282   void setFETokenInfo(void *T) { FETokenInfo = T; }
283 
284   /// \brief Return true if the Preprocessor::HandleIdentifier must be called
285   /// on a token of this identifier.
286   ///
287   /// If this returns false, we know that HandleIdentifier will not affect
288   /// the token.
isHandleIdentifierCase()289   bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
290 
291   /// \brief Return true if the identifier in its current state was loaded
292   /// from an AST file.
isFromAST()293   bool isFromAST() const { return IsFromAST; }
294 
setIsFromAST()295   void setIsFromAST() { IsFromAST = true; }
296 
297   /// \brief Determine whether this identifier has changed since it was loaded
298   /// from an AST file.
hasChangedSinceDeserialization()299   bool hasChangedSinceDeserialization() const {
300     return ChangedAfterLoad;
301   }
302 
303   /// \brief Note that this identifier has changed since it was loaded from
304   /// an AST file.
setChangedSinceDeserialization()305   void setChangedSinceDeserialization() {
306     ChangedAfterLoad = true;
307   }
308 
309   /// \brief Determine whether the frontend token information for this
310   /// identifier has changed since it was loaded from an AST file.
hasFETokenInfoChangedSinceDeserialization()311   bool hasFETokenInfoChangedSinceDeserialization() const {
312     return FEChangedAfterLoad;
313   }
314 
315   /// \brief Note that the frontend token information for this identifier has
316   /// changed since it was loaded from an AST file.
setFETokenInfoChangedSinceDeserialization()317   void setFETokenInfoChangedSinceDeserialization() {
318     FEChangedAfterLoad = true;
319   }
320 
321   /// \brief Determine whether the information for this identifier is out of
322   /// date with respect to the external source.
isOutOfDate()323   bool isOutOfDate() const { return OutOfDate; }
324 
325   /// \brief Set whether the information for this identifier is out of
326   /// date with respect to the external source.
setOutOfDate(bool OOD)327   void setOutOfDate(bool OOD) {
328     OutOfDate = OOD;
329     if (OOD)
330       NeedsHandleIdentifier = true;
331     else
332       RecomputeNeedsHandleIdentifier();
333   }
334 
335   /// \brief Determine whether this is the contextual keyword \c import.
isModulesImport()336   bool isModulesImport() const { return IsModulesImport; }
337 
338   /// \brief Set whether this identifier is the contextual keyword \c import.
setModulesImport(bool I)339   void setModulesImport(bool I) {
340     IsModulesImport = I;
341     if (I)
342       NeedsHandleIdentifier = true;
343     else
344       RecomputeNeedsHandleIdentifier();
345   }
346 
347   /// \brief Provide less than operator for lexicographical sorting.
348   bool operator<(const IdentifierInfo &RHS) const {
349     return getName() < RHS.getName();
350   }
351 
352 private:
353   /// The Preprocessor::HandleIdentifier does several special (but rare)
354   /// things to identifiers of various sorts.  For example, it changes the
355   /// \c for keyword token from tok::identifier to tok::for.
356   ///
357   /// This method is very tied to the definition of HandleIdentifier.  Any
358   /// change to it should be reflected here.
RecomputeNeedsHandleIdentifier()359   void RecomputeNeedsHandleIdentifier() {
360     NeedsHandleIdentifier =
361       (isPoisoned() | hasMacroDefinition() | isCPlusPlusOperatorKeyword() |
362        isExtensionToken() | isFutureCompatKeyword() || isOutOfDate() ||
363        isModulesImport());
364   }
365 };
366 
367 /// \brief An RAII object for [un]poisoning an identifier within a scope.
368 ///
369 /// \p II is allowed to be null, in which case objects of this type have
370 /// no effect.
371 class PoisonIdentifierRAIIObject {
372   IdentifierInfo *const II;
373   const bool OldValue;
374 public:
PoisonIdentifierRAIIObject(IdentifierInfo * II,bool NewValue)375   PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
376     : II(II), OldValue(II ? II->isPoisoned() : false) {
377     if(II)
378       II->setIsPoisoned(NewValue);
379   }
380 
~PoisonIdentifierRAIIObject()381   ~PoisonIdentifierRAIIObject() {
382     if(II)
383       II->setIsPoisoned(OldValue);
384   }
385 };
386 
387 /// \brief An iterator that walks over all of the known identifiers
388 /// in the lookup table.
389 ///
390 /// Since this iterator uses an abstract interface via virtual
391 /// functions, it uses an object-oriented interface rather than the
392 /// more standard C++ STL iterator interface. In this OO-style
393 /// iteration, the single function \c Next() provides dereference,
394 /// advance, and end-of-sequence checking in a single
395 /// operation. Subclasses of this iterator type will provide the
396 /// actual functionality.
397 class IdentifierIterator {
398 private:
399   IdentifierIterator(const IdentifierIterator &) = delete;
400   void operator=(const IdentifierIterator &) = delete;
401 
402 protected:
IdentifierIterator()403   IdentifierIterator() { }
404 
405 public:
406   virtual ~IdentifierIterator();
407 
408   /// \brief Retrieve the next string in the identifier table and
409   /// advances the iterator for the following string.
410   ///
411   /// \returns The next string in the identifier table. If there is
412   /// no such string, returns an empty \c StringRef.
413   virtual StringRef Next() = 0;
414 };
415 
416 /// \brief Provides lookups to, and iteration over, IdentiferInfo objects.
417 class IdentifierInfoLookup {
418 public:
419   virtual ~IdentifierInfoLookup();
420 
421   /// \brief Return the IdentifierInfo for the specified named identifier.
422   ///
423   /// Unlike the version in IdentifierTable, this returns a pointer instead
424   /// of a reference.  If the pointer is null then the IdentifierInfo cannot
425   /// be found.
426   virtual IdentifierInfo* get(StringRef Name) = 0;
427 
428   /// \brief Retrieve an iterator into the set of all identifiers
429   /// known to this identifier lookup source.
430   ///
431   /// This routine provides access to all of the identifiers known to
432   /// the identifier lookup, allowing access to the contents of the
433   /// identifiers without introducing the overhead of constructing
434   /// IdentifierInfo objects for each.
435   ///
436   /// \returns A new iterator into the set of known identifiers. The
437   /// caller is responsible for deleting this iterator.
438   virtual IdentifierIterator *getIdentifiers();
439 };
440 
441 /// \brief Implements an efficient mapping from strings to IdentifierInfo nodes.
442 ///
443 /// This has no other purpose, but this is an extremely performance-critical
444 /// piece of the code, as each occurrence of every identifier goes through
445 /// here when lexed.
446 class IdentifierTable {
447   // Shark shows that using MallocAllocator is *much* slower than using this
448   // BumpPtrAllocator!
449   typedef llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator> HashTableTy;
450   HashTableTy HashTable;
451 
452   IdentifierInfoLookup* ExternalLookup;
453 
454 public:
455   /// \brief Create the identifier table, populating it with info about the
456   /// language keywords for the language specified by \p LangOpts.
457   IdentifierTable(const LangOptions &LangOpts,
458                   IdentifierInfoLookup* externalLookup = nullptr);
459 
460   /// \brief Set the external identifier lookup mechanism.
setExternalIdentifierLookup(IdentifierInfoLookup * IILookup)461   void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
462     ExternalLookup = IILookup;
463   }
464 
465   /// \brief Retrieve the external identifier lookup object, if any.
getExternalIdentifierLookup()466   IdentifierInfoLookup *getExternalIdentifierLookup() const {
467     return ExternalLookup;
468   }
469 
getAllocator()470   llvm::BumpPtrAllocator& getAllocator() {
471     return HashTable.getAllocator();
472   }
473 
474   /// \brief Return the identifier token info for the specified named
475   /// identifier.
get(StringRef Name)476   IdentifierInfo &get(StringRef Name) {
477     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
478 
479     IdentifierInfo *&II = Entry.second;
480     if (II) return *II;
481 
482     // No entry; if we have an external lookup, look there first.
483     if (ExternalLookup) {
484       II = ExternalLookup->get(Name);
485       if (II)
486         return *II;
487     }
488 
489     // Lookups failed, make a new IdentifierInfo.
490     void *Mem = getAllocator().Allocate<IdentifierInfo>();
491     II = new (Mem) IdentifierInfo();
492 
493     // Make sure getName() knows how to find the IdentifierInfo
494     // contents.
495     II->Entry = &Entry;
496 
497     return *II;
498   }
499 
get(StringRef Name,tok::TokenKind TokenCode)500   IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
501     IdentifierInfo &II = get(Name);
502     II.TokenID = TokenCode;
503     assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
504     return II;
505   }
506 
507   /// \brief Gets an IdentifierInfo for the given name without consulting
508   ///        external sources.
509   ///
510   /// This is a version of get() meant for external sources that want to
511   /// introduce or modify an identifier. If they called get(), they would
512   /// likely end up in a recursion.
getOwn(StringRef Name)513   IdentifierInfo &getOwn(StringRef Name) {
514     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
515 
516     IdentifierInfo *&II = Entry.second;
517     if (II)
518       return *II;
519 
520     // Lookups failed, make a new IdentifierInfo.
521     void *Mem = getAllocator().Allocate<IdentifierInfo>();
522     II = new (Mem) IdentifierInfo();
523 
524     // Make sure getName() knows how to find the IdentifierInfo
525     // contents.
526     II->Entry = &Entry;
527 
528     // If this is the 'import' contextual keyword, mark it as such.
529     if (Name.equals("import"))
530       II->setModulesImport(true);
531 
532     return *II;
533   }
534 
535   typedef HashTableTy::const_iterator iterator;
536   typedef HashTableTy::const_iterator const_iterator;
537 
begin()538   iterator begin() const { return HashTable.begin(); }
end()539   iterator end() const   { return HashTable.end(); }
size()540   unsigned size() const { return HashTable.size(); }
541 
542   /// \brief Print some statistics to stderr that indicate how well the
543   /// hashing is doing.
544   void PrintStats() const;
545 
546   void AddKeywords(const LangOptions &LangOpts);
547 };
548 
549 /// \brief A family of Objective-C methods.
550 ///
551 /// These families have no inherent meaning in the language, but are
552 /// nonetheless central enough in the existing implementations to
553 /// merit direct AST support.  While, in theory, arbitrary methods can
554 /// be considered to form families, we focus here on the methods
555 /// involving allocation and retain-count management, as these are the
556 /// most "core" and the most likely to be useful to diverse clients
557 /// without extra information.
558 ///
559 /// Both selectors and actual method declarations may be classified
560 /// into families.  Method families may impose additional restrictions
561 /// beyond their selector name; for example, a method called '_init'
562 /// that returns void is not considered to be in the 'init' family
563 /// (but would be if it returned 'id').  It is also possible to
564 /// explicitly change or remove a method's family.  Therefore the
565 /// method's family should be considered the single source of truth.
566 enum ObjCMethodFamily {
567   /// \brief No particular method family.
568   OMF_None,
569 
570   // Selectors in these families may have arbitrary arity, may be
571   // written with arbitrary leading underscores, and may have
572   // additional CamelCase "words" in their first selector chunk
573   // following the family name.
574   OMF_alloc,
575   OMF_copy,
576   OMF_init,
577   OMF_mutableCopy,
578   OMF_new,
579 
580   // These families are singletons consisting only of the nullary
581   // selector with the given name.
582   OMF_autorelease,
583   OMF_dealloc,
584   OMF_finalize,
585   OMF_release,
586   OMF_retain,
587   OMF_retainCount,
588   OMF_self,
589   OMF_initialize,
590 
591   // performSelector families
592   OMF_performSelector
593 };
594 
595 /// Enough bits to store any enumerator in ObjCMethodFamily or
596 /// InvalidObjCMethodFamily.
597 enum { ObjCMethodFamilyBitWidth = 4 };
598 
599 /// \brief An invalid value of ObjCMethodFamily.
600 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
601 
602 /// \brief A family of Objective-C methods.
603 ///
604 /// These are family of methods whose result type is initially 'id', but
605 /// but are candidate for the result type to be changed to 'instancetype'.
606 enum ObjCInstanceTypeFamily {
607   OIT_None,
608   OIT_Array,
609   OIT_Dictionary,
610   OIT_Singleton,
611   OIT_Init,
612   OIT_ReturnsSelf
613 };
614 
615 enum ObjCStringFormatFamily {
616   SFF_None,
617   SFF_NSString,
618   SFF_CFString
619 };
620 
621 /// \brief Smart pointer class that efficiently represents Objective-C method
622 /// names.
623 ///
624 /// This class will either point to an IdentifierInfo or a
625 /// MultiKeywordSelector (which is private). This enables us to optimize
626 /// selectors that take no arguments and selectors that take 1 argument, which
627 /// accounts for 78% of all selectors in Cocoa.h.
628 class Selector {
629   friend class Diagnostic;
630 
631   enum IdentifierInfoFlag {
632     // Empty selector = 0.
633     ZeroArg  = 0x1,
634     OneArg   = 0x2,
635     MultiArg = 0x3,
636     ArgFlags = ZeroArg|OneArg
637   };
638   uintptr_t InfoPtr; // a pointer to the MultiKeywordSelector or IdentifierInfo.
639 
Selector(IdentifierInfo * II,unsigned nArgs)640   Selector(IdentifierInfo *II, unsigned nArgs) {
641     InfoPtr = reinterpret_cast<uintptr_t>(II);
642     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
643     assert(nArgs < 2 && "nArgs not equal to 0/1");
644     InfoPtr |= nArgs+1;
645   }
Selector(MultiKeywordSelector * SI)646   Selector(MultiKeywordSelector *SI) {
647     InfoPtr = reinterpret_cast<uintptr_t>(SI);
648     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
649     InfoPtr |= MultiArg;
650   }
651 
getAsIdentifierInfo()652   IdentifierInfo *getAsIdentifierInfo() const {
653     if (getIdentifierInfoFlag() < MultiArg)
654       return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
655     return nullptr;
656   }
getMultiKeywordSelector()657   MultiKeywordSelector *getMultiKeywordSelector() const {
658     return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
659   }
660 
getIdentifierInfoFlag()661   unsigned getIdentifierInfoFlag() const {
662     return InfoPtr & ArgFlags;
663   }
664 
665   static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
666 
667   static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
668 
669 public:
670   friend class SelectorTable; // only the SelectorTable can create these
671   friend class DeclarationName; // and the AST's DeclarationName.
672 
673   /// The default ctor should only be used when creating data structures that
674   ///  will contain selectors.
Selector()675   Selector() : InfoPtr(0) {}
Selector(uintptr_t V)676   Selector(uintptr_t V) : InfoPtr(V) {}
677 
678   /// operator==/!= - Indicate whether the specified selectors are identical.
679   bool operator==(Selector RHS) const {
680     return InfoPtr == RHS.InfoPtr;
681   }
682   bool operator!=(Selector RHS) const {
683     return InfoPtr != RHS.InfoPtr;
684   }
getAsOpaquePtr()685   void *getAsOpaquePtr() const {
686     return reinterpret_cast<void*>(InfoPtr);
687   }
688 
689   /// \brief Determine whether this is the empty selector.
isNull()690   bool isNull() const { return InfoPtr == 0; }
691 
692   // Predicates to identify the selector type.
isKeywordSelector()693   bool isKeywordSelector() const {
694     return getIdentifierInfoFlag() != ZeroArg;
695   }
isUnarySelector()696   bool isUnarySelector() const {
697     return getIdentifierInfoFlag() == ZeroArg;
698   }
699   unsigned getNumArgs() const;
700 
701 
702   /// \brief Retrieve the identifier at a given position in the selector.
703   ///
704   /// Note that the identifier pointer returned may be NULL. Clients that only
705   /// care about the text of the identifier string, and not the specific,
706   /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
707   /// an empty string when the identifier pointer would be NULL.
708   ///
709   /// \param argIndex The index for which we want to retrieve the identifier.
710   /// This index shall be less than \c getNumArgs() unless this is a keyword
711   /// selector, in which case 0 is the only permissible value.
712   ///
713   /// \returns the uniqued identifier for this slot, or NULL if this slot has
714   /// no corresponding identifier.
715   IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
716 
717   /// \brief Retrieve the name at a given position in the selector.
718   ///
719   /// \param argIndex The index for which we want to retrieve the name.
720   /// This index shall be less than \c getNumArgs() unless this is a keyword
721   /// selector, in which case 0 is the only permissible value.
722   ///
723   /// \returns the name for this slot, which may be the empty string if no
724   /// name was supplied.
725   StringRef getNameForSlot(unsigned argIndex) const;
726 
727   /// \brief Derive the full selector name (e.g. "foo:bar:") and return
728   /// it as an std::string.
729   std::string getAsString() const;
730 
731   /// \brief Prints the full selector name (e.g. "foo:bar:").
732   void print(llvm::raw_ostream &OS) const;
733 
734   /// \brief Derive the conventional family of this method.
getMethodFamily()735   ObjCMethodFamily getMethodFamily() const {
736     return getMethodFamilyImpl(*this);
737   }
738 
getStringFormatFamily()739   ObjCStringFormatFamily getStringFormatFamily() const {
740     return getStringFormatFamilyImpl(*this);
741   }
742 
getEmptyMarker()743   static Selector getEmptyMarker() {
744     return Selector(uintptr_t(-1));
745   }
getTombstoneMarker()746   static Selector getTombstoneMarker() {
747     return Selector(uintptr_t(-2));
748   }
749 
750   static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
751 };
752 
753 /// \brief This table allows us to fully hide how we implement
754 /// multi-keyword caching.
755 class SelectorTable {
756   void *Impl;  // Actually a SelectorTableImpl
757   SelectorTable(const SelectorTable &) = delete;
758   void operator=(const SelectorTable &) = delete;
759 public:
760   SelectorTable();
761   ~SelectorTable();
762 
763   /// \brief Can create any sort of selector.
764   ///
765   /// \p NumArgs indicates whether this is a no argument selector "foo", a
766   /// single argument selector "foo:" or multi-argument "foo:bar:".
767   Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
768 
getUnarySelector(IdentifierInfo * ID)769   Selector getUnarySelector(IdentifierInfo *ID) {
770     return Selector(ID, 1);
771   }
getNullarySelector(IdentifierInfo * ID)772   Selector getNullarySelector(IdentifierInfo *ID) {
773     return Selector(ID, 0);
774   }
775 
776   /// \brief Return the total amount of memory allocated for managing selectors.
777   size_t getTotalMemory() const;
778 
779   /// \brief Return the default setter name for the given identifier.
780   ///
781   /// This is "set" + \p Name where the initial character of \p Name
782   /// has been capitalized.
783   static SmallString<64> constructSetterName(StringRef Name);
784 
785   /// \brief Return the default setter selector for the given identifier.
786   ///
787   /// This is "set" + \p Name where the initial character of \p Name
788   /// has been capitalized.
789   static Selector constructSetterSelector(IdentifierTable &Idents,
790                                           SelectorTable &SelTable,
791                                           const IdentifierInfo *Name);
792 };
793 
794 /// DeclarationNameExtra - Common base of the MultiKeywordSelector,
795 /// CXXSpecialName, and CXXOperatorIdName classes, all of which are
796 /// private classes that describe different kinds of names.
797 class DeclarationNameExtra {
798 public:
799   /// ExtraKind - The kind of "extra" information stored in the
800   /// DeclarationName. See @c ExtraKindOrNumArgs for an explanation of
801   /// how these enumerator values are used.
802   enum ExtraKind {
803     CXXConstructor = 0,
804     CXXDestructor,
805     CXXConversionFunction,
806 #define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
807     CXXOperator##Name,
808 #include "clang/Basic/OperatorKinds.def"
809     CXXLiteralOperator,
810     CXXUsingDirective,
811     NUM_EXTRA_KINDS
812   };
813 
814   /// ExtraKindOrNumArgs - Either the kind of C++ special name or
815   /// operator-id (if the value is one of the CXX* enumerators of
816   /// ExtraKind), in which case the DeclarationNameExtra is also a
817   /// CXXSpecialName, (for CXXConstructor, CXXDestructor, or
818   /// CXXConversionFunction) CXXOperatorIdName, or CXXLiteralOperatorName,
819   /// it may be also name common to C++ using-directives (CXXUsingDirective),
820   /// otherwise it is NUM_EXTRA_KINDS+NumArgs, where NumArgs is the number of
821   /// arguments in the Objective-C selector, in which case the
822   /// DeclarationNameExtra is also a MultiKeywordSelector.
823   unsigned ExtraKindOrNumArgs;
824 };
825 
826 }  // end namespace clang
827 
828 namespace llvm {
829 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
830 /// DenseSets.
831 template <>
832 struct DenseMapInfo<clang::Selector> {
833   static inline clang::Selector getEmptyKey() {
834     return clang::Selector::getEmptyMarker();
835   }
836   static inline clang::Selector getTombstoneKey() {
837     return clang::Selector::getTombstoneMarker();
838   }
839 
840   static unsigned getHashValue(clang::Selector S);
841 
842   static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
843     return LHS == RHS;
844   }
845 };
846 
847 template <>
848 struct isPodLike<clang::Selector> { static const bool value = true; };
849 
850 template <typename T> class PointerLikeTypeTraits;
851 
852 template<>
853 class PointerLikeTypeTraits<clang::Selector> {
854 public:
855   static inline const void *getAsVoidPointer(clang::Selector P) {
856     return P.getAsOpaquePtr();
857   }
858   static inline clang::Selector getFromVoidPointer(const void *P) {
859     return clang::Selector(reinterpret_cast<uintptr_t>(P));
860   }
861   enum { NumLowBitsAvailable = 0 };
862 };
863 
864 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
865 // are not guaranteed to be 8-byte aligned.
866 template<>
867 class PointerLikeTypeTraits<clang::IdentifierInfo*> {
868 public:
869   static inline void *getAsVoidPointer(clang::IdentifierInfo* P) {
870     return P;
871   }
872   static inline clang::IdentifierInfo *getFromVoidPointer(void *P) {
873     return static_cast<clang::IdentifierInfo*>(P);
874   }
875   enum { NumLowBitsAvailable = 1 };
876 };
877 
878 template<>
879 class PointerLikeTypeTraits<const clang::IdentifierInfo*> {
880 public:
881   static inline const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
882     return P;
883   }
884   static inline const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
885     return static_cast<const clang::IdentifierInfo*>(P);
886   }
887   enum { NumLowBitsAvailable = 1 };
888 };
889 
890 }  // end namespace llvm
891 #endif
892