1 //===- IdentifierTable.h - Hash table for identifier lookup -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Defines the clang::IdentifierInfo, clang::IdentifierTable, and
11 /// clang::Selector interfaces.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
16 #define LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
17 
18 #include "clang/Basic/LLVM.h"
19 #include "clang/Basic/TokenKinds.h"
20 #include "llvm/ADT/DenseMapInfo.h"
21 #include "llvm/ADT/SmallString.h"
22 #include "llvm/ADT/StringMap.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/Allocator.h"
25 #include "llvm/Support/PointerLikeTypeTraits.h"
26 #include "llvm/Support/type_traits.h"
27 #include <cassert>
28 #include <cstddef>
29 #include <cstdint>
30 #include <cstring>
31 #include <string>
32 #include <utility>
33 
34 namespace clang {
35 
36 class DeclarationName;
37 class DeclarationNameTable;
38 class IdentifierInfo;
39 class LangOptions;
40 class MultiKeywordSelector;
41 class SourceLocation;
42 
43 /// A simple pair of identifier info and location.
44 using IdentifierLocPair = std::pair<IdentifierInfo *, SourceLocation>;
45 
46 /// IdentifierInfo and other related classes are aligned to
47 /// 8 bytes so that DeclarationName can use the lower 3 bits
48 /// of a pointer to one of these classes.
49 enum { IdentifierInfoAlignment = 8 };
50 
51 static constexpr int ObjCOrBuiltinIDBits = 15;
52 
53 /// One of these records is kept for each identifier that
54 /// is lexed.  This contains information about whether the token was \#define'd,
55 /// is a language keyword, or if it is a front-end token of some sort (e.g. a
56 /// variable or function name).  The preprocessor keeps this information in a
57 /// set, and all tok::identifier tokens have a pointer to one of these.
58 /// It is aligned to 8 bytes because DeclarationName needs the lower 3 bits.
59 class alignas(IdentifierInfoAlignment) IdentifierInfo {
60   friend class IdentifierTable;
61 
62   // Front-end token ID or tok::identifier.
63   unsigned TokenID : 9;
64 
65   // ObjC keyword ('protocol' in '@protocol') or builtin (__builtin_inf).
66   // First NUM_OBJC_KEYWORDS values are for Objective-C,
67   // the remaining values are for builtins.
68   unsigned ObjCOrBuiltinID : ObjCOrBuiltinIDBits;
69 
70   // True if there is a #define for this.
71   unsigned HasMacro : 1;
72 
73   // True if there was a #define for this.
74   unsigned HadMacro : 1;
75 
76   // True if the identifier is a language extension.
77   unsigned IsExtension : 1;
78 
79   // True if the identifier is a keyword in a newer or proposed Standard.
80   unsigned IsFutureCompatKeyword : 1;
81 
82   // True if the identifier is poisoned.
83   unsigned IsPoisoned : 1;
84 
85   // True if the identifier is a C++ operator keyword.
86   unsigned IsCPPOperatorKeyword : 1;
87 
88   // Internal bit set by the member function RecomputeNeedsHandleIdentifier.
89   // See comment about RecomputeNeedsHandleIdentifier for more info.
90   unsigned NeedsHandleIdentifier : 1;
91 
92   // True if the identifier was loaded (at least partially) from an AST file.
93   unsigned IsFromAST : 1;
94 
95   // True if the identifier has changed from the definition
96   // loaded from an AST file.
97   unsigned ChangedAfterLoad : 1;
98 
99   // True if the identifier's frontend information has changed from the
100   // definition loaded from an AST file.
101   unsigned FEChangedAfterLoad : 1;
102 
103   // True if revertTokenIDToIdentifier was called.
104   unsigned RevertedTokenID : 1;
105 
106   // True if there may be additional information about
107   // this identifier stored externally.
108   unsigned OutOfDate : 1;
109 
110   // True if this is the 'import' contextual keyword.
111   unsigned IsModulesImport : 1;
112 
113   // True if this is a mangled OpenMP variant name.
114   unsigned IsMangledOpenMPVariantName : 1;
115 
116   // 28 bits left in a 64-bit word.
117 
118   // Managed by the language front-end.
119   void *FETokenInfo = nullptr;
120 
121   llvm::StringMapEntry<IdentifierInfo *> *Entry = nullptr;
122 
IdentifierInfo()123   IdentifierInfo()
124       : TokenID(tok::identifier), ObjCOrBuiltinID(0), HasMacro(false),
125         HadMacro(false), IsExtension(false), IsFutureCompatKeyword(false),
126         IsPoisoned(false), IsCPPOperatorKeyword(false),
127         NeedsHandleIdentifier(false), IsFromAST(false), ChangedAfterLoad(false),
128         FEChangedAfterLoad(false), RevertedTokenID(false), OutOfDate(false),
129         IsModulesImport(false), IsMangledOpenMPVariantName(false) {}
130 
131 public:
132   IdentifierInfo(const IdentifierInfo &) = delete;
133   IdentifierInfo &operator=(const IdentifierInfo &) = delete;
134   IdentifierInfo(IdentifierInfo &&) = delete;
135   IdentifierInfo &operator=(IdentifierInfo &&) = delete;
136 
137   /// Return true if this is the identifier for the specified string.
138   ///
139   /// This is intended to be used for string literals only: II->isStr("foo").
140   template <std::size_t StrLen>
isStr(const char (& Str)[StrLen])141   bool isStr(const char (&Str)[StrLen]) const {
142     return getLength() == StrLen-1 &&
143            memcmp(getNameStart(), Str, StrLen-1) == 0;
144   }
145 
146   /// Return true if this is the identifier for the specified StringRef.
isStr(llvm::StringRef Str)147   bool isStr(llvm::StringRef Str) const {
148     llvm::StringRef ThisStr(getNameStart(), getLength());
149     return ThisStr == Str;
150   }
151 
152   /// Return the beginning of the actual null-terminated string for this
153   /// identifier.
getNameStart()154   const char *getNameStart() const { return Entry->getKeyData(); }
155 
156   /// Efficiently return the length of this identifier info.
getLength()157   unsigned getLength() const { return Entry->getKeyLength(); }
158 
159   /// Return the actual identifier string.
getName()160   StringRef getName() const {
161     return StringRef(getNameStart(), getLength());
162   }
163 
164   /// Return true if this identifier is \#defined to some other value.
165   /// \note The current definition may be in a module and not currently visible.
hasMacroDefinition()166   bool hasMacroDefinition() const {
167     return HasMacro;
168   }
setHasMacroDefinition(bool Val)169   void setHasMacroDefinition(bool Val) {
170     if (HasMacro == Val) return;
171 
172     HasMacro = Val;
173     if (Val) {
174       NeedsHandleIdentifier = true;
175       HadMacro = true;
176     } else {
177       RecomputeNeedsHandleIdentifier();
178     }
179   }
180   /// Returns true if this identifier was \#defined to some value at any
181   /// moment. In this case there should be an entry for the identifier in the
182   /// macro history table in Preprocessor.
hadMacroDefinition()183   bool hadMacroDefinition() const {
184     return HadMacro;
185   }
186 
187   /// If this is a source-language token (e.g. 'for'), this API
188   /// can be used to cause the lexer to map identifiers to source-language
189   /// tokens.
getTokenID()190   tok::TokenKind getTokenID() const { return (tok::TokenKind)TokenID; }
191 
192   /// True if revertTokenIDToIdentifier() was called.
hasRevertedTokenIDToIdentifier()193   bool hasRevertedTokenIDToIdentifier() const { return RevertedTokenID; }
194 
195   /// Revert TokenID to tok::identifier; used for GNU libstdc++ 4.2
196   /// compatibility.
197   ///
198   /// TokenID is normally read-only but there are 2 instances where we revert it
199   /// to tok::identifier for libstdc++ 4.2. Keep track of when this happens
200   /// using this method so we can inform serialization about it.
revertTokenIDToIdentifier()201   void revertTokenIDToIdentifier() {
202     assert(TokenID != tok::identifier && "Already at tok::identifier");
203     TokenID = tok::identifier;
204     RevertedTokenID = true;
205   }
revertIdentifierToTokenID(tok::TokenKind TK)206   void revertIdentifierToTokenID(tok::TokenKind TK) {
207     assert(TokenID == tok::identifier && "Should be at tok::identifier");
208     TokenID = TK;
209     RevertedTokenID = false;
210   }
211 
212   /// Return the preprocessor keyword ID for this identifier.
213   ///
214   /// For example, "define" will return tok::pp_define.
215   tok::PPKeywordKind getPPKeywordID() const;
216 
217   /// Return the Objective-C keyword ID for the this identifier.
218   ///
219   /// For example, 'class' will return tok::objc_class if ObjC is enabled.
getObjCKeywordID()220   tok::ObjCKeywordKind getObjCKeywordID() const {
221     if (ObjCOrBuiltinID < tok::NUM_OBJC_KEYWORDS)
222       return tok::ObjCKeywordKind(ObjCOrBuiltinID);
223     else
224       return tok::objc_not_keyword;
225   }
setObjCKeywordID(tok::ObjCKeywordKind ID)226   void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
227 
228   /// Return a value indicating whether this is a builtin function.
229   ///
230   /// 0 is not-built-in. 1+ are specific builtin functions.
getBuiltinID()231   unsigned getBuiltinID() const {
232     if (ObjCOrBuiltinID >= tok::NUM_OBJC_KEYWORDS)
233       return ObjCOrBuiltinID - tok::NUM_OBJC_KEYWORDS;
234     else
235       return 0;
236   }
setBuiltinID(unsigned ID)237   void setBuiltinID(unsigned ID) {
238     ObjCOrBuiltinID = ID + tok::NUM_OBJC_KEYWORDS;
239     assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
240            && "ID too large for field!");
241   }
242 
getObjCOrBuiltinID()243   unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
setObjCOrBuiltinID(unsigned ID)244   void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
245 
246   /// get/setExtension - Initialize information about whether or not this
247   /// language token is an extension.  This controls extension warnings, and is
248   /// only valid if a custom token ID is set.
isExtensionToken()249   bool isExtensionToken() const { return IsExtension; }
setIsExtensionToken(bool Val)250   void setIsExtensionToken(bool Val) {
251     IsExtension = Val;
252     if (Val)
253       NeedsHandleIdentifier = true;
254     else
255       RecomputeNeedsHandleIdentifier();
256   }
257 
258   /// is/setIsFutureCompatKeyword - Initialize information about whether or not
259   /// this language token is a keyword in a newer or proposed Standard. This
260   /// controls compatibility warnings, and is only true when not parsing the
261   /// corresponding Standard. Once a compatibility problem has been diagnosed
262   /// with this keyword, the flag will be cleared.
isFutureCompatKeyword()263   bool isFutureCompatKeyword() const { return IsFutureCompatKeyword; }
setIsFutureCompatKeyword(bool Val)264   void setIsFutureCompatKeyword(bool Val) {
265     IsFutureCompatKeyword = Val;
266     if (Val)
267       NeedsHandleIdentifier = true;
268     else
269       RecomputeNeedsHandleIdentifier();
270   }
271 
272   /// setIsPoisoned - Mark this identifier as poisoned.  After poisoning, the
273   /// Preprocessor will emit an error every time this token is used.
274   void setIsPoisoned(bool Value = true) {
275     IsPoisoned = Value;
276     if (Value)
277       NeedsHandleIdentifier = true;
278     else
279       RecomputeNeedsHandleIdentifier();
280   }
281 
282   /// Return true if this token has been poisoned.
isPoisoned()283   bool isPoisoned() const { return IsPoisoned; }
284 
285   /// isCPlusPlusOperatorKeyword/setIsCPlusPlusOperatorKeyword controls whether
286   /// this identifier is a C++ alternate representation of an operator.
287   void setIsCPlusPlusOperatorKeyword(bool Val = true) {
288     IsCPPOperatorKeyword = Val;
289   }
isCPlusPlusOperatorKeyword()290   bool isCPlusPlusOperatorKeyword() const { return IsCPPOperatorKeyword; }
291 
292   /// Return true if this token is a keyword in the specified language.
293   bool isKeyword(const LangOptions &LangOpts) const;
294 
295   /// Return true if this token is a C++ keyword in the specified
296   /// language.
297   bool isCPlusPlusKeyword(const LangOptions &LangOpts) const;
298 
299   /// Get and set FETokenInfo. The language front-end is allowed to associate
300   /// arbitrary metadata with this token.
getFETokenInfo()301   void *getFETokenInfo() const { return FETokenInfo; }
setFETokenInfo(void * T)302   void setFETokenInfo(void *T) { FETokenInfo = T; }
303 
304   /// Return true if the Preprocessor::HandleIdentifier must be called
305   /// on a token of this identifier.
306   ///
307   /// If this returns false, we know that HandleIdentifier will not affect
308   /// the token.
isHandleIdentifierCase()309   bool isHandleIdentifierCase() const { return NeedsHandleIdentifier; }
310 
311   /// Return true if the identifier in its current state was loaded
312   /// from an AST file.
isFromAST()313   bool isFromAST() const { return IsFromAST; }
314 
setIsFromAST()315   void setIsFromAST() { IsFromAST = true; }
316 
317   /// Determine whether this identifier has changed since it was loaded
318   /// from an AST file.
hasChangedSinceDeserialization()319   bool hasChangedSinceDeserialization() const {
320     return ChangedAfterLoad;
321   }
322 
323   /// Note that this identifier has changed since it was loaded from
324   /// an AST file.
setChangedSinceDeserialization()325   void setChangedSinceDeserialization() {
326     ChangedAfterLoad = true;
327   }
328 
329   /// Determine whether the frontend token information for this
330   /// identifier has changed since it was loaded from an AST file.
hasFETokenInfoChangedSinceDeserialization()331   bool hasFETokenInfoChangedSinceDeserialization() const {
332     return FEChangedAfterLoad;
333   }
334 
335   /// Note that the frontend token information for this identifier has
336   /// changed since it was loaded from an AST file.
setFETokenInfoChangedSinceDeserialization()337   void setFETokenInfoChangedSinceDeserialization() {
338     FEChangedAfterLoad = true;
339   }
340 
341   /// Determine whether the information for this identifier is out of
342   /// date with respect to the external source.
isOutOfDate()343   bool isOutOfDate() const { return OutOfDate; }
344 
345   /// Set whether the information for this identifier is out of
346   /// date with respect to the external source.
setOutOfDate(bool OOD)347   void setOutOfDate(bool OOD) {
348     OutOfDate = OOD;
349     if (OOD)
350       NeedsHandleIdentifier = true;
351     else
352       RecomputeNeedsHandleIdentifier();
353   }
354 
355   /// Determine whether this is the contextual keyword \c import.
isModulesImport()356   bool isModulesImport() const { return IsModulesImport; }
357 
358   /// Set whether this identifier is the contextual keyword \c import.
setModulesImport(bool I)359   void setModulesImport(bool I) {
360     IsModulesImport = I;
361     if (I)
362       NeedsHandleIdentifier = true;
363     else
364       RecomputeNeedsHandleIdentifier();
365   }
366 
367   /// Determine whether this is the mangled name of an OpenMP variant.
isMangledOpenMPVariantName()368   bool isMangledOpenMPVariantName() const { return IsMangledOpenMPVariantName; }
369 
370   /// Set whether this is the mangled name of an OpenMP variant.
setMangledOpenMPVariantName(bool I)371   void setMangledOpenMPVariantName(bool I) { IsMangledOpenMPVariantName = I; }
372 
373   /// Return true if this identifier is an editor placeholder.
374   ///
375   /// Editor placeholders are produced by the code-completion engine and are
376   /// represented as characters between '<#' and '#>' in the source code. An
377   /// example of auto-completed call with a placeholder parameter is shown
378   /// below:
379   /// \code
380   ///   function(<#int x#>);
381   /// \endcode
isEditorPlaceholder()382   bool isEditorPlaceholder() const {
383     return getName().startswith("<#") && getName().endswith("#>");
384   }
385 
386   /// Determine whether \p this is a name reserved for the implementation (C99
387   /// 7.1.3, C++ [lib.global.names]).
388   bool isReservedName(bool doubleUnderscoreOnly = false) const {
389     if (getLength() < 2)
390       return false;
391     const char *Name = getNameStart();
392     return Name[0] == '_' &&
393            (Name[1] == '_' ||
394             (Name[1] >= 'A' && Name[1] <= 'Z' && !doubleUnderscoreOnly));
395   }
396 
397   /// Provide less than operator for lexicographical sorting.
398   bool operator<(const IdentifierInfo &RHS) const {
399     return getName() < RHS.getName();
400   }
401 
402 private:
403   /// The Preprocessor::HandleIdentifier does several special (but rare)
404   /// things to identifiers of various sorts.  For example, it changes the
405   /// \c for keyword token from tok::identifier to tok::for.
406   ///
407   /// This method is very tied to the definition of HandleIdentifier.  Any
408   /// change to it should be reflected here.
RecomputeNeedsHandleIdentifier()409   void RecomputeNeedsHandleIdentifier() {
410     NeedsHandleIdentifier = isPoisoned() || hasMacroDefinition() ||
411                             isExtensionToken() || isFutureCompatKeyword() ||
412                             isOutOfDate() || isModulesImport();
413   }
414 };
415 
416 /// An RAII object for [un]poisoning an identifier within a scope.
417 ///
418 /// \p II is allowed to be null, in which case objects of this type have
419 /// no effect.
420 class PoisonIdentifierRAIIObject {
421   IdentifierInfo *const II;
422   const bool OldValue;
423 
424 public:
PoisonIdentifierRAIIObject(IdentifierInfo * II,bool NewValue)425   PoisonIdentifierRAIIObject(IdentifierInfo *II, bool NewValue)
426     : II(II), OldValue(II ? II->isPoisoned() : false) {
427     if(II)
428       II->setIsPoisoned(NewValue);
429   }
430 
~PoisonIdentifierRAIIObject()431   ~PoisonIdentifierRAIIObject() {
432     if(II)
433       II->setIsPoisoned(OldValue);
434   }
435 };
436 
437 /// An iterator that walks over all of the known identifiers
438 /// in the lookup table.
439 ///
440 /// Since this iterator uses an abstract interface via virtual
441 /// functions, it uses an object-oriented interface rather than the
442 /// more standard C++ STL iterator interface. In this OO-style
443 /// iteration, the single function \c Next() provides dereference,
444 /// advance, and end-of-sequence checking in a single
445 /// operation. Subclasses of this iterator type will provide the
446 /// actual functionality.
447 class IdentifierIterator {
448 protected:
449   IdentifierIterator() = default;
450 
451 public:
452   IdentifierIterator(const IdentifierIterator &) = delete;
453   IdentifierIterator &operator=(const IdentifierIterator &) = delete;
454 
455   virtual ~IdentifierIterator();
456 
457   /// Retrieve the next string in the identifier table and
458   /// advances the iterator for the following string.
459   ///
460   /// \returns The next string in the identifier table. If there is
461   /// no such string, returns an empty \c StringRef.
462   virtual StringRef Next() = 0;
463 };
464 
465 /// Provides lookups to, and iteration over, IdentiferInfo objects.
466 class IdentifierInfoLookup {
467 public:
468   virtual ~IdentifierInfoLookup();
469 
470   /// Return the IdentifierInfo for the specified named identifier.
471   ///
472   /// Unlike the version in IdentifierTable, this returns a pointer instead
473   /// of a reference.  If the pointer is null then the IdentifierInfo cannot
474   /// be found.
475   virtual IdentifierInfo* get(StringRef Name) = 0;
476 
477   /// Retrieve an iterator into the set of all identifiers
478   /// known to this identifier lookup source.
479   ///
480   /// This routine provides access to all of the identifiers known to
481   /// the identifier lookup, allowing access to the contents of the
482   /// identifiers without introducing the overhead of constructing
483   /// IdentifierInfo objects for each.
484   ///
485   /// \returns A new iterator into the set of known identifiers. The
486   /// caller is responsible for deleting this iterator.
487   virtual IdentifierIterator *getIdentifiers();
488 };
489 
490 /// Implements an efficient mapping from strings to IdentifierInfo nodes.
491 ///
492 /// This has no other purpose, but this is an extremely performance-critical
493 /// piece of the code, as each occurrence of every identifier goes through
494 /// here when lexed.
495 class IdentifierTable {
496   // Shark shows that using MallocAllocator is *much* slower than using this
497   // BumpPtrAllocator!
498   using HashTableTy = llvm::StringMap<IdentifierInfo *, llvm::BumpPtrAllocator>;
499   HashTableTy HashTable;
500 
501   IdentifierInfoLookup* ExternalLookup;
502 
503 public:
504   /// Create the identifier table.
505   explicit IdentifierTable(IdentifierInfoLookup *ExternalLookup = nullptr);
506 
507   /// Create the identifier table, populating it with info about the
508   /// language keywords for the language specified by \p LangOpts.
509   explicit IdentifierTable(const LangOptions &LangOpts,
510                            IdentifierInfoLookup *ExternalLookup = nullptr);
511 
512   /// Set the external identifier lookup mechanism.
setExternalIdentifierLookup(IdentifierInfoLookup * IILookup)513   void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) {
514     ExternalLookup = IILookup;
515   }
516 
517   /// Retrieve the external identifier lookup object, if any.
getExternalIdentifierLookup()518   IdentifierInfoLookup *getExternalIdentifierLookup() const {
519     return ExternalLookup;
520   }
521 
getAllocator()522   llvm::BumpPtrAllocator& getAllocator() {
523     return HashTable.getAllocator();
524   }
525 
526   /// Return the identifier token info for the specified named
527   /// identifier.
get(StringRef Name)528   IdentifierInfo &get(StringRef Name) {
529     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
530 
531     IdentifierInfo *&II = Entry.second;
532     if (II) return *II;
533 
534     // No entry; if we have an external lookup, look there first.
535     if (ExternalLookup) {
536       II = ExternalLookup->get(Name);
537       if (II)
538         return *II;
539     }
540 
541     // Lookups failed, make a new IdentifierInfo.
542     void *Mem = getAllocator().Allocate<IdentifierInfo>();
543     II = new (Mem) IdentifierInfo();
544 
545     // Make sure getName() knows how to find the IdentifierInfo
546     // contents.
547     II->Entry = &Entry;
548 
549     return *II;
550   }
551 
get(StringRef Name,tok::TokenKind TokenCode)552   IdentifierInfo &get(StringRef Name, tok::TokenKind TokenCode) {
553     IdentifierInfo &II = get(Name);
554     II.TokenID = TokenCode;
555     assert(II.TokenID == (unsigned) TokenCode && "TokenCode too large");
556     return II;
557   }
558 
559   /// Gets an IdentifierInfo for the given name without consulting
560   ///        external sources.
561   ///
562   /// This is a version of get() meant for external sources that want to
563   /// introduce or modify an identifier. If they called get(), they would
564   /// likely end up in a recursion.
getOwn(StringRef Name)565   IdentifierInfo &getOwn(StringRef Name) {
566     auto &Entry = *HashTable.insert(std::make_pair(Name, nullptr)).first;
567 
568     IdentifierInfo *&II = Entry.second;
569     if (II)
570       return *II;
571 
572     // Lookups failed, make a new IdentifierInfo.
573     void *Mem = getAllocator().Allocate<IdentifierInfo>();
574     II = new (Mem) IdentifierInfo();
575 
576     // Make sure getName() knows how to find the IdentifierInfo
577     // contents.
578     II->Entry = &Entry;
579 
580     // If this is the 'import' contextual keyword, mark it as such.
581     if (Name.equals("import"))
582       II->setModulesImport(true);
583 
584     return *II;
585   }
586 
587   using iterator = HashTableTy::const_iterator;
588   using const_iterator = HashTableTy::const_iterator;
589 
begin()590   iterator begin() const { return HashTable.begin(); }
end()591   iterator end() const   { return HashTable.end(); }
size()592   unsigned size() const  { return HashTable.size(); }
593 
find(StringRef Name)594   iterator find(StringRef Name) const { return HashTable.find(Name); }
595 
596   /// Print some statistics to stderr that indicate how well the
597   /// hashing is doing.
598   void PrintStats() const;
599 
600   /// Populate the identifier table with info about the language keywords
601   /// for the language specified by \p LangOpts.
602   void AddKeywords(const LangOptions &LangOpts);
603 };
604 
605 /// A family of Objective-C methods.
606 ///
607 /// These families have no inherent meaning in the language, but are
608 /// nonetheless central enough in the existing implementations to
609 /// merit direct AST support.  While, in theory, arbitrary methods can
610 /// be considered to form families, we focus here on the methods
611 /// involving allocation and retain-count management, as these are the
612 /// most "core" and the most likely to be useful to diverse clients
613 /// without extra information.
614 ///
615 /// Both selectors and actual method declarations may be classified
616 /// into families.  Method families may impose additional restrictions
617 /// beyond their selector name; for example, a method called '_init'
618 /// that returns void is not considered to be in the 'init' family
619 /// (but would be if it returned 'id').  It is also possible to
620 /// explicitly change or remove a method's family.  Therefore the
621 /// method's family should be considered the single source of truth.
622 enum ObjCMethodFamily {
623   /// No particular method family.
624   OMF_None,
625 
626   // Selectors in these families may have arbitrary arity, may be
627   // written with arbitrary leading underscores, and may have
628   // additional CamelCase "words" in their first selector chunk
629   // following the family name.
630   OMF_alloc,
631   OMF_copy,
632   OMF_init,
633   OMF_mutableCopy,
634   OMF_new,
635 
636   // These families are singletons consisting only of the nullary
637   // selector with the given name.
638   OMF_autorelease,
639   OMF_dealloc,
640   OMF_finalize,
641   OMF_release,
642   OMF_retain,
643   OMF_retainCount,
644   OMF_self,
645   OMF_initialize,
646 
647   // performSelector families
648   OMF_performSelector
649 };
650 
651 /// Enough bits to store any enumerator in ObjCMethodFamily or
652 /// InvalidObjCMethodFamily.
653 enum { ObjCMethodFamilyBitWidth = 4 };
654 
655 /// An invalid value of ObjCMethodFamily.
656 enum { InvalidObjCMethodFamily = (1 << ObjCMethodFamilyBitWidth) - 1 };
657 
658 /// A family of Objective-C methods.
659 ///
660 /// These are family of methods whose result type is initially 'id', but
661 /// but are candidate for the result type to be changed to 'instancetype'.
662 enum ObjCInstanceTypeFamily {
663   OIT_None,
664   OIT_Array,
665   OIT_Dictionary,
666   OIT_Singleton,
667   OIT_Init,
668   OIT_ReturnsSelf
669 };
670 
671 enum ObjCStringFormatFamily {
672   SFF_None,
673   SFF_NSString,
674   SFF_CFString
675 };
676 
677 /// Smart pointer class that efficiently represents Objective-C method
678 /// names.
679 ///
680 /// This class will either point to an IdentifierInfo or a
681 /// MultiKeywordSelector (which is private). This enables us to optimize
682 /// selectors that take no arguments and selectors that take 1 argument, which
683 /// accounts for 78% of all selectors in Cocoa.h.
684 class Selector {
685   friend class Diagnostic;
686   friend class SelectorTable; // only the SelectorTable can create these
687   friend class DeclarationName; // and the AST's DeclarationName.
688 
689   enum IdentifierInfoFlag {
690     // Empty selector = 0. Note that these enumeration values must
691     // correspond to the enumeration values of DeclarationName::StoredNameKind
692     ZeroArg  = 0x01,
693     OneArg   = 0x02,
694     MultiArg = 0x07,
695     ArgFlags = 0x07
696   };
697 
698   /// A pointer to the MultiKeywordSelector or IdentifierInfo. We use the low
699   /// three bits of InfoPtr to store an IdentifierInfoFlag. Note that in any
700   /// case IdentifierInfo and MultiKeywordSelector are already aligned to
701   /// 8 bytes even on 32 bits archs because of DeclarationName.
702   uintptr_t InfoPtr = 0;
703 
Selector(IdentifierInfo * II,unsigned nArgs)704   Selector(IdentifierInfo *II, unsigned nArgs) {
705     InfoPtr = reinterpret_cast<uintptr_t>(II);
706     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
707     assert(nArgs < 2 && "nArgs not equal to 0/1");
708     InfoPtr |= nArgs+1;
709   }
710 
Selector(MultiKeywordSelector * SI)711   Selector(MultiKeywordSelector *SI) {
712     InfoPtr = reinterpret_cast<uintptr_t>(SI);
713     assert((InfoPtr & ArgFlags) == 0 &&"Insufficiently aligned IdentifierInfo");
714     InfoPtr |= MultiArg;
715   }
716 
getAsIdentifierInfo()717   IdentifierInfo *getAsIdentifierInfo() const {
718     if (getIdentifierInfoFlag() < MultiArg)
719       return reinterpret_cast<IdentifierInfo *>(InfoPtr & ~ArgFlags);
720     return nullptr;
721   }
722 
getMultiKeywordSelector()723   MultiKeywordSelector *getMultiKeywordSelector() const {
724     return reinterpret_cast<MultiKeywordSelector *>(InfoPtr & ~ArgFlags);
725   }
726 
getIdentifierInfoFlag()727   unsigned getIdentifierInfoFlag() const {
728     return InfoPtr & ArgFlags;
729   }
730 
731   static ObjCMethodFamily getMethodFamilyImpl(Selector sel);
732 
733   static ObjCStringFormatFamily getStringFormatFamilyImpl(Selector sel);
734 
735 public:
736   /// The default ctor should only be used when creating data structures that
737   ///  will contain selectors.
738   Selector() = default;
Selector(uintptr_t V)739   explicit Selector(uintptr_t V) : InfoPtr(V) {}
740 
741   /// operator==/!= - Indicate whether the specified selectors are identical.
742   bool operator==(Selector RHS) const {
743     return InfoPtr == RHS.InfoPtr;
744   }
745   bool operator!=(Selector RHS) const {
746     return InfoPtr != RHS.InfoPtr;
747   }
748 
getAsOpaquePtr()749   void *getAsOpaquePtr() const {
750     return reinterpret_cast<void*>(InfoPtr);
751   }
752 
753   /// Determine whether this is the empty selector.
isNull()754   bool isNull() const { return InfoPtr == 0; }
755 
756   // Predicates to identify the selector type.
isKeywordSelector()757   bool isKeywordSelector() const {
758     return getIdentifierInfoFlag() != ZeroArg;
759   }
760 
isUnarySelector()761   bool isUnarySelector() const {
762     return getIdentifierInfoFlag() == ZeroArg;
763   }
764 
765   /// If this selector is the specific keyword selector described by Names.
766   bool isKeywordSelector(ArrayRef<StringRef> Names) const;
767 
768   /// If this selector is the specific unary selector described by Name.
769   bool isUnarySelector(StringRef Name) const;
770 
771   unsigned getNumArgs() const;
772 
773   /// Retrieve the identifier at a given position in the selector.
774   ///
775   /// Note that the identifier pointer returned may be NULL. Clients that only
776   /// care about the text of the identifier string, and not the specific,
777   /// uniqued identifier pointer, should use \c getNameForSlot(), which returns
778   /// an empty string when the identifier pointer would be NULL.
779   ///
780   /// \param argIndex The index for which we want to retrieve the identifier.
781   /// This index shall be less than \c getNumArgs() unless this is a keyword
782   /// selector, in which case 0 is the only permissible value.
783   ///
784   /// \returns the uniqued identifier for this slot, or NULL if this slot has
785   /// no corresponding identifier.
786   IdentifierInfo *getIdentifierInfoForSlot(unsigned argIndex) const;
787 
788   /// Retrieve the name at a given position in the selector.
789   ///
790   /// \param argIndex The index for which we want to retrieve the name.
791   /// This index shall be less than \c getNumArgs() unless this is a keyword
792   /// selector, in which case 0 is the only permissible value.
793   ///
794   /// \returns the name for this slot, which may be the empty string if no
795   /// name was supplied.
796   StringRef getNameForSlot(unsigned argIndex) const;
797 
798   /// Derive the full selector name (e.g. "foo:bar:") and return
799   /// it as an std::string.
800   std::string getAsString() const;
801 
802   /// Prints the full selector name (e.g. "foo:bar:").
803   void print(llvm::raw_ostream &OS) const;
804 
805   void dump() const;
806 
807   /// Derive the conventional family of this method.
getMethodFamily()808   ObjCMethodFamily getMethodFamily() const {
809     return getMethodFamilyImpl(*this);
810   }
811 
getStringFormatFamily()812   ObjCStringFormatFamily getStringFormatFamily() const {
813     return getStringFormatFamilyImpl(*this);
814   }
815 
getEmptyMarker()816   static Selector getEmptyMarker() {
817     return Selector(uintptr_t(-1));
818   }
819 
getTombstoneMarker()820   static Selector getTombstoneMarker() {
821     return Selector(uintptr_t(-2));
822   }
823 
824   static ObjCInstanceTypeFamily getInstTypeMethodFamily(Selector sel);
825 };
826 
827 /// This table allows us to fully hide how we implement
828 /// multi-keyword caching.
829 class SelectorTable {
830   // Actually a SelectorTableImpl
831   void *Impl;
832 
833 public:
834   SelectorTable();
835   SelectorTable(const SelectorTable &) = delete;
836   SelectorTable &operator=(const SelectorTable &) = delete;
837   ~SelectorTable();
838 
839   /// Can create any sort of selector.
840   ///
841   /// \p NumArgs indicates whether this is a no argument selector "foo", a
842   /// single argument selector "foo:" or multi-argument "foo:bar:".
843   Selector getSelector(unsigned NumArgs, IdentifierInfo **IIV);
844 
getUnarySelector(IdentifierInfo * ID)845   Selector getUnarySelector(IdentifierInfo *ID) {
846     return Selector(ID, 1);
847   }
848 
getNullarySelector(IdentifierInfo * ID)849   Selector getNullarySelector(IdentifierInfo *ID) {
850     return Selector(ID, 0);
851   }
852 
853   /// Return the total amount of memory allocated for managing selectors.
854   size_t getTotalMemory() const;
855 
856   /// Return the default setter name for the given identifier.
857   ///
858   /// This is "set" + \p Name where the initial character of \p Name
859   /// has been capitalized.
860   static SmallString<64> constructSetterName(StringRef Name);
861 
862   /// Return the default setter selector for the given identifier.
863   ///
864   /// This is "set" + \p Name where the initial character of \p Name
865   /// has been capitalized.
866   static Selector constructSetterSelector(IdentifierTable &Idents,
867                                           SelectorTable &SelTable,
868                                           const IdentifierInfo *Name);
869 
870   /// Return the property name for the given setter selector.
871   static std::string getPropertyNameFromSetterSelector(Selector Sel);
872 };
873 
874 namespace detail {
875 
876 /// DeclarationNameExtra is used as a base of various uncommon special names.
877 /// This class is needed since DeclarationName has not enough space to store
878 /// the kind of every possible names. Therefore the kind of common names is
879 /// stored directly in DeclarationName, and the kind of uncommon names is
880 /// stored in DeclarationNameExtra. It is aligned to 8 bytes because
881 /// DeclarationName needs the lower 3 bits to store the kind of common names.
882 /// DeclarationNameExtra is tightly coupled to DeclarationName and any change
883 /// here is very likely to require changes in DeclarationName(Table).
884 class alignas(IdentifierInfoAlignment) DeclarationNameExtra {
885   friend class clang::DeclarationName;
886   friend class clang::DeclarationNameTable;
887 
888 protected:
889   /// The kind of "extra" information stored in the DeclarationName. See
890   /// @c ExtraKindOrNumArgs for an explanation of how these enumerator values
891   /// are used. Note that DeclarationName depends on the numerical values
892   /// of the enumerators in this enum. See DeclarationName::StoredNameKind
893   /// for more info.
894   enum ExtraKind {
895     CXXDeductionGuideName,
896     CXXLiteralOperatorName,
897     CXXUsingDirective,
898     ObjCMultiArgSelector
899   };
900 
901   /// ExtraKindOrNumArgs has one of the following meaning:
902   ///  * The kind of an uncommon C++ special name. This DeclarationNameExtra
903   ///    is in this case in fact either a CXXDeductionGuideNameExtra or
904   ///    a CXXLiteralOperatorIdName.
905   ///
906   ///  * It may be also name common to C++ using-directives (CXXUsingDirective),
907   ///
908   ///  * Otherwise it is ObjCMultiArgSelector+NumArgs, where NumArgs is
909   ///    the number of arguments in the Objective-C selector, in which
910   ///    case the DeclarationNameExtra is also a MultiKeywordSelector.
911   unsigned ExtraKindOrNumArgs;
912 
DeclarationNameExtra(ExtraKind Kind)913   DeclarationNameExtra(ExtraKind Kind) : ExtraKindOrNumArgs(Kind) {}
DeclarationNameExtra(unsigned NumArgs)914   DeclarationNameExtra(unsigned NumArgs)
915       : ExtraKindOrNumArgs(ObjCMultiArgSelector + NumArgs) {}
916 
917   /// Return the corresponding ExtraKind.
getKind()918   ExtraKind getKind() const {
919     return static_cast<ExtraKind>(ExtraKindOrNumArgs >
920                                           (unsigned)ObjCMultiArgSelector
921                                       ? (unsigned)ObjCMultiArgSelector
922                                       : ExtraKindOrNumArgs);
923   }
924 
925   /// Return the number of arguments in an ObjC selector. Only valid when this
926   /// is indeed an ObjCMultiArgSelector.
getNumArgs()927   unsigned getNumArgs() const {
928     assert(ExtraKindOrNumArgs >= (unsigned)ObjCMultiArgSelector &&
929            "getNumArgs called but this is not an ObjC selector!");
930     return ExtraKindOrNumArgs - (unsigned)ObjCMultiArgSelector;
931   }
932 };
933 
934 } // namespace detail
935 
936 }  // namespace clang
937 
938 namespace llvm {
939 
940 /// Define DenseMapInfo so that Selectors can be used as keys in DenseMap and
941 /// DenseSets.
942 template <>
943 struct DenseMapInfo<clang::Selector> {
944   static clang::Selector getEmptyKey() {
945     return clang::Selector::getEmptyMarker();
946   }
947 
948   static clang::Selector getTombstoneKey() {
949     return clang::Selector::getTombstoneMarker();
950   }
951 
952   static unsigned getHashValue(clang::Selector S);
953 
954   static bool isEqual(clang::Selector LHS, clang::Selector RHS) {
955     return LHS == RHS;
956   }
957 };
958 
959 template<>
960 struct PointerLikeTypeTraits<clang::Selector> {
961   static const void *getAsVoidPointer(clang::Selector P) {
962     return P.getAsOpaquePtr();
963   }
964 
965   static clang::Selector getFromVoidPointer(const void *P) {
966     return clang::Selector(reinterpret_cast<uintptr_t>(P));
967   }
968 
969   static constexpr int NumLowBitsAvailable = 0;
970 };
971 
972 // Provide PointerLikeTypeTraits for IdentifierInfo pointers, which
973 // are not guaranteed to be 8-byte aligned.
974 template<>
975 struct PointerLikeTypeTraits<clang::IdentifierInfo*> {
976   static void *getAsVoidPointer(clang::IdentifierInfo* P) {
977     return P;
978   }
979 
980   static clang::IdentifierInfo *getFromVoidPointer(void *P) {
981     return static_cast<clang::IdentifierInfo*>(P);
982   }
983 
984   static constexpr int NumLowBitsAvailable = 1;
985 };
986 
987 template<>
988 struct PointerLikeTypeTraits<const clang::IdentifierInfo*> {
989   static const void *getAsVoidPointer(const clang::IdentifierInfo* P) {
990     return P;
991   }
992 
993   static const clang::IdentifierInfo *getFromVoidPointer(const void *P) {
994     return static_cast<const clang::IdentifierInfo*>(P);
995   }
996 
997   static constexpr int NumLowBitsAvailable = 1;
998 };
999 
1000 } // namespace llvm
1001 
1002 #endif // LLVM_CLANG_BASIC_IDENTIFIERTABLE_H
1003