1 //===- MicrosoftDemangle.cpp ----------------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is dual licensed under the MIT and the University of Illinois Open
6 // Source Licenses. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a demangler for MSVC-style mangled symbols.
11 //
12 // This file has no dependencies on the rest of LLVM so that it can be
13 // easily reused in other programs such as libcxxabi.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "llvm/Demangle/Demangle.h"
18 
19 #include "Compiler.h"
20 #include "StringView.h"
21 #include "Utility.h"
22 
23 #include <cctype>
24 #include <tuple>
25 
26 // This memory allocator is extremely fast, but it doesn't call dtors
27 // for allocated objects. That means you can't use STL containers
28 // (such as std::vector) with this allocator. But it pays off --
29 // the demangler is 3x faster with this allocator compared to one with
30 // STL containers.
31 namespace {
32   constexpr size_t AllocUnit = 4096;
33 
34 class ArenaAllocator {
35   struct AllocatorNode {
36     uint8_t *Buf = nullptr;
37     size_t Used = 0;
38     size_t Capacity = 0;
39     AllocatorNode *Next = nullptr;
40   };
41 
addNode(size_t Capacity)42   void addNode(size_t Capacity) {
43     AllocatorNode *NewHead = new AllocatorNode;
44     NewHead->Buf = new uint8_t[Capacity];
45     NewHead->Next = Head;
46     NewHead->Capacity = Capacity;
47     Head = NewHead;
48     NewHead->Used = 0;
49   }
50 
51 public:
ArenaAllocator()52   ArenaAllocator() { addNode(AllocUnit); }
53 
~ArenaAllocator()54   ~ArenaAllocator() {
55     while (Head) {
56       assert(Head->Buf);
57       delete[] Head->Buf;
58       AllocatorNode *Next = Head->Next;
59       delete Head;
60       Head = Next;
61     }
62   }
63 
allocUnalignedBuffer(size_t Length)64   char *allocUnalignedBuffer(size_t Length) {
65     uint8_t *Buf = Head->Buf + Head->Used;
66 
67     Head->Used += Length;
68     if (Head->Used > Head->Capacity) {
69       // It's possible we need a buffer which is larger than our default unit
70       // size, so we need to be careful to add a node with capacity that is at
71       // least as large as what we need.
72       addNode(std::max(AllocUnit, Length));
73       Head->Used = Length;
74       Buf = Head->Buf;
75     }
76 
77     return reinterpret_cast<char *>(Buf);
78   }
79 
alloc(Args &&...ConstructorArgs)80   template <typename T, typename... Args> T *alloc(Args &&... ConstructorArgs) {
81 
82     size_t Size = sizeof(T);
83     assert(Head && Head->Buf);
84 
85     size_t P = (size_t)Head->Buf + Head->Used;
86     uintptr_t AlignedP =
87         (((size_t)P + alignof(T) - 1) & ~(size_t)(alignof(T) - 1));
88     uint8_t *PP = (uint8_t *)AlignedP;
89     size_t Adjustment = AlignedP - P;
90 
91     Head->Used += Size + Adjustment;
92     if (Head->Used < Head->Capacity)
93       return new (PP) T(std::forward<Args>(ConstructorArgs)...);
94 
95     addNode(AllocUnit);
96     Head->Used = Size;
97     return new (Head->Buf) T(std::forward<Args>(ConstructorArgs)...);
98   }
99 
100 private:
101   AllocatorNode *Head = nullptr;
102 };
103 } // namespace
104 
startsWithDigit(StringView S)105 static bool startsWithDigit(StringView S) {
106   return !S.empty() && std::isdigit(S.front());
107 }
108 
109 // Writes a space if the last token does not end with a punctuation.
outputSpaceIfNecessary(OutputStream & OS)110 static void outputSpaceIfNecessary(OutputStream &OS) {
111   if (OS.empty())
112     return;
113 
114   char C = OS.back();
115   if (isalnum(C) || C == '>')
116     OS << " ";
117 }
118 
119 // Storage classes
120 enum Qualifiers : uint8_t {
121   Q_None = 0,
122   Q_Const = 1 << 0,
123   Q_Volatile = 1 << 1,
124   Q_Far = 1 << 2,
125   Q_Huge = 1 << 3,
126   Q_Unaligned = 1 << 4,
127   Q_Restrict = 1 << 5,
128   Q_Pointer64 = 1 << 6
129 };
130 
131 enum class StorageClass : uint8_t {
132   None,
133   PrivateStatic,
134   ProtectedStatic,
135   PublicStatic,
136   Global,
137   FunctionLocalStatic
138 };
139 
140 enum class QualifierMangleMode { Drop, Mangle, Result };
141 
142 enum class PointerAffinity { Pointer, Reference, RValueReference };
143 
144 // Calling conventions
145 enum class CallingConv : uint8_t {
146   None,
147   Cdecl,
148   Pascal,
149   Thiscall,
150   Stdcall,
151   Fastcall,
152   Clrcall,
153   Eabi,
154   Vectorcall,
155   Regcall,
156 };
157 
158 enum class ReferenceKind : uint8_t { None, LValueRef, RValueRef };
159 
160 // Types
161 enum class PrimTy : uint8_t {
162   Unknown,
163   None,
164   Function,
165   Ptr,
166   MemberPtr,
167   Array,
168 
169   Struct,
170   Union,
171   Class,
172   Enum,
173 
174   Void,
175   Bool,
176   Char,
177   Schar,
178   Uchar,
179   Char16,
180   Char32,
181   Short,
182   Ushort,
183   Int,
184   Uint,
185   Long,
186   Ulong,
187   Int64,
188   Uint64,
189   Wchar,
190   Float,
191   Double,
192   Ldouble,
193   Nullptr
194 };
195 
196 // Function classes
197 enum FuncClass : uint8_t {
198   Public = 1 << 0,
199   Protected = 1 << 1,
200   Private = 1 << 2,
201   Global = 1 << 3,
202   Static = 1 << 4,
203   Virtual = 1 << 5,
204   Far = 1 << 6,
205 };
206 
207 namespace {
208 
209 struct Type;
210 struct Name;
211 
212 struct FunctionParams {
213   bool IsVariadic = false;
214 
215   Type *Current = nullptr;
216 
217   FunctionParams *Next = nullptr;
218 };
219 
220 struct TemplateParams {
221   bool IsTemplateTemplate = false;
222   bool IsAliasTemplate = false;
223 
224   // Type can be null if this is a template template parameter.  In that case
225   // only Name will be valid.
226   Type *ParamType = nullptr;
227 
228   // Name can be valid if this is a template template parameter (see above) or
229   // this is a function declaration (e.g. foo<&SomeFunc>).  In the latter case
230   // Name contains the name of the function and Type contains the signature.
231   Name *ParamName = nullptr;
232 
233   TemplateParams *Next = nullptr;
234 };
235 
236 // The type class. Mangled symbols are first parsed and converted to
237 // this type and then converted to string.
238 struct Type {
~Type__anon4268c9490211::Type239   virtual ~Type() {}
240 
241   virtual Type *clone(ArenaAllocator &Arena) const;
242 
243   // Write the "first half" of a given type.  This is a static functions to
244   // give the code a chance to do processing that is common to a subset of
245   // subclasses
246   static void outputPre(OutputStream &OS, Type &Ty);
247 
248   // Write the "second half" of a given type.  This is a static functions to
249   // give the code a chance to do processing that is common to a subset of
250   // subclasses
251   static void outputPost(OutputStream &OS, Type &Ty);
252 
253   virtual void outputPre(OutputStream &OS);
254   virtual void outputPost(OutputStream &OS);
255 
256   // Primitive type such as Int.
257   PrimTy Prim = PrimTy::Unknown;
258 
259   Qualifiers Quals = Q_None;
260   StorageClass Storage = StorageClass::None; // storage class
261 };
262 
263 // Represents an identifier which may be a template.
264 struct Name {
265   // Name read from an MangledName string.
266   StringView Str;
267 
268   // Overloaded operators are represented as special BackReferences in mangled
269   // symbols. If this is an operator name, "op" has an operator name (e.g.
270   // ">>"). Otherwise, empty.
271   StringView Operator;
272 
273   // Template parameters. Null if not a template.
274   TemplateParams *TParams = nullptr;
275 
276   // Nested BackReferences (e.g. "A::B::C") are represented as a linked list.
277   Name *Next = nullptr;
278 };
279 
280 struct PointerType : public Type {
281   Type *clone(ArenaAllocator &Arena) const override;
282   void outputPre(OutputStream &OS) override;
283   void outputPost(OutputStream &OS) override;
284 
285   PointerAffinity Affinity;
286 
287   // Represents a type X in "a pointer to X", "a reference to X",
288   // "an array of X", or "a function returning X".
289   Type *Pointee = nullptr;
290 };
291 
292 struct MemberPointerType : public Type {
293   Type *clone(ArenaAllocator &Arena) const override;
294   void outputPre(OutputStream &OS) override;
295   void outputPost(OutputStream &OS) override;
296 
297   Name *MemberName = nullptr;
298 
299   // Represents a type X in "a pointer to X", "a reference to X",
300   // "an array of X", or "a function returning X".
301   Type *Pointee = nullptr;
302 };
303 
304 struct FunctionType : public Type {
305   Type *clone(ArenaAllocator &Arena) const override;
306   void outputPre(OutputStream &OS) override;
307   void outputPost(OutputStream &OS) override;
308 
309   // True if this FunctionType instance is the Pointee of a PointerType or
310   // MemberPointerType.
311   bool IsFunctionPointer = false;
312 
313   Type *ReturnType = nullptr;
314   // If this is a reference, the type of reference.
315   ReferenceKind RefKind;
316 
317   CallingConv CallConvention;
318   FuncClass FunctionClass;
319 
320   FunctionParams Params;
321 };
322 
323 struct UdtType : public Type {
324   Type *clone(ArenaAllocator &Arena) const override;
325   void outputPre(OutputStream &OS) override;
326 
327   Name *UdtName = nullptr;
328 };
329 
330 struct ArrayType : public Type {
331   Type *clone(ArenaAllocator &Arena) const override;
332   void outputPre(OutputStream &OS) override;
333   void outputPost(OutputStream &OS) override;
334 
335   // Either NextDimension or ElementType will be valid.
336   ArrayType *NextDimension = nullptr;
337   uint32_t ArrayDimension = 0;
338 
339   Type *ElementType = nullptr;
340 };
341 
342 } // namespace
343 
isMemberPointer(StringView MangledName)344 static bool isMemberPointer(StringView MangledName) {
345   switch (MangledName.popFront()) {
346   case '$':
347     // This is probably an rvalue reference (e.g. $$Q), and you cannot have an
348     // rvalue reference to a member.
349     return false;
350   case 'A':
351     // 'A' indicates a reference, and you cannot have a reference to a member
352     // function or member.
353     return false;
354   case 'P':
355   case 'Q':
356   case 'R':
357   case 'S':
358     // These 4 values indicate some kind of pointer, but we still don't know
359     // what.
360     break;
361   default:
362     assert(false && "Ty is not a pointer type!");
363   }
364 
365   // If it starts with a number, then 6 indicates a non-member function
366   // pointer, and 8 indicates a member function pointer.
367   if (startsWithDigit(MangledName)) {
368     assert(MangledName[0] == '6' || MangledName[0] == '8');
369     return (MangledName[0] == '8');
370   }
371 
372   // Remove ext qualifiers since those can appear on either type and are
373   // therefore not indicative.
374   MangledName.consumeFront('E'); // 64-bit
375   MangledName.consumeFront('I'); // restrict
376   MangledName.consumeFront('F'); // unaligned
377 
378   assert(!MangledName.empty());
379 
380   // The next value should be either ABCD (non-member) or QRST (member).
381   switch (MangledName.front()) {
382   case 'A':
383   case 'B':
384   case 'C':
385   case 'D':
386     return false;
387   case 'Q':
388   case 'R':
389   case 'S':
390   case 'T':
391     return true;
392   default:
393     assert(false);
394   }
395   return false;
396 }
397 
outputCallingConvention(OutputStream & OS,CallingConv CC)398 static void outputCallingConvention(OutputStream &OS, CallingConv CC) {
399   outputSpaceIfNecessary(OS);
400 
401   switch (CC) {
402   case CallingConv::Cdecl:
403     OS << "__cdecl";
404     break;
405   case CallingConv::Fastcall:
406     OS << "__fastcall";
407     break;
408   case CallingConv::Pascal:
409     OS << "__pascal";
410     break;
411   case CallingConv::Regcall:
412     OS << "__regcall";
413     break;
414   case CallingConv::Stdcall:
415     OS << "__stdcall";
416     break;
417   case CallingConv::Thiscall:
418     OS << "__thiscall";
419     break;
420   case CallingConv::Eabi:
421     OS << "__eabi";
422     break;
423   case CallingConv::Vectorcall:
424     OS << "__vectorcall";
425     break;
426   case CallingConv::Clrcall:
427     OS << "__clrcall";
428     break;
429   default:
430     break;
431   }
432 }
433 
startsWithLocalScopePattern(StringView S)434 static bool startsWithLocalScopePattern(StringView S) {
435   if (!S.consumeFront('?'))
436     return false;
437   if (S.size() < 2)
438     return false;
439 
440   size_t End = S.find('?');
441   if (End == StringView::npos)
442     return false;
443   StringView Candidate = S.substr(0, End);
444   if (Candidate.empty())
445     return false;
446 
447   // \?[0-9]\?
448   // ?@? is the discriminator 0.
449   if (Candidate.size() == 1)
450     return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9');
451 
452   // If it's not 0-9, then it's an encoded number terminated with an @
453   if (Candidate.back() != '@')
454     return false;
455   Candidate = Candidate.dropBack();
456 
457   // An encoded number starts with B-P and all subsequent digits are in A-P.
458   // Note that the reason the first digit cannot be A is two fold.  First, it
459   // would create an ambiguity with ?A which delimits the beginning of an
460   // anonymous namespace.  Second, A represents 0, and you don't start a multi
461   // digit number with a leading 0.  Presumably the anonymous namespace
462   // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J.
463   if (Candidate[0] < 'B' || Candidate[0] > 'P')
464     return false;
465   Candidate = Candidate.dropFront();
466   while (!Candidate.empty()) {
467     if (Candidate[0] < 'A' || Candidate[0] > 'P')
468       return false;
469     Candidate = Candidate.dropFront();
470   }
471 
472   return true;
473 }
474 
475 static void outputName(OutputStream &OS, const Name *TheName);
476 
477 // Write a function or template parameter list.
outputParameterList(OutputStream & OS,const FunctionParams & Params)478 static void outputParameterList(OutputStream &OS,
479                                 const FunctionParams &Params) {
480   if (!Params.Current) {
481     OS << "void";
482     return;
483   }
484 
485   const FunctionParams *Head = &Params;
486   while (Head) {
487     Type::outputPre(OS, *Head->Current);
488     Type::outputPost(OS, *Head->Current);
489 
490     Head = Head->Next;
491 
492     if (Head)
493       OS << ", ";
494   }
495 }
496 
outputParameterList(OutputStream & OS,const TemplateParams & Params)497 static void outputParameterList(OutputStream &OS,
498                                 const TemplateParams &Params) {
499   if (!Params.ParamType && !Params.ParamName) {
500     OS << "<>";
501     return;
502   }
503 
504   OS << "<";
505   const TemplateParams *Head = &Params;
506   while (Head) {
507     // Type can be null if this is a template template parameter,
508     // and Name can be null if this is a simple type.
509 
510     if (Head->ParamType && Head->ParamName) {
511       // Function pointer.
512       OS << "&";
513       Type::outputPre(OS, *Head->ParamType);
514       outputName(OS, Head->ParamName);
515       Type::outputPost(OS, *Head->ParamType);
516     } else if (Head->ParamType) {
517       // simple type.
518       Type::outputPre(OS, *Head->ParamType);
519       Type::outputPost(OS, *Head->ParamType);
520     } else {
521       // Template alias.
522       outputName(OS, Head->ParamName);
523     }
524 
525     Head = Head->Next;
526 
527     if (Head)
528       OS << ", ";
529   }
530   OS << ">";
531 }
532 
outputName(OutputStream & OS,const Name * TheName)533 static void outputName(OutputStream &OS, const Name *TheName) {
534   if (!TheName)
535     return;
536 
537   outputSpaceIfNecessary(OS);
538 
539   const Name *Previous = nullptr;
540   // Print out namespaces or outer class BackReferences.
541   for (; TheName->Next; TheName = TheName->Next) {
542     Previous = TheName;
543     OS << TheName->Str;
544     if (TheName->TParams)
545       outputParameterList(OS, *TheName->TParams);
546     OS << "::";
547   }
548 
549   // Print out a regular name.
550   if (TheName->Operator.empty()) {
551     OS << TheName->Str;
552     if (TheName->TParams)
553       outputParameterList(OS, *TheName->TParams);
554     return;
555   }
556 
557   // Print out ctor or dtor.
558   if (TheName->Operator == "dtor")
559     OS << "~";
560 
561   if (TheName->Operator == "ctor" || TheName->Operator == "dtor") {
562     OS << Previous->Str;
563     if (Previous->TParams)
564       outputParameterList(OS, *Previous->TParams);
565     return;
566   }
567 
568   // Print out an overloaded operator.
569   if (!TheName->Str.empty())
570     OS << TheName->Str << "::";
571   OS << "operator" << TheName->Operator;
572 }
573 
574 namespace {
575 
clone(ArenaAllocator & Arena) const576 Type *Type::clone(ArenaAllocator &Arena) const {
577   return Arena.alloc<Type>(*this);
578 }
579 
580 // Write the "first half" of a given type.
outputPre(OutputStream & OS,Type & Ty)581 void Type::outputPre(OutputStream &OS, Type &Ty) {
582   // Function types require custom handling of const and static so we
583   // handle them separately.  All other types use the same decoration
584   // for these modifiers, so handle them here in common code.
585   if (Ty.Prim == PrimTy::Function) {
586     Ty.outputPre(OS);
587     return;
588   }
589 
590   switch (Ty.Storage) {
591   case StorageClass::PrivateStatic:
592   case StorageClass::PublicStatic:
593   case StorageClass::ProtectedStatic:
594     OS << "static ";
595   default:
596     break;
597   }
598   Ty.outputPre(OS);
599 
600   if (Ty.Quals & Q_Const) {
601     outputSpaceIfNecessary(OS);
602     OS << "const";
603   }
604 
605   if (Ty.Quals & Q_Volatile) {
606     outputSpaceIfNecessary(OS);
607     OS << "volatile";
608   }
609 
610   if (Ty.Quals & Q_Restrict) {
611     outputSpaceIfNecessary(OS);
612     OS << "__restrict";
613   }
614 }
615 
616 // Write the "second half" of a given type.
outputPost(OutputStream & OS,Type & Ty)617 void Type::outputPost(OutputStream &OS, Type &Ty) { Ty.outputPost(OS); }
618 
outputPre(OutputStream & OS)619 void Type::outputPre(OutputStream &OS) {
620   switch (Prim) {
621   case PrimTy::Void:
622     OS << "void";
623     break;
624   case PrimTy::Bool:
625     OS << "bool";
626     break;
627   case PrimTy::Char:
628     OS << "char";
629     break;
630   case PrimTy::Schar:
631     OS << "signed char";
632     break;
633   case PrimTy::Uchar:
634     OS << "unsigned char";
635     break;
636   case PrimTy::Char16:
637     OS << "char16_t";
638     break;
639   case PrimTy::Char32:
640     OS << "char32_t";
641     break;
642   case PrimTy::Short:
643     OS << "short";
644     break;
645   case PrimTy::Ushort:
646     OS << "unsigned short";
647     break;
648   case PrimTy::Int:
649     OS << "int";
650     break;
651   case PrimTy::Uint:
652     OS << "unsigned int";
653     break;
654   case PrimTy::Long:
655     OS << "long";
656     break;
657   case PrimTy::Ulong:
658     OS << "unsigned long";
659     break;
660   case PrimTy::Int64:
661     OS << "__int64";
662     break;
663   case PrimTy::Uint64:
664     OS << "unsigned __int64";
665     break;
666   case PrimTy::Wchar:
667     OS << "wchar_t";
668     break;
669   case PrimTy::Float:
670     OS << "float";
671     break;
672   case PrimTy::Double:
673     OS << "double";
674     break;
675   case PrimTy::Ldouble:
676     OS << "long double";
677     break;
678   case PrimTy::Nullptr:
679     OS << "std::nullptr_t";
680     break;
681   default:
682     assert(false && "Invalid primitive type!");
683   }
684 }
outputPost(OutputStream & OS)685 void Type::outputPost(OutputStream &OS) {}
686 
clone(ArenaAllocator & Arena) const687 Type *PointerType::clone(ArenaAllocator &Arena) const {
688   return Arena.alloc<PointerType>(*this);
689 }
690 
outputPointerIndicator(OutputStream & OS,PointerAffinity Affinity,const Name * MemberName,const Type * Pointee)691 static void outputPointerIndicator(OutputStream &OS, PointerAffinity Affinity,
692                                    const Name *MemberName,
693                                    const Type *Pointee) {
694   // "[]" and "()" (for function parameters) take precedence over "*",
695   // so "int *x(int)" means "x is a function returning int *". We need
696   // parentheses to supercede the default precedence. (e.g. we want to
697   // emit something like "int (*x)(int)".)
698   if (Pointee->Prim == PrimTy::Function || Pointee->Prim == PrimTy::Array) {
699     OS << "(";
700     if (Pointee->Prim == PrimTy::Function) {
701       const FunctionType *FTy = static_cast<const FunctionType *>(Pointee);
702       assert(FTy->IsFunctionPointer);
703       outputCallingConvention(OS, FTy->CallConvention);
704       OS << " ";
705     }
706   }
707 
708   if (MemberName) {
709     outputName(OS, MemberName);
710     OS << "::";
711   }
712 
713   if (Affinity == PointerAffinity::Pointer)
714     OS << "*";
715   else if (Affinity == PointerAffinity::Reference)
716     OS << "&";
717   else
718     OS << "&&";
719 }
720 
outputPre(OutputStream & OS)721 void PointerType::outputPre(OutputStream &OS) {
722   Type::outputPre(OS, *Pointee);
723 
724   outputSpaceIfNecessary(OS);
725 
726   if (Quals & Q_Unaligned)
727     OS << "__unaligned ";
728 
729   outputPointerIndicator(OS, Affinity, nullptr, Pointee);
730 
731   // FIXME: We should output this, but it requires updating lots of tests.
732   // if (Ty.Quals & Q_Pointer64)
733   //  OS << " __ptr64";
734 }
735 
outputPost(OutputStream & OS)736 void PointerType::outputPost(OutputStream &OS) {
737   if (Pointee->Prim == PrimTy::Function || Pointee->Prim == PrimTy::Array)
738     OS << ")";
739 
740   Type::outputPost(OS, *Pointee);
741 }
742 
clone(ArenaAllocator & Arena) const743 Type *MemberPointerType::clone(ArenaAllocator &Arena) const {
744   return Arena.alloc<MemberPointerType>(*this);
745 }
746 
outputPre(OutputStream & OS)747 void MemberPointerType::outputPre(OutputStream &OS) {
748   Type::outputPre(OS, *Pointee);
749 
750   outputSpaceIfNecessary(OS);
751 
752   outputPointerIndicator(OS, PointerAffinity::Pointer, MemberName, Pointee);
753 
754   // FIXME: We should output this, but it requires updating lots of tests.
755   // if (Ty.Quals & Q_Pointer64)
756   //  OS << " __ptr64";
757   if (Quals & Q_Restrict)
758     OS << " __restrict";
759 }
760 
outputPost(OutputStream & OS)761 void MemberPointerType::outputPost(OutputStream &OS) {
762   if (Pointee->Prim == PrimTy::Function || Pointee->Prim == PrimTy::Array)
763     OS << ")";
764 
765   Type::outputPost(OS, *Pointee);
766 }
767 
clone(ArenaAllocator & Arena) const768 Type *FunctionType::clone(ArenaAllocator &Arena) const {
769   return Arena.alloc<FunctionType>(*this);
770 }
771 
outputPre(OutputStream & OS)772 void FunctionType::outputPre(OutputStream &OS) {
773   if (!(FunctionClass & Global)) {
774     if (FunctionClass & Static)
775       OS << "static ";
776   }
777 
778   if (ReturnType) {
779     Type::outputPre(OS, *ReturnType);
780     OS << " ";
781   }
782 
783   // Function pointers print the calling convention as void (__cdecl *)(params)
784   // rather than void __cdecl (*)(params).  So we need to let the PointerType
785   // class handle this.
786   if (!IsFunctionPointer)
787     outputCallingConvention(OS, CallConvention);
788 }
789 
outputPost(OutputStream & OS)790 void FunctionType::outputPost(OutputStream &OS) {
791   OS << "(";
792   outputParameterList(OS, Params);
793   OS << ")";
794   if (Quals & Q_Const)
795     OS << " const";
796   if (Quals & Q_Volatile)
797     OS << " volatile";
798   if (Quals & Q_Restrict)
799     OS << " __restrict";
800   if (Quals & Q_Unaligned)
801     OS << " __unaligned";
802 
803   if (RefKind == ReferenceKind::LValueRef)
804     OS << " &";
805   else if (RefKind == ReferenceKind::RValueRef)
806     OS << " &&";
807 
808   if (ReturnType)
809     Type::outputPost(OS, *ReturnType);
810   return;
811 }
812 
clone(ArenaAllocator & Arena) const813 Type *UdtType::clone(ArenaAllocator &Arena) const {
814   return Arena.alloc<UdtType>(*this);
815 }
816 
outputPre(OutputStream & OS)817 void UdtType::outputPre(OutputStream &OS) {
818   switch (Prim) {
819   case PrimTy::Class:
820     OS << "class ";
821     break;
822   case PrimTy::Struct:
823     OS << "struct ";
824     break;
825   case PrimTy::Union:
826     OS << "union ";
827     break;
828   case PrimTy::Enum:
829     OS << "enum ";
830     break;
831   default:
832     assert(false && "Not a udt type!");
833   }
834 
835   outputName(OS, UdtName);
836 }
837 
clone(ArenaAllocator & Arena) const838 Type *ArrayType::clone(ArenaAllocator &Arena) const {
839   return Arena.alloc<ArrayType>(*this);
840 }
841 
outputPre(OutputStream & OS)842 void ArrayType::outputPre(OutputStream &OS) {
843   Type::outputPre(OS, *ElementType);
844 }
845 
outputPost(OutputStream & OS)846 void ArrayType::outputPost(OutputStream &OS) {
847   if (ArrayDimension > 0)
848     OS << "[" << ArrayDimension << "]";
849   if (NextDimension)
850     Type::outputPost(OS, *NextDimension);
851   else if (ElementType)
852     Type::outputPost(OS, *ElementType);
853 }
854 
855 struct Symbol {
856   Name *SymbolName = nullptr;
857   Type *SymbolType = nullptr;
858 };
859 
860 } // namespace
861 
862 namespace {
863 
864 // Demangler class takes the main role in demangling symbols.
865 // It has a set of functions to parse mangled symbols into Type instances.
866 // It also has a set of functions to cnovert Type instances to strings.
867 class Demangler {
868 public:
869   Demangler() = default;
870 
871   // You are supposed to call parse() first and then check if error is true.  If
872   // it is false, call output() to write the formatted name to the given stream.
873   Symbol *parse(StringView &MangledName);
874   void output(const Symbol *S, OutputStream &OS);
875 
876   // True if an error occurred.
877   bool Error = false;
878 
879 private:
880   Type *demangleVariableEncoding(StringView &MangledName);
881   Type *demangleFunctionEncoding(StringView &MangledName);
882 
883   Qualifiers demanglePointerExtQualifiers(StringView &MangledName);
884 
885   // Parser functions. This is a recursive-descent parser.
886   Type *demangleType(StringView &MangledName, QualifierMangleMode QMM);
887   Type *demangleBasicType(StringView &MangledName);
888   UdtType *demangleClassType(StringView &MangledName);
889   PointerType *demanglePointerType(StringView &MangledName);
890   MemberPointerType *demangleMemberPointerType(StringView &MangledName);
891   FunctionType *demangleFunctionType(StringView &MangledName, bool HasThisQuals,
892                                      bool IsFunctionPointer);
893 
894   ArrayType *demangleArrayType(StringView &MangledName);
895 
896   TemplateParams *demangleTemplateParameterList(StringView &MangledName);
897   FunctionParams demangleFunctionParameterList(StringView &MangledName);
898 
899   int demangleNumber(StringView &MangledName);
900 
901   void memorizeString(StringView s);
902 
903   /// Allocate a copy of \p Borrowed into memory that we own.
904   StringView copyString(StringView Borrowed);
905 
906   Name *demangleFullyQualifiedTypeName(StringView &MangledName);
907   Name *demangleFullyQualifiedSymbolName(StringView &MangledName);
908 
909   Name *demangleUnqualifiedTypeName(StringView &MangledName);
910   Name *demangleUnqualifiedSymbolName(StringView &MangledName);
911 
912   Name *demangleNameScopeChain(StringView &MangledName, Name *UnqualifiedName);
913   Name *demangleNameScopePiece(StringView &MangledName);
914 
915   Name *demangleBackRefName(StringView &MangledName);
916   Name *demangleClassTemplateName(StringView &MangledName);
917   Name *demangleOperatorName(StringView &MangledName);
918   Name *demangleSimpleName(StringView &MangledName, bool Memorize);
919   Name *demangleAnonymousNamespaceName(StringView &MangledName);
920   Name *demangleLocallyScopedNamePiece(StringView &MangledName);
921 
922   StringView demangleSimpleString(StringView &MangledName, bool Memorize);
923 
924   FuncClass demangleFunctionClass(StringView &MangledName);
925   CallingConv demangleCallingConvention(StringView &MangledName);
926   StorageClass demangleVariableStorageClass(StringView &MangledName);
927   ReferenceKind demangleReferenceKind(StringView &MangledName);
928   void demangleThrowSpecification(StringView &MangledName);
929 
930   std::pair<Qualifiers, bool> demangleQualifiers(StringView &MangledName);
931 
932   // Memory allocator.
933   ArenaAllocator Arena;
934 
935   // A single type uses one global back-ref table for all function params.
936   // This means back-refs can even go "into" other types.  Examples:
937   //
938   //  // Second int* is a back-ref to first.
939   //  void foo(int *, int*);
940   //
941   //  // Second int* is not a back-ref to first (first is not a function param).
942   //  int* foo(int*);
943   //
944   //  // Second int* is a back-ref to first (ALL function types share the same
945   //  // back-ref map.
946   //  using F = void(*)(int*);
947   //  F G(int *);
948   Type *FunctionParamBackRefs[10];
949   size_t FunctionParamBackRefCount = 0;
950 
951   // The first 10 BackReferences in a mangled name can be back-referenced by
952   // special name @[0-9]. This is a storage for the first 10 BackReferences.
953   StringView BackReferences[10];
954   size_t BackRefCount = 0;
955 };
956 } // namespace
957 
copyString(StringView Borrowed)958 StringView Demangler::copyString(StringView Borrowed) {
959   char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1);
960   std::strcpy(Stable, Borrowed.begin());
961 
962   return {Stable, Borrowed.size()};
963 }
964 
965 // Parser entry point.
parse(StringView & MangledName)966 Symbol *Demangler::parse(StringView &MangledName) {
967   Symbol *S = Arena.alloc<Symbol>();
968 
969   // MSVC-style mangled symbols must start with '?'.
970   if (!MangledName.consumeFront("?")) {
971     S->SymbolName = Arena.alloc<Name>();
972     S->SymbolName->Str = MangledName;
973     S->SymbolType = Arena.alloc<Type>();
974     S->SymbolType->Prim = PrimTy::Unknown;
975     return S;
976   }
977 
978   // What follows is a main symbol name. This may include
979   // namespaces or class BackReferences.
980   S->SymbolName = demangleFullyQualifiedSymbolName(MangledName);
981 
982   // Read a variable.
983   S->SymbolType = startsWithDigit(MangledName)
984                       ? demangleVariableEncoding(MangledName)
985                       : demangleFunctionEncoding(MangledName);
986 
987   return S;
988 }
989 
990 // <type-encoding> ::= <storage-class> <variable-type>
991 // <storage-class> ::= 0  # private static member
992 //                 ::= 1  # protected static member
993 //                 ::= 2  # public static member
994 //                 ::= 3  # global
995 //                 ::= 4  # static local
996 
demangleVariableEncoding(StringView & MangledName)997 Type *Demangler::demangleVariableEncoding(StringView &MangledName) {
998   StorageClass SC = demangleVariableStorageClass(MangledName);
999 
1000   Type *Ty = demangleType(MangledName, QualifierMangleMode::Drop);
1001 
1002   Ty->Storage = SC;
1003 
1004   // <variable-type> ::= <type> <cvr-qualifiers>
1005   //                 ::= <type> <pointee-cvr-qualifiers> # pointers, references
1006   switch (Ty->Prim) {
1007   case PrimTy::Ptr:
1008   case PrimTy::MemberPtr: {
1009     Qualifiers ExtraChildQuals = Q_None;
1010     Ty->Quals =
1011         Qualifiers(Ty->Quals | demanglePointerExtQualifiers(MangledName));
1012 
1013     bool IsMember = false;
1014     std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName);
1015 
1016     if (Ty->Prim == PrimTy::MemberPtr) {
1017       assert(IsMember);
1018       Name *BackRefName = demangleFullyQualifiedTypeName(MangledName);
1019       (void)BackRefName;
1020       MemberPointerType *MPTy = static_cast<MemberPointerType *>(Ty);
1021       MPTy->Pointee->Quals = Qualifiers(MPTy->Pointee->Quals | ExtraChildQuals);
1022     } else {
1023       PointerType *PTy = static_cast<PointerType *>(Ty);
1024       PTy->Pointee->Quals = Qualifiers(PTy->Pointee->Quals | ExtraChildQuals);
1025     }
1026 
1027     break;
1028   }
1029   default:
1030     Ty->Quals = demangleQualifiers(MangledName).first;
1031     break;
1032   }
1033 
1034   return Ty;
1035 }
1036 
1037 // Sometimes numbers are encoded in mangled symbols. For example,
1038 // "int (*x)[20]" is a valid C type (x is a pointer to an array of
1039 // length 20), so we need some way to embed numbers as part of symbols.
1040 // This function parses it.
1041 //
1042 // <number>               ::= [?] <non-negative integer>
1043 //
1044 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10
1045 //                        ::= <hex digit>+ @  # when Numbrer == 0 or >= 10
1046 //
1047 // <hex-digit>            ::= [A-P]           # A = 0, B = 1, ...
demangleNumber(StringView & MangledName)1048 int Demangler::demangleNumber(StringView &MangledName) {
1049   bool neg = MangledName.consumeFront("?");
1050 
1051   if (startsWithDigit(MangledName)) {
1052     int32_t Ret = MangledName[0] - '0' + 1;
1053     MangledName = MangledName.dropFront(1);
1054     return neg ? -Ret : Ret;
1055   }
1056 
1057   int Ret = 0;
1058   for (size_t i = 0; i < MangledName.size(); ++i) {
1059     char C = MangledName[i];
1060     if (C == '@') {
1061       MangledName = MangledName.dropFront(i + 1);
1062       return neg ? -Ret : Ret;
1063     }
1064     if ('A' <= C && C <= 'P') {
1065       Ret = (Ret << 4) + (C - 'A');
1066       continue;
1067     }
1068     break;
1069   }
1070 
1071   Error = true;
1072   return 0;
1073 }
1074 
1075 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9.
1076 // Memorize it.
memorizeString(StringView S)1077 void Demangler::memorizeString(StringView S) {
1078   if (BackRefCount >= sizeof(BackReferences) / sizeof(*BackReferences))
1079     return;
1080   for (size_t i = 0; i < BackRefCount; ++i)
1081     if (S == BackReferences[i])
1082       return;
1083   BackReferences[BackRefCount++] = S;
1084 }
1085 
demangleBackRefName(StringView & MangledName)1086 Name *Demangler::demangleBackRefName(StringView &MangledName) {
1087   assert(startsWithDigit(MangledName));
1088 
1089   size_t I = MangledName[0] - '0';
1090   if (I >= BackRefCount) {
1091     Error = true;
1092     return nullptr;
1093   }
1094 
1095   MangledName = MangledName.dropFront();
1096   Name *Node = Arena.alloc<Name>();
1097   Node->Str = BackReferences[I];
1098   return Node;
1099 }
1100 
demangleClassTemplateName(StringView & MangledName)1101 Name *Demangler::demangleClassTemplateName(StringView &MangledName) {
1102   assert(MangledName.startsWith("?$"));
1103   MangledName.consumeFront("?$");
1104 
1105   Name *Node = demangleSimpleName(MangledName, false);
1106   Node->TParams = demangleTemplateParameterList(MangledName);
1107 
1108   // Render this class template name into a string buffer so that we can
1109   // memorize it for the purpose of back-referencing.
1110   OutputStream OS = OutputStream::create(nullptr, nullptr, 1024);
1111   outputName(OS, Node);
1112   OS << '\0';
1113   char *Name = OS.getBuffer();
1114 
1115   StringView Owned = copyString(Name);
1116   memorizeString(Owned);
1117   std::free(Name);
1118 
1119   return Node;
1120 }
1121 
demangleOperatorName(StringView & MangledName)1122 Name *Demangler::demangleOperatorName(StringView &MangledName) {
1123   assert(MangledName.startsWith('?'));
1124   MangledName.consumeFront('?');
1125 
1126   auto NameString = [this, &MangledName]() -> StringView {
1127     switch (MangledName.popFront()) {
1128     case '0':
1129       return "ctor";
1130     case '1':
1131       return "dtor";
1132     case '2':
1133       return " new";
1134     case '3':
1135       return " delete";
1136     case '4':
1137       return "=";
1138     case '5':
1139       return ">>";
1140     case '6':
1141       return "<<";
1142     case '7':
1143       return "!";
1144     case '8':
1145       return "==";
1146     case '9':
1147       return "!=";
1148     case 'A':
1149       return "[]";
1150     case 'C':
1151       return "->";
1152     case 'D':
1153       return "*";
1154     case 'E':
1155       return "++";
1156     case 'F':
1157       return "--";
1158     case 'G':
1159       return "-";
1160     case 'H':
1161       return "+";
1162     case 'I':
1163       return "&";
1164     case 'J':
1165       return "->*";
1166     case 'K':
1167       return "/";
1168     case 'L':
1169       return "%";
1170     case 'M':
1171       return "<";
1172     case 'N':
1173       return "<=";
1174     case 'O':
1175       return ">";
1176     case 'P':
1177       return ">=";
1178     case 'Q':
1179       return ",";
1180     case 'R':
1181       return "()";
1182     case 'S':
1183       return "~";
1184     case 'T':
1185       return "^";
1186     case 'U':
1187       return "|";
1188     case 'V':
1189       return "&&";
1190     case 'W':
1191       return "||";
1192     case 'X':
1193       return "*=";
1194     case 'Y':
1195       return "+=";
1196     case 'Z':
1197       return "-=";
1198     case '_': {
1199       if (MangledName.empty())
1200         break;
1201 
1202       switch (MangledName.popFront()) {
1203       case '0':
1204         return "/=";
1205       case '1':
1206         return "%=";
1207       case '2':
1208         return ">>=";
1209       case '3':
1210         return "<<=";
1211       case '4':
1212         return "&=";
1213       case '5':
1214         return "|=";
1215       case '6':
1216         return "^=";
1217       case 'U':
1218         return " new[]";
1219       case 'V':
1220         return " delete[]";
1221       case '_':
1222         if (MangledName.consumeFront("L"))
1223           return " co_await";
1224         if (MangledName.consumeFront("K")) {
1225           size_t EndPos = MangledName.find('@');
1226           if (EndPos == StringView::npos)
1227             break;
1228           StringView OpName = demangleSimpleString(MangledName, false);
1229           size_t FullSize = OpName.size() + 3; // <space>""OpName
1230           char *Buffer = Arena.allocUnalignedBuffer(FullSize);
1231           Buffer[0] = ' ';
1232           Buffer[1] = '"';
1233           Buffer[2] = '"';
1234           std::memcpy(Buffer + 3, OpName.begin(), OpName.size());
1235           return {Buffer, FullSize};
1236         }
1237       }
1238     }
1239     }
1240     Error = true;
1241     return "";
1242   };
1243 
1244   Name *Node = Arena.alloc<Name>();
1245   Node->Operator = NameString();
1246   return Node;
1247 }
1248 
demangleSimpleName(StringView & MangledName,bool Memorize)1249 Name *Demangler::demangleSimpleName(StringView &MangledName, bool Memorize) {
1250   StringView S = demangleSimpleString(MangledName, Memorize);
1251   if (Error)
1252     return nullptr;
1253 
1254   Name *Node = Arena.alloc<Name>();
1255   Node->Str = S;
1256   return Node;
1257 }
1258 
demangleSimpleString(StringView & MangledName,bool Memorize)1259 StringView Demangler::demangleSimpleString(StringView &MangledName,
1260                                            bool Memorize) {
1261   StringView S;
1262   for (size_t i = 0; i < MangledName.size(); ++i) {
1263     if (MangledName[i] != '@')
1264       continue;
1265     S = MangledName.substr(0, i);
1266     MangledName = MangledName.dropFront(i + 1);
1267 
1268     if (Memorize)
1269       memorizeString(S);
1270     return S;
1271   }
1272 
1273   Error = true;
1274   return {};
1275 }
1276 
demangleAnonymousNamespaceName(StringView & MangledName)1277 Name *Demangler::demangleAnonymousNamespaceName(StringView &MangledName) {
1278   assert(MangledName.startsWith("?A"));
1279   MangledName.consumeFront("?A");
1280 
1281   Name *Node = Arena.alloc<Name>();
1282   Node->Str = "`anonymous namespace'";
1283   if (MangledName.consumeFront('@'))
1284     return Node;
1285 
1286   Error = true;
1287   return nullptr;
1288 }
1289 
demangleLocallyScopedNamePiece(StringView & MangledName)1290 Name *Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
1291   assert(startsWithLocalScopePattern(MangledName));
1292 
1293   Name *Node = Arena.alloc<Name>();
1294   MangledName.consumeFront('?');
1295   int ScopeIdentifier = demangleNumber(MangledName);
1296 
1297   // One ? to terminate the number
1298   MangledName.consumeFront('?');
1299 
1300   assert(!Error);
1301   Symbol *Scope = parse(MangledName);
1302   if (Error)
1303     return nullptr;
1304 
1305   // Render the parent symbol's name into a buffer.
1306   OutputStream OS = OutputStream::create(nullptr, nullptr, 1024);
1307   OS << '`';
1308   output(Scope, OS);
1309   OS << '\'';
1310   OS << "::`" << ScopeIdentifier << "'";
1311   OS << '\0';
1312   char *Result = OS.getBuffer();
1313   Node->Str = copyString(Result);
1314   std::free(Result);
1315   return Node;
1316 }
1317 
1318 // Parses a type name in the form of A@B@C@@ which represents C::B::A.
demangleFullyQualifiedTypeName(StringView & MangledName)1319 Name *Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) {
1320   Name *TypeName = demangleUnqualifiedTypeName(MangledName);
1321   assert(TypeName);
1322 
1323   Name *QualName = demangleNameScopeChain(MangledName, TypeName);
1324   assert(QualName);
1325   return QualName;
1326 }
1327 
1328 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A.
1329 // Symbol names have slightly different rules regarding what can appear
1330 // so we separate out the implementations for flexibility.
demangleFullyQualifiedSymbolName(StringView & MangledName)1331 Name *Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) {
1332   Name *SymbolName = demangleUnqualifiedSymbolName(MangledName);
1333   assert(SymbolName);
1334 
1335   Name *QualName = demangleNameScopeChain(MangledName, SymbolName);
1336   assert(QualName);
1337   return QualName;
1338 }
1339 
demangleUnqualifiedTypeName(StringView & MangledName)1340 Name *Demangler::demangleUnqualifiedTypeName(StringView &MangledName) {
1341   // An inner-most name can be a back-reference, because a fully-qualified name
1342   // (e.g. Scope + Inner) can contain other fully qualified names inside of
1343   // them (for example template parameters), and these nested parameters can
1344   // refer to previously mangled types.
1345   if (startsWithDigit(MangledName))
1346     return demangleBackRefName(MangledName);
1347 
1348   if (MangledName.startsWith("?$"))
1349     return demangleClassTemplateName(MangledName);
1350 
1351   return demangleSimpleName(MangledName, true);
1352 }
1353 
demangleUnqualifiedSymbolName(StringView & MangledName)1354 Name *Demangler::demangleUnqualifiedSymbolName(StringView &MangledName) {
1355   if (startsWithDigit(MangledName))
1356     return demangleBackRefName(MangledName);
1357   if (MangledName.startsWith("?$"))
1358     return demangleClassTemplateName(MangledName);
1359   if (MangledName.startsWith('?'))
1360     return demangleOperatorName(MangledName);
1361   return demangleSimpleName(MangledName, true);
1362 }
1363 
demangleNameScopePiece(StringView & MangledName)1364 Name *Demangler::demangleNameScopePiece(StringView &MangledName) {
1365   if (startsWithDigit(MangledName))
1366     return demangleBackRefName(MangledName);
1367 
1368   if (MangledName.startsWith("?$"))
1369     return demangleClassTemplateName(MangledName);
1370 
1371   if (MangledName.startsWith("?A"))
1372     return demangleAnonymousNamespaceName(MangledName);
1373 
1374   if (startsWithLocalScopePattern(MangledName))
1375     return demangleLocallyScopedNamePiece(MangledName);
1376 
1377   return demangleSimpleName(MangledName, true);
1378 }
1379 
demangleNameScopeChain(StringView & MangledName,Name * UnqualifiedName)1380 Name *Demangler::demangleNameScopeChain(StringView &MangledName,
1381                                         Name *UnqualifiedName) {
1382   Name *Head = UnqualifiedName;
1383 
1384   while (!MangledName.consumeFront("@")) {
1385     if (MangledName.empty()) {
1386       Error = true;
1387       return nullptr;
1388     }
1389 
1390     assert(!Error);
1391     Name *Elem = demangleNameScopePiece(MangledName);
1392     if (Error)
1393       return nullptr;
1394 
1395     Elem->Next = Head;
1396     Head = Elem;
1397   }
1398   return Head;
1399 }
1400 
demangleFunctionClass(StringView & MangledName)1401 FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
1402   SwapAndRestore<StringView> RestoreOnError(MangledName, MangledName);
1403   RestoreOnError.shouldRestore(false);
1404 
1405   switch (MangledName.popFront()) {
1406   case 'A':
1407     return Private;
1408   case 'B':
1409     return FuncClass(Private | Far);
1410   case 'C':
1411     return FuncClass(Private | Static);
1412   case 'D':
1413     return FuncClass(Private | Static);
1414   case 'E':
1415     return FuncClass(Private | Virtual);
1416   case 'F':
1417     return FuncClass(Private | Virtual);
1418   case 'I':
1419     return Protected;
1420   case 'J':
1421     return FuncClass(Protected | Far);
1422   case 'K':
1423     return FuncClass(Protected | Static);
1424   case 'L':
1425     return FuncClass(Protected | Static | Far);
1426   case 'M':
1427     return FuncClass(Protected | Virtual);
1428   case 'N':
1429     return FuncClass(Protected | Virtual | Far);
1430   case 'Q':
1431     return Public;
1432   case 'R':
1433     return FuncClass(Public | Far);
1434   case 'S':
1435     return FuncClass(Public | Static);
1436   case 'T':
1437     return FuncClass(Public | Static | Far);
1438   case 'U':
1439     return FuncClass(Public | Virtual);
1440   case 'V':
1441     return FuncClass(Public | Virtual | Far);
1442   case 'Y':
1443     return Global;
1444   case 'Z':
1445     return FuncClass(Global | Far);
1446   }
1447 
1448   Error = true;
1449   RestoreOnError.shouldRestore(true);
1450   return Public;
1451 }
1452 
demangleCallingConvention(StringView & MangledName)1453 CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
1454   switch (MangledName.popFront()) {
1455   case 'A':
1456   case 'B':
1457     return CallingConv::Cdecl;
1458   case 'C':
1459   case 'D':
1460     return CallingConv::Pascal;
1461   case 'E':
1462   case 'F':
1463     return CallingConv::Thiscall;
1464   case 'G':
1465   case 'H':
1466     return CallingConv::Stdcall;
1467   case 'I':
1468   case 'J':
1469     return CallingConv::Fastcall;
1470   case 'M':
1471   case 'N':
1472     return CallingConv::Clrcall;
1473   case 'O':
1474   case 'P':
1475     return CallingConv::Eabi;
1476   case 'Q':
1477     return CallingConv::Vectorcall;
1478   }
1479 
1480   return CallingConv::None;
1481 }
1482 
demangleVariableStorageClass(StringView & MangledName)1483 StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) {
1484   assert(std::isdigit(MangledName.front()));
1485 
1486   switch (MangledName.popFront()) {
1487   case '0':
1488     return StorageClass::PrivateStatic;
1489   case '1':
1490     return StorageClass::ProtectedStatic;
1491   case '2':
1492     return StorageClass::PublicStatic;
1493   case '3':
1494     return StorageClass::Global;
1495   case '4':
1496     return StorageClass::FunctionLocalStatic;
1497   }
1498   Error = true;
1499   return StorageClass::None;
1500 }
1501 
1502 std::pair<Qualifiers, bool>
demangleQualifiers(StringView & MangledName)1503 Demangler::demangleQualifiers(StringView &MangledName) {
1504 
1505   switch (MangledName.popFront()) {
1506   // Member qualifiers
1507   case 'Q':
1508     return std::make_pair(Q_None, true);
1509   case 'R':
1510     return std::make_pair(Q_Const, true);
1511   case 'S':
1512     return std::make_pair(Q_Volatile, true);
1513   case 'T':
1514     return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true);
1515   // Non-Member qualifiers
1516   case 'A':
1517     return std::make_pair(Q_None, false);
1518   case 'B':
1519     return std::make_pair(Q_Const, false);
1520   case 'C':
1521     return std::make_pair(Q_Volatile, false);
1522   case 'D':
1523     return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false);
1524   }
1525   Error = true;
1526   return std::make_pair(Q_None, false);
1527 }
1528 
isTagType(StringView S)1529 static bool isTagType(StringView S) {
1530   switch (S.front()) {
1531   case 'T': // union
1532   case 'U': // struct
1533   case 'V': // class
1534   case 'W': // enum
1535     return true;
1536   }
1537   return false;
1538 }
1539 
isPointerType(StringView S)1540 static bool isPointerType(StringView S) {
1541   if (S.startsWith("$$Q")) // foo &&
1542     return true;
1543 
1544   switch (S.front()) {
1545   case 'A': // foo &
1546   case 'P': // foo *
1547   case 'Q': // foo *const
1548   case 'R': // foo *volatile
1549   case 'S': // foo *const volatile
1550     return true;
1551   }
1552   return false;
1553 }
1554 
isArrayType(StringView S)1555 static bool isArrayType(StringView S) { return S[0] == 'Y'; }
1556 
isFunctionType(StringView S)1557 static bool isFunctionType(StringView S) {
1558   return S.startsWith("$$A8@@") || S.startsWith("$$A6");
1559 }
1560 
1561 // <variable-type> ::= <type> <cvr-qualifiers>
1562 //                 ::= <type> <pointee-cvr-qualifiers> # pointers, references
demangleType(StringView & MangledName,QualifierMangleMode QMM)1563 Type *Demangler::demangleType(StringView &MangledName,
1564                               QualifierMangleMode QMM) {
1565   Qualifiers Quals = Q_None;
1566   bool IsMember = false;
1567   bool IsMemberKnown = false;
1568   if (QMM == QualifierMangleMode::Mangle) {
1569     std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1570     IsMemberKnown = true;
1571   } else if (QMM == QualifierMangleMode::Result) {
1572     if (MangledName.consumeFront('?')) {
1573       std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1574       IsMemberKnown = true;
1575     }
1576   }
1577 
1578   Type *Ty = nullptr;
1579   if (isTagType(MangledName))
1580     Ty = demangleClassType(MangledName);
1581   else if (isPointerType(MangledName)) {
1582     if (!IsMemberKnown)
1583       IsMember = isMemberPointer(MangledName);
1584 
1585     if (IsMember)
1586       Ty = demangleMemberPointerType(MangledName);
1587     else
1588       Ty = demanglePointerType(MangledName);
1589   } else if (isArrayType(MangledName))
1590     Ty = demangleArrayType(MangledName);
1591   else if (isFunctionType(MangledName)) {
1592     if (MangledName.consumeFront("$$A8@@"))
1593       Ty = demangleFunctionType(MangledName, true, false);
1594     else {
1595       assert(MangledName.startsWith("$$A6"));
1596       MangledName.consumeFront("$$A6");
1597       Ty = demangleFunctionType(MangledName, false, false);
1598     }
1599   } else {
1600     Ty = demangleBasicType(MangledName);
1601     assert(Ty && !Error);
1602     if (!Ty || Error)
1603       return Ty;
1604   }
1605 
1606   Ty->Quals = Qualifiers(Ty->Quals | Quals);
1607   return Ty;
1608 }
1609 
demangleReferenceKind(StringView & MangledName)1610 ReferenceKind Demangler::demangleReferenceKind(StringView &MangledName) {
1611   if (MangledName.consumeFront('G'))
1612     return ReferenceKind::LValueRef;
1613   else if (MangledName.consumeFront('H'))
1614     return ReferenceKind::RValueRef;
1615   return ReferenceKind::None;
1616 }
1617 
demangleThrowSpecification(StringView & MangledName)1618 void Demangler::demangleThrowSpecification(StringView &MangledName) {
1619   if (MangledName.consumeFront('Z'))
1620     return;
1621 
1622   Error = true;
1623 }
1624 
demangleFunctionType(StringView & MangledName,bool HasThisQuals,bool IsFunctionPointer)1625 FunctionType *Demangler::demangleFunctionType(StringView &MangledName,
1626                                               bool HasThisQuals,
1627                                               bool IsFunctionPointer) {
1628   FunctionType *FTy = Arena.alloc<FunctionType>();
1629   FTy->Prim = PrimTy::Function;
1630   FTy->IsFunctionPointer = IsFunctionPointer;
1631 
1632   if (HasThisQuals) {
1633     FTy->Quals = demanglePointerExtQualifiers(MangledName);
1634     FTy->RefKind = demangleReferenceKind(MangledName);
1635     FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first);
1636   }
1637 
1638   // Fields that appear on both member and non-member functions.
1639   FTy->CallConvention = demangleCallingConvention(MangledName);
1640 
1641   // <return-type> ::= <type>
1642   //               ::= @ # structors (they have no declared return type)
1643   bool IsStructor = MangledName.consumeFront('@');
1644   if (!IsStructor)
1645     FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result);
1646 
1647   FTy->Params = demangleFunctionParameterList(MangledName);
1648 
1649   demangleThrowSpecification(MangledName);
1650 
1651   return FTy;
1652 }
1653 
demangleFunctionEncoding(StringView & MangledName)1654 Type *Demangler::demangleFunctionEncoding(StringView &MangledName) {
1655   FuncClass FC = demangleFunctionClass(MangledName);
1656 
1657   bool HasThisQuals = !(FC & (Global | Static));
1658   FunctionType *FTy = demangleFunctionType(MangledName, HasThisQuals, false);
1659   FTy->FunctionClass = FC;
1660 
1661   return FTy;
1662 }
1663 
1664 // Reads a primitive type.
demangleBasicType(StringView & MangledName)1665 Type *Demangler::demangleBasicType(StringView &MangledName) {
1666   Type *Ty = Arena.alloc<Type>();
1667 
1668   if (MangledName.consumeFront("$$T")) {
1669     Ty->Prim = PrimTy::Nullptr;
1670     return Ty;
1671   }
1672 
1673   switch (MangledName.popFront()) {
1674   case 'X':
1675     Ty->Prim = PrimTy::Void;
1676     break;
1677   case 'D':
1678     Ty->Prim = PrimTy::Char;
1679     break;
1680   case 'C':
1681     Ty->Prim = PrimTy::Schar;
1682     break;
1683   case 'E':
1684     Ty->Prim = PrimTy::Uchar;
1685     break;
1686   case 'F':
1687     Ty->Prim = PrimTy::Short;
1688     break;
1689   case 'G':
1690     Ty->Prim = PrimTy::Ushort;
1691     break;
1692   case 'H':
1693     Ty->Prim = PrimTy::Int;
1694     break;
1695   case 'I':
1696     Ty->Prim = PrimTy::Uint;
1697     break;
1698   case 'J':
1699     Ty->Prim = PrimTy::Long;
1700     break;
1701   case 'K':
1702     Ty->Prim = PrimTy::Ulong;
1703     break;
1704   case 'M':
1705     Ty->Prim = PrimTy::Float;
1706     break;
1707   case 'N':
1708     Ty->Prim = PrimTy::Double;
1709     break;
1710   case 'O':
1711     Ty->Prim = PrimTy::Ldouble;
1712     break;
1713   case '_': {
1714     if (MangledName.empty()) {
1715       Error = true;
1716       return nullptr;
1717     }
1718     switch (MangledName.popFront()) {
1719     case 'N':
1720       Ty->Prim = PrimTy::Bool;
1721       break;
1722     case 'J':
1723       Ty->Prim = PrimTy::Int64;
1724       break;
1725     case 'K':
1726       Ty->Prim = PrimTy::Uint64;
1727       break;
1728     case 'W':
1729       Ty->Prim = PrimTy::Wchar;
1730       break;
1731     case 'S':
1732       Ty->Prim = PrimTy::Char16;
1733       break;
1734     case 'U':
1735       Ty->Prim = PrimTy::Char32;
1736       break;
1737     default:
1738       Error = true;
1739       return nullptr;
1740     }
1741     break;
1742   }
1743   default:
1744     Error = true;
1745     return nullptr;
1746   }
1747   return Ty;
1748 }
1749 
demangleClassType(StringView & MangledName)1750 UdtType *Demangler::demangleClassType(StringView &MangledName) {
1751   UdtType *UTy = Arena.alloc<UdtType>();
1752 
1753   switch (MangledName.popFront()) {
1754   case 'T':
1755     UTy->Prim = PrimTy::Union;
1756     break;
1757   case 'U':
1758     UTy->Prim = PrimTy::Struct;
1759     break;
1760   case 'V':
1761     UTy->Prim = PrimTy::Class;
1762     break;
1763   case 'W':
1764     if (MangledName.popFront() != '4') {
1765       Error = true;
1766       return nullptr;
1767     }
1768     UTy->Prim = PrimTy::Enum;
1769     break;
1770   default:
1771     assert(false);
1772   }
1773 
1774   UTy->UdtName = demangleFullyQualifiedTypeName(MangledName);
1775   return UTy;
1776 }
1777 
1778 static std::pair<Qualifiers, PointerAffinity>
demanglePointerCVQualifiers(StringView & MangledName)1779 demanglePointerCVQualifiers(StringView &MangledName) {
1780   if (MangledName.consumeFront("$$Q"))
1781     return std::make_pair(Q_None, PointerAffinity::RValueReference);
1782 
1783   switch (MangledName.popFront()) {
1784   case 'A':
1785     return std::make_pair(Q_None, PointerAffinity::Reference);
1786   case 'P':
1787     return std::make_pair(Q_None, PointerAffinity::Pointer);
1788   case 'Q':
1789     return std::make_pair(Q_Const, PointerAffinity::Pointer);
1790   case 'R':
1791     return std::make_pair(Q_Volatile, PointerAffinity::Pointer);
1792   case 'S':
1793     return std::make_pair(Qualifiers(Q_Const | Q_Volatile),
1794                           PointerAffinity::Pointer);
1795   default:
1796     assert(false && "Ty is not a pointer type!");
1797   }
1798   return std::make_pair(Q_None, PointerAffinity::Pointer);
1799 }
1800 
1801 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type>
1802 //                       # the E is required for 64-bit non-static pointers
demanglePointerType(StringView & MangledName)1803 PointerType *Demangler::demanglePointerType(StringView &MangledName) {
1804   PointerType *Pointer = Arena.alloc<PointerType>();
1805 
1806   std::tie(Pointer->Quals, Pointer->Affinity) =
1807       demanglePointerCVQualifiers(MangledName);
1808 
1809   Pointer->Prim = PrimTy::Ptr;
1810   if (MangledName.consumeFront("6")) {
1811     Pointer->Pointee = demangleFunctionType(MangledName, false, true);
1812     return Pointer;
1813   }
1814 
1815   Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
1816   Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
1817 
1818   Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle);
1819   return Pointer;
1820 }
1821 
1822 MemberPointerType *
demangleMemberPointerType(StringView & MangledName)1823 Demangler::demangleMemberPointerType(StringView &MangledName) {
1824   MemberPointerType *Pointer = Arena.alloc<MemberPointerType>();
1825   Pointer->Prim = PrimTy::MemberPtr;
1826 
1827   PointerAffinity Affinity;
1828   std::tie(Pointer->Quals, Affinity) = demanglePointerCVQualifiers(MangledName);
1829   assert(Affinity == PointerAffinity::Pointer);
1830 
1831   Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
1832   Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
1833 
1834   if (MangledName.consumeFront("8")) {
1835     Pointer->MemberName = demangleFullyQualifiedSymbolName(MangledName);
1836     Pointer->Pointee = demangleFunctionType(MangledName, true, true);
1837   } else {
1838     Qualifiers PointeeQuals = Q_None;
1839     bool IsMember = false;
1840     std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
1841     assert(IsMember);
1842     Pointer->MemberName = demangleFullyQualifiedSymbolName(MangledName);
1843 
1844     Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
1845     Pointer->Pointee->Quals = PointeeQuals;
1846   }
1847 
1848   return Pointer;
1849 }
1850 
demanglePointerExtQualifiers(StringView & MangledName)1851 Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) {
1852   Qualifiers Quals = Q_None;
1853   if (MangledName.consumeFront('E'))
1854     Quals = Qualifiers(Quals | Q_Pointer64);
1855   if (MangledName.consumeFront('I'))
1856     Quals = Qualifiers(Quals | Q_Restrict);
1857   if (MangledName.consumeFront('F'))
1858     Quals = Qualifiers(Quals | Q_Unaligned);
1859 
1860   return Quals;
1861 }
1862 
demangleArrayType(StringView & MangledName)1863 ArrayType *Demangler::demangleArrayType(StringView &MangledName) {
1864   assert(MangledName.front() == 'Y');
1865   MangledName.popFront();
1866 
1867   int Dimension = demangleNumber(MangledName);
1868   if (Dimension <= 0) {
1869     Error = true;
1870     return nullptr;
1871   }
1872 
1873   ArrayType *ATy = Arena.alloc<ArrayType>();
1874   ArrayType *Dim = ATy;
1875   for (int I = 0; I < Dimension; ++I) {
1876     Dim->Prim = PrimTy::Array;
1877     Dim->ArrayDimension = demangleNumber(MangledName);
1878     Dim->NextDimension = Arena.alloc<ArrayType>();
1879     Dim = Dim->NextDimension;
1880   }
1881 
1882   if (MangledName.consumeFront("$$C")) {
1883     if (MangledName.consumeFront("B"))
1884       ATy->Quals = Q_Const;
1885     else if (MangledName.consumeFront("C") || MangledName.consumeFront("D"))
1886       ATy->Quals = Qualifiers(Q_Const | Q_Volatile);
1887     else if (!MangledName.consumeFront("A"))
1888       Error = true;
1889   }
1890 
1891   ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop);
1892   Dim->ElementType = ATy->ElementType;
1893   return ATy;
1894 }
1895 
1896 // Reads a function or a template parameters.
1897 FunctionParams
demangleFunctionParameterList(StringView & MangledName)1898 Demangler::demangleFunctionParameterList(StringView &MangledName) {
1899   // Empty parameter list.
1900   if (MangledName.consumeFront('X'))
1901     return {};
1902 
1903   FunctionParams *Head;
1904   FunctionParams **Current = &Head;
1905   while (!Error && !MangledName.startsWith('@') &&
1906          !MangledName.startsWith('Z')) {
1907 
1908     if (startsWithDigit(MangledName)) {
1909       size_t N = MangledName[0] - '0';
1910       if (N >= FunctionParamBackRefCount) {
1911         Error = true;
1912         return {};
1913       }
1914       MangledName = MangledName.dropFront();
1915 
1916       *Current = Arena.alloc<FunctionParams>();
1917       (*Current)->Current = FunctionParamBackRefs[N]->clone(Arena);
1918       Current = &(*Current)->Next;
1919       continue;
1920     }
1921 
1922     size_t OldSize = MangledName.size();
1923 
1924     *Current = Arena.alloc<FunctionParams>();
1925     (*Current)->Current = demangleType(MangledName, QualifierMangleMode::Drop);
1926 
1927     size_t CharsConsumed = OldSize - MangledName.size();
1928     assert(CharsConsumed != 0);
1929 
1930     // Single-letter types are ignored for backreferences because memorizing
1931     // them doesn't save anything.
1932     if (FunctionParamBackRefCount <= 9 && CharsConsumed > 1)
1933       FunctionParamBackRefs[FunctionParamBackRefCount++] = (*Current)->Current;
1934 
1935     Current = &(*Current)->Next;
1936   }
1937 
1938   if (Error)
1939     return {};
1940 
1941   // A non-empty parameter list is terminated by either 'Z' (variadic) parameter
1942   // list or '@' (non variadic).  Careful not to consume "@Z", as in that case
1943   // the following Z could be a throw specifier.
1944   if (MangledName.consumeFront('@'))
1945     return *Head;
1946 
1947   if (MangledName.consumeFront('Z')) {
1948     Head->IsVariadic = true;
1949     return *Head;
1950   }
1951 
1952   Error = true;
1953   return {};
1954 }
1955 
1956 TemplateParams *
demangleTemplateParameterList(StringView & MangledName)1957 Demangler::demangleTemplateParameterList(StringView &MangledName) {
1958   TemplateParams *Head;
1959   TemplateParams **Current = &Head;
1960   while (!Error && !MangledName.startsWith('@')) {
1961     // Template parameter lists don't participate in back-referencing.
1962     *Current = Arena.alloc<TemplateParams>();
1963 
1964     // Empty parameter pack.
1965     if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") ||
1966         MangledName.consumeFront("$$$V")) {
1967       if (!MangledName.startsWith('@'))
1968         Error = true;
1969       continue;
1970     }
1971 
1972     if (MangledName.consumeFront("$$Y")) {
1973       (*Current)->IsTemplateTemplate = true;
1974       (*Current)->IsAliasTemplate = true;
1975       (*Current)->ParamName = demangleFullyQualifiedTypeName(MangledName);
1976     } else if (MangledName.consumeFront("$1?")) {
1977       (*Current)->ParamName = demangleFullyQualifiedSymbolName(MangledName);
1978       (*Current)->ParamType = demangleFunctionEncoding(MangledName);
1979     } else {
1980       (*Current)->ParamType =
1981           demangleType(MangledName, QualifierMangleMode::Drop);
1982     }
1983 
1984     Current = &(*Current)->Next;
1985   }
1986 
1987   if (Error)
1988     return {};
1989 
1990   // Template parameter lists cannot be variadic, so it can only be terminated
1991   // by @.
1992   if (MangledName.consumeFront('@'))
1993     return Head;
1994   Error = true;
1995   return {};
1996 }
1997 
output(const Symbol * S,OutputStream & OS)1998 void Demangler::output(const Symbol *S, OutputStream &OS) {
1999   // Converts an AST to a string.
2000   //
2001   // Converting an AST representing a C++ type to a string is tricky due
2002   // to the bad grammar of the C++ declaration inherited from C. You have
2003   // to construct a string from inside to outside. For example, if a type
2004   // X is a pointer to a function returning int, the order you create a
2005   // string becomes something like this:
2006   //
2007   //   (1) X is a pointer: *X
2008   //   (2) (1) is a function returning int: int (*X)()
2009   //
2010   // So you cannot construct a result just by appending strings to a result.
2011   //
2012   // To deal with this, we split the function into two. outputPre() writes
2013   // the "first half" of type declaration, and outputPost() writes the
2014   // "second half". For example, outputPre() writes a return type for a
2015   // function and outputPost() writes an parameter list.
2016   Type::outputPre(OS, *S->SymbolType);
2017   outputName(OS, S->SymbolName);
2018   Type::outputPost(OS, *S->SymbolType);
2019 }
2020 
microsoftDemangle(const char * MangledName,char * Buf,size_t * N,int * Status)2021 char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N,
2022                               int *Status) {
2023   Demangler D;
2024   StringView Name{MangledName};
2025   Symbol *S = D.parse(Name);
2026 
2027   if (D.Error)
2028     *Status = llvm::demangle_invalid_mangled_name;
2029   else
2030     *Status = llvm::demangle_success;
2031 
2032   OutputStream OS = OutputStream::create(Buf, N, 1024);
2033   D.output(S, OS);
2034   OS << '\0';
2035   return OS.getBuffer();
2036 }
2037