1 //===- MicrosoftDemangle.cpp ----------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is dual licensed under the MIT and the University of Illinois Open
6 // Source Licenses. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines a demangler for MSVC-style mangled symbols.
11 //
12 // This file has no dependencies on the rest of LLVM so that it can be
13 // easily reused in other programs such as libcxxabi.
14 //
15 //===----------------------------------------------------------------------===//
16
17 #include "llvm/Demangle/Demangle.h"
18
19 #include "Compiler.h"
20 #include "StringView.h"
21 #include "Utility.h"
22
23 #include <cctype>
24 #include <tuple>
25
26 // This memory allocator is extremely fast, but it doesn't call dtors
27 // for allocated objects. That means you can't use STL containers
28 // (such as std::vector) with this allocator. But it pays off --
29 // the demangler is 3x faster with this allocator compared to one with
30 // STL containers.
31 namespace {
32 constexpr size_t AllocUnit = 4096;
33
34 class ArenaAllocator {
35 struct AllocatorNode {
36 uint8_t *Buf = nullptr;
37 size_t Used = 0;
38 size_t Capacity = 0;
39 AllocatorNode *Next = nullptr;
40 };
41
addNode(size_t Capacity)42 void addNode(size_t Capacity) {
43 AllocatorNode *NewHead = new AllocatorNode;
44 NewHead->Buf = new uint8_t[Capacity];
45 NewHead->Next = Head;
46 NewHead->Capacity = Capacity;
47 Head = NewHead;
48 NewHead->Used = 0;
49 }
50
51 public:
ArenaAllocator()52 ArenaAllocator() { addNode(AllocUnit); }
53
~ArenaAllocator()54 ~ArenaAllocator() {
55 while (Head) {
56 assert(Head->Buf);
57 delete[] Head->Buf;
58 AllocatorNode *Next = Head->Next;
59 delete Head;
60 Head = Next;
61 }
62 }
63
allocUnalignedBuffer(size_t Length)64 char *allocUnalignedBuffer(size_t Length) {
65 uint8_t *Buf = Head->Buf + Head->Used;
66
67 Head->Used += Length;
68 if (Head->Used > Head->Capacity) {
69 // It's possible we need a buffer which is larger than our default unit
70 // size, so we need to be careful to add a node with capacity that is at
71 // least as large as what we need.
72 addNode(std::max(AllocUnit, Length));
73 Head->Used = Length;
74 Buf = Head->Buf;
75 }
76
77 return reinterpret_cast<char *>(Buf);
78 }
79
alloc(Args &&...ConstructorArgs)80 template <typename T, typename... Args> T *alloc(Args &&... ConstructorArgs) {
81
82 size_t Size = sizeof(T);
83 assert(Head && Head->Buf);
84
85 size_t P = (size_t)Head->Buf + Head->Used;
86 uintptr_t AlignedP =
87 (((size_t)P + alignof(T) - 1) & ~(size_t)(alignof(T) - 1));
88 uint8_t *PP = (uint8_t *)AlignedP;
89 size_t Adjustment = AlignedP - P;
90
91 Head->Used += Size + Adjustment;
92 if (Head->Used < Head->Capacity)
93 return new (PP) T(std::forward<Args>(ConstructorArgs)...);
94
95 addNode(AllocUnit);
96 Head->Used = Size;
97 return new (Head->Buf) T(std::forward<Args>(ConstructorArgs)...);
98 }
99
100 private:
101 AllocatorNode *Head = nullptr;
102 };
103 } // namespace
104
startsWithDigit(StringView S)105 static bool startsWithDigit(StringView S) {
106 return !S.empty() && std::isdigit(S.front());
107 }
108
109 // Writes a space if the last token does not end with a punctuation.
outputSpaceIfNecessary(OutputStream & OS)110 static void outputSpaceIfNecessary(OutputStream &OS) {
111 if (OS.empty())
112 return;
113
114 char C = OS.back();
115 if (isalnum(C) || C == '>')
116 OS << " ";
117 }
118
119 // Storage classes
120 enum Qualifiers : uint8_t {
121 Q_None = 0,
122 Q_Const = 1 << 0,
123 Q_Volatile = 1 << 1,
124 Q_Far = 1 << 2,
125 Q_Huge = 1 << 3,
126 Q_Unaligned = 1 << 4,
127 Q_Restrict = 1 << 5,
128 Q_Pointer64 = 1 << 6
129 };
130
131 enum class StorageClass : uint8_t {
132 None,
133 PrivateStatic,
134 ProtectedStatic,
135 PublicStatic,
136 Global,
137 FunctionLocalStatic
138 };
139
140 enum class QualifierMangleMode { Drop, Mangle, Result };
141
142 enum class PointerAffinity { Pointer, Reference, RValueReference };
143
144 // Calling conventions
145 enum class CallingConv : uint8_t {
146 None,
147 Cdecl,
148 Pascal,
149 Thiscall,
150 Stdcall,
151 Fastcall,
152 Clrcall,
153 Eabi,
154 Vectorcall,
155 Regcall,
156 };
157
158 enum class ReferenceKind : uint8_t { None, LValueRef, RValueRef };
159
160 // Types
161 enum class PrimTy : uint8_t {
162 Unknown,
163 None,
164 Function,
165 Ptr,
166 MemberPtr,
167 Array,
168
169 Struct,
170 Union,
171 Class,
172 Enum,
173
174 Void,
175 Bool,
176 Char,
177 Schar,
178 Uchar,
179 Char16,
180 Char32,
181 Short,
182 Ushort,
183 Int,
184 Uint,
185 Long,
186 Ulong,
187 Int64,
188 Uint64,
189 Wchar,
190 Float,
191 Double,
192 Ldouble,
193 Nullptr
194 };
195
196 // Function classes
197 enum FuncClass : uint8_t {
198 Public = 1 << 0,
199 Protected = 1 << 1,
200 Private = 1 << 2,
201 Global = 1 << 3,
202 Static = 1 << 4,
203 Virtual = 1 << 5,
204 Far = 1 << 6,
205 };
206
207 namespace {
208
209 struct Type;
210 struct Name;
211
212 struct FunctionParams {
213 bool IsVariadic = false;
214
215 Type *Current = nullptr;
216
217 FunctionParams *Next = nullptr;
218 };
219
220 struct TemplateParams {
221 bool IsTemplateTemplate = false;
222 bool IsAliasTemplate = false;
223
224 // Type can be null if this is a template template parameter. In that case
225 // only Name will be valid.
226 Type *ParamType = nullptr;
227
228 // Name can be valid if this is a template template parameter (see above) or
229 // this is a function declaration (e.g. foo<&SomeFunc>). In the latter case
230 // Name contains the name of the function and Type contains the signature.
231 Name *ParamName = nullptr;
232
233 TemplateParams *Next = nullptr;
234 };
235
236 // The type class. Mangled symbols are first parsed and converted to
237 // this type and then converted to string.
238 struct Type {
~Type__anon4268c9490211::Type239 virtual ~Type() {}
240
241 virtual Type *clone(ArenaAllocator &Arena) const;
242
243 // Write the "first half" of a given type. This is a static functions to
244 // give the code a chance to do processing that is common to a subset of
245 // subclasses
246 static void outputPre(OutputStream &OS, Type &Ty);
247
248 // Write the "second half" of a given type. This is a static functions to
249 // give the code a chance to do processing that is common to a subset of
250 // subclasses
251 static void outputPost(OutputStream &OS, Type &Ty);
252
253 virtual void outputPre(OutputStream &OS);
254 virtual void outputPost(OutputStream &OS);
255
256 // Primitive type such as Int.
257 PrimTy Prim = PrimTy::Unknown;
258
259 Qualifiers Quals = Q_None;
260 StorageClass Storage = StorageClass::None; // storage class
261 };
262
263 // Represents an identifier which may be a template.
264 struct Name {
265 // Name read from an MangledName string.
266 StringView Str;
267
268 // Overloaded operators are represented as special BackReferences in mangled
269 // symbols. If this is an operator name, "op" has an operator name (e.g.
270 // ">>"). Otherwise, empty.
271 StringView Operator;
272
273 // Template parameters. Null if not a template.
274 TemplateParams *TParams = nullptr;
275
276 // Nested BackReferences (e.g. "A::B::C") are represented as a linked list.
277 Name *Next = nullptr;
278 };
279
280 struct PointerType : public Type {
281 Type *clone(ArenaAllocator &Arena) const override;
282 void outputPre(OutputStream &OS) override;
283 void outputPost(OutputStream &OS) override;
284
285 PointerAffinity Affinity;
286
287 // Represents a type X in "a pointer to X", "a reference to X",
288 // "an array of X", or "a function returning X".
289 Type *Pointee = nullptr;
290 };
291
292 struct MemberPointerType : public Type {
293 Type *clone(ArenaAllocator &Arena) const override;
294 void outputPre(OutputStream &OS) override;
295 void outputPost(OutputStream &OS) override;
296
297 Name *MemberName = nullptr;
298
299 // Represents a type X in "a pointer to X", "a reference to X",
300 // "an array of X", or "a function returning X".
301 Type *Pointee = nullptr;
302 };
303
304 struct FunctionType : public Type {
305 Type *clone(ArenaAllocator &Arena) const override;
306 void outputPre(OutputStream &OS) override;
307 void outputPost(OutputStream &OS) override;
308
309 // True if this FunctionType instance is the Pointee of a PointerType or
310 // MemberPointerType.
311 bool IsFunctionPointer = false;
312
313 Type *ReturnType = nullptr;
314 // If this is a reference, the type of reference.
315 ReferenceKind RefKind;
316
317 CallingConv CallConvention;
318 FuncClass FunctionClass;
319
320 FunctionParams Params;
321 };
322
323 struct UdtType : public Type {
324 Type *clone(ArenaAllocator &Arena) const override;
325 void outputPre(OutputStream &OS) override;
326
327 Name *UdtName = nullptr;
328 };
329
330 struct ArrayType : public Type {
331 Type *clone(ArenaAllocator &Arena) const override;
332 void outputPre(OutputStream &OS) override;
333 void outputPost(OutputStream &OS) override;
334
335 // Either NextDimension or ElementType will be valid.
336 ArrayType *NextDimension = nullptr;
337 uint32_t ArrayDimension = 0;
338
339 Type *ElementType = nullptr;
340 };
341
342 } // namespace
343
isMemberPointer(StringView MangledName)344 static bool isMemberPointer(StringView MangledName) {
345 switch (MangledName.popFront()) {
346 case '$':
347 // This is probably an rvalue reference (e.g. $$Q), and you cannot have an
348 // rvalue reference to a member.
349 return false;
350 case 'A':
351 // 'A' indicates a reference, and you cannot have a reference to a member
352 // function or member.
353 return false;
354 case 'P':
355 case 'Q':
356 case 'R':
357 case 'S':
358 // These 4 values indicate some kind of pointer, but we still don't know
359 // what.
360 break;
361 default:
362 assert(false && "Ty is not a pointer type!");
363 }
364
365 // If it starts with a number, then 6 indicates a non-member function
366 // pointer, and 8 indicates a member function pointer.
367 if (startsWithDigit(MangledName)) {
368 assert(MangledName[0] == '6' || MangledName[0] == '8');
369 return (MangledName[0] == '8');
370 }
371
372 // Remove ext qualifiers since those can appear on either type and are
373 // therefore not indicative.
374 MangledName.consumeFront('E'); // 64-bit
375 MangledName.consumeFront('I'); // restrict
376 MangledName.consumeFront('F'); // unaligned
377
378 assert(!MangledName.empty());
379
380 // The next value should be either ABCD (non-member) or QRST (member).
381 switch (MangledName.front()) {
382 case 'A':
383 case 'B':
384 case 'C':
385 case 'D':
386 return false;
387 case 'Q':
388 case 'R':
389 case 'S':
390 case 'T':
391 return true;
392 default:
393 assert(false);
394 }
395 return false;
396 }
397
outputCallingConvention(OutputStream & OS,CallingConv CC)398 static void outputCallingConvention(OutputStream &OS, CallingConv CC) {
399 outputSpaceIfNecessary(OS);
400
401 switch (CC) {
402 case CallingConv::Cdecl:
403 OS << "__cdecl";
404 break;
405 case CallingConv::Fastcall:
406 OS << "__fastcall";
407 break;
408 case CallingConv::Pascal:
409 OS << "__pascal";
410 break;
411 case CallingConv::Regcall:
412 OS << "__regcall";
413 break;
414 case CallingConv::Stdcall:
415 OS << "__stdcall";
416 break;
417 case CallingConv::Thiscall:
418 OS << "__thiscall";
419 break;
420 case CallingConv::Eabi:
421 OS << "__eabi";
422 break;
423 case CallingConv::Vectorcall:
424 OS << "__vectorcall";
425 break;
426 case CallingConv::Clrcall:
427 OS << "__clrcall";
428 break;
429 default:
430 break;
431 }
432 }
433
startsWithLocalScopePattern(StringView S)434 static bool startsWithLocalScopePattern(StringView S) {
435 if (!S.consumeFront('?'))
436 return false;
437 if (S.size() < 2)
438 return false;
439
440 size_t End = S.find('?');
441 if (End == StringView::npos)
442 return false;
443 StringView Candidate = S.substr(0, End);
444 if (Candidate.empty())
445 return false;
446
447 // \?[0-9]\?
448 // ?@? is the discriminator 0.
449 if (Candidate.size() == 1)
450 return Candidate[0] == '@' || (Candidate[0] >= '0' && Candidate[0] <= '9');
451
452 // If it's not 0-9, then it's an encoded number terminated with an @
453 if (Candidate.back() != '@')
454 return false;
455 Candidate = Candidate.dropBack();
456
457 // An encoded number starts with B-P and all subsequent digits are in A-P.
458 // Note that the reason the first digit cannot be A is two fold. First, it
459 // would create an ambiguity with ?A which delimits the beginning of an
460 // anonymous namespace. Second, A represents 0, and you don't start a multi
461 // digit number with a leading 0. Presumably the anonymous namespace
462 // ambiguity is also why single digit encoded numbers use 0-9 rather than A-J.
463 if (Candidate[0] < 'B' || Candidate[0] > 'P')
464 return false;
465 Candidate = Candidate.dropFront();
466 while (!Candidate.empty()) {
467 if (Candidate[0] < 'A' || Candidate[0] > 'P')
468 return false;
469 Candidate = Candidate.dropFront();
470 }
471
472 return true;
473 }
474
475 static void outputName(OutputStream &OS, const Name *TheName);
476
477 // Write a function or template parameter list.
outputParameterList(OutputStream & OS,const FunctionParams & Params)478 static void outputParameterList(OutputStream &OS,
479 const FunctionParams &Params) {
480 if (!Params.Current) {
481 OS << "void";
482 return;
483 }
484
485 const FunctionParams *Head = &Params;
486 while (Head) {
487 Type::outputPre(OS, *Head->Current);
488 Type::outputPost(OS, *Head->Current);
489
490 Head = Head->Next;
491
492 if (Head)
493 OS << ", ";
494 }
495 }
496
outputParameterList(OutputStream & OS,const TemplateParams & Params)497 static void outputParameterList(OutputStream &OS,
498 const TemplateParams &Params) {
499 if (!Params.ParamType && !Params.ParamName) {
500 OS << "<>";
501 return;
502 }
503
504 OS << "<";
505 const TemplateParams *Head = &Params;
506 while (Head) {
507 // Type can be null if this is a template template parameter,
508 // and Name can be null if this is a simple type.
509
510 if (Head->ParamType && Head->ParamName) {
511 // Function pointer.
512 OS << "&";
513 Type::outputPre(OS, *Head->ParamType);
514 outputName(OS, Head->ParamName);
515 Type::outputPost(OS, *Head->ParamType);
516 } else if (Head->ParamType) {
517 // simple type.
518 Type::outputPre(OS, *Head->ParamType);
519 Type::outputPost(OS, *Head->ParamType);
520 } else {
521 // Template alias.
522 outputName(OS, Head->ParamName);
523 }
524
525 Head = Head->Next;
526
527 if (Head)
528 OS << ", ";
529 }
530 OS << ">";
531 }
532
outputName(OutputStream & OS,const Name * TheName)533 static void outputName(OutputStream &OS, const Name *TheName) {
534 if (!TheName)
535 return;
536
537 outputSpaceIfNecessary(OS);
538
539 const Name *Previous = nullptr;
540 // Print out namespaces or outer class BackReferences.
541 for (; TheName->Next; TheName = TheName->Next) {
542 Previous = TheName;
543 OS << TheName->Str;
544 if (TheName->TParams)
545 outputParameterList(OS, *TheName->TParams);
546 OS << "::";
547 }
548
549 // Print out a regular name.
550 if (TheName->Operator.empty()) {
551 OS << TheName->Str;
552 if (TheName->TParams)
553 outputParameterList(OS, *TheName->TParams);
554 return;
555 }
556
557 // Print out ctor or dtor.
558 if (TheName->Operator == "dtor")
559 OS << "~";
560
561 if (TheName->Operator == "ctor" || TheName->Operator == "dtor") {
562 OS << Previous->Str;
563 if (Previous->TParams)
564 outputParameterList(OS, *Previous->TParams);
565 return;
566 }
567
568 // Print out an overloaded operator.
569 if (!TheName->Str.empty())
570 OS << TheName->Str << "::";
571 OS << "operator" << TheName->Operator;
572 }
573
574 namespace {
575
clone(ArenaAllocator & Arena) const576 Type *Type::clone(ArenaAllocator &Arena) const {
577 return Arena.alloc<Type>(*this);
578 }
579
580 // Write the "first half" of a given type.
outputPre(OutputStream & OS,Type & Ty)581 void Type::outputPre(OutputStream &OS, Type &Ty) {
582 // Function types require custom handling of const and static so we
583 // handle them separately. All other types use the same decoration
584 // for these modifiers, so handle them here in common code.
585 if (Ty.Prim == PrimTy::Function) {
586 Ty.outputPre(OS);
587 return;
588 }
589
590 switch (Ty.Storage) {
591 case StorageClass::PrivateStatic:
592 case StorageClass::PublicStatic:
593 case StorageClass::ProtectedStatic:
594 OS << "static ";
595 default:
596 break;
597 }
598 Ty.outputPre(OS);
599
600 if (Ty.Quals & Q_Const) {
601 outputSpaceIfNecessary(OS);
602 OS << "const";
603 }
604
605 if (Ty.Quals & Q_Volatile) {
606 outputSpaceIfNecessary(OS);
607 OS << "volatile";
608 }
609
610 if (Ty.Quals & Q_Restrict) {
611 outputSpaceIfNecessary(OS);
612 OS << "__restrict";
613 }
614 }
615
616 // Write the "second half" of a given type.
outputPost(OutputStream & OS,Type & Ty)617 void Type::outputPost(OutputStream &OS, Type &Ty) { Ty.outputPost(OS); }
618
outputPre(OutputStream & OS)619 void Type::outputPre(OutputStream &OS) {
620 switch (Prim) {
621 case PrimTy::Void:
622 OS << "void";
623 break;
624 case PrimTy::Bool:
625 OS << "bool";
626 break;
627 case PrimTy::Char:
628 OS << "char";
629 break;
630 case PrimTy::Schar:
631 OS << "signed char";
632 break;
633 case PrimTy::Uchar:
634 OS << "unsigned char";
635 break;
636 case PrimTy::Char16:
637 OS << "char16_t";
638 break;
639 case PrimTy::Char32:
640 OS << "char32_t";
641 break;
642 case PrimTy::Short:
643 OS << "short";
644 break;
645 case PrimTy::Ushort:
646 OS << "unsigned short";
647 break;
648 case PrimTy::Int:
649 OS << "int";
650 break;
651 case PrimTy::Uint:
652 OS << "unsigned int";
653 break;
654 case PrimTy::Long:
655 OS << "long";
656 break;
657 case PrimTy::Ulong:
658 OS << "unsigned long";
659 break;
660 case PrimTy::Int64:
661 OS << "__int64";
662 break;
663 case PrimTy::Uint64:
664 OS << "unsigned __int64";
665 break;
666 case PrimTy::Wchar:
667 OS << "wchar_t";
668 break;
669 case PrimTy::Float:
670 OS << "float";
671 break;
672 case PrimTy::Double:
673 OS << "double";
674 break;
675 case PrimTy::Ldouble:
676 OS << "long double";
677 break;
678 case PrimTy::Nullptr:
679 OS << "std::nullptr_t";
680 break;
681 default:
682 assert(false && "Invalid primitive type!");
683 }
684 }
outputPost(OutputStream & OS)685 void Type::outputPost(OutputStream &OS) {}
686
clone(ArenaAllocator & Arena) const687 Type *PointerType::clone(ArenaAllocator &Arena) const {
688 return Arena.alloc<PointerType>(*this);
689 }
690
outputPointerIndicator(OutputStream & OS,PointerAffinity Affinity,const Name * MemberName,const Type * Pointee)691 static void outputPointerIndicator(OutputStream &OS, PointerAffinity Affinity,
692 const Name *MemberName,
693 const Type *Pointee) {
694 // "[]" and "()" (for function parameters) take precedence over "*",
695 // so "int *x(int)" means "x is a function returning int *". We need
696 // parentheses to supercede the default precedence. (e.g. we want to
697 // emit something like "int (*x)(int)".)
698 if (Pointee->Prim == PrimTy::Function || Pointee->Prim == PrimTy::Array) {
699 OS << "(";
700 if (Pointee->Prim == PrimTy::Function) {
701 const FunctionType *FTy = static_cast<const FunctionType *>(Pointee);
702 assert(FTy->IsFunctionPointer);
703 outputCallingConvention(OS, FTy->CallConvention);
704 OS << " ";
705 }
706 }
707
708 if (MemberName) {
709 outputName(OS, MemberName);
710 OS << "::";
711 }
712
713 if (Affinity == PointerAffinity::Pointer)
714 OS << "*";
715 else if (Affinity == PointerAffinity::Reference)
716 OS << "&";
717 else
718 OS << "&&";
719 }
720
outputPre(OutputStream & OS)721 void PointerType::outputPre(OutputStream &OS) {
722 Type::outputPre(OS, *Pointee);
723
724 outputSpaceIfNecessary(OS);
725
726 if (Quals & Q_Unaligned)
727 OS << "__unaligned ";
728
729 outputPointerIndicator(OS, Affinity, nullptr, Pointee);
730
731 // FIXME: We should output this, but it requires updating lots of tests.
732 // if (Ty.Quals & Q_Pointer64)
733 // OS << " __ptr64";
734 }
735
outputPost(OutputStream & OS)736 void PointerType::outputPost(OutputStream &OS) {
737 if (Pointee->Prim == PrimTy::Function || Pointee->Prim == PrimTy::Array)
738 OS << ")";
739
740 Type::outputPost(OS, *Pointee);
741 }
742
clone(ArenaAllocator & Arena) const743 Type *MemberPointerType::clone(ArenaAllocator &Arena) const {
744 return Arena.alloc<MemberPointerType>(*this);
745 }
746
outputPre(OutputStream & OS)747 void MemberPointerType::outputPre(OutputStream &OS) {
748 Type::outputPre(OS, *Pointee);
749
750 outputSpaceIfNecessary(OS);
751
752 outputPointerIndicator(OS, PointerAffinity::Pointer, MemberName, Pointee);
753
754 // FIXME: We should output this, but it requires updating lots of tests.
755 // if (Ty.Quals & Q_Pointer64)
756 // OS << " __ptr64";
757 if (Quals & Q_Restrict)
758 OS << " __restrict";
759 }
760
outputPost(OutputStream & OS)761 void MemberPointerType::outputPost(OutputStream &OS) {
762 if (Pointee->Prim == PrimTy::Function || Pointee->Prim == PrimTy::Array)
763 OS << ")";
764
765 Type::outputPost(OS, *Pointee);
766 }
767
clone(ArenaAllocator & Arena) const768 Type *FunctionType::clone(ArenaAllocator &Arena) const {
769 return Arena.alloc<FunctionType>(*this);
770 }
771
outputPre(OutputStream & OS)772 void FunctionType::outputPre(OutputStream &OS) {
773 if (!(FunctionClass & Global)) {
774 if (FunctionClass & Static)
775 OS << "static ";
776 }
777
778 if (ReturnType) {
779 Type::outputPre(OS, *ReturnType);
780 OS << " ";
781 }
782
783 // Function pointers print the calling convention as void (__cdecl *)(params)
784 // rather than void __cdecl (*)(params). So we need to let the PointerType
785 // class handle this.
786 if (!IsFunctionPointer)
787 outputCallingConvention(OS, CallConvention);
788 }
789
outputPost(OutputStream & OS)790 void FunctionType::outputPost(OutputStream &OS) {
791 OS << "(";
792 outputParameterList(OS, Params);
793 OS << ")";
794 if (Quals & Q_Const)
795 OS << " const";
796 if (Quals & Q_Volatile)
797 OS << " volatile";
798 if (Quals & Q_Restrict)
799 OS << " __restrict";
800 if (Quals & Q_Unaligned)
801 OS << " __unaligned";
802
803 if (RefKind == ReferenceKind::LValueRef)
804 OS << " &";
805 else if (RefKind == ReferenceKind::RValueRef)
806 OS << " &&";
807
808 if (ReturnType)
809 Type::outputPost(OS, *ReturnType);
810 return;
811 }
812
clone(ArenaAllocator & Arena) const813 Type *UdtType::clone(ArenaAllocator &Arena) const {
814 return Arena.alloc<UdtType>(*this);
815 }
816
outputPre(OutputStream & OS)817 void UdtType::outputPre(OutputStream &OS) {
818 switch (Prim) {
819 case PrimTy::Class:
820 OS << "class ";
821 break;
822 case PrimTy::Struct:
823 OS << "struct ";
824 break;
825 case PrimTy::Union:
826 OS << "union ";
827 break;
828 case PrimTy::Enum:
829 OS << "enum ";
830 break;
831 default:
832 assert(false && "Not a udt type!");
833 }
834
835 outputName(OS, UdtName);
836 }
837
clone(ArenaAllocator & Arena) const838 Type *ArrayType::clone(ArenaAllocator &Arena) const {
839 return Arena.alloc<ArrayType>(*this);
840 }
841
outputPre(OutputStream & OS)842 void ArrayType::outputPre(OutputStream &OS) {
843 Type::outputPre(OS, *ElementType);
844 }
845
outputPost(OutputStream & OS)846 void ArrayType::outputPost(OutputStream &OS) {
847 if (ArrayDimension > 0)
848 OS << "[" << ArrayDimension << "]";
849 if (NextDimension)
850 Type::outputPost(OS, *NextDimension);
851 else if (ElementType)
852 Type::outputPost(OS, *ElementType);
853 }
854
855 struct Symbol {
856 Name *SymbolName = nullptr;
857 Type *SymbolType = nullptr;
858 };
859
860 } // namespace
861
862 namespace {
863
864 // Demangler class takes the main role in demangling symbols.
865 // It has a set of functions to parse mangled symbols into Type instances.
866 // It also has a set of functions to cnovert Type instances to strings.
867 class Demangler {
868 public:
869 Demangler() = default;
870
871 // You are supposed to call parse() first and then check if error is true. If
872 // it is false, call output() to write the formatted name to the given stream.
873 Symbol *parse(StringView &MangledName);
874 void output(const Symbol *S, OutputStream &OS);
875
876 // True if an error occurred.
877 bool Error = false;
878
879 private:
880 Type *demangleVariableEncoding(StringView &MangledName);
881 Type *demangleFunctionEncoding(StringView &MangledName);
882
883 Qualifiers demanglePointerExtQualifiers(StringView &MangledName);
884
885 // Parser functions. This is a recursive-descent parser.
886 Type *demangleType(StringView &MangledName, QualifierMangleMode QMM);
887 Type *demangleBasicType(StringView &MangledName);
888 UdtType *demangleClassType(StringView &MangledName);
889 PointerType *demanglePointerType(StringView &MangledName);
890 MemberPointerType *demangleMemberPointerType(StringView &MangledName);
891 FunctionType *demangleFunctionType(StringView &MangledName, bool HasThisQuals,
892 bool IsFunctionPointer);
893
894 ArrayType *demangleArrayType(StringView &MangledName);
895
896 TemplateParams *demangleTemplateParameterList(StringView &MangledName);
897 FunctionParams demangleFunctionParameterList(StringView &MangledName);
898
899 int demangleNumber(StringView &MangledName);
900
901 void memorizeString(StringView s);
902
903 /// Allocate a copy of \p Borrowed into memory that we own.
904 StringView copyString(StringView Borrowed);
905
906 Name *demangleFullyQualifiedTypeName(StringView &MangledName);
907 Name *demangleFullyQualifiedSymbolName(StringView &MangledName);
908
909 Name *demangleUnqualifiedTypeName(StringView &MangledName);
910 Name *demangleUnqualifiedSymbolName(StringView &MangledName);
911
912 Name *demangleNameScopeChain(StringView &MangledName, Name *UnqualifiedName);
913 Name *demangleNameScopePiece(StringView &MangledName);
914
915 Name *demangleBackRefName(StringView &MangledName);
916 Name *demangleClassTemplateName(StringView &MangledName);
917 Name *demangleOperatorName(StringView &MangledName);
918 Name *demangleSimpleName(StringView &MangledName, bool Memorize);
919 Name *demangleAnonymousNamespaceName(StringView &MangledName);
920 Name *demangleLocallyScopedNamePiece(StringView &MangledName);
921
922 StringView demangleSimpleString(StringView &MangledName, bool Memorize);
923
924 FuncClass demangleFunctionClass(StringView &MangledName);
925 CallingConv demangleCallingConvention(StringView &MangledName);
926 StorageClass demangleVariableStorageClass(StringView &MangledName);
927 ReferenceKind demangleReferenceKind(StringView &MangledName);
928 void demangleThrowSpecification(StringView &MangledName);
929
930 std::pair<Qualifiers, bool> demangleQualifiers(StringView &MangledName);
931
932 // Memory allocator.
933 ArenaAllocator Arena;
934
935 // A single type uses one global back-ref table for all function params.
936 // This means back-refs can even go "into" other types. Examples:
937 //
938 // // Second int* is a back-ref to first.
939 // void foo(int *, int*);
940 //
941 // // Second int* is not a back-ref to first (first is not a function param).
942 // int* foo(int*);
943 //
944 // // Second int* is a back-ref to first (ALL function types share the same
945 // // back-ref map.
946 // using F = void(*)(int*);
947 // F G(int *);
948 Type *FunctionParamBackRefs[10];
949 size_t FunctionParamBackRefCount = 0;
950
951 // The first 10 BackReferences in a mangled name can be back-referenced by
952 // special name @[0-9]. This is a storage for the first 10 BackReferences.
953 StringView BackReferences[10];
954 size_t BackRefCount = 0;
955 };
956 } // namespace
957
copyString(StringView Borrowed)958 StringView Demangler::copyString(StringView Borrowed) {
959 char *Stable = Arena.allocUnalignedBuffer(Borrowed.size() + 1);
960 std::strcpy(Stable, Borrowed.begin());
961
962 return {Stable, Borrowed.size()};
963 }
964
965 // Parser entry point.
parse(StringView & MangledName)966 Symbol *Demangler::parse(StringView &MangledName) {
967 Symbol *S = Arena.alloc<Symbol>();
968
969 // MSVC-style mangled symbols must start with '?'.
970 if (!MangledName.consumeFront("?")) {
971 S->SymbolName = Arena.alloc<Name>();
972 S->SymbolName->Str = MangledName;
973 S->SymbolType = Arena.alloc<Type>();
974 S->SymbolType->Prim = PrimTy::Unknown;
975 return S;
976 }
977
978 // What follows is a main symbol name. This may include
979 // namespaces or class BackReferences.
980 S->SymbolName = demangleFullyQualifiedSymbolName(MangledName);
981
982 // Read a variable.
983 S->SymbolType = startsWithDigit(MangledName)
984 ? demangleVariableEncoding(MangledName)
985 : demangleFunctionEncoding(MangledName);
986
987 return S;
988 }
989
990 // <type-encoding> ::= <storage-class> <variable-type>
991 // <storage-class> ::= 0 # private static member
992 // ::= 1 # protected static member
993 // ::= 2 # public static member
994 // ::= 3 # global
995 // ::= 4 # static local
996
demangleVariableEncoding(StringView & MangledName)997 Type *Demangler::demangleVariableEncoding(StringView &MangledName) {
998 StorageClass SC = demangleVariableStorageClass(MangledName);
999
1000 Type *Ty = demangleType(MangledName, QualifierMangleMode::Drop);
1001
1002 Ty->Storage = SC;
1003
1004 // <variable-type> ::= <type> <cvr-qualifiers>
1005 // ::= <type> <pointee-cvr-qualifiers> # pointers, references
1006 switch (Ty->Prim) {
1007 case PrimTy::Ptr:
1008 case PrimTy::MemberPtr: {
1009 Qualifiers ExtraChildQuals = Q_None;
1010 Ty->Quals =
1011 Qualifiers(Ty->Quals | demanglePointerExtQualifiers(MangledName));
1012
1013 bool IsMember = false;
1014 std::tie(ExtraChildQuals, IsMember) = demangleQualifiers(MangledName);
1015
1016 if (Ty->Prim == PrimTy::MemberPtr) {
1017 assert(IsMember);
1018 Name *BackRefName = demangleFullyQualifiedTypeName(MangledName);
1019 (void)BackRefName;
1020 MemberPointerType *MPTy = static_cast<MemberPointerType *>(Ty);
1021 MPTy->Pointee->Quals = Qualifiers(MPTy->Pointee->Quals | ExtraChildQuals);
1022 } else {
1023 PointerType *PTy = static_cast<PointerType *>(Ty);
1024 PTy->Pointee->Quals = Qualifiers(PTy->Pointee->Quals | ExtraChildQuals);
1025 }
1026
1027 break;
1028 }
1029 default:
1030 Ty->Quals = demangleQualifiers(MangledName).first;
1031 break;
1032 }
1033
1034 return Ty;
1035 }
1036
1037 // Sometimes numbers are encoded in mangled symbols. For example,
1038 // "int (*x)[20]" is a valid C type (x is a pointer to an array of
1039 // length 20), so we need some way to embed numbers as part of symbols.
1040 // This function parses it.
1041 //
1042 // <number> ::= [?] <non-negative integer>
1043 //
1044 // <non-negative integer> ::= <decimal digit> # when 1 <= Number <= 10
1045 // ::= <hex digit>+ @ # when Numbrer == 0 or >= 10
1046 //
1047 // <hex-digit> ::= [A-P] # A = 0, B = 1, ...
demangleNumber(StringView & MangledName)1048 int Demangler::demangleNumber(StringView &MangledName) {
1049 bool neg = MangledName.consumeFront("?");
1050
1051 if (startsWithDigit(MangledName)) {
1052 int32_t Ret = MangledName[0] - '0' + 1;
1053 MangledName = MangledName.dropFront(1);
1054 return neg ? -Ret : Ret;
1055 }
1056
1057 int Ret = 0;
1058 for (size_t i = 0; i < MangledName.size(); ++i) {
1059 char C = MangledName[i];
1060 if (C == '@') {
1061 MangledName = MangledName.dropFront(i + 1);
1062 return neg ? -Ret : Ret;
1063 }
1064 if ('A' <= C && C <= 'P') {
1065 Ret = (Ret << 4) + (C - 'A');
1066 continue;
1067 }
1068 break;
1069 }
1070
1071 Error = true;
1072 return 0;
1073 }
1074
1075 // First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9.
1076 // Memorize it.
memorizeString(StringView S)1077 void Demangler::memorizeString(StringView S) {
1078 if (BackRefCount >= sizeof(BackReferences) / sizeof(*BackReferences))
1079 return;
1080 for (size_t i = 0; i < BackRefCount; ++i)
1081 if (S == BackReferences[i])
1082 return;
1083 BackReferences[BackRefCount++] = S;
1084 }
1085
demangleBackRefName(StringView & MangledName)1086 Name *Demangler::demangleBackRefName(StringView &MangledName) {
1087 assert(startsWithDigit(MangledName));
1088
1089 size_t I = MangledName[0] - '0';
1090 if (I >= BackRefCount) {
1091 Error = true;
1092 return nullptr;
1093 }
1094
1095 MangledName = MangledName.dropFront();
1096 Name *Node = Arena.alloc<Name>();
1097 Node->Str = BackReferences[I];
1098 return Node;
1099 }
1100
demangleClassTemplateName(StringView & MangledName)1101 Name *Demangler::demangleClassTemplateName(StringView &MangledName) {
1102 assert(MangledName.startsWith("?$"));
1103 MangledName.consumeFront("?$");
1104
1105 Name *Node = demangleSimpleName(MangledName, false);
1106 Node->TParams = demangleTemplateParameterList(MangledName);
1107
1108 // Render this class template name into a string buffer so that we can
1109 // memorize it for the purpose of back-referencing.
1110 OutputStream OS = OutputStream::create(nullptr, nullptr, 1024);
1111 outputName(OS, Node);
1112 OS << '\0';
1113 char *Name = OS.getBuffer();
1114
1115 StringView Owned = copyString(Name);
1116 memorizeString(Owned);
1117 std::free(Name);
1118
1119 return Node;
1120 }
1121
demangleOperatorName(StringView & MangledName)1122 Name *Demangler::demangleOperatorName(StringView &MangledName) {
1123 assert(MangledName.startsWith('?'));
1124 MangledName.consumeFront('?');
1125
1126 auto NameString = [this, &MangledName]() -> StringView {
1127 switch (MangledName.popFront()) {
1128 case '0':
1129 return "ctor";
1130 case '1':
1131 return "dtor";
1132 case '2':
1133 return " new";
1134 case '3':
1135 return " delete";
1136 case '4':
1137 return "=";
1138 case '5':
1139 return ">>";
1140 case '6':
1141 return "<<";
1142 case '7':
1143 return "!";
1144 case '8':
1145 return "==";
1146 case '9':
1147 return "!=";
1148 case 'A':
1149 return "[]";
1150 case 'C':
1151 return "->";
1152 case 'D':
1153 return "*";
1154 case 'E':
1155 return "++";
1156 case 'F':
1157 return "--";
1158 case 'G':
1159 return "-";
1160 case 'H':
1161 return "+";
1162 case 'I':
1163 return "&";
1164 case 'J':
1165 return "->*";
1166 case 'K':
1167 return "/";
1168 case 'L':
1169 return "%";
1170 case 'M':
1171 return "<";
1172 case 'N':
1173 return "<=";
1174 case 'O':
1175 return ">";
1176 case 'P':
1177 return ">=";
1178 case 'Q':
1179 return ",";
1180 case 'R':
1181 return "()";
1182 case 'S':
1183 return "~";
1184 case 'T':
1185 return "^";
1186 case 'U':
1187 return "|";
1188 case 'V':
1189 return "&&";
1190 case 'W':
1191 return "||";
1192 case 'X':
1193 return "*=";
1194 case 'Y':
1195 return "+=";
1196 case 'Z':
1197 return "-=";
1198 case '_': {
1199 if (MangledName.empty())
1200 break;
1201
1202 switch (MangledName.popFront()) {
1203 case '0':
1204 return "/=";
1205 case '1':
1206 return "%=";
1207 case '2':
1208 return ">>=";
1209 case '3':
1210 return "<<=";
1211 case '4':
1212 return "&=";
1213 case '5':
1214 return "|=";
1215 case '6':
1216 return "^=";
1217 case 'U':
1218 return " new[]";
1219 case 'V':
1220 return " delete[]";
1221 case '_':
1222 if (MangledName.consumeFront("L"))
1223 return " co_await";
1224 if (MangledName.consumeFront("K")) {
1225 size_t EndPos = MangledName.find('@');
1226 if (EndPos == StringView::npos)
1227 break;
1228 StringView OpName = demangleSimpleString(MangledName, false);
1229 size_t FullSize = OpName.size() + 3; // <space>""OpName
1230 char *Buffer = Arena.allocUnalignedBuffer(FullSize);
1231 Buffer[0] = ' ';
1232 Buffer[1] = '"';
1233 Buffer[2] = '"';
1234 std::memcpy(Buffer + 3, OpName.begin(), OpName.size());
1235 return {Buffer, FullSize};
1236 }
1237 }
1238 }
1239 }
1240 Error = true;
1241 return "";
1242 };
1243
1244 Name *Node = Arena.alloc<Name>();
1245 Node->Operator = NameString();
1246 return Node;
1247 }
1248
demangleSimpleName(StringView & MangledName,bool Memorize)1249 Name *Demangler::demangleSimpleName(StringView &MangledName, bool Memorize) {
1250 StringView S = demangleSimpleString(MangledName, Memorize);
1251 if (Error)
1252 return nullptr;
1253
1254 Name *Node = Arena.alloc<Name>();
1255 Node->Str = S;
1256 return Node;
1257 }
1258
demangleSimpleString(StringView & MangledName,bool Memorize)1259 StringView Demangler::demangleSimpleString(StringView &MangledName,
1260 bool Memorize) {
1261 StringView S;
1262 for (size_t i = 0; i < MangledName.size(); ++i) {
1263 if (MangledName[i] != '@')
1264 continue;
1265 S = MangledName.substr(0, i);
1266 MangledName = MangledName.dropFront(i + 1);
1267
1268 if (Memorize)
1269 memorizeString(S);
1270 return S;
1271 }
1272
1273 Error = true;
1274 return {};
1275 }
1276
demangleAnonymousNamespaceName(StringView & MangledName)1277 Name *Demangler::demangleAnonymousNamespaceName(StringView &MangledName) {
1278 assert(MangledName.startsWith("?A"));
1279 MangledName.consumeFront("?A");
1280
1281 Name *Node = Arena.alloc<Name>();
1282 Node->Str = "`anonymous namespace'";
1283 if (MangledName.consumeFront('@'))
1284 return Node;
1285
1286 Error = true;
1287 return nullptr;
1288 }
1289
demangleLocallyScopedNamePiece(StringView & MangledName)1290 Name *Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
1291 assert(startsWithLocalScopePattern(MangledName));
1292
1293 Name *Node = Arena.alloc<Name>();
1294 MangledName.consumeFront('?');
1295 int ScopeIdentifier = demangleNumber(MangledName);
1296
1297 // One ? to terminate the number
1298 MangledName.consumeFront('?');
1299
1300 assert(!Error);
1301 Symbol *Scope = parse(MangledName);
1302 if (Error)
1303 return nullptr;
1304
1305 // Render the parent symbol's name into a buffer.
1306 OutputStream OS = OutputStream::create(nullptr, nullptr, 1024);
1307 OS << '`';
1308 output(Scope, OS);
1309 OS << '\'';
1310 OS << "::`" << ScopeIdentifier << "'";
1311 OS << '\0';
1312 char *Result = OS.getBuffer();
1313 Node->Str = copyString(Result);
1314 std::free(Result);
1315 return Node;
1316 }
1317
1318 // Parses a type name in the form of A@B@C@@ which represents C::B::A.
demangleFullyQualifiedTypeName(StringView & MangledName)1319 Name *Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) {
1320 Name *TypeName = demangleUnqualifiedTypeName(MangledName);
1321 assert(TypeName);
1322
1323 Name *QualName = demangleNameScopeChain(MangledName, TypeName);
1324 assert(QualName);
1325 return QualName;
1326 }
1327
1328 // Parses a symbol name in the form of A@B@C@@ which represents C::B::A.
1329 // Symbol names have slightly different rules regarding what can appear
1330 // so we separate out the implementations for flexibility.
demangleFullyQualifiedSymbolName(StringView & MangledName)1331 Name *Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) {
1332 Name *SymbolName = demangleUnqualifiedSymbolName(MangledName);
1333 assert(SymbolName);
1334
1335 Name *QualName = demangleNameScopeChain(MangledName, SymbolName);
1336 assert(QualName);
1337 return QualName;
1338 }
1339
demangleUnqualifiedTypeName(StringView & MangledName)1340 Name *Demangler::demangleUnqualifiedTypeName(StringView &MangledName) {
1341 // An inner-most name can be a back-reference, because a fully-qualified name
1342 // (e.g. Scope + Inner) can contain other fully qualified names inside of
1343 // them (for example template parameters), and these nested parameters can
1344 // refer to previously mangled types.
1345 if (startsWithDigit(MangledName))
1346 return demangleBackRefName(MangledName);
1347
1348 if (MangledName.startsWith("?$"))
1349 return demangleClassTemplateName(MangledName);
1350
1351 return demangleSimpleName(MangledName, true);
1352 }
1353
demangleUnqualifiedSymbolName(StringView & MangledName)1354 Name *Demangler::demangleUnqualifiedSymbolName(StringView &MangledName) {
1355 if (startsWithDigit(MangledName))
1356 return demangleBackRefName(MangledName);
1357 if (MangledName.startsWith("?$"))
1358 return demangleClassTemplateName(MangledName);
1359 if (MangledName.startsWith('?'))
1360 return demangleOperatorName(MangledName);
1361 return demangleSimpleName(MangledName, true);
1362 }
1363
demangleNameScopePiece(StringView & MangledName)1364 Name *Demangler::demangleNameScopePiece(StringView &MangledName) {
1365 if (startsWithDigit(MangledName))
1366 return demangleBackRefName(MangledName);
1367
1368 if (MangledName.startsWith("?$"))
1369 return demangleClassTemplateName(MangledName);
1370
1371 if (MangledName.startsWith("?A"))
1372 return demangleAnonymousNamespaceName(MangledName);
1373
1374 if (startsWithLocalScopePattern(MangledName))
1375 return demangleLocallyScopedNamePiece(MangledName);
1376
1377 return demangleSimpleName(MangledName, true);
1378 }
1379
demangleNameScopeChain(StringView & MangledName,Name * UnqualifiedName)1380 Name *Demangler::demangleNameScopeChain(StringView &MangledName,
1381 Name *UnqualifiedName) {
1382 Name *Head = UnqualifiedName;
1383
1384 while (!MangledName.consumeFront("@")) {
1385 if (MangledName.empty()) {
1386 Error = true;
1387 return nullptr;
1388 }
1389
1390 assert(!Error);
1391 Name *Elem = demangleNameScopePiece(MangledName);
1392 if (Error)
1393 return nullptr;
1394
1395 Elem->Next = Head;
1396 Head = Elem;
1397 }
1398 return Head;
1399 }
1400
demangleFunctionClass(StringView & MangledName)1401 FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
1402 SwapAndRestore<StringView> RestoreOnError(MangledName, MangledName);
1403 RestoreOnError.shouldRestore(false);
1404
1405 switch (MangledName.popFront()) {
1406 case 'A':
1407 return Private;
1408 case 'B':
1409 return FuncClass(Private | Far);
1410 case 'C':
1411 return FuncClass(Private | Static);
1412 case 'D':
1413 return FuncClass(Private | Static);
1414 case 'E':
1415 return FuncClass(Private | Virtual);
1416 case 'F':
1417 return FuncClass(Private | Virtual);
1418 case 'I':
1419 return Protected;
1420 case 'J':
1421 return FuncClass(Protected | Far);
1422 case 'K':
1423 return FuncClass(Protected | Static);
1424 case 'L':
1425 return FuncClass(Protected | Static | Far);
1426 case 'M':
1427 return FuncClass(Protected | Virtual);
1428 case 'N':
1429 return FuncClass(Protected | Virtual | Far);
1430 case 'Q':
1431 return Public;
1432 case 'R':
1433 return FuncClass(Public | Far);
1434 case 'S':
1435 return FuncClass(Public | Static);
1436 case 'T':
1437 return FuncClass(Public | Static | Far);
1438 case 'U':
1439 return FuncClass(Public | Virtual);
1440 case 'V':
1441 return FuncClass(Public | Virtual | Far);
1442 case 'Y':
1443 return Global;
1444 case 'Z':
1445 return FuncClass(Global | Far);
1446 }
1447
1448 Error = true;
1449 RestoreOnError.shouldRestore(true);
1450 return Public;
1451 }
1452
demangleCallingConvention(StringView & MangledName)1453 CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
1454 switch (MangledName.popFront()) {
1455 case 'A':
1456 case 'B':
1457 return CallingConv::Cdecl;
1458 case 'C':
1459 case 'D':
1460 return CallingConv::Pascal;
1461 case 'E':
1462 case 'F':
1463 return CallingConv::Thiscall;
1464 case 'G':
1465 case 'H':
1466 return CallingConv::Stdcall;
1467 case 'I':
1468 case 'J':
1469 return CallingConv::Fastcall;
1470 case 'M':
1471 case 'N':
1472 return CallingConv::Clrcall;
1473 case 'O':
1474 case 'P':
1475 return CallingConv::Eabi;
1476 case 'Q':
1477 return CallingConv::Vectorcall;
1478 }
1479
1480 return CallingConv::None;
1481 }
1482
demangleVariableStorageClass(StringView & MangledName)1483 StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) {
1484 assert(std::isdigit(MangledName.front()));
1485
1486 switch (MangledName.popFront()) {
1487 case '0':
1488 return StorageClass::PrivateStatic;
1489 case '1':
1490 return StorageClass::ProtectedStatic;
1491 case '2':
1492 return StorageClass::PublicStatic;
1493 case '3':
1494 return StorageClass::Global;
1495 case '4':
1496 return StorageClass::FunctionLocalStatic;
1497 }
1498 Error = true;
1499 return StorageClass::None;
1500 }
1501
1502 std::pair<Qualifiers, bool>
demangleQualifiers(StringView & MangledName)1503 Demangler::demangleQualifiers(StringView &MangledName) {
1504
1505 switch (MangledName.popFront()) {
1506 // Member qualifiers
1507 case 'Q':
1508 return std::make_pair(Q_None, true);
1509 case 'R':
1510 return std::make_pair(Q_Const, true);
1511 case 'S':
1512 return std::make_pair(Q_Volatile, true);
1513 case 'T':
1514 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), true);
1515 // Non-Member qualifiers
1516 case 'A':
1517 return std::make_pair(Q_None, false);
1518 case 'B':
1519 return std::make_pair(Q_Const, false);
1520 case 'C':
1521 return std::make_pair(Q_Volatile, false);
1522 case 'D':
1523 return std::make_pair(Qualifiers(Q_Const | Q_Volatile), false);
1524 }
1525 Error = true;
1526 return std::make_pair(Q_None, false);
1527 }
1528
isTagType(StringView S)1529 static bool isTagType(StringView S) {
1530 switch (S.front()) {
1531 case 'T': // union
1532 case 'U': // struct
1533 case 'V': // class
1534 case 'W': // enum
1535 return true;
1536 }
1537 return false;
1538 }
1539
isPointerType(StringView S)1540 static bool isPointerType(StringView S) {
1541 if (S.startsWith("$$Q")) // foo &&
1542 return true;
1543
1544 switch (S.front()) {
1545 case 'A': // foo &
1546 case 'P': // foo *
1547 case 'Q': // foo *const
1548 case 'R': // foo *volatile
1549 case 'S': // foo *const volatile
1550 return true;
1551 }
1552 return false;
1553 }
1554
isArrayType(StringView S)1555 static bool isArrayType(StringView S) { return S[0] == 'Y'; }
1556
isFunctionType(StringView S)1557 static bool isFunctionType(StringView S) {
1558 return S.startsWith("$$A8@@") || S.startsWith("$$A6");
1559 }
1560
1561 // <variable-type> ::= <type> <cvr-qualifiers>
1562 // ::= <type> <pointee-cvr-qualifiers> # pointers, references
demangleType(StringView & MangledName,QualifierMangleMode QMM)1563 Type *Demangler::demangleType(StringView &MangledName,
1564 QualifierMangleMode QMM) {
1565 Qualifiers Quals = Q_None;
1566 bool IsMember = false;
1567 bool IsMemberKnown = false;
1568 if (QMM == QualifierMangleMode::Mangle) {
1569 std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1570 IsMemberKnown = true;
1571 } else if (QMM == QualifierMangleMode::Result) {
1572 if (MangledName.consumeFront('?')) {
1573 std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
1574 IsMemberKnown = true;
1575 }
1576 }
1577
1578 Type *Ty = nullptr;
1579 if (isTagType(MangledName))
1580 Ty = demangleClassType(MangledName);
1581 else if (isPointerType(MangledName)) {
1582 if (!IsMemberKnown)
1583 IsMember = isMemberPointer(MangledName);
1584
1585 if (IsMember)
1586 Ty = demangleMemberPointerType(MangledName);
1587 else
1588 Ty = demanglePointerType(MangledName);
1589 } else if (isArrayType(MangledName))
1590 Ty = demangleArrayType(MangledName);
1591 else if (isFunctionType(MangledName)) {
1592 if (MangledName.consumeFront("$$A8@@"))
1593 Ty = demangleFunctionType(MangledName, true, false);
1594 else {
1595 assert(MangledName.startsWith("$$A6"));
1596 MangledName.consumeFront("$$A6");
1597 Ty = demangleFunctionType(MangledName, false, false);
1598 }
1599 } else {
1600 Ty = demangleBasicType(MangledName);
1601 assert(Ty && !Error);
1602 if (!Ty || Error)
1603 return Ty;
1604 }
1605
1606 Ty->Quals = Qualifiers(Ty->Quals | Quals);
1607 return Ty;
1608 }
1609
demangleReferenceKind(StringView & MangledName)1610 ReferenceKind Demangler::demangleReferenceKind(StringView &MangledName) {
1611 if (MangledName.consumeFront('G'))
1612 return ReferenceKind::LValueRef;
1613 else if (MangledName.consumeFront('H'))
1614 return ReferenceKind::RValueRef;
1615 return ReferenceKind::None;
1616 }
1617
demangleThrowSpecification(StringView & MangledName)1618 void Demangler::demangleThrowSpecification(StringView &MangledName) {
1619 if (MangledName.consumeFront('Z'))
1620 return;
1621
1622 Error = true;
1623 }
1624
demangleFunctionType(StringView & MangledName,bool HasThisQuals,bool IsFunctionPointer)1625 FunctionType *Demangler::demangleFunctionType(StringView &MangledName,
1626 bool HasThisQuals,
1627 bool IsFunctionPointer) {
1628 FunctionType *FTy = Arena.alloc<FunctionType>();
1629 FTy->Prim = PrimTy::Function;
1630 FTy->IsFunctionPointer = IsFunctionPointer;
1631
1632 if (HasThisQuals) {
1633 FTy->Quals = demanglePointerExtQualifiers(MangledName);
1634 FTy->RefKind = demangleReferenceKind(MangledName);
1635 FTy->Quals = Qualifiers(FTy->Quals | demangleQualifiers(MangledName).first);
1636 }
1637
1638 // Fields that appear on both member and non-member functions.
1639 FTy->CallConvention = demangleCallingConvention(MangledName);
1640
1641 // <return-type> ::= <type>
1642 // ::= @ # structors (they have no declared return type)
1643 bool IsStructor = MangledName.consumeFront('@');
1644 if (!IsStructor)
1645 FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result);
1646
1647 FTy->Params = demangleFunctionParameterList(MangledName);
1648
1649 demangleThrowSpecification(MangledName);
1650
1651 return FTy;
1652 }
1653
demangleFunctionEncoding(StringView & MangledName)1654 Type *Demangler::demangleFunctionEncoding(StringView &MangledName) {
1655 FuncClass FC = demangleFunctionClass(MangledName);
1656
1657 bool HasThisQuals = !(FC & (Global | Static));
1658 FunctionType *FTy = demangleFunctionType(MangledName, HasThisQuals, false);
1659 FTy->FunctionClass = FC;
1660
1661 return FTy;
1662 }
1663
1664 // Reads a primitive type.
demangleBasicType(StringView & MangledName)1665 Type *Demangler::demangleBasicType(StringView &MangledName) {
1666 Type *Ty = Arena.alloc<Type>();
1667
1668 if (MangledName.consumeFront("$$T")) {
1669 Ty->Prim = PrimTy::Nullptr;
1670 return Ty;
1671 }
1672
1673 switch (MangledName.popFront()) {
1674 case 'X':
1675 Ty->Prim = PrimTy::Void;
1676 break;
1677 case 'D':
1678 Ty->Prim = PrimTy::Char;
1679 break;
1680 case 'C':
1681 Ty->Prim = PrimTy::Schar;
1682 break;
1683 case 'E':
1684 Ty->Prim = PrimTy::Uchar;
1685 break;
1686 case 'F':
1687 Ty->Prim = PrimTy::Short;
1688 break;
1689 case 'G':
1690 Ty->Prim = PrimTy::Ushort;
1691 break;
1692 case 'H':
1693 Ty->Prim = PrimTy::Int;
1694 break;
1695 case 'I':
1696 Ty->Prim = PrimTy::Uint;
1697 break;
1698 case 'J':
1699 Ty->Prim = PrimTy::Long;
1700 break;
1701 case 'K':
1702 Ty->Prim = PrimTy::Ulong;
1703 break;
1704 case 'M':
1705 Ty->Prim = PrimTy::Float;
1706 break;
1707 case 'N':
1708 Ty->Prim = PrimTy::Double;
1709 break;
1710 case 'O':
1711 Ty->Prim = PrimTy::Ldouble;
1712 break;
1713 case '_': {
1714 if (MangledName.empty()) {
1715 Error = true;
1716 return nullptr;
1717 }
1718 switch (MangledName.popFront()) {
1719 case 'N':
1720 Ty->Prim = PrimTy::Bool;
1721 break;
1722 case 'J':
1723 Ty->Prim = PrimTy::Int64;
1724 break;
1725 case 'K':
1726 Ty->Prim = PrimTy::Uint64;
1727 break;
1728 case 'W':
1729 Ty->Prim = PrimTy::Wchar;
1730 break;
1731 case 'S':
1732 Ty->Prim = PrimTy::Char16;
1733 break;
1734 case 'U':
1735 Ty->Prim = PrimTy::Char32;
1736 break;
1737 default:
1738 Error = true;
1739 return nullptr;
1740 }
1741 break;
1742 }
1743 default:
1744 Error = true;
1745 return nullptr;
1746 }
1747 return Ty;
1748 }
1749
demangleClassType(StringView & MangledName)1750 UdtType *Demangler::demangleClassType(StringView &MangledName) {
1751 UdtType *UTy = Arena.alloc<UdtType>();
1752
1753 switch (MangledName.popFront()) {
1754 case 'T':
1755 UTy->Prim = PrimTy::Union;
1756 break;
1757 case 'U':
1758 UTy->Prim = PrimTy::Struct;
1759 break;
1760 case 'V':
1761 UTy->Prim = PrimTy::Class;
1762 break;
1763 case 'W':
1764 if (MangledName.popFront() != '4') {
1765 Error = true;
1766 return nullptr;
1767 }
1768 UTy->Prim = PrimTy::Enum;
1769 break;
1770 default:
1771 assert(false);
1772 }
1773
1774 UTy->UdtName = demangleFullyQualifiedTypeName(MangledName);
1775 return UTy;
1776 }
1777
1778 static std::pair<Qualifiers, PointerAffinity>
demanglePointerCVQualifiers(StringView & MangledName)1779 demanglePointerCVQualifiers(StringView &MangledName) {
1780 if (MangledName.consumeFront("$$Q"))
1781 return std::make_pair(Q_None, PointerAffinity::RValueReference);
1782
1783 switch (MangledName.popFront()) {
1784 case 'A':
1785 return std::make_pair(Q_None, PointerAffinity::Reference);
1786 case 'P':
1787 return std::make_pair(Q_None, PointerAffinity::Pointer);
1788 case 'Q':
1789 return std::make_pair(Q_Const, PointerAffinity::Pointer);
1790 case 'R':
1791 return std::make_pair(Q_Volatile, PointerAffinity::Pointer);
1792 case 'S':
1793 return std::make_pair(Qualifiers(Q_Const | Q_Volatile),
1794 PointerAffinity::Pointer);
1795 default:
1796 assert(false && "Ty is not a pointer type!");
1797 }
1798 return std::make_pair(Q_None, PointerAffinity::Pointer);
1799 }
1800
1801 // <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type>
1802 // # the E is required for 64-bit non-static pointers
demanglePointerType(StringView & MangledName)1803 PointerType *Demangler::demanglePointerType(StringView &MangledName) {
1804 PointerType *Pointer = Arena.alloc<PointerType>();
1805
1806 std::tie(Pointer->Quals, Pointer->Affinity) =
1807 demanglePointerCVQualifiers(MangledName);
1808
1809 Pointer->Prim = PrimTy::Ptr;
1810 if (MangledName.consumeFront("6")) {
1811 Pointer->Pointee = demangleFunctionType(MangledName, false, true);
1812 return Pointer;
1813 }
1814
1815 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
1816 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
1817
1818 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Mangle);
1819 return Pointer;
1820 }
1821
1822 MemberPointerType *
demangleMemberPointerType(StringView & MangledName)1823 Demangler::demangleMemberPointerType(StringView &MangledName) {
1824 MemberPointerType *Pointer = Arena.alloc<MemberPointerType>();
1825 Pointer->Prim = PrimTy::MemberPtr;
1826
1827 PointerAffinity Affinity;
1828 std::tie(Pointer->Quals, Affinity) = demanglePointerCVQualifiers(MangledName);
1829 assert(Affinity == PointerAffinity::Pointer);
1830
1831 Qualifiers ExtQuals = demanglePointerExtQualifiers(MangledName);
1832 Pointer->Quals = Qualifiers(Pointer->Quals | ExtQuals);
1833
1834 if (MangledName.consumeFront("8")) {
1835 Pointer->MemberName = demangleFullyQualifiedSymbolName(MangledName);
1836 Pointer->Pointee = demangleFunctionType(MangledName, true, true);
1837 } else {
1838 Qualifiers PointeeQuals = Q_None;
1839 bool IsMember = false;
1840 std::tie(PointeeQuals, IsMember) = demangleQualifiers(MangledName);
1841 assert(IsMember);
1842 Pointer->MemberName = demangleFullyQualifiedSymbolName(MangledName);
1843
1844 Pointer->Pointee = demangleType(MangledName, QualifierMangleMode::Drop);
1845 Pointer->Pointee->Quals = PointeeQuals;
1846 }
1847
1848 return Pointer;
1849 }
1850
demanglePointerExtQualifiers(StringView & MangledName)1851 Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) {
1852 Qualifiers Quals = Q_None;
1853 if (MangledName.consumeFront('E'))
1854 Quals = Qualifiers(Quals | Q_Pointer64);
1855 if (MangledName.consumeFront('I'))
1856 Quals = Qualifiers(Quals | Q_Restrict);
1857 if (MangledName.consumeFront('F'))
1858 Quals = Qualifiers(Quals | Q_Unaligned);
1859
1860 return Quals;
1861 }
1862
demangleArrayType(StringView & MangledName)1863 ArrayType *Demangler::demangleArrayType(StringView &MangledName) {
1864 assert(MangledName.front() == 'Y');
1865 MangledName.popFront();
1866
1867 int Dimension = demangleNumber(MangledName);
1868 if (Dimension <= 0) {
1869 Error = true;
1870 return nullptr;
1871 }
1872
1873 ArrayType *ATy = Arena.alloc<ArrayType>();
1874 ArrayType *Dim = ATy;
1875 for (int I = 0; I < Dimension; ++I) {
1876 Dim->Prim = PrimTy::Array;
1877 Dim->ArrayDimension = demangleNumber(MangledName);
1878 Dim->NextDimension = Arena.alloc<ArrayType>();
1879 Dim = Dim->NextDimension;
1880 }
1881
1882 if (MangledName.consumeFront("$$C")) {
1883 if (MangledName.consumeFront("B"))
1884 ATy->Quals = Q_Const;
1885 else if (MangledName.consumeFront("C") || MangledName.consumeFront("D"))
1886 ATy->Quals = Qualifiers(Q_Const | Q_Volatile);
1887 else if (!MangledName.consumeFront("A"))
1888 Error = true;
1889 }
1890
1891 ATy->ElementType = demangleType(MangledName, QualifierMangleMode::Drop);
1892 Dim->ElementType = ATy->ElementType;
1893 return ATy;
1894 }
1895
1896 // Reads a function or a template parameters.
1897 FunctionParams
demangleFunctionParameterList(StringView & MangledName)1898 Demangler::demangleFunctionParameterList(StringView &MangledName) {
1899 // Empty parameter list.
1900 if (MangledName.consumeFront('X'))
1901 return {};
1902
1903 FunctionParams *Head;
1904 FunctionParams **Current = &Head;
1905 while (!Error && !MangledName.startsWith('@') &&
1906 !MangledName.startsWith('Z')) {
1907
1908 if (startsWithDigit(MangledName)) {
1909 size_t N = MangledName[0] - '0';
1910 if (N >= FunctionParamBackRefCount) {
1911 Error = true;
1912 return {};
1913 }
1914 MangledName = MangledName.dropFront();
1915
1916 *Current = Arena.alloc<FunctionParams>();
1917 (*Current)->Current = FunctionParamBackRefs[N]->clone(Arena);
1918 Current = &(*Current)->Next;
1919 continue;
1920 }
1921
1922 size_t OldSize = MangledName.size();
1923
1924 *Current = Arena.alloc<FunctionParams>();
1925 (*Current)->Current = demangleType(MangledName, QualifierMangleMode::Drop);
1926
1927 size_t CharsConsumed = OldSize - MangledName.size();
1928 assert(CharsConsumed != 0);
1929
1930 // Single-letter types are ignored for backreferences because memorizing
1931 // them doesn't save anything.
1932 if (FunctionParamBackRefCount <= 9 && CharsConsumed > 1)
1933 FunctionParamBackRefs[FunctionParamBackRefCount++] = (*Current)->Current;
1934
1935 Current = &(*Current)->Next;
1936 }
1937
1938 if (Error)
1939 return {};
1940
1941 // A non-empty parameter list is terminated by either 'Z' (variadic) parameter
1942 // list or '@' (non variadic). Careful not to consume "@Z", as in that case
1943 // the following Z could be a throw specifier.
1944 if (MangledName.consumeFront('@'))
1945 return *Head;
1946
1947 if (MangledName.consumeFront('Z')) {
1948 Head->IsVariadic = true;
1949 return *Head;
1950 }
1951
1952 Error = true;
1953 return {};
1954 }
1955
1956 TemplateParams *
demangleTemplateParameterList(StringView & MangledName)1957 Demangler::demangleTemplateParameterList(StringView &MangledName) {
1958 TemplateParams *Head;
1959 TemplateParams **Current = &Head;
1960 while (!Error && !MangledName.startsWith('@')) {
1961 // Template parameter lists don't participate in back-referencing.
1962 *Current = Arena.alloc<TemplateParams>();
1963
1964 // Empty parameter pack.
1965 if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") ||
1966 MangledName.consumeFront("$$$V")) {
1967 if (!MangledName.startsWith('@'))
1968 Error = true;
1969 continue;
1970 }
1971
1972 if (MangledName.consumeFront("$$Y")) {
1973 (*Current)->IsTemplateTemplate = true;
1974 (*Current)->IsAliasTemplate = true;
1975 (*Current)->ParamName = demangleFullyQualifiedTypeName(MangledName);
1976 } else if (MangledName.consumeFront("$1?")) {
1977 (*Current)->ParamName = demangleFullyQualifiedSymbolName(MangledName);
1978 (*Current)->ParamType = demangleFunctionEncoding(MangledName);
1979 } else {
1980 (*Current)->ParamType =
1981 demangleType(MangledName, QualifierMangleMode::Drop);
1982 }
1983
1984 Current = &(*Current)->Next;
1985 }
1986
1987 if (Error)
1988 return {};
1989
1990 // Template parameter lists cannot be variadic, so it can only be terminated
1991 // by @.
1992 if (MangledName.consumeFront('@'))
1993 return Head;
1994 Error = true;
1995 return {};
1996 }
1997
output(const Symbol * S,OutputStream & OS)1998 void Demangler::output(const Symbol *S, OutputStream &OS) {
1999 // Converts an AST to a string.
2000 //
2001 // Converting an AST representing a C++ type to a string is tricky due
2002 // to the bad grammar of the C++ declaration inherited from C. You have
2003 // to construct a string from inside to outside. For example, if a type
2004 // X is a pointer to a function returning int, the order you create a
2005 // string becomes something like this:
2006 //
2007 // (1) X is a pointer: *X
2008 // (2) (1) is a function returning int: int (*X)()
2009 //
2010 // So you cannot construct a result just by appending strings to a result.
2011 //
2012 // To deal with this, we split the function into two. outputPre() writes
2013 // the "first half" of type declaration, and outputPost() writes the
2014 // "second half". For example, outputPre() writes a return type for a
2015 // function and outputPost() writes an parameter list.
2016 Type::outputPre(OS, *S->SymbolType);
2017 outputName(OS, S->SymbolName);
2018 Type::outputPost(OS, *S->SymbolType);
2019 }
2020
microsoftDemangle(const char * MangledName,char * Buf,size_t * N,int * Status)2021 char *llvm::microsoftDemangle(const char *MangledName, char *Buf, size_t *N,
2022 int *Status) {
2023 Demangler D;
2024 StringView Name{MangledName};
2025 Symbol *S = D.parse(Name);
2026
2027 if (D.Error)
2028 *Status = llvm::demangle_invalid_mangled_name;
2029 else
2030 *Status = llvm::demangle_success;
2031
2032 OutputStream OS = OutputStream::create(Buf, N, 1024);
2033 D.output(S, OS);
2034 OS << '\0';
2035 return OS.getBuffer();
2036 }
2037