1 //===-- X86AsmLexer.cpp - Tokenize X86 assembly to AsmTokens --------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "llvm/MC/MCAsmInfo.h"
12 #include "llvm/MC/MCParser/MCAsmLexer.h"
13 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
14 #include "llvm/MC/MCTargetAsmLexer.h"
15 #include "llvm/Support/TargetRegistry.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringExtras.h"
18 
19 using namespace llvm;
20 
21 namespace {
22 
23 class X86AsmLexer : public MCTargetAsmLexer {
24   const MCAsmInfo &AsmInfo;
25 
26   bool tentativeIsValid;
27   AsmToken tentativeToken;
28 
lexTentative()29   const AsmToken &lexTentative() {
30     tentativeToken = getLexer()->Lex();
31     tentativeIsValid = true;
32     return tentativeToken;
33   }
34 
lexDefinite()35   const AsmToken &lexDefinite() {
36     if (tentativeIsValid) {
37       tentativeIsValid = false;
38       return tentativeToken;
39     }
40     return getLexer()->Lex();
41   }
42 
43   AsmToken LexTokenATT();
44   AsmToken LexTokenIntel();
45 protected:
LexToken()46   AsmToken LexToken() {
47     if (!Lexer) {
48       SetError(SMLoc(), "No MCAsmLexer installed");
49       return AsmToken(AsmToken::Error, "", 0);
50     }
51 
52     switch (AsmInfo.getAssemblerDialect()) {
53     default:
54       SetError(SMLoc(), "Unhandled dialect");
55       return AsmToken(AsmToken::Error, "", 0);
56     case 0:
57       return LexTokenATT();
58     case 1:
59       return LexTokenIntel();
60     }
61   }
62 public:
X86AsmLexer(const Target & T,const MCRegisterInfo & MRI,const MCAsmInfo & MAI)63   X86AsmLexer(const Target &T, const MCRegisterInfo &MRI, const MCAsmInfo &MAI)
64     : MCTargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) {
65   }
66 };
67 
68 } // end anonymous namespace
69 
70 #define GET_REGISTER_MATCHER
71 #include "X86GenAsmMatcher.inc"
72 
LexTokenATT()73 AsmToken X86AsmLexer::LexTokenATT() {
74   AsmToken lexedToken = lexDefinite();
75 
76   switch (lexedToken.getKind()) {
77   default:
78     return lexedToken;
79   case AsmToken::Error:
80     SetError(Lexer->getErrLoc(), Lexer->getErr());
81     return lexedToken;
82 
83   case AsmToken::Percent: {
84     const AsmToken &nextToken = lexTentative();
85     if (nextToken.getKind() != AsmToken::Identifier)
86       return lexedToken;
87 
88 
89     if (unsigned regID = MatchRegisterName(nextToken.getString())) {
90       lexDefinite();
91 
92       // FIXME: This is completely wrong when there is a space or other
93       // punctuation between the % and the register name.
94       StringRef regStr(lexedToken.getString().data(),
95                        lexedToken.getString().size() +
96                        nextToken.getString().size());
97 
98       return AsmToken(AsmToken::Register, regStr,
99                       static_cast<int64_t>(regID));
100     }
101 
102     // Match register name failed.  If this is "db[0-7]", match it as an alias
103     // for dr[0-7].
104     if (nextToken.getString().size() == 3 &&
105         nextToken.getString().startswith("db")) {
106       int RegNo = -1;
107       switch (nextToken.getString()[2]) {
108       case '0': RegNo = X86::DR0; break;
109       case '1': RegNo = X86::DR1; break;
110       case '2': RegNo = X86::DR2; break;
111       case '3': RegNo = X86::DR3; break;
112       case '4': RegNo = X86::DR4; break;
113       case '5': RegNo = X86::DR5; break;
114       case '6': RegNo = X86::DR6; break;
115       case '7': RegNo = X86::DR7; break;
116       }
117 
118       if (RegNo != -1) {
119         lexDefinite();
120 
121         // FIXME: This is completely wrong when there is a space or other
122         // punctuation between the % and the register name.
123         StringRef regStr(lexedToken.getString().data(),
124                          lexedToken.getString().size() +
125                          nextToken.getString().size());
126         return AsmToken(AsmToken::Register, regStr,
127                         static_cast<int64_t>(RegNo));
128       }
129     }
130 
131 
132     return lexedToken;
133   }
134   }
135 }
136 
LexTokenIntel()137 AsmToken X86AsmLexer::LexTokenIntel() {
138   const AsmToken &lexedToken = lexDefinite();
139 
140   switch(lexedToken.getKind()) {
141   default:
142     return lexedToken;
143   case AsmToken::Error:
144     SetError(Lexer->getErrLoc(), Lexer->getErr());
145     return lexedToken;
146   case AsmToken::Identifier: {
147     std::string upperCase = lexedToken.getString().str();
148     std::string lowerCase = LowercaseString(upperCase);
149     StringRef lowerRef(lowerCase);
150 
151     unsigned regID = MatchRegisterName(lowerRef);
152 
153     if (regID)
154       return AsmToken(AsmToken::Register,
155                       lexedToken.getString(),
156                       static_cast<int64_t>(regID));
157     return lexedToken;
158   }
159   }
160 }
161 
LLVMInitializeX86AsmLexer()162 extern "C" void LLVMInitializeX86AsmLexer() {
163   RegisterMCAsmLexer<X86AsmLexer> X(TheX86_32Target);
164   RegisterMCAsmLexer<X86AsmLexer> Y(TheX86_64Target);
165 }
166