1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "InstPrinter/X86IntelInstPrinter.h"
11 #include "MCTargetDesc/X86BaseInfo.h"
12 #include "MCTargetDesc/X86MCExpr.h"
13 #include "MCTargetDesc/X86TargetStreamer.h"
14 #include "X86AsmInstrumentation.h"
15 #include "X86AsmParserCommon.h"
16 #include "X86Operand.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCParser/MCAsmLexer.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSection.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Support/SourceMgr.h"
36 #include "llvm/Support/TargetRegistry.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include <algorithm>
39 #include <memory>
40 
41 using namespace llvm;
42 
checkScale(unsigned Scale,StringRef & ErrMsg)43 static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
44   if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
45     ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
46     return true;
47   }
48   return false;
49 }
50 
51 namespace {
52 
53 static const char OpPrecedence[] = {
54   0, // IC_OR
55   1, // IC_XOR
56   2, // IC_AND
57   3, // IC_LSHIFT
58   3, // IC_RSHIFT
59   4, // IC_PLUS
60   4, // IC_MINUS
61   5, // IC_MULTIPLY
62   5, // IC_DIVIDE
63   5, // IC_MOD
64   6, // IC_NOT
65   7, // IC_NEG
66   8, // IC_RPAREN
67   9, // IC_LPAREN
68   0, // IC_IMM
69   0  // IC_REGISTER
70 };
71 
72 class X86AsmParser : public MCTargetAsmParser {
73   ParseInstructionInfo *InstInfo;
74   std::unique_ptr<X86AsmInstrumentation> Instrumentation;
75   bool Code16GCC;
76 
77 private:
consumeToken()78   SMLoc consumeToken() {
79     MCAsmParser &Parser = getParser();
80     SMLoc Result = Parser.getTok().getLoc();
81     Parser.Lex();
82     return Result;
83   }
84 
getTargetStreamer()85   X86TargetStreamer &getTargetStreamer() {
86     assert(getParser().getStreamer().getTargetStreamer() &&
87            "do not have a target streamer");
88     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
89     return static_cast<X86TargetStreamer &>(TS);
90   }
91 
MatchInstruction(const OperandVector & Operands,MCInst & Inst,uint64_t & ErrorInfo,bool matchingInlineAsm,unsigned VariantID=0)92   unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
93                             uint64_t &ErrorInfo, bool matchingInlineAsm,
94                             unsigned VariantID = 0) {
95     // In Code16GCC mode, match as 32-bit.
96     if (Code16GCC)
97       SwitchMode(X86::Mode32Bit);
98     unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
99                                        matchingInlineAsm, VariantID);
100     if (Code16GCC)
101       SwitchMode(X86::Mode16Bit);
102     return rv;
103   }
104 
105   enum InfixCalculatorTok {
106     IC_OR = 0,
107     IC_XOR,
108     IC_AND,
109     IC_LSHIFT,
110     IC_RSHIFT,
111     IC_PLUS,
112     IC_MINUS,
113     IC_MULTIPLY,
114     IC_DIVIDE,
115     IC_MOD,
116     IC_NOT,
117     IC_NEG,
118     IC_RPAREN,
119     IC_LPAREN,
120     IC_IMM,
121     IC_REGISTER
122   };
123 
124   enum IntelOperatorKind {
125     IOK_INVALID = 0,
126     IOK_LENGTH,
127     IOK_SIZE,
128     IOK_TYPE,
129     IOK_OFFSET
130   };
131 
132   class InfixCalculator {
133     typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
134     SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
135     SmallVector<ICToken, 4> PostfixStack;
136 
isUnaryOperator(const InfixCalculatorTok Op)137     bool isUnaryOperator(const InfixCalculatorTok Op) {
138       return Op == IC_NEG || Op == IC_NOT;
139     }
140 
141   public:
popOperand()142     int64_t popOperand() {
143       assert (!PostfixStack.empty() && "Poped an empty stack!");
144       ICToken Op = PostfixStack.pop_back_val();
145       if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
146         return -1; // The invalid Scale value will be caught later by checkScale
147       return Op.second;
148     }
pushOperand(InfixCalculatorTok Op,int64_t Val=0)149     void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
150       assert ((Op == IC_IMM || Op == IC_REGISTER) &&
151               "Unexpected operand!");
152       PostfixStack.push_back(std::make_pair(Op, Val));
153     }
154 
popOperator()155     void popOperator() { InfixOperatorStack.pop_back(); }
pushOperator(InfixCalculatorTok Op)156     void pushOperator(InfixCalculatorTok Op) {
157       // Push the new operator if the stack is empty.
158       if (InfixOperatorStack.empty()) {
159         InfixOperatorStack.push_back(Op);
160         return;
161       }
162 
163       // Push the new operator if it has a higher precedence than the operator
164       // on the top of the stack or the operator on the top of the stack is a
165       // left parentheses.
166       unsigned Idx = InfixOperatorStack.size() - 1;
167       InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
168       if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
169         InfixOperatorStack.push_back(Op);
170         return;
171       }
172 
173       // The operator on the top of the stack has higher precedence than the
174       // new operator.
175       unsigned ParenCount = 0;
176       while (1) {
177         // Nothing to process.
178         if (InfixOperatorStack.empty())
179           break;
180 
181         Idx = InfixOperatorStack.size() - 1;
182         StackOp = InfixOperatorStack[Idx];
183         if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
184           break;
185 
186         // If we have an even parentheses count and we see a left parentheses,
187         // then stop processing.
188         if (!ParenCount && StackOp == IC_LPAREN)
189           break;
190 
191         if (StackOp == IC_RPAREN) {
192           ++ParenCount;
193           InfixOperatorStack.pop_back();
194         } else if (StackOp == IC_LPAREN) {
195           --ParenCount;
196           InfixOperatorStack.pop_back();
197         } else {
198           InfixOperatorStack.pop_back();
199           PostfixStack.push_back(std::make_pair(StackOp, 0));
200         }
201       }
202       // Push the new operator.
203       InfixOperatorStack.push_back(Op);
204     }
205 
execute()206     int64_t execute() {
207       // Push any remaining operators onto the postfix stack.
208       while (!InfixOperatorStack.empty()) {
209         InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
210         if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
211           PostfixStack.push_back(std::make_pair(StackOp, 0));
212       }
213 
214       if (PostfixStack.empty())
215         return 0;
216 
217       SmallVector<ICToken, 16> OperandStack;
218       for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
219         ICToken Op = PostfixStack[i];
220         if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
221           OperandStack.push_back(Op);
222         } else if (isUnaryOperator(Op.first)) {
223           assert (OperandStack.size() > 0 && "Too few operands.");
224           ICToken Operand = OperandStack.pop_back_val();
225           assert (Operand.first == IC_IMM &&
226                   "Unary operation with a register!");
227           switch (Op.first) {
228           default:
229             report_fatal_error("Unexpected operator!");
230             break;
231           case IC_NEG:
232             OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
233             break;
234           case IC_NOT:
235             OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
236             break;
237           }
238         } else {
239           assert (OperandStack.size() > 1 && "Too few operands.");
240           int64_t Val;
241           ICToken Op2 = OperandStack.pop_back_val();
242           ICToken Op1 = OperandStack.pop_back_val();
243           switch (Op.first) {
244           default:
245             report_fatal_error("Unexpected operator!");
246             break;
247           case IC_PLUS:
248             Val = Op1.second + Op2.second;
249             OperandStack.push_back(std::make_pair(IC_IMM, Val));
250             break;
251           case IC_MINUS:
252             Val = Op1.second - Op2.second;
253             OperandStack.push_back(std::make_pair(IC_IMM, Val));
254             break;
255           case IC_MULTIPLY:
256             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
257                     "Multiply operation with an immediate and a register!");
258             Val = Op1.second * Op2.second;
259             OperandStack.push_back(std::make_pair(IC_IMM, Val));
260             break;
261           case IC_DIVIDE:
262             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
263                     "Divide operation with an immediate and a register!");
264             assert (Op2.second != 0 && "Division by zero!");
265             Val = Op1.second / Op2.second;
266             OperandStack.push_back(std::make_pair(IC_IMM, Val));
267             break;
268           case IC_MOD:
269             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
270                     "Modulo operation with an immediate and a register!");
271             Val = Op1.second % Op2.second;
272             OperandStack.push_back(std::make_pair(IC_IMM, Val));
273             break;
274           case IC_OR:
275             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
276                     "Or operation with an immediate and a register!");
277             Val = Op1.second | Op2.second;
278             OperandStack.push_back(std::make_pair(IC_IMM, Val));
279             break;
280           case IC_XOR:
281             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
282               "Xor operation with an immediate and a register!");
283             Val = Op1.second ^ Op2.second;
284             OperandStack.push_back(std::make_pair(IC_IMM, Val));
285             break;
286           case IC_AND:
287             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
288                     "And operation with an immediate and a register!");
289             Val = Op1.second & Op2.second;
290             OperandStack.push_back(std::make_pair(IC_IMM, Val));
291             break;
292           case IC_LSHIFT:
293             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
294                     "Left shift operation with an immediate and a register!");
295             Val = Op1.second << Op2.second;
296             OperandStack.push_back(std::make_pair(IC_IMM, Val));
297             break;
298           case IC_RSHIFT:
299             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
300                     "Right shift operation with an immediate and a register!");
301             Val = Op1.second >> Op2.second;
302             OperandStack.push_back(std::make_pair(IC_IMM, Val));
303             break;
304           }
305         }
306       }
307       assert (OperandStack.size() == 1 && "Expected a single result.");
308       return OperandStack.pop_back_val().second;
309     }
310   };
311 
312   enum IntelExprState {
313     IES_INIT,
314     IES_OR,
315     IES_XOR,
316     IES_AND,
317     IES_LSHIFT,
318     IES_RSHIFT,
319     IES_PLUS,
320     IES_MINUS,
321     IES_NOT,
322     IES_MULTIPLY,
323     IES_DIVIDE,
324     IES_MOD,
325     IES_LBRAC,
326     IES_RBRAC,
327     IES_LPAREN,
328     IES_RPAREN,
329     IES_REGISTER,
330     IES_INTEGER,
331     IES_IDENTIFIER,
332     IES_ERROR
333   };
334 
335   class IntelExprStateMachine {
336     IntelExprState State, PrevState;
337     unsigned BaseReg, IndexReg, TmpReg, Scale;
338     int64_t Imm;
339     const MCExpr *Sym;
340     StringRef SymName;
341     InfixCalculator IC;
342     InlineAsmIdentifierInfo Info;
343     short BracCount;
344     bool MemExpr;
345 
346   public:
IntelExprStateMachine()347     IntelExprStateMachine()
348         : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0),
349           TmpReg(0), Scale(0), Imm(0), Sym(nullptr), BracCount(0),
350           MemExpr(false) {}
351 
addImm(int64_t imm)352     void addImm(int64_t imm) { Imm += imm; }
getBracCount()353     short getBracCount() { return BracCount; }
isMemExpr()354     bool isMemExpr() { return MemExpr; }
getBaseReg()355     unsigned getBaseReg() { return BaseReg; }
getIndexReg()356     unsigned getIndexReg() { return IndexReg; }
getScale()357     unsigned getScale() { return Scale; }
getSym()358     const MCExpr *getSym() { return Sym; }
getSymName()359     StringRef getSymName() { return SymName; }
getImm()360     int64_t getImm() { return Imm + IC.execute(); }
isValidEndState()361     bool isValidEndState() {
362       return State == IES_RBRAC || State == IES_INTEGER;
363     }
hadError()364     bool hadError() { return State == IES_ERROR; }
getIdentifierInfo()365     InlineAsmIdentifierInfo &getIdentifierInfo() { return Info; }
366 
onOr()367     void onOr() {
368       IntelExprState CurrState = State;
369       switch (State) {
370       default:
371         State = IES_ERROR;
372         break;
373       case IES_INTEGER:
374       case IES_RPAREN:
375       case IES_REGISTER:
376         State = IES_OR;
377         IC.pushOperator(IC_OR);
378         break;
379       }
380       PrevState = CurrState;
381     }
onXor()382     void onXor() {
383       IntelExprState CurrState = State;
384       switch (State) {
385       default:
386         State = IES_ERROR;
387         break;
388       case IES_INTEGER:
389       case IES_RPAREN:
390       case IES_REGISTER:
391         State = IES_XOR;
392         IC.pushOperator(IC_XOR);
393         break;
394       }
395       PrevState = CurrState;
396     }
onAnd()397     void onAnd() {
398       IntelExprState CurrState = State;
399       switch (State) {
400       default:
401         State = IES_ERROR;
402         break;
403       case IES_INTEGER:
404       case IES_RPAREN:
405       case IES_REGISTER:
406         State = IES_AND;
407         IC.pushOperator(IC_AND);
408         break;
409       }
410       PrevState = CurrState;
411     }
onLShift()412     void onLShift() {
413       IntelExprState CurrState = State;
414       switch (State) {
415       default:
416         State = IES_ERROR;
417         break;
418       case IES_INTEGER:
419       case IES_RPAREN:
420       case IES_REGISTER:
421         State = IES_LSHIFT;
422         IC.pushOperator(IC_LSHIFT);
423         break;
424       }
425       PrevState = CurrState;
426     }
onRShift()427     void onRShift() {
428       IntelExprState CurrState = State;
429       switch (State) {
430       default:
431         State = IES_ERROR;
432         break;
433       case IES_INTEGER:
434       case IES_RPAREN:
435       case IES_REGISTER:
436         State = IES_RSHIFT;
437         IC.pushOperator(IC_RSHIFT);
438         break;
439       }
440       PrevState = CurrState;
441     }
onPlus(StringRef & ErrMsg)442     bool onPlus(StringRef &ErrMsg) {
443       IntelExprState CurrState = State;
444       switch (State) {
445       default:
446         State = IES_ERROR;
447         break;
448       case IES_INTEGER:
449       case IES_RPAREN:
450       case IES_REGISTER:
451         State = IES_PLUS;
452         IC.pushOperator(IC_PLUS);
453         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
454           // If we already have a BaseReg, then assume this is the IndexReg with
455           // no explicit scale.
456           if (!BaseReg) {
457             BaseReg = TmpReg;
458           } else {
459             if (IndexReg) {
460               ErrMsg = "BaseReg/IndexReg already set!";
461               return true;
462             }
463             IndexReg = TmpReg;
464             Scale = 0;
465           }
466         }
467         break;
468       }
469       PrevState = CurrState;
470       return false;
471     }
onMinus(StringRef & ErrMsg)472     bool onMinus(StringRef &ErrMsg) {
473       IntelExprState CurrState = State;
474       switch (State) {
475       default:
476         State = IES_ERROR;
477         break;
478       case IES_OR:
479       case IES_XOR:
480       case IES_AND:
481       case IES_LSHIFT:
482       case IES_RSHIFT:
483       case IES_PLUS:
484       case IES_NOT:
485       case IES_MULTIPLY:
486       case IES_DIVIDE:
487       case IES_MOD:
488       case IES_LPAREN:
489       case IES_RPAREN:
490       case IES_LBRAC:
491       case IES_RBRAC:
492       case IES_INTEGER:
493       case IES_REGISTER:
494       case IES_INIT:
495         State = IES_MINUS;
496         // push minus operator if it is not a negate operator
497         if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
498             CurrState == IES_INTEGER  || CurrState == IES_RBRAC)
499           IC.pushOperator(IC_MINUS);
500         else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
501           // We have negate operator for Scale: it's illegal
502           ErrMsg = "Scale can't be negative";
503           return true;
504         } else
505           IC.pushOperator(IC_NEG);
506         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
507           // If we already have a BaseReg, then assume this is the IndexReg with
508           // no explicit scale.
509           if (!BaseReg) {
510             BaseReg = TmpReg;
511           } else {
512             if (IndexReg) {
513               ErrMsg = "BaseReg/IndexReg already set!";
514               return true;
515             }
516             IndexReg = TmpReg;
517             Scale = 0;
518           }
519         }
520         break;
521       }
522       PrevState = CurrState;
523       return false;
524     }
onNot()525     void onNot() {
526       IntelExprState CurrState = State;
527       switch (State) {
528       default:
529         State = IES_ERROR;
530         break;
531       case IES_OR:
532       case IES_XOR:
533       case IES_AND:
534       case IES_LSHIFT:
535       case IES_RSHIFT:
536       case IES_PLUS:
537       case IES_MINUS:
538       case IES_NOT:
539       case IES_MULTIPLY:
540       case IES_DIVIDE:
541       case IES_MOD:
542       case IES_LPAREN:
543       case IES_LBRAC:
544       case IES_INIT:
545         State = IES_NOT;
546         IC.pushOperator(IC_NOT);
547         break;
548       }
549       PrevState = CurrState;
550     }
551 
onRegister(unsigned Reg,StringRef & ErrMsg)552     bool onRegister(unsigned Reg, StringRef &ErrMsg) {
553       IntelExprState CurrState = State;
554       switch (State) {
555       default:
556         State = IES_ERROR;
557         break;
558       case IES_PLUS:
559       case IES_LPAREN:
560       case IES_LBRAC:
561         State = IES_REGISTER;
562         TmpReg = Reg;
563         IC.pushOperand(IC_REGISTER);
564         break;
565       case IES_MULTIPLY:
566         // Index Register - Scale * Register
567         if (PrevState == IES_INTEGER) {
568           if (IndexReg) {
569             ErrMsg = "BaseReg/IndexReg already set!";
570             return true;
571           }
572           State = IES_REGISTER;
573           IndexReg = Reg;
574           // Get the scale and replace the 'Scale * Register' with '0'.
575           Scale = IC.popOperand();
576           if (checkScale(Scale, ErrMsg))
577             return true;
578           IC.pushOperand(IC_IMM);
579           IC.popOperator();
580         } else {
581           State = IES_ERROR;
582         }
583         break;
584       }
585       PrevState = CurrState;
586       return false;
587     }
onIdentifierExpr(const MCExpr * SymRef,StringRef SymRefName,const InlineAsmIdentifierInfo & IDInfo,bool ParsingInlineAsm,StringRef & ErrMsg)588     bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
589                           const InlineAsmIdentifierInfo &IDInfo,
590                           bool ParsingInlineAsm, StringRef &ErrMsg) {
591       // InlineAsm: Treat an enum value as an integer
592       if (ParsingInlineAsm)
593         if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
594           return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
595       // Treat a symbolic constant like an integer
596       if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
597         return onInteger(CE->getValue(), ErrMsg);
598       PrevState = State;
599       bool HasSymbol = Sym != nullptr;
600       switch (State) {
601       default:
602         State = IES_ERROR;
603         break;
604       case IES_PLUS:
605       case IES_MINUS:
606       case IES_NOT:
607       case IES_INIT:
608       case IES_LBRAC:
609         MemExpr = true;
610         State = IES_INTEGER;
611         Sym = SymRef;
612         SymName = SymRefName;
613         IC.pushOperand(IC_IMM);
614         if (ParsingInlineAsm)
615           Info = IDInfo;
616         break;
617       }
618       if (HasSymbol)
619         ErrMsg = "cannot use more than one symbol in memory operand";
620       return HasSymbol;
621     }
onInteger(int64_t TmpInt,StringRef & ErrMsg)622     bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
623       IntelExprState CurrState = State;
624       switch (State) {
625       default:
626         State = IES_ERROR;
627         break;
628       case IES_PLUS:
629       case IES_MINUS:
630       case IES_NOT:
631       case IES_OR:
632       case IES_XOR:
633       case IES_AND:
634       case IES_LSHIFT:
635       case IES_RSHIFT:
636       case IES_DIVIDE:
637       case IES_MOD:
638       case IES_MULTIPLY:
639       case IES_LPAREN:
640       case IES_INIT:
641       case IES_LBRAC:
642         State = IES_INTEGER;
643         if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
644           // Index Register - Register * Scale
645           if (IndexReg) {
646             ErrMsg = "BaseReg/IndexReg already set!";
647             return true;
648           }
649           IndexReg = TmpReg;
650           Scale = TmpInt;
651           if (checkScale(Scale, ErrMsg))
652             return true;
653           // Get the scale and replace the 'Register * Scale' with '0'.
654           IC.popOperator();
655         } else {
656           IC.pushOperand(IC_IMM, TmpInt);
657         }
658         break;
659       }
660       PrevState = CurrState;
661       return false;
662     }
onStar()663     void onStar() {
664       PrevState = State;
665       switch (State) {
666       default:
667         State = IES_ERROR;
668         break;
669       case IES_INTEGER:
670       case IES_REGISTER:
671       case IES_RPAREN:
672         State = IES_MULTIPLY;
673         IC.pushOperator(IC_MULTIPLY);
674         break;
675       }
676     }
onDivide()677     void onDivide() {
678       PrevState = State;
679       switch (State) {
680       default:
681         State = IES_ERROR;
682         break;
683       case IES_INTEGER:
684       case IES_RPAREN:
685         State = IES_DIVIDE;
686         IC.pushOperator(IC_DIVIDE);
687         break;
688       }
689     }
onMod()690     void onMod() {
691       PrevState = State;
692       switch (State) {
693       default:
694         State = IES_ERROR;
695         break;
696       case IES_INTEGER:
697       case IES_RPAREN:
698         State = IES_MOD;
699         IC.pushOperator(IC_MOD);
700         break;
701       }
702     }
onLBrac()703     bool onLBrac() {
704       if (BracCount)
705         return true;
706       PrevState = State;
707       switch (State) {
708       default:
709         State = IES_ERROR;
710         break;
711       case IES_RBRAC:
712       case IES_INTEGER:
713       case IES_RPAREN:
714         State = IES_PLUS;
715         IC.pushOperator(IC_PLUS);
716         break;
717       case IES_INIT:
718         assert(!BracCount && "BracCount should be zero on parsing's start");
719         State = IES_LBRAC;
720         break;
721       }
722       MemExpr = true;
723       BracCount++;
724       return false;
725     }
onRBrac()726     bool onRBrac() {
727       IntelExprState CurrState = State;
728       switch (State) {
729       default:
730         State = IES_ERROR;
731         break;
732       case IES_INTEGER:
733       case IES_REGISTER:
734       case IES_RPAREN:
735         if (BracCount-- != 1)
736           return true;
737         State = IES_RBRAC;
738         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
739           // If we already have a BaseReg, then assume this is the IndexReg with
740           // no explicit scale.
741           if (!BaseReg) {
742             BaseReg = TmpReg;
743           } else {
744             assert (!IndexReg && "BaseReg/IndexReg already set!");
745             IndexReg = TmpReg;
746             Scale = 0;
747           }
748         }
749         break;
750       }
751       PrevState = CurrState;
752       return false;
753     }
onLParen()754     void onLParen() {
755       IntelExprState CurrState = State;
756       switch (State) {
757       default:
758         State = IES_ERROR;
759         break;
760       case IES_PLUS:
761       case IES_MINUS:
762       case IES_NOT:
763       case IES_OR:
764       case IES_XOR:
765       case IES_AND:
766       case IES_LSHIFT:
767       case IES_RSHIFT:
768       case IES_MULTIPLY:
769       case IES_DIVIDE:
770       case IES_MOD:
771       case IES_LPAREN:
772       case IES_INIT:
773       case IES_LBRAC:
774         State = IES_LPAREN;
775         IC.pushOperator(IC_LPAREN);
776         break;
777       }
778       PrevState = CurrState;
779     }
onRParen()780     void onRParen() {
781       PrevState = State;
782       switch (State) {
783       default:
784         State = IES_ERROR;
785         break;
786       case IES_INTEGER:
787       case IES_REGISTER:
788       case IES_RPAREN:
789         State = IES_RPAREN;
790         IC.pushOperator(IC_RPAREN);
791         break;
792       }
793     }
794   };
795 
Error(SMLoc L,const Twine & Msg,SMRange Range=None,bool MatchingInlineAsm=false)796   bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
797              bool MatchingInlineAsm = false) {
798     MCAsmParser &Parser = getParser();
799     if (MatchingInlineAsm) {
800       if (!getLexer().isAtStartOfStatement())
801         Parser.eatToEndOfStatement();
802       return false;
803     }
804     return Parser.Error(L, Msg, Range);
805   }
806 
ErrorOperand(SMLoc Loc,StringRef Msg)807   std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
808     Error(Loc, Msg);
809     return nullptr;
810   }
811 
812   std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
813   std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
814   bool IsSIReg(unsigned Reg);
815   unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
816   void
817   AddDefaultSrcDestOperands(OperandVector &Operands,
818                             std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
819                             std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
820   bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
821                                OperandVector &FinalOperands);
822   std::unique_ptr<X86Operand> ParseOperand();
823   std::unique_ptr<X86Operand> ParseATTOperand();
824   std::unique_ptr<X86Operand> ParseIntelOperand();
825   std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
826   bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
827   unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
828   unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
829   std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start);
830   bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM);
831   void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
832                               SMLoc End);
833   bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
834   bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
835                                      InlineAsmIdentifierInfo &Info,
836                                      bool IsUnevaluatedOperand, SMLoc &End);
837 
838   std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc MemStart);
839 
840   bool ParseIntelMemoryOperandSize(unsigned &Size);
841   std::unique_ptr<X86Operand>
842   CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
843                         unsigned IndexReg, unsigned Scale, SMLoc Start,
844                         SMLoc End, unsigned Size, StringRef Identifier,
845                         const InlineAsmIdentifierInfo &Info);
846 
847   bool parseDirectiveEven(SMLoc L);
848   bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
849 
850   /// CodeView FPO data directives.
851   bool parseDirectiveFPOProc(SMLoc L);
852   bool parseDirectiveFPOSetFrame(SMLoc L);
853   bool parseDirectiveFPOPushReg(SMLoc L);
854   bool parseDirectiveFPOStackAlloc(SMLoc L);
855   bool parseDirectiveFPOEndPrologue(SMLoc L);
856   bool parseDirectiveFPOEndProc(SMLoc L);
857   bool parseDirectiveFPOData(SMLoc L);
858 
859   bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
860   bool processInstruction(MCInst &Inst, const OperandVector &Ops);
861 
862   /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
863   /// instrumentation around Inst.
864   void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
865 
866   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
867                                OperandVector &Operands, MCStreamer &Out,
868                                uint64_t &ErrorInfo,
869                                bool MatchingInlineAsm) override;
870 
871   void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
872                          MCStreamer &Out, bool MatchingInlineAsm);
873 
874   bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
875                            bool MatchingInlineAsm);
876 
877   bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
878                                   OperandVector &Operands, MCStreamer &Out,
879                                   uint64_t &ErrorInfo,
880                                   bool MatchingInlineAsm);
881 
882   bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
883                                     OperandVector &Operands, MCStreamer &Out,
884                                     uint64_t &ErrorInfo,
885                                     bool MatchingInlineAsm);
886 
887   bool OmitRegisterFromClobberLists(unsigned RegNo) override;
888 
889   /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
890   /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
891   /// return false if no parsing errors occurred, true otherwise.
892   bool HandleAVX512Operand(OperandVector &Operands,
893                            const MCParsedAsmOperand &Op);
894 
895   bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
896 
is64BitMode() const897   bool is64BitMode() const {
898     // FIXME: Can tablegen auto-generate this?
899     return getSTI().getFeatureBits()[X86::Mode64Bit];
900   }
is32BitMode() const901   bool is32BitMode() const {
902     // FIXME: Can tablegen auto-generate this?
903     return getSTI().getFeatureBits()[X86::Mode32Bit];
904   }
is16BitMode() const905   bool is16BitMode() const {
906     // FIXME: Can tablegen auto-generate this?
907     return getSTI().getFeatureBits()[X86::Mode16Bit];
908   }
SwitchMode(unsigned mode)909   void SwitchMode(unsigned mode) {
910     MCSubtargetInfo &STI = copySTI();
911     FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
912     FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
913     uint64_t FB = ComputeAvailableFeatures(
914       STI.ToggleFeature(OldMode.flip(mode)));
915     setAvailableFeatures(FB);
916 
917     assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
918   }
919 
getPointerWidth()920   unsigned getPointerWidth() {
921     if (is16BitMode()) return 16;
922     if (is32BitMode()) return 32;
923     if (is64BitMode()) return 64;
924     llvm_unreachable("invalid mode");
925   }
926 
isParsingIntelSyntax()927   bool isParsingIntelSyntax() {
928     return getParser().getAssemblerDialect();
929   }
930 
931   /// @name Auto-generated Matcher Functions
932   /// {
933 
934 #define GET_ASSEMBLER_HEADER
935 #include "X86GenAsmMatcher.inc"
936 
937   /// }
938 
939 public:
940 
X86AsmParser(const MCSubtargetInfo & sti,MCAsmParser & Parser,const MCInstrInfo & mii,const MCTargetOptions & Options)941   X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
942                const MCInstrInfo &mii, const MCTargetOptions &Options)
943       : MCTargetAsmParser(Options, sti, mii),  InstInfo(nullptr),
944         Code16GCC(false) {
945 
946     Parser.addAliasForDirective(".word", ".2byte");
947 
948     // Initialize the set of available features.
949     setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
950     Instrumentation.reset(
951         CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
952   }
953 
954   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
955 
956   void SetFrameRegister(unsigned RegNo) override;
957 
958   bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
959 
960   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
961                         SMLoc NameLoc, OperandVector &Operands) override;
962 
963   bool ParseDirective(AsmToken DirectiveID) override;
964 };
965 } // end anonymous namespace
966 
967 /// @name Auto-generated Match Functions
968 /// {
969 
970 static unsigned MatchRegisterName(StringRef Name);
971 
972 /// }
973 
CheckBaseRegAndIndexRegAndScale(unsigned BaseReg,unsigned IndexReg,unsigned Scale,bool Is64BitMode,StringRef & ErrMsg)974 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
975                                             unsigned Scale, bool Is64BitMode,
976                                             StringRef &ErrMsg) {
977   // If we have both a base register and an index register make sure they are
978   // both 64-bit or 32-bit registers.
979   // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
980 
981   if (BaseReg != 0 &&
982       !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
983         X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
984         X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
985         X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
986     ErrMsg = "invalid base+index expression";
987     return true;
988   }
989 
990   if (IndexReg != 0 &&
991       !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
992         X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
993         X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
994         X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
995         X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
996         X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
997         X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
998     ErrMsg = "invalid base+index expression";
999     return true;
1000   }
1001 
1002   if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) ||
1003       IndexReg == X86::EIP || IndexReg == X86::RIP ||
1004       IndexReg == X86::ESP || IndexReg == X86::RSP) {
1005     ErrMsg = "invalid base+index expression";
1006     return true;
1007   }
1008 
1009   // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1010   // and then only in non-64-bit modes.
1011   if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1012       (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1013                        BaseReg != X86::SI && BaseReg != X86::DI)) &&
1014       BaseReg != X86::DX) {
1015     ErrMsg = "invalid 16-bit base register";
1016     return true;
1017   }
1018 
1019   if (BaseReg == 0 &&
1020       X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1021     ErrMsg = "16-bit memory operand may not include only index register";
1022     return true;
1023   }
1024 
1025   if (BaseReg != 0 && IndexReg != 0) {
1026     if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1027         (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1028          X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1029          IndexReg == X86::EIZ)) {
1030       ErrMsg = "base register is 64-bit, but index register is not";
1031       return true;
1032     }
1033     if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1034         (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1035          X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1036          IndexReg == X86::RIZ)) {
1037       ErrMsg = "base register is 32-bit, but index register is not";
1038       return true;
1039     }
1040     if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1041       if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1042           X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1043         ErrMsg = "base register is 16-bit, but index register is not";
1044         return true;
1045       }
1046       if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1047           (IndexReg != X86::SI && IndexReg != X86::DI)) {
1048         ErrMsg = "invalid 16-bit base/index register combination";
1049         return true;
1050       }
1051     }
1052   }
1053 
1054   // RIP/EIP-relative addressing is only supported in 64-bit mode.
1055   if (!Is64BitMode && BaseReg != 0 &&
1056       (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1057     ErrMsg = "IP-relative addressing requires 64-bit mode";
1058     return true;
1059   }
1060 
1061   return checkScale(Scale, ErrMsg);
1062 }
1063 
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)1064 bool X86AsmParser::ParseRegister(unsigned &RegNo,
1065                                  SMLoc &StartLoc, SMLoc &EndLoc) {
1066   MCAsmParser &Parser = getParser();
1067   RegNo = 0;
1068   const AsmToken &PercentTok = Parser.getTok();
1069   StartLoc = PercentTok.getLoc();
1070 
1071   // If we encounter a %, ignore it. This code handles registers with and
1072   // without the prefix, unprefixed registers can occur in cfi directives.
1073   if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
1074     Parser.Lex(); // Eat percent token.
1075 
1076   const AsmToken &Tok = Parser.getTok();
1077   EndLoc = Tok.getEndLoc();
1078 
1079   if (Tok.isNot(AsmToken::Identifier)) {
1080     if (isParsingIntelSyntax()) return true;
1081     return Error(StartLoc, "invalid register name",
1082                  SMRange(StartLoc, EndLoc));
1083   }
1084 
1085   RegNo = MatchRegisterName(Tok.getString());
1086 
1087   // If the match failed, try the register name as lowercase.
1088   if (RegNo == 0)
1089     RegNo = MatchRegisterName(Tok.getString().lower());
1090 
1091   // The "flags" register cannot be referenced directly.
1092   // Treat it as an identifier instead.
1093   if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
1094     RegNo = 0;
1095 
1096   if (!is64BitMode()) {
1097     // FIXME: This should be done using Requires<Not64BitMode> and
1098     // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1099     // checked.
1100     // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
1101     // REX prefix.
1102     if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1103         X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1104         X86II::isX86_64NonExtLowByteReg(RegNo) ||
1105         X86II::isX86_64ExtendedReg(RegNo))
1106       return Error(StartLoc, "register %"
1107                    + Tok.getString() + " is only available in 64-bit mode",
1108                    SMRange(StartLoc, EndLoc));
1109   }
1110 
1111   // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1112   if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
1113     RegNo = X86::ST0;
1114     Parser.Lex(); // Eat 'st'
1115 
1116     // Check to see if we have '(4)' after %st.
1117     if (getLexer().isNot(AsmToken::LParen))
1118       return false;
1119     // Lex the paren.
1120     getParser().Lex();
1121 
1122     const AsmToken &IntTok = Parser.getTok();
1123     if (IntTok.isNot(AsmToken::Integer))
1124       return Error(IntTok.getLoc(), "expected stack index");
1125     switch (IntTok.getIntVal()) {
1126     case 0: RegNo = X86::ST0; break;
1127     case 1: RegNo = X86::ST1; break;
1128     case 2: RegNo = X86::ST2; break;
1129     case 3: RegNo = X86::ST3; break;
1130     case 4: RegNo = X86::ST4; break;
1131     case 5: RegNo = X86::ST5; break;
1132     case 6: RegNo = X86::ST6; break;
1133     case 7: RegNo = X86::ST7; break;
1134     default: return Error(IntTok.getLoc(), "invalid stack index");
1135     }
1136 
1137     if (getParser().Lex().isNot(AsmToken::RParen))
1138       return Error(Parser.getTok().getLoc(), "expected ')'");
1139 
1140     EndLoc = Parser.getTok().getEndLoc();
1141     Parser.Lex(); // Eat ')'
1142     return false;
1143   }
1144 
1145   EndLoc = Parser.getTok().getEndLoc();
1146 
1147   // If this is "db[0-15]", match it as an alias
1148   // for dr[0-15].
1149   if (RegNo == 0 && Tok.getString().startswith("db")) {
1150     if (Tok.getString().size() == 3) {
1151       switch (Tok.getString()[2]) {
1152       case '0': RegNo = X86::DR0; break;
1153       case '1': RegNo = X86::DR1; break;
1154       case '2': RegNo = X86::DR2; break;
1155       case '3': RegNo = X86::DR3; break;
1156       case '4': RegNo = X86::DR4; break;
1157       case '5': RegNo = X86::DR5; break;
1158       case '6': RegNo = X86::DR6; break;
1159       case '7': RegNo = X86::DR7; break;
1160       case '8': RegNo = X86::DR8; break;
1161       case '9': RegNo = X86::DR9; break;
1162       }
1163     } else if (Tok.getString().size() == 4 && Tok.getString()[2] == '1') {
1164       switch (Tok.getString()[3]) {
1165       case '0': RegNo = X86::DR10; break;
1166       case '1': RegNo = X86::DR11; break;
1167       case '2': RegNo = X86::DR12; break;
1168       case '3': RegNo = X86::DR13; break;
1169       case '4': RegNo = X86::DR14; break;
1170       case '5': RegNo = X86::DR15; break;
1171       }
1172     }
1173 
1174     if (RegNo != 0) {
1175       EndLoc = Parser.getTok().getEndLoc();
1176       Parser.Lex(); // Eat it.
1177       return false;
1178     }
1179   }
1180 
1181   if (RegNo == 0) {
1182     if (isParsingIntelSyntax()) return true;
1183     return Error(StartLoc, "invalid register name",
1184                  SMRange(StartLoc, EndLoc));
1185   }
1186 
1187   Parser.Lex(); // Eat identifier token.
1188   return false;
1189 }
1190 
SetFrameRegister(unsigned RegNo)1191 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
1192   Instrumentation->SetInitialFrameRegister(RegNo);
1193 }
1194 
DefaultMemSIOperand(SMLoc Loc)1195 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1196   bool Parse32 = is32BitMode() || Code16GCC;
1197   unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1198   const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1199   return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1200                                /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1201                                Loc, Loc, 0);
1202 }
1203 
DefaultMemDIOperand(SMLoc Loc)1204 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1205   bool Parse32 = is32BitMode() || Code16GCC;
1206   unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1207   const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1208   return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1209                                /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1210                                Loc, Loc, 0);
1211 }
1212 
IsSIReg(unsigned Reg)1213 bool X86AsmParser::IsSIReg(unsigned Reg) {
1214   switch (Reg) {
1215   default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1216   case X86::RSI:
1217   case X86::ESI:
1218   case X86::SI:
1219     return true;
1220   case X86::RDI:
1221   case X86::EDI:
1222   case X86::DI:
1223     return false;
1224   }
1225 }
1226 
GetSIDIForRegClass(unsigned RegClassID,unsigned Reg,bool IsSIReg)1227 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1228                                           bool IsSIReg) {
1229   switch (RegClassID) {
1230   default: llvm_unreachable("Unexpected register class");
1231   case X86::GR64RegClassID:
1232     return IsSIReg ? X86::RSI : X86::RDI;
1233   case X86::GR32RegClassID:
1234     return IsSIReg ? X86::ESI : X86::EDI;
1235   case X86::GR16RegClassID:
1236     return IsSIReg ? X86::SI : X86::DI;
1237   }
1238 }
1239 
AddDefaultSrcDestOperands(OperandVector & Operands,std::unique_ptr<llvm::MCParsedAsmOperand> && Src,std::unique_ptr<llvm::MCParsedAsmOperand> && Dst)1240 void X86AsmParser::AddDefaultSrcDestOperands(
1241     OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1242     std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1243   if (isParsingIntelSyntax()) {
1244     Operands.push_back(std::move(Dst));
1245     Operands.push_back(std::move(Src));
1246   }
1247   else {
1248     Operands.push_back(std::move(Src));
1249     Operands.push_back(std::move(Dst));
1250   }
1251 }
1252 
VerifyAndAdjustOperands(OperandVector & OrigOperands,OperandVector & FinalOperands)1253 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1254                                            OperandVector &FinalOperands) {
1255 
1256   if (OrigOperands.size() > 1) {
1257     // Check if sizes match, OrigOperands also contains the instruction name
1258     assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1259            "Operand size mismatch");
1260 
1261     SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1262     // Verify types match
1263     int RegClassID = -1;
1264     for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1265       X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1266       X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1267 
1268       if (FinalOp.isReg() &&
1269           (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1270         // Return false and let a normal complaint about bogus operands happen
1271         return false;
1272 
1273       if (FinalOp.isMem()) {
1274 
1275         if (!OrigOp.isMem())
1276           // Return false and let a normal complaint about bogus operands happen
1277           return false;
1278 
1279         unsigned OrigReg = OrigOp.Mem.BaseReg;
1280         unsigned FinalReg = FinalOp.Mem.BaseReg;
1281 
1282         // If we've already encounterd a register class, make sure all register
1283         // bases are of the same register class
1284         if (RegClassID != -1 &&
1285             !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1286           return Error(OrigOp.getStartLoc(),
1287                        "mismatching source and destination index registers");
1288         }
1289 
1290         if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1291           RegClassID = X86::GR64RegClassID;
1292         else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1293           RegClassID = X86::GR32RegClassID;
1294         else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1295           RegClassID = X86::GR16RegClassID;
1296         else
1297           // Unexpected register class type
1298           // Return false and let a normal complaint about bogus operands happen
1299           return false;
1300 
1301         bool IsSI = IsSIReg(FinalReg);
1302         FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1303 
1304         if (FinalReg != OrigReg) {
1305           std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1306           Warnings.push_back(std::make_pair(
1307               OrigOp.getStartLoc(),
1308               "memory operand is only for determining the size, " + RegName +
1309                   " will be used for the location"));
1310         }
1311 
1312         FinalOp.Mem.Size = OrigOp.Mem.Size;
1313         FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1314         FinalOp.Mem.BaseReg = FinalReg;
1315       }
1316     }
1317 
1318     // Produce warnings only if all the operands passed the adjustment - prevent
1319     // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1320     for (auto &WarningMsg : Warnings) {
1321       Warning(WarningMsg.first, WarningMsg.second);
1322     }
1323 
1324     // Remove old operands
1325     for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1326       OrigOperands.pop_back();
1327   }
1328   // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1329   for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1330     OrigOperands.push_back(std::move(FinalOperands[i]));
1331 
1332   return false;
1333 }
1334 
ParseOperand()1335 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1336   if (isParsingIntelSyntax())
1337     return ParseIntelOperand();
1338   return ParseATTOperand();
1339 }
1340 
CreateMemForInlineAsm(unsigned SegReg,const MCExpr * Disp,unsigned BaseReg,unsigned IndexReg,unsigned Scale,SMLoc Start,SMLoc End,unsigned Size,StringRef Identifier,const InlineAsmIdentifierInfo & Info)1341 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1342     unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1343     unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1344     const InlineAsmIdentifierInfo &Info) {
1345   // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1346   // some other label reference.
1347   if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) {
1348     // Insert an explicit size if the user didn't have one.
1349     if (!Size) {
1350       Size = getPointerWidth();
1351       InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1352                                           /*Len=*/0, Size);
1353     }
1354     // Create an absolute memory reference in order to match against
1355     // instructions taking a PC relative operand.
1356     return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1357                                  Identifier, Info.Label.Decl);
1358   }
1359   // We either have a direct symbol reference, or an offset from a symbol.  The
1360   // parser always puts the symbol on the LHS, so look there for size
1361   // calculation purposes.
1362   unsigned FrontendSize = 0;
1363   void *Decl = nullptr;
1364   bool IsGlobalLV = false;
1365   if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1366     // Size is in terms of bits in this context.
1367     FrontendSize = Info.Var.Type * 8;
1368     Decl = Info.Var.Decl;
1369     IsGlobalLV = Info.Var.IsGlobalLV;
1370   }
1371   // It is widely common for MS InlineAsm to use a global variable and one/two
1372   // registers in a mmory expression, and though unaccessible via rip/eip.
1373   if (IsGlobalLV && (BaseReg || IndexReg)) {
1374     return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End);
1375   // Otherwise, we set the base register to a non-zero value
1376   // if we don't know the actual value at this time.  This is necessary to
1377   // get the matching correct in some cases.
1378   } else {
1379     BaseReg = BaseReg ? BaseReg : 1;
1380     return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1381                                  IndexReg, Scale, Start, End, Size, Identifier,
1382                                  Decl, FrontendSize);
1383   }
1384 }
1385 
1386 // Some binary bitwise operators have a named synonymous
1387 // Query a candidate string for being such a named operator
1388 // and if so - invoke the appropriate handler
ParseIntelNamedOperator(StringRef Name,IntelExprStateMachine & SM)1389 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) {
1390   // A named operator should be either lower or upper case, but not a mix
1391   if (Name.compare(Name.lower()) && Name.compare(Name.upper()))
1392     return false;
1393   if (Name.equals_lower("not"))
1394     SM.onNot();
1395   else if (Name.equals_lower("or"))
1396     SM.onOr();
1397   else if (Name.equals_lower("shl"))
1398     SM.onLShift();
1399   else if (Name.equals_lower("shr"))
1400     SM.onRShift();
1401   else if (Name.equals_lower("xor"))
1402     SM.onXor();
1403   else if (Name.equals_lower("and"))
1404     SM.onAnd();
1405   else if (Name.equals_lower("mod"))
1406     SM.onMod();
1407   else
1408     return false;
1409   return true;
1410 }
1411 
ParseIntelExpression(IntelExprStateMachine & SM,SMLoc & End)1412 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1413   MCAsmParser &Parser = getParser();
1414   const AsmToken &Tok = Parser.getTok();
1415   StringRef ErrMsg;
1416 
1417   AsmToken::TokenKind PrevTK = AsmToken::Error;
1418   bool Done = false;
1419   while (!Done) {
1420     bool UpdateLocLex = true;
1421     AsmToken::TokenKind TK = getLexer().getKind();
1422 
1423     switch (TK) {
1424     default:
1425       if ((Done = SM.isValidEndState()))
1426         break;
1427       return Error(Tok.getLoc(), "unknown token in expression");
1428     case AsmToken::EndOfStatement:
1429       Done = true;
1430       break;
1431     case AsmToken::Real:
1432       // DotOperator: [ebx].0
1433       UpdateLocLex = false;
1434       if (ParseIntelDotOperator(SM, End))
1435         return true;
1436       break;
1437     case AsmToken::At:
1438     case AsmToken::String:
1439     case AsmToken::Identifier: {
1440       SMLoc IdentLoc = Tok.getLoc();
1441       StringRef Identifier = Tok.getString();
1442       UpdateLocLex = false;
1443       // Register
1444       unsigned Reg;
1445       if (Tok.is(AsmToken::Identifier) && !ParseRegister(Reg, IdentLoc, End)) {
1446         if (SM.onRegister(Reg, ErrMsg))
1447           return Error(Tok.getLoc(), ErrMsg);
1448         break;
1449       }
1450       // Operator synonymous ("not", "or" etc.)
1451       if ((UpdateLocLex = ParseIntelNamedOperator(Identifier, SM)))
1452         break;
1453       // Symbol reference, when parsing assembly content
1454       InlineAsmIdentifierInfo Info;
1455       const MCExpr *Val;
1456       if (!isParsingInlineAsm()) {
1457         if (getParser().parsePrimaryExpr(Val, End)) {
1458           return Error(Tok.getLoc(), "Unexpected identifier!");
1459         } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) {
1460           return Error(IdentLoc, ErrMsg);
1461         } else
1462           break;
1463       }
1464       // MS InlineAsm operators (TYPE/LENGTH/SIZE)
1465       if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
1466         if (OpKind == IOK_OFFSET)
1467           return Error(IdentLoc, "Dealing OFFSET operator as part of"
1468             "a compound immediate expression is yet to be supported");
1469         if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
1470           if (SM.onInteger(Val, ErrMsg))
1471             return Error(IdentLoc, ErrMsg);
1472         } else
1473           return true;
1474         break;
1475       }
1476       // MS Dot Operator expression
1477       if (Identifier.count('.') && PrevTK == AsmToken::RBrac) {
1478         if (ParseIntelDotOperator(SM, End))
1479           return true;
1480         break;
1481       }
1482       // MS InlineAsm identifier
1483       // Call parseIdentifier() to combine @ with the identifier behind it.
1484       if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
1485         return Error(IdentLoc, "expected identifier");
1486       if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
1487         return true;
1488       else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg))
1489         return Error(IdentLoc, ErrMsg);
1490       break;
1491     }
1492     case AsmToken::Integer: {
1493       // Look for 'b' or 'f' following an Integer as a directional label
1494       SMLoc Loc = getTok().getLoc();
1495       int64_t IntVal = getTok().getIntVal();
1496       End = consumeToken();
1497       UpdateLocLex = false;
1498       if (getLexer().getKind() == AsmToken::Identifier) {
1499         StringRef IDVal = getTok().getString();
1500         if (IDVal == "f" || IDVal == "b") {
1501           MCSymbol *Sym =
1502               getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1503           MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1504           const MCExpr *Val =
1505               MCSymbolRefExpr::create(Sym, Variant, getContext());
1506           if (IDVal == "b" && Sym->isUndefined())
1507             return Error(Loc, "invalid reference to undefined symbol");
1508           StringRef Identifier = Sym->getName();
1509           InlineAsmIdentifierInfo Info;
1510           if (SM.onIdentifierExpr(Val, Identifier, Info,
1511               isParsingInlineAsm(), ErrMsg))
1512             return Error(Loc, ErrMsg);
1513           End = consumeToken();
1514         } else {
1515           if (SM.onInteger(IntVal, ErrMsg))
1516             return Error(Loc, ErrMsg);
1517         }
1518       } else {
1519         if (SM.onInteger(IntVal, ErrMsg))
1520           return Error(Loc, ErrMsg);
1521       }
1522       break;
1523     }
1524     case AsmToken::Plus:
1525       if (SM.onPlus(ErrMsg))
1526         return Error(getTok().getLoc(), ErrMsg);
1527       break;
1528     case AsmToken::Minus:
1529       if (SM.onMinus(ErrMsg))
1530         return Error(getTok().getLoc(), ErrMsg);
1531       break;
1532     case AsmToken::Tilde:   SM.onNot(); break;
1533     case AsmToken::Star:    SM.onStar(); break;
1534     case AsmToken::Slash:   SM.onDivide(); break;
1535     case AsmToken::Percent: SM.onMod(); break;
1536     case AsmToken::Pipe:    SM.onOr(); break;
1537     case AsmToken::Caret:   SM.onXor(); break;
1538     case AsmToken::Amp:     SM.onAnd(); break;
1539     case AsmToken::LessLess:
1540                             SM.onLShift(); break;
1541     case AsmToken::GreaterGreater:
1542                             SM.onRShift(); break;
1543     case AsmToken::LBrac:
1544       if (SM.onLBrac())
1545         return Error(Tok.getLoc(), "unexpected bracket encountered");
1546       break;
1547     case AsmToken::RBrac:
1548       if (SM.onRBrac())
1549         return Error(Tok.getLoc(), "unexpected bracket encountered");
1550       break;
1551     case AsmToken::LParen:  SM.onLParen(); break;
1552     case AsmToken::RParen:  SM.onRParen(); break;
1553     }
1554     if (SM.hadError())
1555       return Error(Tok.getLoc(), "unknown token in expression");
1556 
1557     if (!Done && UpdateLocLex)
1558       End = consumeToken();
1559 
1560     PrevTK = TK;
1561   }
1562   return false;
1563 }
1564 
RewriteIntelExpression(IntelExprStateMachine & SM,SMLoc Start,SMLoc End)1565 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
1566                                           SMLoc Start, SMLoc End) {
1567   SMLoc Loc = Start;
1568   unsigned ExprLen = End.getPointer() - Start.getPointer();
1569   // Skip everything before a symbol displacement (if we have one)
1570   if (SM.getSym()) {
1571     StringRef SymName = SM.getSymName();
1572     if (unsigned Len =  SymName.data() - Start.getPointer())
1573       InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
1574     Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
1575     ExprLen = End.getPointer() - (SymName.data() + SymName.size());
1576     // If we have only a symbol than there's no need for complex rewrite,
1577     // simply skip everything after it
1578     if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
1579       if (ExprLen)
1580         InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
1581       return;
1582     }
1583   }
1584   // Build an Intel Expression rewrite
1585   StringRef BaseRegStr;
1586   StringRef IndexRegStr;
1587   if (SM.getBaseReg())
1588     BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
1589   if (SM.getIndexReg())
1590     IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
1591   // Emit it
1592   IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), SM.getImm(), SM.isMemExpr());
1593   InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
1594 }
1595 
1596 // Inline assembly may use variable names with namespace alias qualifiers.
ParseIntelInlineAsmIdentifier(const MCExpr * & Val,StringRef & Identifier,InlineAsmIdentifierInfo & Info,bool IsUnevaluatedOperand,SMLoc & End)1597 bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val,
1598                                                  StringRef &Identifier,
1599                                                  InlineAsmIdentifierInfo &Info,
1600                                                  bool IsUnevaluatedOperand,
1601                                                  SMLoc &End) {
1602   MCAsmParser &Parser = getParser();
1603   assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1604   Val = nullptr;
1605 
1606   StringRef LineBuf(Identifier.data());
1607   SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1608 
1609   const AsmToken &Tok = Parser.getTok();
1610   SMLoc Loc = Tok.getLoc();
1611 
1612   // Advance the token stream until the end of the current token is
1613   // after the end of what the frontend claimed.
1614   const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1615   do {
1616     End = Tok.getEndLoc();
1617     getLexer().Lex();
1618   } while (End.getPointer() < EndPtr);
1619   Identifier = LineBuf;
1620 
1621   // The frontend should end parsing on an assembler token boundary, unless it
1622   // failed parsing.
1623   assert((End.getPointer() == EndPtr ||
1624           Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) &&
1625           "frontend claimed part of a token?");
1626 
1627   // If the identifier lookup was unsuccessful, assume that we are dealing with
1628   // a label.
1629   if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) {
1630     StringRef InternalName =
1631       SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1632                                          Loc, false);
1633     assert(InternalName.size() && "We should have an internal name here.");
1634     // Push a rewrite for replacing the identifier name with the internal name.
1635     InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
1636                                         InternalName);
1637   } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
1638     return false;
1639   // Create the symbol reference.
1640   MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1641   MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1642   Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1643   return false;
1644 }
1645 
1646 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1647 std::unique_ptr<X86Operand>
ParseRoundingModeOp(SMLoc Start)1648 X86AsmParser::ParseRoundingModeOp(SMLoc Start) {
1649   MCAsmParser &Parser = getParser();
1650   const AsmToken &Tok = Parser.getTok();
1651   // Eat "{" and mark the current place.
1652   const SMLoc consumedToken = consumeToken();
1653   if (Tok.getIdentifier().startswith("r")){
1654     int rndMode = StringSwitch<int>(Tok.getIdentifier())
1655       .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1656       .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1657       .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1658       .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1659       .Default(-1);
1660     if (-1 == rndMode)
1661       return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1662      Parser.Lex();  // Eat "r*" of r*-sae
1663     if (!getLexer().is(AsmToken::Minus))
1664       return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1665     Parser.Lex();  // Eat "-"
1666     Parser.Lex();  // Eat the sae
1667     if (!getLexer().is(AsmToken::RCurly))
1668       return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1669     SMLoc End = Tok.getEndLoc();
1670     Parser.Lex();  // Eat "}"
1671     const MCExpr *RndModeOp =
1672       MCConstantExpr::create(rndMode, Parser.getContext());
1673     return X86Operand::CreateImm(RndModeOp, Start, End);
1674   }
1675   if(Tok.getIdentifier().equals("sae")){
1676     Parser.Lex();  // Eat the sae
1677     if (!getLexer().is(AsmToken::RCurly))
1678       return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1679     Parser.Lex();  // Eat "}"
1680     return X86Operand::CreateToken("{sae}", consumedToken);
1681   }
1682   return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1683 }
1684 
1685 /// Parse the '.' operator.
ParseIntelDotOperator(IntelExprStateMachine & SM,SMLoc & End)1686 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) {
1687   const AsmToken &Tok = getTok();
1688   unsigned Offset;
1689 
1690   // Drop the optional '.'.
1691   StringRef DotDispStr = Tok.getString();
1692   if (DotDispStr.startswith("."))
1693     DotDispStr = DotDispStr.drop_front(1);
1694 
1695   // .Imm gets lexed as a real.
1696   if (Tok.is(AsmToken::Real)) {
1697     APInt DotDisp;
1698     DotDispStr.getAsInteger(10, DotDisp);
1699     Offset = DotDisp.getZExtValue();
1700   } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1701     std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1702     if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1703                                            Offset))
1704       return Error(Tok.getLoc(), "Unable to lookup field reference!");
1705   } else
1706     return Error(Tok.getLoc(), "Unexpected token type!");
1707 
1708   // Eat the DotExpression and update End
1709   End = SMLoc::getFromPointer(DotDispStr.data());
1710   const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
1711   while (Tok.getLoc().getPointer() < DotExprEndLoc)
1712     Lex();
1713   SM.addImm(Offset);
1714   return false;
1715 }
1716 
1717 /// Parse the 'offset' operator.  This operator is used to specify the
1718 /// location rather then the content of a variable.
ParseIntelOffsetOfOperator()1719 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1720   MCAsmParser &Parser = getParser();
1721   const AsmToken &Tok = Parser.getTok();
1722   SMLoc OffsetOfLoc = Tok.getLoc();
1723   Parser.Lex(); // Eat offset.
1724 
1725   const MCExpr *Val;
1726   InlineAsmIdentifierInfo Info;
1727   SMLoc Start = Tok.getLoc(), End;
1728   StringRef Identifier = Tok.getString();
1729   if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
1730                                     /*Unevaluated=*/false, End))
1731     return nullptr;
1732 
1733   void *Decl = nullptr;
1734   // FIXME: MS evaluates "offset <Constant>" to the underlying integral
1735   if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
1736     return ErrorOperand(Start, "offset operator cannot yet handle constants");
1737   else if (Info.isKind(InlineAsmIdentifierInfo::IK_Var))
1738     Decl = Info.Var.Decl;
1739   // Don't emit the offset operator.
1740   InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
1741 
1742   // The offset operator will have an 'r' constraint, thus we need to create
1743   // register operand to ensure proper matching.  Just pick a GPR based on
1744   // the size of a pointer.
1745   bool Parse32 = is32BitMode() || Code16GCC;
1746   unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX);
1747 
1748   return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1749                                OffsetOfLoc, Identifier, Decl);
1750 }
1751 
1752 // Query a candidate string for being an Intel assembly operator
1753 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
IdentifyIntelInlineAsmOperator(StringRef Name)1754 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
1755   return StringSwitch<unsigned>(Name)
1756     .Cases("TYPE","type",IOK_TYPE)
1757     .Cases("SIZE","size",IOK_SIZE)
1758     .Cases("LENGTH","length",IOK_LENGTH)
1759     .Cases("OFFSET","offset",IOK_OFFSET)
1760     .Default(IOK_INVALID);
1761 }
1762 
1763 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators.  The LENGTH operator
1764 /// returns the number of elements in an array.  It returns the value 1 for
1765 /// non-array variables.  The SIZE operator returns the size of a C or C++
1766 /// variable.  A variable's size is the product of its LENGTH and TYPE.  The
1767 /// TYPE operator returns the size of a C or C++ type or variable. If the
1768 /// variable is an array, TYPE returns the size of a single element.
ParseIntelInlineAsmOperator(unsigned OpKind)1769 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
1770   MCAsmParser &Parser = getParser();
1771   const AsmToken &Tok = Parser.getTok();
1772   Parser.Lex(); // Eat operator.
1773 
1774   const MCExpr *Val = nullptr;
1775   InlineAsmIdentifierInfo Info;
1776   SMLoc Start = Tok.getLoc(), End;
1777   StringRef Identifier = Tok.getString();
1778   if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
1779                                     /*Unevaluated=*/true, End))
1780     return 0;
1781 
1782   if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1783     Error(Start, "unable to lookup expression");
1784     return 0;
1785   }
1786 
1787   unsigned CVal = 0;
1788   switch(OpKind) {
1789   default: llvm_unreachable("Unexpected operand kind!");
1790   case IOK_LENGTH: CVal = Info.Var.Length; break;
1791   case IOK_SIZE: CVal = Info.Var.Size; break;
1792   case IOK_TYPE: CVal = Info.Var.Type; break;
1793   }
1794 
1795   return CVal;
1796 }
1797 
ParseIntelMemoryOperandSize(unsigned & Size)1798 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
1799   Size = StringSwitch<unsigned>(getTok().getString())
1800     .Cases("BYTE", "byte", 8)
1801     .Cases("WORD", "word", 16)
1802     .Cases("DWORD", "dword", 32)
1803     .Cases("FLOAT", "float", 32)
1804     .Cases("LONG", "long", 32)
1805     .Cases("FWORD", "fword", 48)
1806     .Cases("DOUBLE", "double", 64)
1807     .Cases("QWORD", "qword", 64)
1808     .Cases("MMWORD","mmword", 64)
1809     .Cases("XWORD", "xword", 80)
1810     .Cases("TBYTE", "tbyte", 80)
1811     .Cases("XMMWORD", "xmmword", 128)
1812     .Cases("YMMWORD", "ymmword", 256)
1813     .Cases("ZMMWORD", "zmmword", 512)
1814     .Default(0);
1815   if (Size) {
1816     const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
1817     if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr")))
1818       return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1819     Lex(); // Eat ptr.
1820   }
1821   return false;
1822 }
1823 
ParseIntelOperand()1824 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1825   MCAsmParser &Parser = getParser();
1826   const AsmToken &Tok = Parser.getTok();
1827   SMLoc Start, End;
1828 
1829   // FIXME: Offset operator
1830   // Should be handled as part of immediate expression, as other operators
1831   // Currently, only supported as a stand-alone operand
1832   if (isParsingInlineAsm())
1833     if (IdentifyIntelInlineAsmOperator(Tok.getString()) == IOK_OFFSET)
1834       return ParseIntelOffsetOfOperator();
1835 
1836   // Parse optional Size directive.
1837   unsigned Size;
1838   if (ParseIntelMemoryOperandSize(Size))
1839     return nullptr;
1840   bool PtrInOperand = bool(Size);
1841 
1842   Start = Tok.getLoc();
1843 
1844   // Rounding mode operand.
1845   if (getLexer().is(AsmToken::LCurly))
1846     return ParseRoundingModeOp(Start);
1847 
1848   // Register operand.
1849   unsigned RegNo = 0;
1850   if (Tok.is(AsmToken::Identifier) && !ParseRegister(RegNo, Start, End)) {
1851     if (RegNo == X86::RIP)
1852       return ErrorOperand(Start, "rip can only be used as a base register");
1853     // A Register followed by ':' is considered a segment override
1854     if (Tok.isNot(AsmToken::Colon))
1855       return !PtrInOperand ? X86Operand::CreateReg(RegNo, Start, End) :
1856         ErrorOperand(Start, "expected memory operand after 'ptr', "
1857                             "found register operand instead");
1858     // An alleged segment override. check if we have a valid segment register
1859     if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1860       return ErrorOperand(Start, "invalid segment register");
1861     // Eat ':' and update Start location
1862     Start = Lex().getLoc();
1863   }
1864 
1865   // Immediates and Memory
1866   IntelExprStateMachine SM;
1867   if (ParseIntelExpression(SM, End))
1868     return nullptr;
1869 
1870   if (isParsingInlineAsm())
1871     RewriteIntelExpression(SM, Start, Tok.getLoc());
1872 
1873   int64_t Imm = SM.getImm();
1874   const MCExpr *Disp = SM.getSym();
1875   const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
1876   if (Disp && Imm)
1877     Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
1878   if (!Disp)
1879     Disp = ImmDisp;
1880 
1881   // RegNo != 0 specifies a valid segment register,
1882   // and we are parsing a segment override
1883   if (!SM.isMemExpr() && !RegNo)
1884     return X86Operand::CreateImm(Disp, Start, End);
1885 
1886   StringRef ErrMsg;
1887   unsigned BaseReg = SM.getBaseReg();
1888   unsigned IndexReg = SM.getIndexReg();
1889   unsigned Scale = SM.getScale();
1890 
1891   if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
1892       (IndexReg == X86::ESP || IndexReg == X86::RSP))
1893     std::swap(BaseReg, IndexReg);
1894 
1895   // If BaseReg is a vector register and IndexReg is not, swap them unless
1896   // Scale was specified in which case it would be an error.
1897   if (Scale == 0 &&
1898       !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1899         X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1900         X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
1901       (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
1902        X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
1903        X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
1904     std::swap(BaseReg, IndexReg);
1905 
1906   if (Scale != 0 &&
1907       X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
1908     return ErrorOperand(Start, "16-bit addresses cannot have a scale");
1909 
1910   // If there was no explicit scale specified, change it to 1.
1911   if (Scale == 0)
1912     Scale = 1;
1913 
1914   // If this is a 16-bit addressing mode with the base and index in the wrong
1915   // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
1916   // shared with att syntax where order matters.
1917   if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
1918       (IndexReg == X86::BX || IndexReg == X86::BP))
1919     std::swap(BaseReg, IndexReg);
1920 
1921   if ((BaseReg || IndexReg) &&
1922       CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
1923                                       ErrMsg))
1924     return ErrorOperand(Start, ErrMsg);
1925   if (isParsingInlineAsm())
1926     return CreateMemForInlineAsm(RegNo, Disp, BaseReg, IndexReg,
1927                                  Scale, Start, End, Size, SM.getSymName(),
1928                                  SM.getIdentifierInfo());
1929   if (!(BaseReg || IndexReg || RegNo))
1930     return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1931   return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
1932                                BaseReg, IndexReg, Scale, Start, End, Size);
1933 }
1934 
ParseATTOperand()1935 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1936   MCAsmParser &Parser = getParser();
1937   switch (getLexer().getKind()) {
1938   default:
1939     // Parse a memory operand with no segment register.
1940     return ParseMemOperand(0, Parser.getTok().getLoc());
1941   case AsmToken::Percent: {
1942     // Read the register.
1943     unsigned RegNo;
1944     SMLoc Start, End;
1945     if (ParseRegister(RegNo, Start, End)) return nullptr;
1946     if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1947       Error(Start, "%eiz and %riz can only be used as index registers",
1948             SMRange(Start, End));
1949       return nullptr;
1950     }
1951     if (RegNo == X86::RIP) {
1952       Error(Start, "%rip can only be used as a base register",
1953             SMRange(Start, End));
1954       return nullptr;
1955     }
1956 
1957     // If this is a segment register followed by a ':', then this is the start
1958     // of a memory reference, otherwise this is a normal register reference.
1959     if (getLexer().isNot(AsmToken::Colon))
1960       return X86Operand::CreateReg(RegNo, Start, End);
1961 
1962     if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1963       return ErrorOperand(Start, "invalid segment register");
1964 
1965     getParser().Lex(); // Eat the colon.
1966     return ParseMemOperand(RegNo, Start);
1967   }
1968   case AsmToken::Dollar: {
1969     // $42 -> immediate.
1970     SMLoc Start = Parser.getTok().getLoc(), End;
1971     Parser.Lex();
1972     const MCExpr *Val;
1973     if (getParser().parseExpression(Val, End))
1974       return nullptr;
1975     return X86Operand::CreateImm(Val, Start, End);
1976   }
1977   case AsmToken::LCurly:{
1978     SMLoc Start = Parser.getTok().getLoc();
1979     return ParseRoundingModeOp(Start);
1980   }
1981   }
1982 }
1983 
1984 // true on failure, false otherwise
1985 // If no {z} mark was found - Parser doesn't advance
ParseZ(std::unique_ptr<X86Operand> & Z,const SMLoc & StartLoc)1986 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
1987                           const SMLoc &StartLoc) {
1988   MCAsmParser &Parser = getParser();
1989   // Assuming we are just pass the '{' mark, quering the next token
1990   // Searched for {z}, but none was found. Return false, as no parsing error was
1991   // encountered
1992   if (!(getLexer().is(AsmToken::Identifier) &&
1993         (getLexer().getTok().getIdentifier() == "z")))
1994     return false;
1995   Parser.Lex(); // Eat z
1996   // Query and eat the '}' mark
1997   if (!getLexer().is(AsmToken::RCurly))
1998     return Error(getLexer().getLoc(), "Expected } at this point");
1999   Parser.Lex(); // Eat '}'
2000   // Assign Z with the {z} mark opernad
2001   Z = X86Operand::CreateToken("{z}", StartLoc);
2002   return false;
2003 }
2004 
2005 // true on failure, false otherwise
HandleAVX512Operand(OperandVector & Operands,const MCParsedAsmOperand & Op)2006 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
2007                                        const MCParsedAsmOperand &Op) {
2008   MCAsmParser &Parser = getParser();
2009   if (getLexer().is(AsmToken::LCurly)) {
2010     // Eat "{" and mark the current place.
2011     const SMLoc consumedToken = consumeToken();
2012     // Distinguish {1to<NUM>} from {%k<NUM>}.
2013     if(getLexer().is(AsmToken::Integer)) {
2014       // Parse memory broadcasting ({1to<NUM>}).
2015       if (getLexer().getTok().getIntVal() != 1)
2016         return TokError("Expected 1to<NUM> at this point");
2017       Parser.Lex();  // Eat "1" of 1to8
2018       if (!getLexer().is(AsmToken::Identifier) ||
2019           !getLexer().getTok().getIdentifier().startswith("to"))
2020         return TokError("Expected 1to<NUM> at this point");
2021       // Recognize only reasonable suffixes.
2022       const char *BroadcastPrimitive =
2023         StringSwitch<const char*>(getLexer().getTok().getIdentifier())
2024           .Case("to2",  "{1to2}")
2025           .Case("to4",  "{1to4}")
2026           .Case("to8",  "{1to8}")
2027           .Case("to16", "{1to16}")
2028           .Default(nullptr);
2029       if (!BroadcastPrimitive)
2030         return TokError("Invalid memory broadcast primitive.");
2031       Parser.Lex();  // Eat "toN" of 1toN
2032       if (!getLexer().is(AsmToken::RCurly))
2033         return TokError("Expected } at this point");
2034       Parser.Lex();  // Eat "}"
2035       Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2036                                                  consumedToken));
2037       // No AVX512 specific primitives can pass
2038       // after memory broadcasting, so return.
2039       return false;
2040     } else {
2041       // Parse either {k}{z}, {z}{k}, {k} or {z}
2042       // last one have no meaning, but GCC accepts it
2043       // Currently, we're just pass a '{' mark
2044       std::unique_ptr<X86Operand> Z;
2045       if (ParseZ(Z, consumedToken))
2046         return true;
2047       // Reaching here means that parsing of the allegadly '{z}' mark yielded
2048       // no errors.
2049       // Query for the need of further parsing for a {%k<NUM>} mark
2050       if (!Z || getLexer().is(AsmToken::LCurly)) {
2051         SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2052         // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2053         // expected
2054         unsigned RegNo;
2055         SMLoc RegLoc;
2056         if (!ParseRegister(RegNo, RegLoc, StartLoc) &&
2057             X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2058           if (RegNo == X86::K0)
2059             return Error(RegLoc, "Register k0 can't be used as write mask");
2060           if (!getLexer().is(AsmToken::RCurly))
2061             return Error(getLexer().getLoc(), "Expected } at this point");
2062           Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2063           Operands.push_back(
2064               X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2065           Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2066         } else
2067           return Error(getLexer().getLoc(),
2068                         "Expected an op-mask register at this point");
2069         // {%k<NUM>} mark is found, inquire for {z}
2070         if (getLexer().is(AsmToken::LCurly) && !Z) {
2071           // Have we've found a parsing error, or found no (expected) {z} mark
2072           // - report an error
2073           if (ParseZ(Z, consumeToken()) || !Z)
2074             return Error(getLexer().getLoc(),
2075                          "Expected a {z} mark at this point");
2076 
2077         }
2078         // '{z}' on its own is meaningless, hence should be ignored.
2079         // on the contrary - have it been accompanied by a K register,
2080         // allow it.
2081         if (Z)
2082           Operands.push_back(std::move(Z));
2083       }
2084     }
2085   }
2086   return false;
2087 }
2088 
2089 /// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
2090 /// has already been parsed if present.
ParseMemOperand(unsigned SegReg,SMLoc MemStart)2091 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
2092                                                           SMLoc MemStart) {
2093 
2094   MCAsmParser &Parser = getParser();
2095   // We have to disambiguate a parenthesized expression "(4+5)" from the start
2096   // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
2097   // only way to do this without lookahead is to eat the '(' and see what is
2098   // after it.
2099   const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
2100   if (getLexer().isNot(AsmToken::LParen)) {
2101     SMLoc ExprEnd;
2102     if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
2103     // Disp may be a variable, handle register values.
2104     if (auto *RE = dyn_cast<X86MCExpr>(Disp))
2105       return X86Operand::CreateReg(RE->getRegNo(), MemStart, ExprEnd);
2106 
2107     // After parsing the base expression we could either have a parenthesized
2108     // memory address or not.  If not, return now.  If so, eat the (.
2109     if (getLexer().isNot(AsmToken::LParen)) {
2110       // Unless we have a segment register, treat this as an immediate.
2111       if (SegReg == 0)
2112         return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
2113       return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2114                                    MemStart, ExprEnd);
2115     }
2116 
2117     // Eat the '('.
2118     Parser.Lex();
2119   } else {
2120     // Okay, we have a '('.  We don't know if this is an expression or not, but
2121     // so we have to eat the ( to see beyond it.
2122     SMLoc LParenLoc = Parser.getTok().getLoc();
2123     Parser.Lex(); // Eat the '('.
2124 
2125     if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
2126       // Nothing to do here, fall into the code below with the '(' part of the
2127       // memory operand consumed.
2128     } else {
2129       SMLoc ExprEnd;
2130       getLexer().UnLex(AsmToken(AsmToken::LParen, "("));
2131 
2132       // It must be either an parenthesized expression, or an expression that
2133       // begins from a parenthesized expression, parse it now. Example: (1+2) or
2134       // (1+2)+3
2135       if (getParser().parseExpression(Disp, ExprEnd))
2136         return nullptr;
2137 
2138       // After parsing the base expression we could either have a parenthesized
2139       // memory address or not.  If not, return now.  If so, eat the (.
2140       if (getLexer().isNot(AsmToken::LParen)) {
2141         // Unless we have a segment register, treat this as an immediate.
2142         if (SegReg == 0)
2143           return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
2144                                        ExprEnd);
2145         return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2146                                      MemStart, ExprEnd);
2147       }
2148 
2149       // Eat the '('.
2150       Parser.Lex();
2151     }
2152   }
2153 
2154   // If we reached here, then we just ate the ( of the memory operand.  Process
2155   // the rest of the memory operand.
2156   unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2157   SMLoc IndexLoc, BaseLoc;
2158 
2159   if (getLexer().is(AsmToken::Percent)) {
2160     SMLoc StartLoc, EndLoc;
2161     BaseLoc = Parser.getTok().getLoc();
2162     if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
2163     if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
2164       Error(StartLoc, "eiz and riz can only be used as index registers",
2165             SMRange(StartLoc, EndLoc));
2166       return nullptr;
2167     }
2168   }
2169 
2170   if (getLexer().is(AsmToken::Comma)) {
2171     Parser.Lex(); // Eat the comma.
2172     IndexLoc = Parser.getTok().getLoc();
2173 
2174     // Following the comma we should have either an index register, or a scale
2175     // value. We don't support the later form, but we want to parse it
2176     // correctly.
2177     //
2178     // Not that even though it would be completely consistent to support syntax
2179     // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2180     if (getLexer().is(AsmToken::Percent)) {
2181       SMLoc L;
2182       if (ParseRegister(IndexReg, L, L))
2183         return nullptr;
2184       if (BaseReg == X86::RIP) {
2185         Error(IndexLoc, "%rip as base register can not have an index register");
2186         return nullptr;
2187       }
2188       if (IndexReg == X86::RIP) {
2189         Error(IndexLoc, "%rip is not allowed as an index register");
2190         return nullptr;
2191       }
2192 
2193       if (getLexer().isNot(AsmToken::RParen)) {
2194         // Parse the scale amount:
2195         //  ::= ',' [scale-expression]
2196         if (parseToken(AsmToken::Comma, "expected comma in scale expression"))
2197           return nullptr;
2198 
2199         if (getLexer().isNot(AsmToken::RParen)) {
2200           SMLoc Loc = Parser.getTok().getLoc();
2201 
2202           int64_t ScaleVal;
2203           if (getParser().parseAbsoluteExpression(ScaleVal)){
2204             Error(Loc, "expected scale expression");
2205             return nullptr;
2206           }
2207 
2208           // Validate the scale amount.
2209           if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2210               ScaleVal != 1) {
2211             Error(Loc, "scale factor in 16-bit address must be 1");
2212             return nullptr;
2213           }
2214           if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 &&
2215               ScaleVal != 8) {
2216             Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
2217             return nullptr;
2218           }
2219           Scale = (unsigned)ScaleVal;
2220         }
2221       }
2222     } else if (getLexer().isNot(AsmToken::RParen)) {
2223       // A scale amount without an index is ignored.
2224       // index.
2225       SMLoc Loc = Parser.getTok().getLoc();
2226 
2227       int64_t Value;
2228       if (getParser().parseAbsoluteExpression(Value))
2229         return nullptr;
2230 
2231       if (Value != 1)
2232         Warning(Loc, "scale factor without index register is ignored");
2233       Scale = 1;
2234     }
2235   }
2236 
2237   // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2238   SMLoc MemEnd = Parser.getTok().getEndLoc();
2239   if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
2240     return nullptr;
2241 
2242   // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
2243   // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
2244   // documented form in various unofficial manuals, so a lot of code uses it.
2245   if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 &&
2246       SegReg == 0 && isa<MCConstantExpr>(Disp) &&
2247       cast<MCConstantExpr>(Disp)->getValue() == 0)
2248     return X86Operand::CreateDXReg(BaseLoc, BaseLoc);
2249 
2250   StringRef ErrMsg;
2251   if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2252                                       ErrMsg)) {
2253     Error(BaseLoc, ErrMsg);
2254     return nullptr;
2255   }
2256 
2257   if (SegReg || BaseReg || IndexReg)
2258     return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2259                                  IndexReg, Scale, MemStart, MemEnd);
2260   return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
2261 }
2262 
2263 // Parse either a standard primary expression or a register.
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)2264 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
2265   MCAsmParser &Parser = getParser();
2266   if (Parser.parsePrimaryExpr(Res, EndLoc)) {
2267     SMLoc StartLoc = Parser.getTok().getLoc();
2268     // Normal Expression parse fails, check if it could be a register.
2269     unsigned RegNo;
2270     bool TryRegParse =
2271         getTok().is(AsmToken::Percent) ||
2272         (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier));
2273     if (!TryRegParse || ParseRegister(RegNo, StartLoc, EndLoc))
2274       return true;
2275     // Clear previous parse error and return correct expression.
2276     Parser.clearPendingErrors();
2277     Res = X86MCExpr::create(RegNo, Parser.getContext());
2278     return false;
2279   }
2280 
2281   return false;
2282 }
2283 
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)2284 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2285                                     SMLoc NameLoc, OperandVector &Operands) {
2286   MCAsmParser &Parser = getParser();
2287   InstInfo = &Info;
2288   StringRef PatchedName = Name;
2289 
2290   if ((Name.equals("jmp") || Name.equals("jc") || Name.equals("jz")) &&
2291       isParsingIntelSyntax() && isParsingInlineAsm()) {
2292     StringRef NextTok = Parser.getTok().getString();
2293     if (NextTok == "short") {
2294       SMLoc NameEndLoc =
2295           NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
2296       // Eat the short keyword
2297       Parser.Lex();
2298       // MS ignores the short keyword, it determines the jmp type based
2299       // on the distance of the label
2300       InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
2301                                           NextTok.size() + 1);
2302     }
2303   }
2304 
2305   // FIXME: Hack to recognize setneb as setne.
2306   if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2307       PatchedName != "setb" && PatchedName != "setnb")
2308     PatchedName = PatchedName.substr(0, Name.size()-1);
2309 
2310   // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2311   if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2312       (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2313        PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2314     bool IsVCMP = PatchedName[0] == 'v';
2315     unsigned CCIdx = IsVCMP ? 4 : 3;
2316     unsigned ComparisonCode = StringSwitch<unsigned>(
2317       PatchedName.slice(CCIdx, PatchedName.size() - 2))
2318       .Case("eq",       0x00)
2319       .Case("eq_oq",    0x00)
2320       .Case("lt",       0x01)
2321       .Case("lt_os",    0x01)
2322       .Case("le",       0x02)
2323       .Case("le_os",    0x02)
2324       .Case("unord",    0x03)
2325       .Case("unord_q",  0x03)
2326       .Case("neq",      0x04)
2327       .Case("neq_uq",   0x04)
2328       .Case("nlt",      0x05)
2329       .Case("nlt_us",   0x05)
2330       .Case("nle",      0x06)
2331       .Case("nle_us",   0x06)
2332       .Case("ord",      0x07)
2333       .Case("ord_q",    0x07)
2334       /* AVX only from here */
2335       .Case("eq_uq",    0x08)
2336       .Case("nge",      0x09)
2337       .Case("nge_us",   0x09)
2338       .Case("ngt",      0x0A)
2339       .Case("ngt_us",   0x0A)
2340       .Case("false",    0x0B)
2341       .Case("false_oq", 0x0B)
2342       .Case("neq_oq",   0x0C)
2343       .Case("ge",       0x0D)
2344       .Case("ge_os",    0x0D)
2345       .Case("gt",       0x0E)
2346       .Case("gt_os",    0x0E)
2347       .Case("true",     0x0F)
2348       .Case("true_uq",  0x0F)
2349       .Case("eq_os",    0x10)
2350       .Case("lt_oq",    0x11)
2351       .Case("le_oq",    0x12)
2352       .Case("unord_s",  0x13)
2353       .Case("neq_us",   0x14)
2354       .Case("nlt_uq",   0x15)
2355       .Case("nle_uq",   0x16)
2356       .Case("ord_s",    0x17)
2357       .Case("eq_us",    0x18)
2358       .Case("nge_uq",   0x19)
2359       .Case("ngt_uq",   0x1A)
2360       .Case("false_os", 0x1B)
2361       .Case("neq_os",   0x1C)
2362       .Case("ge_oq",    0x1D)
2363       .Case("gt_oq",    0x1E)
2364       .Case("true_us",  0x1F)
2365       .Default(~0U);
2366     if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2367 
2368       Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2369                                                  NameLoc));
2370 
2371       const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2372                                                    getParser().getContext());
2373       Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2374 
2375       PatchedName = PatchedName.substr(PatchedName.size() - 2);
2376     }
2377   }
2378 
2379   // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2380   if (PatchedName.startswith("vpcmp") &&
2381       (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2382        PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2383     unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2384     unsigned ComparisonCode = StringSwitch<unsigned>(
2385       PatchedName.slice(5, PatchedName.size() - CCIdx))
2386       .Case("eq",    0x0) // Only allowed on unsigned. Checked below.
2387       .Case("lt",    0x1)
2388       .Case("le",    0x2)
2389       //.Case("false", 0x3) // Not a documented alias.
2390       .Case("neq",   0x4)
2391       .Case("nlt",   0x5)
2392       .Case("nle",   0x6)
2393       //.Case("true",  0x7) // Not a documented alias.
2394       .Default(~0U);
2395     if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2396       Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2397 
2398       const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2399                                                    getParser().getContext());
2400       Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2401 
2402       PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2403     }
2404   }
2405 
2406   // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2407   if (PatchedName.startswith("vpcom") &&
2408       (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2409        PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2410     unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2411     unsigned ComparisonCode = StringSwitch<unsigned>(
2412       PatchedName.slice(5, PatchedName.size() - CCIdx))
2413       .Case("lt",    0x0)
2414       .Case("le",    0x1)
2415       .Case("gt",    0x2)
2416       .Case("ge",    0x3)
2417       .Case("eq",    0x4)
2418       .Case("neq",   0x5)
2419       .Case("false", 0x6)
2420       .Case("true",  0x7)
2421       .Default(~0U);
2422     if (ComparisonCode != ~0U) {
2423       Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2424 
2425       const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2426                                                    getParser().getContext());
2427       Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2428 
2429       PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2430     }
2431   }
2432 
2433 
2434   // Determine whether this is an instruction prefix.
2435   // FIXME:
2436   // Enhance prefixes integrity robustness. for example, following forms
2437   // are currently tolerated:
2438   // repz repnz <insn>    ; GAS errors for the use of two similar prefixes
2439   // lock addq %rax, %rbx ; Destination operand must be of memory type
2440   // xacquire <insn>      ; xacquire must be accompanied by 'lock'
2441   bool isPrefix = StringSwitch<bool>(Name)
2442                       .Cases("rex64", "data32", "data16", true)
2443                       .Cases("xacquire", "xrelease", true)
2444                       .Cases("acquire", "release", isParsingIntelSyntax())
2445                       .Default(false);
2446 
2447   auto isLockRepeatNtPrefix = [](StringRef N) {
2448     return StringSwitch<bool>(N)
2449         .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
2450         .Default(false);
2451   };
2452 
2453   bool CurlyAsEndOfStatement = false;
2454 
2455   unsigned Flags = X86::IP_NO_PREFIX;
2456   while (isLockRepeatNtPrefix(Name.lower())) {
2457     unsigned Prefix =
2458         StringSwitch<unsigned>(Name)
2459             .Cases("lock", "lock", X86::IP_HAS_LOCK)
2460             .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
2461             .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
2462             .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
2463             .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
2464     Flags |= Prefix;
2465     if (getLexer().is(AsmToken::EndOfStatement)) {
2466       // We don't have real instr with the given prefix
2467       //  let's use the prefix as the instr.
2468       // TODO: there could be several prefixes one after another
2469       Flags = X86::IP_NO_PREFIX;
2470       break;
2471     }
2472     Name = Parser.getTok().getString();
2473     Parser.Lex(); // eat the prefix
2474     // Hack: we could have something like "rep # some comment" or
2475     //    "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
2476     while (Name.startswith(";") || Name.startswith("\n") ||
2477            Name.startswith("#") || Name.startswith("\t") ||
2478            Name.startswith("/")) {
2479       Name = Parser.getTok().getString();
2480       Parser.Lex(); // go to next prefix or instr
2481     }
2482   }
2483 
2484   if (Flags)
2485     PatchedName = Name;
2486 
2487   // Hacks to handle 'data16' and 'data32'
2488   if (PatchedName == "data16" && is16BitMode()) {
2489     return Error(NameLoc, "redundant data16 prefix");
2490   }
2491   if (PatchedName == "data32") {
2492     if (is32BitMode())
2493       return Error(NameLoc, "redundant data32 prefix");
2494     if (is64BitMode())
2495       return Error(NameLoc, "'data32' is not supported in 64-bit mode");
2496     // Hack to 'data16' for the table lookup.
2497     PatchedName = "data16";
2498   }
2499 
2500   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2501 
2502   // This does the actual operand parsing.  Don't parse any more if we have a
2503   // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2504   // just want to parse the "lock" as the first instruction and the "incl" as
2505   // the next one.
2506   if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2507     // Parse '*' modifier.
2508     if (getLexer().is(AsmToken::Star))
2509       Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2510 
2511     // Read the operands.
2512     while(1) {
2513       if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2514         Operands.push_back(std::move(Op));
2515         if (HandleAVX512Operand(Operands, *Operands.back()))
2516           return true;
2517       } else {
2518          return true;
2519       }
2520       // check for comma and eat it
2521       if (getLexer().is(AsmToken::Comma))
2522         Parser.Lex();
2523       else
2524         break;
2525      }
2526 
2527     // In MS inline asm curly braces mark the beginning/end of a block,
2528     // therefore they should be interepreted as end of statement
2529     CurlyAsEndOfStatement =
2530         isParsingIntelSyntax() && isParsingInlineAsm() &&
2531         (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
2532     if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
2533       return TokError("unexpected token in argument list");
2534   }
2535 
2536   // Consume the EndOfStatement or the prefix separator Slash
2537   if (getLexer().is(AsmToken::EndOfStatement) ||
2538       (isPrefix && getLexer().is(AsmToken::Slash)))
2539     Parser.Lex();
2540   else if (CurlyAsEndOfStatement)
2541     // Add an actual EndOfStatement before the curly brace
2542     Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
2543                                    getLexer().getTok().getLoc(), 0);
2544 
2545   // This is for gas compatibility and cannot be done in td.
2546   // Adding "p" for some floating point with no argument.
2547   // For example: fsub --> fsubp
2548   bool IsFp =
2549     Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
2550   if (IsFp && Operands.size() == 1) {
2551     const char *Repl = StringSwitch<const char *>(Name)
2552       .Case("fsub", "fsubp")
2553       .Case("fdiv", "fdivp")
2554       .Case("fsubr", "fsubrp")
2555       .Case("fdivr", "fdivrp");
2556     static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
2557   }
2558 
2559   // Moving a 32 or 16 bit value into a segment register has the same
2560   // behavior. Modify such instructions to always take shorter form.
2561   if ((Name == "mov" || Name == "movw" || Name == "movl") &&
2562       (Operands.size() == 3)) {
2563     X86Operand &Op1 = (X86Operand &)*Operands[1];
2564     X86Operand &Op2 = (X86Operand &)*Operands[2];
2565     SMLoc Loc = Op1.getEndLoc();
2566     if (Op1.isReg() && Op2.isReg() &&
2567         X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
2568             Op2.getReg()) &&
2569         (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
2570          X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
2571       // Change instruction name to match new instruction.
2572       if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
2573         Name = is16BitMode() ? "movw" : "movl";
2574         Operands[0] = X86Operand::CreateToken(Name, NameLoc);
2575       }
2576       // Select the correct equivalent 16-/32-bit source register.
2577       unsigned Reg =
2578           getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
2579       Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
2580     }
2581   }
2582 
2583   // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
2584   // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
2585   // documented form in various unofficial manuals, so a lot of code uses it.
2586   if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
2587        Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
2588       Operands.size() == 3) {
2589     X86Operand &Op = (X86Operand &)*Operands.back();
2590     if (Op.isDXReg())
2591       Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
2592                                               Op.getEndLoc());
2593   }
2594   // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
2595   if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
2596        Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
2597       Operands.size() == 3) {
2598     X86Operand &Op = (X86Operand &)*Operands[1];
2599     if (Op.isDXReg())
2600       Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
2601                                           Op.getEndLoc());
2602   }
2603 
2604   SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
2605   bool HadVerifyError = false;
2606 
2607   // Append default arguments to "ins[bwld]"
2608   if (Name.startswith("ins") &&
2609       (Operands.size() == 1 || Operands.size() == 3) &&
2610       (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
2611        Name == "ins")) {
2612 
2613     AddDefaultSrcDestOperands(TmpOperands,
2614                               X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
2615                               DefaultMemDIOperand(NameLoc));
2616     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2617   }
2618 
2619   // Append default arguments to "outs[bwld]"
2620   if (Name.startswith("outs") &&
2621       (Operands.size() == 1 || Operands.size() == 3) &&
2622       (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2623        Name == "outsd" || Name == "outs")) {
2624     AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2625                               X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2626     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2627   }
2628 
2629   // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2630   // values of $SIREG according to the mode. It would be nice if this
2631   // could be achieved with InstAlias in the tables.
2632   if (Name.startswith("lods") &&
2633       (Operands.size() == 1 || Operands.size() == 2) &&
2634       (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2635        Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
2636     TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
2637     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2638   }
2639 
2640   // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2641   // values of $DIREG according to the mode. It would be nice if this
2642   // could be achieved with InstAlias in the tables.
2643   if (Name.startswith("stos") &&
2644       (Operands.size() == 1 || Operands.size() == 2) &&
2645       (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2646        Name == "stosl" || Name == "stosd" || Name == "stosq")) {
2647     TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2648     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2649   }
2650 
2651   // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2652   // values of $DIREG according to the mode. It would be nice if this
2653   // could be achieved with InstAlias in the tables.
2654   if (Name.startswith("scas") &&
2655       (Operands.size() == 1 || Operands.size() == 2) &&
2656       (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2657        Name == "scasl" || Name == "scasd" || Name == "scasq")) {
2658     TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2659     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2660   }
2661 
2662   // Add default SI and DI operands to "cmps[bwlq]".
2663   if (Name.startswith("cmps") &&
2664       (Operands.size() == 1 || Operands.size() == 3) &&
2665       (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2666        Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2667     AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
2668                               DefaultMemSIOperand(NameLoc));
2669     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2670   }
2671 
2672   // Add default SI and DI operands to "movs[bwlq]".
2673   if (((Name.startswith("movs") &&
2674         (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2675          Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2676        (Name.startswith("smov") &&
2677         (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2678          Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
2679       (Operands.size() == 1 || Operands.size() == 3)) {
2680     if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
2681       Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2682     AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2683                               DefaultMemDIOperand(NameLoc));
2684     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2685   }
2686 
2687   // Check if we encountered an error for one the string insturctions
2688   if (HadVerifyError) {
2689     return HadVerifyError;
2690   }
2691 
2692   // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>.  Canonicalize to
2693   // "shift <op>".
2694   if ((Name.startswith("shr") || Name.startswith("sar") ||
2695        Name.startswith("shl") || Name.startswith("sal") ||
2696        Name.startswith("rcl") || Name.startswith("rcr") ||
2697        Name.startswith("rol") || Name.startswith("ror")) &&
2698       Operands.size() == 3) {
2699     if (isParsingIntelSyntax()) {
2700       // Intel syntax
2701       X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2702       if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2703           cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2704         Operands.pop_back();
2705     } else {
2706       X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2707       if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2708           cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2709         Operands.erase(Operands.begin() + 1);
2710     }
2711   }
2712 
2713   // Transforms "int $3" into "int3" as a size optimization.  We can't write an
2714   // instalias with an immediate operand yet.
2715   if (Name == "int" && Operands.size() == 2) {
2716     X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2717     if (Op1.isImm())
2718       if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
2719         if (CE->getValue() == 3) {
2720           Operands.erase(Operands.begin() + 1);
2721           static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2722         }
2723   }
2724 
2725   // Transforms "xlat mem8" into "xlatb"
2726   if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
2727     X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2728     if (Op1.isMem8()) {
2729       Warning(Op1.getStartLoc(), "memory operand is only for determining the "
2730                                  "size, (R|E)BX will be used for the location");
2731       Operands.pop_back();
2732       static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
2733     }
2734   }
2735 
2736   if (Flags)
2737     Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
2738   return false;
2739 }
2740 
processInstruction(MCInst & Inst,const OperandVector & Ops)2741 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2742   return false;
2743 }
2744 
validateInstruction(MCInst & Inst,const OperandVector & Ops)2745 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2746   const MCRegisterInfo *MRI = getContext().getRegisterInfo();
2747 
2748   switch (Inst.getOpcode()) {
2749   case X86::VGATHERDPDYrm:
2750   case X86::VGATHERDPDrm:
2751   case X86::VGATHERDPSYrm:
2752   case X86::VGATHERDPSrm:
2753   case X86::VGATHERQPDYrm:
2754   case X86::VGATHERQPDrm:
2755   case X86::VGATHERQPSYrm:
2756   case X86::VGATHERQPSrm:
2757   case X86::VPGATHERDDYrm:
2758   case X86::VPGATHERDDrm:
2759   case X86::VPGATHERDQYrm:
2760   case X86::VPGATHERDQrm:
2761   case X86::VPGATHERQDYrm:
2762   case X86::VPGATHERQDrm:
2763   case X86::VPGATHERQQYrm:
2764   case X86::VPGATHERQQrm: {
2765     unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
2766     unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
2767     unsigned Index =
2768       MRI->getEncodingValue(Inst.getOperand(3 + X86::AddrIndexReg).getReg());
2769     if (Dest == Mask || Dest == Index || Mask == Index)
2770       return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
2771                                             "registers should be distinct");
2772     break;
2773   }
2774   case X86::VGATHERDPDZ128rm:
2775   case X86::VGATHERDPDZ256rm:
2776   case X86::VGATHERDPDZrm:
2777   case X86::VGATHERDPSZ128rm:
2778   case X86::VGATHERDPSZ256rm:
2779   case X86::VGATHERDPSZrm:
2780   case X86::VGATHERQPDZ128rm:
2781   case X86::VGATHERQPDZ256rm:
2782   case X86::VGATHERQPDZrm:
2783   case X86::VGATHERQPSZ128rm:
2784   case X86::VGATHERQPSZ256rm:
2785   case X86::VGATHERQPSZrm:
2786   case X86::VPGATHERDDZ128rm:
2787   case X86::VPGATHERDDZ256rm:
2788   case X86::VPGATHERDDZrm:
2789   case X86::VPGATHERDQZ128rm:
2790   case X86::VPGATHERDQZ256rm:
2791   case X86::VPGATHERDQZrm:
2792   case X86::VPGATHERQDZ128rm:
2793   case X86::VPGATHERQDZ256rm:
2794   case X86::VPGATHERQDZrm:
2795   case X86::VPGATHERQQZ128rm:
2796   case X86::VPGATHERQQZ256rm:
2797   case X86::VPGATHERQQZrm: {
2798     unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
2799     unsigned Index =
2800       MRI->getEncodingValue(Inst.getOperand(4 + X86::AddrIndexReg).getReg());
2801     if (Dest == Index)
2802       return Warning(Ops[0]->getStartLoc(), "index and destination registers "
2803                                             "should be distinct");
2804     break;
2805   }
2806   case X86::V4FMADDPSrm:
2807   case X86::V4FMADDPSrmk:
2808   case X86::V4FMADDPSrmkz:
2809   case X86::V4FMADDSSrm:
2810   case X86::V4FMADDSSrmk:
2811   case X86::V4FMADDSSrmkz:
2812   case X86::V4FNMADDPSrm:
2813   case X86::V4FNMADDPSrmk:
2814   case X86::V4FNMADDPSrmkz:
2815   case X86::V4FNMADDSSrm:
2816   case X86::V4FNMADDSSrmk:
2817   case X86::V4FNMADDSSrmkz:
2818   case X86::VP4DPWSSDSrm:
2819   case X86::VP4DPWSSDSrmk:
2820   case X86::VP4DPWSSDSrmkz:
2821   case X86::VP4DPWSSDrm:
2822   case X86::VP4DPWSSDrmk:
2823   case X86::VP4DPWSSDrmkz: {
2824     unsigned Src2 = Inst.getOperand(Inst.getNumOperands() -
2825                                     X86::AddrNumOperands - 1).getReg();
2826     unsigned Src2Enc = MRI->getEncodingValue(Src2);
2827     if (Src2Enc % 4 != 0) {
2828       StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2);
2829       unsigned GroupStart = (Src2Enc / 4) * 4;
2830       unsigned GroupEnd = GroupStart + 3;
2831       return Warning(Ops[0]->getStartLoc(),
2832                      "source register '" + RegName + "' implicitly denotes '" +
2833                      RegName.take_front(3) + Twine(GroupStart) + "' to '" +
2834                      RegName.take_front(3) + Twine(GroupEnd) +
2835                      "' source group");
2836     }
2837     break;
2838   }
2839   }
2840 
2841   return false;
2842 }
2843 
2844 static const char *getSubtargetFeatureName(uint64_t Val);
2845 
EmitInstruction(MCInst & Inst,OperandVector & Operands,MCStreamer & Out)2846 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2847                                    MCStreamer &Out) {
2848   Instrumentation->InstrumentAndEmitInstruction(
2849       Inst, Operands, getContext(), MII, Out,
2850       getParser().shouldPrintSchedInfo());
2851 }
2852 
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2853 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2854                                            OperandVector &Operands,
2855                                            MCStreamer &Out, uint64_t &ErrorInfo,
2856                                            bool MatchingInlineAsm) {
2857   if (isParsingIntelSyntax())
2858     return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2859                                         MatchingInlineAsm);
2860   return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2861                                     MatchingInlineAsm);
2862 }
2863 
MatchFPUWaitAlias(SMLoc IDLoc,X86Operand & Op,OperandVector & Operands,MCStreamer & Out,bool MatchingInlineAsm)2864 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2865                                      OperandVector &Operands, MCStreamer &Out,
2866                                      bool MatchingInlineAsm) {
2867   // FIXME: This should be replaced with a real .td file alias mechanism.
2868   // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2869   // call.
2870   const char *Repl = StringSwitch<const char *>(Op.getToken())
2871                          .Case("finit", "fninit")
2872                          .Case("fsave", "fnsave")
2873                          .Case("fstcw", "fnstcw")
2874                          .Case("fstcww", "fnstcw")
2875                          .Case("fstenv", "fnstenv")
2876                          .Case("fstsw", "fnstsw")
2877                          .Case("fstsww", "fnstsw")
2878                          .Case("fclex", "fnclex")
2879                          .Default(nullptr);
2880   if (Repl) {
2881     MCInst Inst;
2882     Inst.setOpcode(X86::WAIT);
2883     Inst.setLoc(IDLoc);
2884     if (!MatchingInlineAsm)
2885       EmitInstruction(Inst, Operands, Out);
2886     Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2887   }
2888 }
2889 
ErrorMissingFeature(SMLoc IDLoc,uint64_t ErrorInfo,bool MatchingInlineAsm)2890 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2891                                        bool MatchingInlineAsm) {
2892   assert(ErrorInfo && "Unknown missing feature!");
2893   SmallString<126> Msg;
2894   raw_svector_ostream OS(Msg);
2895   OS << "instruction requires:";
2896   uint64_t Mask = 1;
2897   for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2898     if (ErrorInfo & Mask)
2899       OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2900     Mask <<= 1;
2901   }
2902   return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
2903 }
2904 
getPrefixes(OperandVector & Operands)2905 static unsigned getPrefixes(OperandVector &Operands) {
2906   unsigned Result = 0;
2907   X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
2908   if (Prefix.isPrefix()) {
2909     Result = Prefix.getPrefix();
2910     Operands.pop_back();
2911   }
2912   return Result;
2913 }
2914 
MatchAndEmitATTInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2915 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2916                                               OperandVector &Operands,
2917                                               MCStreamer &Out,
2918                                               uint64_t &ErrorInfo,
2919                                               bool MatchingInlineAsm) {
2920   assert(!Operands.empty() && "Unexpect empty operand list!");
2921   X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2922   assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2923   SMRange EmptyRange = None;
2924 
2925   // First, handle aliases that expand to multiple instructions.
2926   MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2927 
2928   bool WasOriginallyInvalidOperand = false;
2929   unsigned Prefixes = getPrefixes(Operands);
2930 
2931   MCInst Inst;
2932 
2933   if (Prefixes)
2934     Inst.setFlags(Prefixes);
2935 
2936   // First, try a direct match.
2937   switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
2938                            isParsingIntelSyntax())) {
2939   default: llvm_unreachable("Unexpected match result!");
2940   case Match_Success:
2941     if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
2942       return true;
2943     // Some instructions need post-processing to, for example, tweak which
2944     // encoding is selected. Loop on it while changes happen so the
2945     // individual transformations can chain off each other.
2946     if (!MatchingInlineAsm)
2947       while (processInstruction(Inst, Operands))
2948         ;
2949 
2950     Inst.setLoc(IDLoc);
2951     if (!MatchingInlineAsm)
2952       EmitInstruction(Inst, Operands, Out);
2953     Opcode = Inst.getOpcode();
2954     return false;
2955   case Match_MissingFeature:
2956     return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2957   case Match_InvalidOperand:
2958     WasOriginallyInvalidOperand = true;
2959     break;
2960   case Match_MnemonicFail:
2961     break;
2962   }
2963 
2964   // FIXME: Ideally, we would only attempt suffix matches for things which are
2965   // valid prefixes, and we could just infer the right unambiguous
2966   // type. However, that requires substantially more matcher support than the
2967   // following hack.
2968 
2969   // Change the operand to point to a temporary token.
2970   StringRef Base = Op.getToken();
2971   SmallString<16> Tmp;
2972   Tmp += Base;
2973   Tmp += ' ';
2974   Op.setTokenValue(Tmp);
2975 
2976   // If this instruction starts with an 'f', then it is a floating point stack
2977   // instruction.  These come in up to three forms for 32-bit, 64-bit, and
2978   // 80-bit floating point, which use the suffixes s,l,t respectively.
2979   //
2980   // Otherwise, we assume that this may be an integer instruction, which comes
2981   // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2982   const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2983 
2984   // Check for the various suffix matches.
2985   uint64_t ErrorInfoIgnore;
2986   uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2987   unsigned Match[4];
2988 
2989   for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2990     Tmp.back() = Suffixes[I];
2991     Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
2992                                 MatchingInlineAsm, isParsingIntelSyntax());
2993     // If this returned as a missing feature failure, remember that.
2994     if (Match[I] == Match_MissingFeature)
2995       ErrorInfoMissingFeature = ErrorInfoIgnore;
2996   }
2997 
2998   // Restore the old token.
2999   Op.setTokenValue(Base);
3000 
3001   // If exactly one matched, then we treat that as a successful match (and the
3002   // instruction will already have been filled in correctly, since the failing
3003   // matches won't have modified it).
3004   unsigned NumSuccessfulMatches =
3005       std::count(std::begin(Match), std::end(Match), Match_Success);
3006   if (NumSuccessfulMatches == 1) {
3007     Inst.setLoc(IDLoc);
3008     if (!MatchingInlineAsm)
3009       EmitInstruction(Inst, Operands, Out);
3010     Opcode = Inst.getOpcode();
3011     return false;
3012   }
3013 
3014   // Otherwise, the match failed, try to produce a decent error message.
3015 
3016   // If we had multiple suffix matches, then identify this as an ambiguous
3017   // match.
3018   if (NumSuccessfulMatches > 1) {
3019     char MatchChars[4];
3020     unsigned NumMatches = 0;
3021     for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
3022       if (Match[I] == Match_Success)
3023         MatchChars[NumMatches++] = Suffixes[I];
3024 
3025     SmallString<126> Msg;
3026     raw_svector_ostream OS(Msg);
3027     OS << "ambiguous instructions require an explicit suffix (could be ";
3028     for (unsigned i = 0; i != NumMatches; ++i) {
3029       if (i != 0)
3030         OS << ", ";
3031       if (i + 1 == NumMatches)
3032         OS << "or ";
3033       OS << "'" << Base << MatchChars[i] << "'";
3034     }
3035     OS << ")";
3036     Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
3037     return true;
3038   }
3039 
3040   // Okay, we know that none of the variants matched successfully.
3041 
3042   // If all of the instructions reported an invalid mnemonic, then the original
3043   // mnemonic was invalid.
3044   if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
3045     if (!WasOriginallyInvalidOperand) {
3046       return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
3047                    Op.getLocRange(), MatchingInlineAsm);
3048     }
3049 
3050     // Recover location info for the operand if we know which was the problem.
3051     if (ErrorInfo != ~0ULL) {
3052       if (ErrorInfo >= Operands.size())
3053         return Error(IDLoc, "too few operands for instruction", EmptyRange,
3054                      MatchingInlineAsm);
3055 
3056       X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
3057       if (Operand.getStartLoc().isValid()) {
3058         SMRange OperandRange = Operand.getLocRange();
3059         return Error(Operand.getStartLoc(), "invalid operand for instruction",
3060                      OperandRange, MatchingInlineAsm);
3061       }
3062     }
3063 
3064     return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3065                  MatchingInlineAsm);
3066   }
3067 
3068   // If one instruction matched with a missing feature, report this as a
3069   // missing feature.
3070   if (std::count(std::begin(Match), std::end(Match),
3071                  Match_MissingFeature) == 1) {
3072     ErrorInfo = ErrorInfoMissingFeature;
3073     return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
3074                                MatchingInlineAsm);
3075   }
3076 
3077   // If one instruction matched with an invalid operand, report this as an
3078   // operand failure.
3079   if (std::count(std::begin(Match), std::end(Match),
3080                  Match_InvalidOperand) == 1) {
3081     return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3082                  MatchingInlineAsm);
3083   }
3084 
3085   // If all of these were an outright failure, report it in a useless way.
3086   Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
3087         EmptyRange, MatchingInlineAsm);
3088   return true;
3089 }
3090 
MatchAndEmitIntelInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)3091 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
3092                                                 OperandVector &Operands,
3093                                                 MCStreamer &Out,
3094                                                 uint64_t &ErrorInfo,
3095                                                 bool MatchingInlineAsm) {
3096   assert(!Operands.empty() && "Unexpect empty operand list!");
3097   X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
3098   assert(Op.isToken() && "Leading operand should always be a mnemonic!");
3099   StringRef Mnemonic = Op.getToken();
3100   SMRange EmptyRange = None;
3101   StringRef Base = Op.getToken();
3102   unsigned Prefixes = getPrefixes(Operands);
3103 
3104   // First, handle aliases that expand to multiple instructions.
3105   MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
3106 
3107   MCInst Inst;
3108 
3109   if (Prefixes)
3110     Inst.setFlags(Prefixes);
3111 
3112   // Find one unsized memory operand, if present.
3113   X86Operand *UnsizedMemOp = nullptr;
3114   for (const auto &Op : Operands) {
3115     X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
3116     if (X86Op->isMemUnsized()) {
3117       UnsizedMemOp = X86Op;
3118       // Have we found an unqualified memory operand,
3119       // break. IA allows only one memory operand.
3120       break;
3121     }
3122   }
3123 
3124   // Allow some instructions to have implicitly pointer-sized operands.  This is
3125   // compatible with gas.
3126   if (UnsizedMemOp) {
3127     static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
3128     for (const char *Instr : PtrSizedInstrs) {
3129       if (Mnemonic == Instr) {
3130         UnsizedMemOp->Mem.Size = getPointerWidth();
3131         break;
3132       }
3133     }
3134   }
3135 
3136   SmallVector<unsigned, 8> Match;
3137   uint64_t ErrorInfoMissingFeature = 0;
3138 
3139   // If unsized push has immediate operand we should default the default pointer
3140   // size for the size.
3141   if (Mnemonic == "push" && Operands.size() == 2) {
3142     auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
3143     if (X86Op->isImm()) {
3144       // If it's not a constant fall through and let remainder take care of it.
3145       const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
3146       unsigned Size = getPointerWidth();
3147       if (CE &&
3148           (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
3149         SmallString<16> Tmp;
3150         Tmp += Base;
3151         Tmp += (is64BitMode())
3152                    ? "q"
3153                    : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
3154         Op.setTokenValue(Tmp);
3155         // Do match in ATT mode to allow explicit suffix usage.
3156         Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
3157                                          MatchingInlineAsm,
3158                                          false /*isParsingIntelSyntax()*/));
3159         Op.setTokenValue(Base);
3160       }
3161     }
3162   }
3163 
3164   // If an unsized memory operand is present, try to match with each memory
3165   // operand size.  In Intel assembly, the size is not part of the instruction
3166   // mnemonic.
3167   if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
3168     static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
3169     for (unsigned Size : MopSizes) {
3170       UnsizedMemOp->Mem.Size = Size;
3171       uint64_t ErrorInfoIgnore;
3172       unsigned LastOpcode = Inst.getOpcode();
3173       unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
3174                                     MatchingInlineAsm, isParsingIntelSyntax());
3175       if (Match.empty() || LastOpcode != Inst.getOpcode())
3176         Match.push_back(M);
3177 
3178       // If this returned as a missing feature failure, remember that.
3179       if (Match.back() == Match_MissingFeature)
3180         ErrorInfoMissingFeature = ErrorInfoIgnore;
3181     }
3182 
3183     // Restore the size of the unsized memory operand if we modified it.
3184     UnsizedMemOp->Mem.Size = 0;
3185   }
3186 
3187   // If we haven't matched anything yet, this is not a basic integer or FPU
3188   // operation.  There shouldn't be any ambiguity in our mnemonic table, so try
3189   // matching with the unsized operand.
3190   if (Match.empty()) {
3191     Match.push_back(MatchInstruction(
3192         Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax()));
3193     // If this returned as a missing feature failure, remember that.
3194     if (Match.back() == Match_MissingFeature)
3195       ErrorInfoMissingFeature = ErrorInfo;
3196   }
3197 
3198   // Restore the size of the unsized memory operand if we modified it.
3199   if (UnsizedMemOp)
3200     UnsizedMemOp->Mem.Size = 0;
3201 
3202   // If it's a bad mnemonic, all results will be the same.
3203   if (Match.back() == Match_MnemonicFail) {
3204     return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
3205                  Op.getLocRange(), MatchingInlineAsm);
3206   }
3207 
3208   unsigned NumSuccessfulMatches =
3209       std::count(std::begin(Match), std::end(Match), Match_Success);
3210 
3211   // If matching was ambiguous and we had size information from the frontend,
3212   // try again with that. This handles cases like "movxz eax, m8/m16".
3213   if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
3214       UnsizedMemOp->getMemFrontendSize()) {
3215     UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
3216     unsigned M = MatchInstruction(
3217         Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax());
3218     if (M == Match_Success)
3219       NumSuccessfulMatches = 1;
3220 
3221     // Add a rewrite that encodes the size information we used from the
3222     // frontend.
3223     InstInfo->AsmRewrites->emplace_back(
3224         AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
3225         /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
3226   }
3227 
3228   // If exactly one matched, then we treat that as a successful match (and the
3229   // instruction will already have been filled in correctly, since the failing
3230   // matches won't have modified it).
3231   if (NumSuccessfulMatches == 1) {
3232     if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
3233       return true;
3234     // Some instructions need post-processing to, for example, tweak which
3235     // encoding is selected. Loop on it while changes happen so the individual
3236     // transformations can chain off each other.
3237     if (!MatchingInlineAsm)
3238       while (processInstruction(Inst, Operands))
3239         ;
3240     Inst.setLoc(IDLoc);
3241     if (!MatchingInlineAsm)
3242       EmitInstruction(Inst, Operands, Out);
3243     Opcode = Inst.getOpcode();
3244     return false;
3245   } else if (NumSuccessfulMatches > 1) {
3246     assert(UnsizedMemOp &&
3247            "multiple matches only possible with unsized memory operands");
3248     return Error(UnsizedMemOp->getStartLoc(),
3249                  "ambiguous operand size for instruction '" + Mnemonic + "\'",
3250                  UnsizedMemOp->getLocRange());
3251   }
3252 
3253   // If one instruction matched with a missing feature, report this as a
3254   // missing feature.
3255   if (std::count(std::begin(Match), std::end(Match),
3256                  Match_MissingFeature) == 1) {
3257     ErrorInfo = ErrorInfoMissingFeature;
3258     return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
3259                                MatchingInlineAsm);
3260   }
3261 
3262   // If one instruction matched with an invalid operand, report this as an
3263   // operand failure.
3264   if (std::count(std::begin(Match), std::end(Match),
3265                  Match_InvalidOperand) == 1) {
3266     return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3267                  MatchingInlineAsm);
3268   }
3269 
3270   // If all of these were an outright failure, report it in a useless way.
3271   return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
3272                MatchingInlineAsm);
3273 }
3274 
OmitRegisterFromClobberLists(unsigned RegNo)3275 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
3276   return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
3277 }
3278 
ParseDirective(AsmToken DirectiveID)3279 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
3280   MCAsmParser &Parser = getParser();
3281   StringRef IDVal = DirectiveID.getIdentifier();
3282   if (IDVal.startswith(".code"))
3283     return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
3284   else if (IDVal.startswith(".att_syntax")) {
3285     getParser().setParsingInlineAsm(false);
3286     if (getLexer().isNot(AsmToken::EndOfStatement)) {
3287       if (Parser.getTok().getString() == "prefix")
3288         Parser.Lex();
3289       else if (Parser.getTok().getString() == "noprefix")
3290         return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
3291                                            "supported: registers must have a "
3292                                            "'%' prefix in .att_syntax");
3293     }
3294     getParser().setAssemblerDialect(0);
3295     return false;
3296   } else if (IDVal.startswith(".intel_syntax")) {
3297     getParser().setAssemblerDialect(1);
3298     getParser().setParsingInlineAsm(true);
3299     if (getLexer().isNot(AsmToken::EndOfStatement)) {
3300       if (Parser.getTok().getString() == "noprefix")
3301         Parser.Lex();
3302       else if (Parser.getTok().getString() == "prefix")
3303         return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
3304                                            "supported: registers must not have "
3305                                            "a '%' prefix in .intel_syntax");
3306     }
3307     return false;
3308   } else if (IDVal == ".even")
3309     return parseDirectiveEven(DirectiveID.getLoc());
3310   else if (IDVal == ".cv_fpo_proc")
3311     return parseDirectiveFPOProc(DirectiveID.getLoc());
3312   else if (IDVal == ".cv_fpo_setframe")
3313     return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
3314   else if (IDVal == ".cv_fpo_pushreg")
3315     return parseDirectiveFPOPushReg(DirectiveID.getLoc());
3316   else if (IDVal == ".cv_fpo_stackalloc")
3317     return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
3318   else if (IDVal == ".cv_fpo_endprologue")
3319     return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
3320   else if (IDVal == ".cv_fpo_endproc")
3321     return parseDirectiveFPOEndProc(DirectiveID.getLoc());
3322 
3323   return true;
3324 }
3325 
3326 /// parseDirectiveEven
3327 ///  ::= .even
parseDirectiveEven(SMLoc L)3328 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
3329   if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
3330     return false;
3331 
3332   const MCSection *Section = getStreamer().getCurrentSectionOnly();
3333   if (!Section) {
3334     getStreamer().InitSections(false);
3335     Section = getStreamer().getCurrentSectionOnly();
3336   }
3337   if (Section->UseCodeAlign())
3338     getStreamer().EmitCodeAlignment(2, 0);
3339   else
3340     getStreamer().EmitValueToAlignment(2, 0, 1, 0);
3341   return false;
3342 }
3343 
3344 /// ParseDirectiveCode
3345 ///  ::= .code16 | .code32 | .code64
ParseDirectiveCode(StringRef IDVal,SMLoc L)3346 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
3347   MCAsmParser &Parser = getParser();
3348   Code16GCC = false;
3349   if (IDVal == ".code16") {
3350     Parser.Lex();
3351     if (!is16BitMode()) {
3352       SwitchMode(X86::Mode16Bit);
3353       getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3354     }
3355   } else if (IDVal == ".code16gcc") {
3356     // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
3357     Parser.Lex();
3358     Code16GCC = true;
3359     if (!is16BitMode()) {
3360       SwitchMode(X86::Mode16Bit);
3361       getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3362     }
3363   } else if (IDVal == ".code32") {
3364     Parser.Lex();
3365     if (!is32BitMode()) {
3366       SwitchMode(X86::Mode32Bit);
3367       getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
3368     }
3369   } else if (IDVal == ".code64") {
3370     Parser.Lex();
3371     if (!is64BitMode()) {
3372       SwitchMode(X86::Mode64Bit);
3373       getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
3374     }
3375   } else {
3376     Error(L, "unknown directive " + IDVal);
3377     return false;
3378   }
3379 
3380   return false;
3381 }
3382 
3383 // .cv_fpo_proc foo
parseDirectiveFPOProc(SMLoc L)3384 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
3385   MCAsmParser &Parser = getParser();
3386   StringRef ProcName;
3387   int64_t ParamsSize;
3388   if (Parser.parseIdentifier(ProcName))
3389     return Parser.TokError("expected symbol name");
3390   if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
3391     return true;
3392   if (!isUIntN(32, ParamsSize))
3393     return Parser.TokError("parameters size out of range");
3394   if (Parser.parseEOL("unexpected tokens"))
3395     return addErrorSuffix(" in '.cv_fpo_proc' directive");
3396   MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
3397   return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
3398 }
3399 
3400 // .cv_fpo_setframe ebp
parseDirectiveFPOSetFrame(SMLoc L)3401 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
3402   MCAsmParser &Parser = getParser();
3403   unsigned Reg;
3404   SMLoc DummyLoc;
3405   if (ParseRegister(Reg, DummyLoc, DummyLoc) ||
3406       Parser.parseEOL("unexpected tokens"))
3407     return addErrorSuffix(" in '.cv_fpo_setframe' directive");
3408   return getTargetStreamer().emitFPOSetFrame(Reg, L);
3409 }
3410 
3411 // .cv_fpo_pushreg ebx
parseDirectiveFPOPushReg(SMLoc L)3412 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
3413   MCAsmParser &Parser = getParser();
3414   unsigned Reg;
3415   SMLoc DummyLoc;
3416   if (ParseRegister(Reg, DummyLoc, DummyLoc) ||
3417       Parser.parseEOL("unexpected tokens"))
3418     return addErrorSuffix(" in '.cv_fpo_pushreg' directive");
3419   return getTargetStreamer().emitFPOPushReg(Reg, L);
3420 }
3421 
3422 // .cv_fpo_stackalloc 20
parseDirectiveFPOStackAlloc(SMLoc L)3423 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
3424   MCAsmParser &Parser = getParser();
3425   int64_t Offset;
3426   if (Parser.parseIntToken(Offset, "expected offset") ||
3427       Parser.parseEOL("unexpected tokens"))
3428     return addErrorSuffix(" in '.cv_fpo_stackalloc' directive");
3429   return getTargetStreamer().emitFPOStackAlloc(Offset, L);
3430 }
3431 
3432 // .cv_fpo_endprologue
parseDirectiveFPOEndPrologue(SMLoc L)3433 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
3434   MCAsmParser &Parser = getParser();
3435   if (Parser.parseEOL("unexpected tokens"))
3436     return addErrorSuffix(" in '.cv_fpo_endprologue' directive");
3437   return getTargetStreamer().emitFPOEndPrologue(L);
3438 }
3439 
3440 // .cv_fpo_endproc
parseDirectiveFPOEndProc(SMLoc L)3441 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
3442   MCAsmParser &Parser = getParser();
3443   if (Parser.parseEOL("unexpected tokens"))
3444     return addErrorSuffix(" in '.cv_fpo_endproc' directive");
3445   return getTargetStreamer().emitFPOEndProc(L);
3446 }
3447 
3448 // Force static initialization.
LLVMInitializeX86AsmParser()3449 extern "C" void LLVMInitializeX86AsmParser() {
3450   RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
3451   RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
3452 }
3453 
3454 #define GET_REGISTER_MATCHER
3455 #define GET_MATCHER_IMPLEMENTATION
3456 #define GET_SUBTARGET_FEATURE_NAME
3457 #include "X86GenAsmMatcher.inc"
3458