1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "X86ISelLowering.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/MC/MCContext.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCParser/MCAsmLexer.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/MC/MCTargetAsmParser.h"
33 #include "llvm/Support/SourceMgr.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <algorithm>
37 #include <memory>
38 
39 using namespace llvm;
40 
41 namespace {
42 
43 static const char OpPrecedence[] = {
44   0, // IC_OR
45   1, // IC_AND
46   2, // IC_LSHIFT
47   2, // IC_RSHIFT
48   3, // IC_PLUS
49   3, // IC_MINUS
50   4, // IC_MULTIPLY
51   4, // IC_DIVIDE
52   5, // IC_RPAREN
53   6, // IC_LPAREN
54   0, // IC_IMM
55   0  // IC_REGISTER
56 };
57 
58 class X86AsmParser : public MCTargetAsmParser {
59   MCSubtargetInfo &STI;
60   const MCInstrInfo &MII;
61   ParseInstructionInfo *InstInfo;
62   std::unique_ptr<X86AsmInstrumentation> Instrumentation;
63 private:
consumeToken()64   SMLoc consumeToken() {
65     MCAsmParser &Parser = getParser();
66     SMLoc Result = Parser.getTok().getLoc();
67     Parser.Lex();
68     return Result;
69   }
70 
71   enum InfixCalculatorTok {
72     IC_OR = 0,
73     IC_AND,
74     IC_LSHIFT,
75     IC_RSHIFT,
76     IC_PLUS,
77     IC_MINUS,
78     IC_MULTIPLY,
79     IC_DIVIDE,
80     IC_RPAREN,
81     IC_LPAREN,
82     IC_IMM,
83     IC_REGISTER
84   };
85 
86   class InfixCalculator {
87     typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
88     SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
89     SmallVector<ICToken, 4> PostfixStack;
90 
91   public:
popOperand()92     int64_t popOperand() {
93       assert (!PostfixStack.empty() && "Poped an empty stack!");
94       ICToken Op = PostfixStack.pop_back_val();
95       assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
96               && "Expected and immediate or register!");
97       return Op.second;
98     }
pushOperand(InfixCalculatorTok Op,int64_t Val=0)99     void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
100       assert ((Op == IC_IMM || Op == IC_REGISTER) &&
101               "Unexpected operand!");
102       PostfixStack.push_back(std::make_pair(Op, Val));
103     }
104 
popOperator()105     void popOperator() { InfixOperatorStack.pop_back(); }
pushOperator(InfixCalculatorTok Op)106     void pushOperator(InfixCalculatorTok Op) {
107       // Push the new operator if the stack is empty.
108       if (InfixOperatorStack.empty()) {
109         InfixOperatorStack.push_back(Op);
110         return;
111       }
112 
113       // Push the new operator if it has a higher precedence than the operator
114       // on the top of the stack or the operator on the top of the stack is a
115       // left parentheses.
116       unsigned Idx = InfixOperatorStack.size() - 1;
117       InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
118       if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
119         InfixOperatorStack.push_back(Op);
120         return;
121       }
122 
123       // The operator on the top of the stack has higher precedence than the
124       // new operator.
125       unsigned ParenCount = 0;
126       while (1) {
127         // Nothing to process.
128         if (InfixOperatorStack.empty())
129           break;
130 
131         Idx = InfixOperatorStack.size() - 1;
132         StackOp = InfixOperatorStack[Idx];
133         if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
134           break;
135 
136         // If we have an even parentheses count and we see a left parentheses,
137         // then stop processing.
138         if (!ParenCount && StackOp == IC_LPAREN)
139           break;
140 
141         if (StackOp == IC_RPAREN) {
142           ++ParenCount;
143           InfixOperatorStack.pop_back();
144         } else if (StackOp == IC_LPAREN) {
145           --ParenCount;
146           InfixOperatorStack.pop_back();
147         } else {
148           InfixOperatorStack.pop_back();
149           PostfixStack.push_back(std::make_pair(StackOp, 0));
150         }
151       }
152       // Push the new operator.
153       InfixOperatorStack.push_back(Op);
154     }
execute()155     int64_t execute() {
156       // Push any remaining operators onto the postfix stack.
157       while (!InfixOperatorStack.empty()) {
158         InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
159         if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
160           PostfixStack.push_back(std::make_pair(StackOp, 0));
161       }
162 
163       if (PostfixStack.empty())
164         return 0;
165 
166       SmallVector<ICToken, 16> OperandStack;
167       for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
168         ICToken Op = PostfixStack[i];
169         if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
170           OperandStack.push_back(Op);
171         } else {
172           assert (OperandStack.size() > 1 && "Too few operands.");
173           int64_t Val;
174           ICToken Op2 = OperandStack.pop_back_val();
175           ICToken Op1 = OperandStack.pop_back_val();
176           switch (Op.first) {
177           default:
178             report_fatal_error("Unexpected operator!");
179             break;
180           case IC_PLUS:
181             Val = Op1.second + Op2.second;
182             OperandStack.push_back(std::make_pair(IC_IMM, Val));
183             break;
184           case IC_MINUS:
185             Val = Op1.second - Op2.second;
186             OperandStack.push_back(std::make_pair(IC_IMM, Val));
187             break;
188           case IC_MULTIPLY:
189             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
190                     "Multiply operation with an immediate and a register!");
191             Val = Op1.second * Op2.second;
192             OperandStack.push_back(std::make_pair(IC_IMM, Val));
193             break;
194           case IC_DIVIDE:
195             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
196                     "Divide operation with an immediate and a register!");
197             assert (Op2.second != 0 && "Division by zero!");
198             Val = Op1.second / Op2.second;
199             OperandStack.push_back(std::make_pair(IC_IMM, Val));
200             break;
201           case IC_OR:
202             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
203                     "Or operation with an immediate and a register!");
204             Val = Op1.second | Op2.second;
205             OperandStack.push_back(std::make_pair(IC_IMM, Val));
206             break;
207           case IC_AND:
208             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
209                     "And operation with an immediate and a register!");
210             Val = Op1.second & Op2.second;
211             OperandStack.push_back(std::make_pair(IC_IMM, Val));
212             break;
213           case IC_LSHIFT:
214             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
215                     "Left shift operation with an immediate and a register!");
216             Val = Op1.second << Op2.second;
217             OperandStack.push_back(std::make_pair(IC_IMM, Val));
218             break;
219           case IC_RSHIFT:
220             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
221                     "Right shift operation with an immediate and a register!");
222             Val = Op1.second >> Op2.second;
223             OperandStack.push_back(std::make_pair(IC_IMM, Val));
224             break;
225           }
226         }
227       }
228       assert (OperandStack.size() == 1 && "Expected a single result.");
229       return OperandStack.pop_back_val().second;
230     }
231   };
232 
233   enum IntelExprState {
234     IES_OR,
235     IES_AND,
236     IES_LSHIFT,
237     IES_RSHIFT,
238     IES_PLUS,
239     IES_MINUS,
240     IES_NOT,
241     IES_MULTIPLY,
242     IES_DIVIDE,
243     IES_LBRAC,
244     IES_RBRAC,
245     IES_LPAREN,
246     IES_RPAREN,
247     IES_REGISTER,
248     IES_INTEGER,
249     IES_IDENTIFIER,
250     IES_ERROR
251   };
252 
253   class IntelExprStateMachine {
254     IntelExprState State, PrevState;
255     unsigned BaseReg, IndexReg, TmpReg, Scale;
256     int64_t Imm;
257     const MCExpr *Sym;
258     StringRef SymName;
259     bool StopOnLBrac, AddImmPrefix;
260     InfixCalculator IC;
261     InlineAsmIdentifierInfo Info;
262   public:
IntelExprStateMachine(int64_t imm,bool stoponlbrac,bool addimmprefix)263     IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
264       State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
265       Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
266       AddImmPrefix(addimmprefix) { Info.clear(); }
267 
getBaseReg()268     unsigned getBaseReg() { return BaseReg; }
getIndexReg()269     unsigned getIndexReg() { return IndexReg; }
getScale()270     unsigned getScale() { return Scale; }
getSym()271     const MCExpr *getSym() { return Sym; }
getSymName()272     StringRef getSymName() { return SymName; }
getImm()273     int64_t getImm() { return Imm + IC.execute(); }
isValidEndState()274     bool isValidEndState() {
275       return State == IES_RBRAC || State == IES_INTEGER;
276     }
getStopOnLBrac()277     bool getStopOnLBrac() { return StopOnLBrac; }
getAddImmPrefix()278     bool getAddImmPrefix() { return AddImmPrefix; }
hadError()279     bool hadError() { return State == IES_ERROR; }
280 
getIdentifierInfo()281     InlineAsmIdentifierInfo &getIdentifierInfo() {
282       return Info;
283     }
284 
onOr()285     void onOr() {
286       IntelExprState CurrState = State;
287       switch (State) {
288       default:
289         State = IES_ERROR;
290         break;
291       case IES_INTEGER:
292       case IES_RPAREN:
293       case IES_REGISTER:
294         State = IES_OR;
295         IC.pushOperator(IC_OR);
296         break;
297       }
298       PrevState = CurrState;
299     }
onAnd()300     void onAnd() {
301       IntelExprState CurrState = State;
302       switch (State) {
303       default:
304         State = IES_ERROR;
305         break;
306       case IES_INTEGER:
307       case IES_RPAREN:
308       case IES_REGISTER:
309         State = IES_AND;
310         IC.pushOperator(IC_AND);
311         break;
312       }
313       PrevState = CurrState;
314     }
onLShift()315     void onLShift() {
316       IntelExprState CurrState = State;
317       switch (State) {
318       default:
319         State = IES_ERROR;
320         break;
321       case IES_INTEGER:
322       case IES_RPAREN:
323       case IES_REGISTER:
324         State = IES_LSHIFT;
325         IC.pushOperator(IC_LSHIFT);
326         break;
327       }
328       PrevState = CurrState;
329     }
onRShift()330     void onRShift() {
331       IntelExprState CurrState = State;
332       switch (State) {
333       default:
334         State = IES_ERROR;
335         break;
336       case IES_INTEGER:
337       case IES_RPAREN:
338       case IES_REGISTER:
339         State = IES_RSHIFT;
340         IC.pushOperator(IC_RSHIFT);
341         break;
342       }
343       PrevState = CurrState;
344     }
onPlus()345     void onPlus() {
346       IntelExprState CurrState = State;
347       switch (State) {
348       default:
349         State = IES_ERROR;
350         break;
351       case IES_INTEGER:
352       case IES_RPAREN:
353       case IES_REGISTER:
354         State = IES_PLUS;
355         IC.pushOperator(IC_PLUS);
356         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
357           // If we already have a BaseReg, then assume this is the IndexReg with
358           // a scale of 1.
359           if (!BaseReg) {
360             BaseReg = TmpReg;
361           } else {
362             assert (!IndexReg && "BaseReg/IndexReg already set!");
363             IndexReg = TmpReg;
364             Scale = 1;
365           }
366         }
367         break;
368       }
369       PrevState = CurrState;
370     }
onMinus()371     void onMinus() {
372       IntelExprState CurrState = State;
373       switch (State) {
374       default:
375         State = IES_ERROR;
376         break;
377       case IES_PLUS:
378       case IES_NOT:
379       case IES_MULTIPLY:
380       case IES_DIVIDE:
381       case IES_LPAREN:
382       case IES_RPAREN:
383       case IES_LBRAC:
384       case IES_RBRAC:
385       case IES_INTEGER:
386       case IES_REGISTER:
387         State = IES_MINUS;
388         // Only push the minus operator if it is not a unary operator.
389         if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
390               CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
391               CurrState == IES_LPAREN || CurrState == IES_LBRAC))
392           IC.pushOperator(IC_MINUS);
393         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
394           // If we already have a BaseReg, then assume this is the IndexReg with
395           // a scale of 1.
396           if (!BaseReg) {
397             BaseReg = TmpReg;
398           } else {
399             assert (!IndexReg && "BaseReg/IndexReg already set!");
400             IndexReg = TmpReg;
401             Scale = 1;
402           }
403         }
404         break;
405       }
406       PrevState = CurrState;
407     }
onNot()408     void onNot() {
409       IntelExprState CurrState = State;
410       switch (State) {
411       default:
412         State = IES_ERROR;
413         break;
414       case IES_PLUS:
415       case IES_NOT:
416         State = IES_NOT;
417         break;
418       }
419       PrevState = CurrState;
420     }
onRegister(unsigned Reg)421     void onRegister(unsigned Reg) {
422       IntelExprState CurrState = State;
423       switch (State) {
424       default:
425         State = IES_ERROR;
426         break;
427       case IES_PLUS:
428       case IES_LPAREN:
429         State = IES_REGISTER;
430         TmpReg = Reg;
431         IC.pushOperand(IC_REGISTER);
432         break;
433       case IES_MULTIPLY:
434         // Index Register - Scale * Register
435         if (PrevState == IES_INTEGER) {
436           assert (!IndexReg && "IndexReg already set!");
437           State = IES_REGISTER;
438           IndexReg = Reg;
439           // Get the scale and replace the 'Scale * Register' with '0'.
440           Scale = IC.popOperand();
441           IC.pushOperand(IC_IMM);
442           IC.popOperator();
443         } else {
444           State = IES_ERROR;
445         }
446         break;
447       }
448       PrevState = CurrState;
449     }
onIdentifierExpr(const MCExpr * SymRef,StringRef SymRefName)450     void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
451       PrevState = State;
452       switch (State) {
453       default:
454         State = IES_ERROR;
455         break;
456       case IES_PLUS:
457       case IES_MINUS:
458       case IES_NOT:
459         State = IES_INTEGER;
460         Sym = SymRef;
461         SymName = SymRefName;
462         IC.pushOperand(IC_IMM);
463         break;
464       }
465     }
onInteger(int64_t TmpInt,StringRef & ErrMsg)466     bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
467       IntelExprState CurrState = State;
468       switch (State) {
469       default:
470         State = IES_ERROR;
471         break;
472       case IES_PLUS:
473       case IES_MINUS:
474       case IES_NOT:
475       case IES_OR:
476       case IES_AND:
477       case IES_LSHIFT:
478       case IES_RSHIFT:
479       case IES_DIVIDE:
480       case IES_MULTIPLY:
481       case IES_LPAREN:
482         State = IES_INTEGER;
483         if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
484           // Index Register - Register * Scale
485           assert (!IndexReg && "IndexReg already set!");
486           IndexReg = TmpReg;
487           Scale = TmpInt;
488           if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
489             ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
490             return true;
491           }
492           // Get the scale and replace the 'Register * Scale' with '0'.
493           IC.popOperator();
494         } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
495                     PrevState == IES_OR || PrevState == IES_AND ||
496                     PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
497                     PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
498                     PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
499                     PrevState == IES_NOT) &&
500                    CurrState == IES_MINUS) {
501           // Unary minus.  No need to pop the minus operand because it was never
502           // pushed.
503           IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
504         } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
505                     PrevState == IES_OR || PrevState == IES_AND ||
506                     PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
507                     PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
508                     PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
509                     PrevState == IES_NOT) &&
510                    CurrState == IES_NOT) {
511           // Unary not.  No need to pop the not operand because it was never
512           // pushed.
513           IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
514         } else {
515           IC.pushOperand(IC_IMM, TmpInt);
516         }
517         break;
518       }
519       PrevState = CurrState;
520       return false;
521     }
onStar()522     void onStar() {
523       PrevState = State;
524       switch (State) {
525       default:
526         State = IES_ERROR;
527         break;
528       case IES_INTEGER:
529       case IES_REGISTER:
530       case IES_RPAREN:
531         State = IES_MULTIPLY;
532         IC.pushOperator(IC_MULTIPLY);
533         break;
534       }
535     }
onDivide()536     void onDivide() {
537       PrevState = State;
538       switch (State) {
539       default:
540         State = IES_ERROR;
541         break;
542       case IES_INTEGER:
543       case IES_RPAREN:
544         State = IES_DIVIDE;
545         IC.pushOperator(IC_DIVIDE);
546         break;
547       }
548     }
onLBrac()549     void onLBrac() {
550       PrevState = State;
551       switch (State) {
552       default:
553         State = IES_ERROR;
554         break;
555       case IES_RBRAC:
556         State = IES_PLUS;
557         IC.pushOperator(IC_PLUS);
558         break;
559       }
560     }
onRBrac()561     void onRBrac() {
562       IntelExprState CurrState = State;
563       switch (State) {
564       default:
565         State = IES_ERROR;
566         break;
567       case IES_INTEGER:
568       case IES_REGISTER:
569       case IES_RPAREN:
570         State = IES_RBRAC;
571         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
572           // If we already have a BaseReg, then assume this is the IndexReg with
573           // a scale of 1.
574           if (!BaseReg) {
575             BaseReg = TmpReg;
576           } else {
577             assert (!IndexReg && "BaseReg/IndexReg already set!");
578             IndexReg = TmpReg;
579             Scale = 1;
580           }
581         }
582         break;
583       }
584       PrevState = CurrState;
585     }
onLParen()586     void onLParen() {
587       IntelExprState CurrState = State;
588       switch (State) {
589       default:
590         State = IES_ERROR;
591         break;
592       case IES_PLUS:
593       case IES_MINUS:
594       case IES_NOT:
595       case IES_OR:
596       case IES_AND:
597       case IES_LSHIFT:
598       case IES_RSHIFT:
599       case IES_MULTIPLY:
600       case IES_DIVIDE:
601       case IES_LPAREN:
602         // FIXME: We don't handle this type of unary minus or not, yet.
603         if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
604             PrevState == IES_OR || PrevState == IES_AND ||
605             PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
606             PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
607             PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
608             PrevState == IES_NOT) &&
609             (CurrState == IES_MINUS || CurrState == IES_NOT)) {
610           State = IES_ERROR;
611           break;
612         }
613         State = IES_LPAREN;
614         IC.pushOperator(IC_LPAREN);
615         break;
616       }
617       PrevState = CurrState;
618     }
onRParen()619     void onRParen() {
620       PrevState = State;
621       switch (State) {
622       default:
623         State = IES_ERROR;
624         break;
625       case IES_INTEGER:
626       case IES_REGISTER:
627       case IES_RPAREN:
628         State = IES_RPAREN;
629         IC.pushOperator(IC_RPAREN);
630         break;
631       }
632     }
633   };
634 
Error(SMLoc L,const Twine & Msg,ArrayRef<SMRange> Ranges=None,bool MatchingInlineAsm=false)635   bool Error(SMLoc L, const Twine &Msg,
636              ArrayRef<SMRange> Ranges = None,
637              bool MatchingInlineAsm = false) {
638     MCAsmParser &Parser = getParser();
639     if (MatchingInlineAsm) return true;
640     return Parser.Error(L, Msg, Ranges);
641   }
642 
ErrorAndEatStatement(SMLoc L,const Twine & Msg,ArrayRef<SMRange> Ranges=None,bool MatchingInlineAsm=false)643   bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
644           ArrayRef<SMRange> Ranges = None,
645           bool MatchingInlineAsm = false) {
646     MCAsmParser &Parser = getParser();
647     Parser.eatToEndOfStatement();
648     return Error(L, Msg, Ranges, MatchingInlineAsm);
649   }
650 
ErrorOperand(SMLoc Loc,StringRef Msg)651   std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
652     Error(Loc, Msg);
653     return nullptr;
654   }
655 
656   std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
657   std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
658   std::unique_ptr<X86Operand> ParseOperand();
659   std::unique_ptr<X86Operand> ParseATTOperand();
660   std::unique_ptr<X86Operand> ParseIntelOperand();
661   std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
662   bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
663   std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
664   std::unique_ptr<X86Operand>
665   ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
666   std::unique_ptr<X86Operand>
667   ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
668   std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
669   bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
670   std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
671                                                        SMLoc Start,
672                                                        int64_t ImmDisp,
673                                                        unsigned Size);
674   bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
675                             InlineAsmIdentifierInfo &Info,
676                             bool IsUnevaluatedOperand, SMLoc &End);
677 
678   std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
679 
680   std::unique_ptr<X86Operand>
681   CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
682                         unsigned IndexReg, unsigned Scale, SMLoc Start,
683                         SMLoc End, unsigned Size, StringRef Identifier,
684                         InlineAsmIdentifierInfo &Info);
685 
686   bool ParseDirectiveWord(unsigned Size, SMLoc L);
687   bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
688 
689   bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
690   bool processInstruction(MCInst &Inst, const OperandVector &Ops);
691 
692   /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
693   /// instrumentation around Inst.
694   void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
695 
696   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
697                                OperandVector &Operands, MCStreamer &Out,
698                                uint64_t &ErrorInfo,
699                                bool MatchingInlineAsm) override;
700 
701   void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
702                          MCStreamer &Out, bool MatchingInlineAsm);
703 
704   bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
705                            bool MatchingInlineAsm);
706 
707   bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
708                                   OperandVector &Operands, MCStreamer &Out,
709                                   uint64_t &ErrorInfo,
710                                   bool MatchingInlineAsm);
711 
712   bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
713                                     OperandVector &Operands, MCStreamer &Out,
714                                     uint64_t &ErrorInfo,
715                                     bool MatchingInlineAsm);
716 
717   bool OmitRegisterFromClobberLists(unsigned RegNo) override;
718 
719   /// doSrcDstMatch - Returns true if operands are matching in their
720   /// word size (%si and %di, %esi and %edi, etc.). Order depends on
721   /// the parsing mode (Intel vs. AT&T).
722   bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
723 
724   /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
725   /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
726   /// \return \c true if no parsing errors occurred, \c false otherwise.
727   bool HandleAVX512Operand(OperandVector &Operands,
728                            const MCParsedAsmOperand &Op);
729 
is64BitMode() const730   bool is64BitMode() const {
731     // FIXME: Can tablegen auto-generate this?
732     return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
733   }
is32BitMode() const734   bool is32BitMode() const {
735     // FIXME: Can tablegen auto-generate this?
736     return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
737   }
is16BitMode() const738   bool is16BitMode() const {
739     // FIXME: Can tablegen auto-generate this?
740     return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
741   }
SwitchMode(uint64_t mode)742   void SwitchMode(uint64_t mode) {
743     uint64_t oldMode = STI.getFeatureBits() &
744         (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
745     unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
746     setAvailableFeatures(FB);
747     assert(mode == (STI.getFeatureBits() &
748                     (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
749   }
750 
getPointerWidth()751   unsigned getPointerWidth() {
752     if (is16BitMode()) return 16;
753     if (is32BitMode()) return 32;
754     if (is64BitMode()) return 64;
755     llvm_unreachable("invalid mode");
756   }
757 
isParsingIntelSyntax()758   bool isParsingIntelSyntax() {
759     return getParser().getAssemblerDialect();
760   }
761 
762   /// @name Auto-generated Matcher Functions
763   /// {
764 
765 #define GET_ASSEMBLER_HEADER
766 #include "X86GenAsmMatcher.inc"
767 
768   /// }
769 
770 public:
X86AsmParser(MCSubtargetInfo & sti,MCAsmParser & Parser,const MCInstrInfo & mii,const MCTargetOptions & Options)771   X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
772                const MCInstrInfo &mii, const MCTargetOptions &Options)
773       : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
774 
775     // Initialize the set of available features.
776     setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
777     Instrumentation.reset(
778         CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
779   }
780 
781   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
782 
783   void SetFrameRegister(unsigned RegNo) override;
784 
785   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
786                         SMLoc NameLoc, OperandVector &Operands) override;
787 
788   bool ParseDirective(AsmToken DirectiveID) override;
789 };
790 } // end anonymous namespace
791 
792 /// @name Auto-generated Match Functions
793 /// {
794 
795 static unsigned MatchRegisterName(StringRef Name);
796 
797 /// }
798 
CheckBaseRegAndIndexReg(unsigned BaseReg,unsigned IndexReg,StringRef & ErrMsg)799 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
800                                     StringRef &ErrMsg) {
801   // If we have both a base register and an index register make sure they are
802   // both 64-bit or 32-bit registers.
803   // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
804   if (BaseReg != 0 && IndexReg != 0) {
805     if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
806         (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
807          X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
808         IndexReg != X86::RIZ) {
809       ErrMsg = "base register is 64-bit, but index register is not";
810       return true;
811     }
812     if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
813         (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
814          X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
815         IndexReg != X86::EIZ){
816       ErrMsg = "base register is 32-bit, but index register is not";
817       return true;
818     }
819     if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
820       if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
821           X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
822         ErrMsg = "base register is 16-bit, but index register is not";
823         return true;
824       }
825       if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
826            IndexReg != X86::SI && IndexReg != X86::DI) ||
827           ((BaseReg == X86::SI || BaseReg == X86::DI) &&
828            IndexReg != X86::BX && IndexReg != X86::BP)) {
829         ErrMsg = "invalid 16-bit base/index register combination";
830         return true;
831       }
832     }
833   }
834   return false;
835 }
836 
doSrcDstMatch(X86Operand & Op1,X86Operand & Op2)837 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
838 {
839   // Return true and let a normal complaint about bogus operands happen.
840   if (!Op1.isMem() || !Op2.isMem())
841     return true;
842 
843   // Actually these might be the other way round if Intel syntax is
844   // being used. It doesn't matter.
845   unsigned diReg = Op1.Mem.BaseReg;
846   unsigned siReg = Op2.Mem.BaseReg;
847 
848   if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
849     return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
850   if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
851     return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
852   if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
853     return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
854   // Again, return true and let another error happen.
855   return true;
856 }
857 
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)858 bool X86AsmParser::ParseRegister(unsigned &RegNo,
859                                  SMLoc &StartLoc, SMLoc &EndLoc) {
860   MCAsmParser &Parser = getParser();
861   RegNo = 0;
862   const AsmToken &PercentTok = Parser.getTok();
863   StartLoc = PercentTok.getLoc();
864 
865   // If we encounter a %, ignore it. This code handles registers with and
866   // without the prefix, unprefixed registers can occur in cfi directives.
867   if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
868     Parser.Lex(); // Eat percent token.
869 
870   const AsmToken &Tok = Parser.getTok();
871   EndLoc = Tok.getEndLoc();
872 
873   if (Tok.isNot(AsmToken::Identifier)) {
874     if (isParsingIntelSyntax()) return true;
875     return Error(StartLoc, "invalid register name",
876                  SMRange(StartLoc, EndLoc));
877   }
878 
879   RegNo = MatchRegisterName(Tok.getString());
880 
881   // If the match failed, try the register name as lowercase.
882   if (RegNo == 0)
883     RegNo = MatchRegisterName(Tok.getString().lower());
884 
885   if (!is64BitMode()) {
886     // FIXME: This should be done using Requires<Not64BitMode> and
887     // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
888     // checked.
889     // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
890     // REX prefix.
891     if (RegNo == X86::RIZ ||
892         X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
893         X86II::isX86_64NonExtLowByteReg(RegNo) ||
894         X86II::isX86_64ExtendedReg(RegNo))
895       return Error(StartLoc, "register %"
896                    + Tok.getString() + " is only available in 64-bit mode",
897                    SMRange(StartLoc, EndLoc));
898   }
899 
900   // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
901   if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
902     RegNo = X86::ST0;
903     Parser.Lex(); // Eat 'st'
904 
905     // Check to see if we have '(4)' after %st.
906     if (getLexer().isNot(AsmToken::LParen))
907       return false;
908     // Lex the paren.
909     getParser().Lex();
910 
911     const AsmToken &IntTok = Parser.getTok();
912     if (IntTok.isNot(AsmToken::Integer))
913       return Error(IntTok.getLoc(), "expected stack index");
914     switch (IntTok.getIntVal()) {
915     case 0: RegNo = X86::ST0; break;
916     case 1: RegNo = X86::ST1; break;
917     case 2: RegNo = X86::ST2; break;
918     case 3: RegNo = X86::ST3; break;
919     case 4: RegNo = X86::ST4; break;
920     case 5: RegNo = X86::ST5; break;
921     case 6: RegNo = X86::ST6; break;
922     case 7: RegNo = X86::ST7; break;
923     default: return Error(IntTok.getLoc(), "invalid stack index");
924     }
925 
926     if (getParser().Lex().isNot(AsmToken::RParen))
927       return Error(Parser.getTok().getLoc(), "expected ')'");
928 
929     EndLoc = Parser.getTok().getEndLoc();
930     Parser.Lex(); // Eat ')'
931     return false;
932   }
933 
934   EndLoc = Parser.getTok().getEndLoc();
935 
936   // If this is "db[0-7]", match it as an alias
937   // for dr[0-7].
938   if (RegNo == 0 && Tok.getString().size() == 3 &&
939       Tok.getString().startswith("db")) {
940     switch (Tok.getString()[2]) {
941     case '0': RegNo = X86::DR0; break;
942     case '1': RegNo = X86::DR1; break;
943     case '2': RegNo = X86::DR2; break;
944     case '3': RegNo = X86::DR3; break;
945     case '4': RegNo = X86::DR4; break;
946     case '5': RegNo = X86::DR5; break;
947     case '6': RegNo = X86::DR6; break;
948     case '7': RegNo = X86::DR7; break;
949     }
950 
951     if (RegNo != 0) {
952       EndLoc = Parser.getTok().getEndLoc();
953       Parser.Lex(); // Eat it.
954       return false;
955     }
956   }
957 
958   if (RegNo == 0) {
959     if (isParsingIntelSyntax()) return true;
960     return Error(StartLoc, "invalid register name",
961                  SMRange(StartLoc, EndLoc));
962   }
963 
964   Parser.Lex(); // Eat identifier token.
965   return false;
966 }
967 
SetFrameRegister(unsigned RegNo)968 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
969   Instrumentation->SetInitialFrameRegister(RegNo);
970 }
971 
DefaultMemSIOperand(SMLoc Loc)972 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
973   unsigned basereg =
974     is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
975   const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
976   return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
977                                /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
978                                Loc, Loc, 0);
979 }
980 
DefaultMemDIOperand(SMLoc Loc)981 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
982   unsigned basereg =
983     is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
984   const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
985   return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
986                                /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
987                                Loc, Loc, 0);
988 }
989 
ParseOperand()990 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
991   if (isParsingIntelSyntax())
992     return ParseIntelOperand();
993   return ParseATTOperand();
994 }
995 
996 /// getIntelMemOperandSize - Return intel memory operand size.
getIntelMemOperandSize(StringRef OpStr)997 static unsigned getIntelMemOperandSize(StringRef OpStr) {
998   unsigned Size = StringSwitch<unsigned>(OpStr)
999     .Cases("BYTE", "byte", 8)
1000     .Cases("WORD", "word", 16)
1001     .Cases("DWORD", "dword", 32)
1002     .Cases("QWORD", "qword", 64)
1003     .Cases("XWORD", "xword", 80)
1004     .Cases("XMMWORD", "xmmword", 128)
1005     .Cases("YMMWORD", "ymmword", 256)
1006     .Cases("ZMMWORD", "zmmword", 512)
1007     .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1008     .Default(0);
1009   return Size;
1010 }
1011 
CreateMemForInlineAsm(unsigned SegReg,const MCExpr * Disp,unsigned BaseReg,unsigned IndexReg,unsigned Scale,SMLoc Start,SMLoc End,unsigned Size,StringRef Identifier,InlineAsmIdentifierInfo & Info)1012 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1013     unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1014     unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1015     InlineAsmIdentifierInfo &Info) {
1016   // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1017   // some other label reference.
1018   if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1019     // Insert an explicit size if the user didn't have one.
1020     if (!Size) {
1021       Size = getPointerWidth();
1022       InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1023                                                   /*Len=*/0, Size));
1024     }
1025 
1026     // Create an absolute memory reference in order to match against
1027     // instructions taking a PC relative operand.
1028     return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1029                                  Identifier, Info.OpDecl);
1030   }
1031 
1032   // We either have a direct symbol reference, or an offset from a symbol.  The
1033   // parser always puts the symbol on the LHS, so look there for size
1034   // calculation purposes.
1035   const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1036   bool IsSymRef =
1037       isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1038   if (IsSymRef) {
1039     if (!Size) {
1040       Size = Info.Type * 8; // Size is in terms of bits in this context.
1041       if (Size)
1042         InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1043                                                     /*Len=*/0, Size));
1044     }
1045   }
1046 
1047   // When parsing inline assembly we set the base register to a non-zero value
1048   // if we don't know the actual value at this time.  This is necessary to
1049   // get the matching correct in some cases.
1050   BaseReg = BaseReg ? BaseReg : 1;
1051   return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1052                                IndexReg, Scale, Start, End, Size, Identifier,
1053                                Info.OpDecl);
1054 }
1055 
1056 static void
RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> * AsmRewrites,StringRef SymName,int64_t ImmDisp,int64_t FinalImmDisp,SMLoc & BracLoc,SMLoc & StartInBrac,SMLoc & End)1057 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1058                            StringRef SymName, int64_t ImmDisp,
1059                            int64_t FinalImmDisp, SMLoc &BracLoc,
1060                            SMLoc &StartInBrac, SMLoc &End) {
1061   // Remove the '[' and ']' from the IR string.
1062   AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1063   AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1064 
1065   // If ImmDisp is non-zero, then we parsed a displacement before the
1066   // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1067   // If ImmDisp doesn't match the displacement computed by the state machine
1068   // then we have an additional displacement in the bracketed expression.
1069   if (ImmDisp != FinalImmDisp) {
1070     if (ImmDisp) {
1071       // We have an immediate displacement before the bracketed expression.
1072       // Adjust this to match the final immediate displacement.
1073       bool Found = false;
1074       for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1075              E = AsmRewrites->end(); I != E; ++I) {
1076         if ((*I).Loc.getPointer() > BracLoc.getPointer())
1077           continue;
1078         if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1079           assert (!Found && "ImmDisp already rewritten.");
1080           (*I).Kind = AOK_Imm;
1081           (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1082           (*I).Val = FinalImmDisp;
1083           Found = true;
1084           break;
1085         }
1086       }
1087       assert (Found && "Unable to rewrite ImmDisp.");
1088       (void)Found;
1089     } else {
1090       // We have a symbolic and an immediate displacement, but no displacement
1091       // before the bracketed expression.  Put the immediate displacement
1092       // before the bracketed expression.
1093       AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1094     }
1095   }
1096   // Remove all the ImmPrefix rewrites within the brackets.
1097   for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1098          E = AsmRewrites->end(); I != E; ++I) {
1099     if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1100       continue;
1101     if ((*I).Kind == AOK_ImmPrefix)
1102       (*I).Kind = AOK_Delete;
1103   }
1104   const char *SymLocPtr = SymName.data();
1105   // Skip everything before the symbol.
1106   if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1107     assert(Len > 0 && "Expected a non-negative length.");
1108     AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1109   }
1110   // Skip everything after the symbol.
1111   if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1112     SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1113     assert(Len > 0 && "Expected a non-negative length.");
1114     AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1115   }
1116 }
1117 
ParseIntelExpression(IntelExprStateMachine & SM,SMLoc & End)1118 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1119   MCAsmParser &Parser = getParser();
1120   const AsmToken &Tok = Parser.getTok();
1121 
1122   bool Done = false;
1123   while (!Done) {
1124     bool UpdateLocLex = true;
1125 
1126     // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1127     // identifier.  Don't try an parse it as a register.
1128     if (Tok.getString().startswith("."))
1129       break;
1130 
1131     // If we're parsing an immediate expression, we don't expect a '['.
1132     if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1133       break;
1134 
1135     AsmToken::TokenKind TK = getLexer().getKind();
1136     switch (TK) {
1137     default: {
1138       if (SM.isValidEndState()) {
1139         Done = true;
1140         break;
1141       }
1142       return Error(Tok.getLoc(), "unknown token in expression");
1143     }
1144     case AsmToken::EndOfStatement: {
1145       Done = true;
1146       break;
1147     }
1148     case AsmToken::String:
1149     case AsmToken::Identifier: {
1150       // This could be a register or a symbolic displacement.
1151       unsigned TmpReg;
1152       const MCExpr *Val;
1153       SMLoc IdentLoc = Tok.getLoc();
1154       StringRef Identifier = Tok.getString();
1155       if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1156         SM.onRegister(TmpReg);
1157         UpdateLocLex = false;
1158         break;
1159       } else {
1160         if (!isParsingInlineAsm()) {
1161           if (getParser().parsePrimaryExpr(Val, End))
1162             return Error(Tok.getLoc(), "Unexpected identifier!");
1163         } else {
1164           // This is a dot operator, not an adjacent identifier.
1165           if (Identifier.find('.') != StringRef::npos) {
1166             return false;
1167           } else {
1168             InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1169             if (ParseIntelIdentifier(Val, Identifier, Info,
1170                                      /*Unevaluated=*/false, End))
1171               return true;
1172           }
1173         }
1174         SM.onIdentifierExpr(Val, Identifier);
1175         UpdateLocLex = false;
1176         break;
1177       }
1178       return Error(Tok.getLoc(), "Unexpected identifier!");
1179     }
1180     case AsmToken::Integer: {
1181       StringRef ErrMsg;
1182       if (isParsingInlineAsm() && SM.getAddImmPrefix())
1183         InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1184                                                     Tok.getLoc()));
1185       // Look for 'b' or 'f' following an Integer as a directional label
1186       SMLoc Loc = getTok().getLoc();
1187       int64_t IntVal = getTok().getIntVal();
1188       End = consumeToken();
1189       UpdateLocLex = false;
1190       if (getLexer().getKind() == AsmToken::Identifier) {
1191         StringRef IDVal = getTok().getString();
1192         if (IDVal == "f" || IDVal == "b") {
1193           MCSymbol *Sym =
1194               getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1195           MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1196           const MCExpr *Val =
1197 	    MCSymbolRefExpr::Create(Sym, Variant, getContext());
1198           if (IDVal == "b" && Sym->isUndefined())
1199             return Error(Loc, "invalid reference to undefined symbol");
1200           StringRef Identifier = Sym->getName();
1201           SM.onIdentifierExpr(Val, Identifier);
1202           End = consumeToken();
1203         } else {
1204           if (SM.onInteger(IntVal, ErrMsg))
1205             return Error(Loc, ErrMsg);
1206         }
1207       } else {
1208         if (SM.onInteger(IntVal, ErrMsg))
1209           return Error(Loc, ErrMsg);
1210       }
1211       break;
1212     }
1213     case AsmToken::Plus:    SM.onPlus(); break;
1214     case AsmToken::Minus:   SM.onMinus(); break;
1215     case AsmToken::Tilde:   SM.onNot(); break;
1216     case AsmToken::Star:    SM.onStar(); break;
1217     case AsmToken::Slash:   SM.onDivide(); break;
1218     case AsmToken::Pipe:    SM.onOr(); break;
1219     case AsmToken::Amp:     SM.onAnd(); break;
1220     case AsmToken::LessLess:
1221                             SM.onLShift(); break;
1222     case AsmToken::GreaterGreater:
1223                             SM.onRShift(); break;
1224     case AsmToken::LBrac:   SM.onLBrac(); break;
1225     case AsmToken::RBrac:   SM.onRBrac(); break;
1226     case AsmToken::LParen:  SM.onLParen(); break;
1227     case AsmToken::RParen:  SM.onRParen(); break;
1228     }
1229     if (SM.hadError())
1230       return Error(Tok.getLoc(), "unknown token in expression");
1231 
1232     if (!Done && UpdateLocLex)
1233       End = consumeToken();
1234   }
1235   return false;
1236 }
1237 
1238 std::unique_ptr<X86Operand>
ParseIntelBracExpression(unsigned SegReg,SMLoc Start,int64_t ImmDisp,unsigned Size)1239 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1240                                        int64_t ImmDisp, unsigned Size) {
1241   MCAsmParser &Parser = getParser();
1242   const AsmToken &Tok = Parser.getTok();
1243   SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1244   if (getLexer().isNot(AsmToken::LBrac))
1245     return ErrorOperand(BracLoc, "Expected '[' token!");
1246   Parser.Lex(); // Eat '['
1247 
1248   SMLoc StartInBrac = Tok.getLoc();
1249   // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ].  We
1250   // may have already parsed an immediate displacement before the bracketed
1251   // expression.
1252   IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1253   if (ParseIntelExpression(SM, End))
1254     return nullptr;
1255 
1256   const MCExpr *Disp = nullptr;
1257   if (const MCExpr *Sym = SM.getSym()) {
1258     // A symbolic displacement.
1259     Disp = Sym;
1260     if (isParsingInlineAsm())
1261       RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1262                                  ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1263                                  End);
1264   }
1265 
1266   if (SM.getImm() || !Disp) {
1267     const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1268     if (Disp)
1269       Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1270     else
1271       Disp = Imm;  // An immediate displacement only.
1272   }
1273 
1274   // Parse struct field access.  Intel requires a dot, but MSVC doesn't.  MSVC
1275   // will in fact do global lookup the field name inside all global typedefs,
1276   // but we don't emulate that.
1277   if (Tok.getString().find('.') != StringRef::npos) {
1278     const MCExpr *NewDisp;
1279     if (ParseIntelDotOperator(Disp, NewDisp))
1280       return nullptr;
1281 
1282     End = Tok.getEndLoc();
1283     Parser.Lex();  // Eat the field.
1284     Disp = NewDisp;
1285   }
1286 
1287   int BaseReg = SM.getBaseReg();
1288   int IndexReg = SM.getIndexReg();
1289   int Scale = SM.getScale();
1290   if (!isParsingInlineAsm()) {
1291     // handle [-42]
1292     if (!BaseReg && !IndexReg) {
1293       if (!SegReg)
1294         return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1295       return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1296                                    Start, End, Size);
1297     }
1298     StringRef ErrMsg;
1299     if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1300       Error(StartInBrac, ErrMsg);
1301       return nullptr;
1302     }
1303     return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1304                                  IndexReg, Scale, Start, End, Size);
1305   }
1306 
1307   InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1308   return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1309                                End, Size, SM.getSymName(), Info);
1310 }
1311 
1312 // Inline assembly may use variable names with namespace alias qualifiers.
ParseIntelIdentifier(const MCExpr * & Val,StringRef & Identifier,InlineAsmIdentifierInfo & Info,bool IsUnevaluatedOperand,SMLoc & End)1313 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1314                                         StringRef &Identifier,
1315                                         InlineAsmIdentifierInfo &Info,
1316                                         bool IsUnevaluatedOperand, SMLoc &End) {
1317   MCAsmParser &Parser = getParser();
1318   assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1319   Val = nullptr;
1320 
1321   StringRef LineBuf(Identifier.data());
1322   void *Result =
1323     SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1324 
1325   const AsmToken &Tok = Parser.getTok();
1326   SMLoc Loc = Tok.getLoc();
1327 
1328   // Advance the token stream until the end of the current token is
1329   // after the end of what the frontend claimed.
1330   const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1331   while (true) {
1332     End = Tok.getEndLoc();
1333     getLexer().Lex();
1334 
1335     assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1336     if (End.getPointer() == EndPtr) break;
1337   }
1338   Identifier = LineBuf;
1339 
1340   // If the identifier lookup was unsuccessful, assume that we are dealing with
1341   // a label.
1342   if (!Result) {
1343     StringRef InternalName =
1344       SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1345                                          Loc, false);
1346     assert(InternalName.size() && "We should have an internal name here.");
1347     // Push a rewrite for replacing the identifier name with the internal name.
1348     InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
1349                                                 Identifier.size(),
1350                                                 InternalName));
1351   }
1352 
1353   // Create the symbol reference.
1354   MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1355   MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1356   Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1357   return false;
1358 }
1359 
1360 /// \brief Parse intel style segment override.
1361 std::unique_ptr<X86Operand>
ParseIntelSegmentOverride(unsigned SegReg,SMLoc Start,unsigned Size)1362 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1363                                         unsigned Size) {
1364   MCAsmParser &Parser = getParser();
1365   assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1366   const AsmToken &Tok = Parser.getTok(); // Eat colon.
1367   if (Tok.isNot(AsmToken::Colon))
1368     return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1369   Parser.Lex(); // Eat ':'
1370 
1371   int64_t ImmDisp = 0;
1372   if (getLexer().is(AsmToken::Integer)) {
1373     ImmDisp = Tok.getIntVal();
1374     AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1375 
1376     if (isParsingInlineAsm())
1377       InstInfo->AsmRewrites->push_back(
1378           AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1379 
1380     if (getLexer().isNot(AsmToken::LBrac)) {
1381       // An immediate following a 'segment register', 'colon' token sequence can
1382       // be followed by a bracketed expression.  If it isn't we know we have our
1383       // final segment override.
1384       const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1385       return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1386                                    /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1387                                    Start, ImmDispToken.getEndLoc(), Size);
1388     }
1389   }
1390 
1391   if (getLexer().is(AsmToken::LBrac))
1392     return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1393 
1394   const MCExpr *Val;
1395   SMLoc End;
1396   if (!isParsingInlineAsm()) {
1397     if (getParser().parsePrimaryExpr(Val, End))
1398       return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1399 
1400     return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1401   }
1402 
1403   InlineAsmIdentifierInfo Info;
1404   StringRef Identifier = Tok.getString();
1405   if (ParseIntelIdentifier(Val, Identifier, Info,
1406                            /*Unevaluated=*/false, End))
1407     return nullptr;
1408   return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1409                                /*Scale=*/1, Start, End, Size, Identifier, Info);
1410 }
1411 
1412 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1413 std::unique_ptr<X86Operand>
ParseRoundingModeOp(SMLoc Start,SMLoc End)1414 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1415   MCAsmParser &Parser = getParser();
1416   const AsmToken &Tok = Parser.getTok();
1417   consumeToken(); // Eat "{"
1418   if (Tok.getIdentifier().startswith("r")){
1419     int rndMode = StringSwitch<int>(Tok.getIdentifier())
1420       .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1421       .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1422       .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1423       .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1424       .Default(-1);
1425     if (-1 == rndMode)
1426       return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1427      Parser.Lex();  // Eat "r*" of r*-sae
1428     if (!getLexer().is(AsmToken::Minus))
1429       return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1430     Parser.Lex();  // Eat "-"
1431     Parser.Lex();  // Eat the sae
1432     if (!getLexer().is(AsmToken::RCurly))
1433       return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1434     Parser.Lex();  // Eat "}"
1435     const MCExpr *RndModeOp =
1436       MCConstantExpr::Create(rndMode, Parser.getContext());
1437     return X86Operand::CreateImm(RndModeOp, Start, End);
1438   }
1439   return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1440 }
1441 /// ParseIntelMemOperand - Parse intel style memory operand.
ParseIntelMemOperand(int64_t ImmDisp,SMLoc Start,unsigned Size)1442 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1443                                                                SMLoc Start,
1444                                                                unsigned Size) {
1445   MCAsmParser &Parser = getParser();
1446   const AsmToken &Tok = Parser.getTok();
1447   SMLoc End;
1448 
1449   // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1450   if (getLexer().is(AsmToken::LBrac))
1451     return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1452   assert(ImmDisp == 0);
1453 
1454   const MCExpr *Val;
1455   if (!isParsingInlineAsm()) {
1456     if (getParser().parsePrimaryExpr(Val, End))
1457       return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1458 
1459     return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1460   }
1461 
1462   InlineAsmIdentifierInfo Info;
1463   StringRef Identifier = Tok.getString();
1464   if (ParseIntelIdentifier(Val, Identifier, Info,
1465                            /*Unevaluated=*/false, End))
1466     return nullptr;
1467 
1468   if (!getLexer().is(AsmToken::LBrac))
1469     return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1470                                  /*Scale=*/1, Start, End, Size, Identifier, Info);
1471 
1472   Parser.Lex(); // Eat '['
1473 
1474   // Parse Identifier [ ImmDisp ]
1475   IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1476                            /*AddImmPrefix=*/false);
1477   if (ParseIntelExpression(SM, End))
1478     return nullptr;
1479 
1480   if (SM.getSym()) {
1481     Error(Start, "cannot use more than one symbol in memory operand");
1482     return nullptr;
1483   }
1484   if (SM.getBaseReg()) {
1485     Error(Start, "cannot use base register with variable reference");
1486     return nullptr;
1487   }
1488   if (SM.getIndexReg()) {
1489     Error(Start, "cannot use index register with variable reference");
1490     return nullptr;
1491   }
1492 
1493   const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1494   // BaseReg is non-zero to avoid assertions.  In the context of inline asm,
1495   // we're pointing to a local variable in memory, so the base register is
1496   // really the frame or stack pointer.
1497   return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1498                                /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
1499                                Start, End, Size, Identifier, Info.OpDecl);
1500 }
1501 
1502 /// Parse the '.' operator.
ParseIntelDotOperator(const MCExpr * Disp,const MCExpr * & NewDisp)1503 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1504                                                 const MCExpr *&NewDisp) {
1505   MCAsmParser &Parser = getParser();
1506   const AsmToken &Tok = Parser.getTok();
1507   int64_t OrigDispVal, DotDispVal;
1508 
1509   // FIXME: Handle non-constant expressions.
1510   if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1511     OrigDispVal = OrigDisp->getValue();
1512   else
1513     return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1514 
1515   // Drop the optional '.'.
1516   StringRef DotDispStr = Tok.getString();
1517   if (DotDispStr.startswith("."))
1518     DotDispStr = DotDispStr.drop_front(1);
1519 
1520   // .Imm gets lexed as a real.
1521   if (Tok.is(AsmToken::Real)) {
1522     APInt DotDisp;
1523     DotDispStr.getAsInteger(10, DotDisp);
1524     DotDispVal = DotDisp.getZExtValue();
1525   } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1526     unsigned DotDisp;
1527     std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1528     if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1529                                            DotDisp))
1530       return Error(Tok.getLoc(), "Unable to lookup field reference!");
1531     DotDispVal = DotDisp;
1532   } else
1533     return Error(Tok.getLoc(), "Unexpected token type!");
1534 
1535   if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1536     SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1537     unsigned Len = DotDispStr.size();
1538     unsigned Val = OrigDispVal + DotDispVal;
1539     InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1540                                                 Val));
1541   }
1542 
1543   NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1544   return false;
1545 }
1546 
1547 /// Parse the 'offset' operator.  This operator is used to specify the
1548 /// location rather then the content of a variable.
ParseIntelOffsetOfOperator()1549 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1550   MCAsmParser &Parser = getParser();
1551   const AsmToken &Tok = Parser.getTok();
1552   SMLoc OffsetOfLoc = Tok.getLoc();
1553   Parser.Lex(); // Eat offset.
1554 
1555   const MCExpr *Val;
1556   InlineAsmIdentifierInfo Info;
1557   SMLoc Start = Tok.getLoc(), End;
1558   StringRef Identifier = Tok.getString();
1559   if (ParseIntelIdentifier(Val, Identifier, Info,
1560                            /*Unevaluated=*/false, End))
1561     return nullptr;
1562 
1563   // Don't emit the offset operator.
1564   InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1565 
1566   // The offset operator will have an 'r' constraint, thus we need to create
1567   // register operand to ensure proper matching.  Just pick a GPR based on
1568   // the size of a pointer.
1569   unsigned RegNo =
1570       is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1571   return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1572                                OffsetOfLoc, Identifier, Info.OpDecl);
1573 }
1574 
1575 enum IntelOperatorKind {
1576   IOK_LENGTH,
1577   IOK_SIZE,
1578   IOK_TYPE
1579 };
1580 
1581 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators.  The LENGTH operator
1582 /// returns the number of elements in an array.  It returns the value 1 for
1583 /// non-array variables.  The SIZE operator returns the size of a C or C++
1584 /// variable.  A variable's size is the product of its LENGTH and TYPE.  The
1585 /// TYPE operator returns the size of a C or C++ type or variable. If the
1586 /// variable is an array, TYPE returns the size of a single element.
ParseIntelOperator(unsigned OpKind)1587 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1588   MCAsmParser &Parser = getParser();
1589   const AsmToken &Tok = Parser.getTok();
1590   SMLoc TypeLoc = Tok.getLoc();
1591   Parser.Lex(); // Eat operator.
1592 
1593   const MCExpr *Val = nullptr;
1594   InlineAsmIdentifierInfo Info;
1595   SMLoc Start = Tok.getLoc(), End;
1596   StringRef Identifier = Tok.getString();
1597   if (ParseIntelIdentifier(Val, Identifier, Info,
1598                            /*Unevaluated=*/true, End))
1599     return nullptr;
1600 
1601   if (!Info.OpDecl)
1602     return ErrorOperand(Start, "unable to lookup expression");
1603 
1604   unsigned CVal = 0;
1605   switch(OpKind) {
1606   default: llvm_unreachable("Unexpected operand kind!");
1607   case IOK_LENGTH: CVal = Info.Length; break;
1608   case IOK_SIZE: CVal = Info.Size; break;
1609   case IOK_TYPE: CVal = Info.Type; break;
1610   }
1611 
1612   // Rewrite the type operator and the C or C++ type or variable in terms of an
1613   // immediate.  E.g. TYPE foo -> $$4
1614   unsigned Len = End.getPointer() - TypeLoc.getPointer();
1615   InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1616 
1617   const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1618   return X86Operand::CreateImm(Imm, Start, End);
1619 }
1620 
ParseIntelOperand()1621 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1622   MCAsmParser &Parser = getParser();
1623   const AsmToken &Tok = Parser.getTok();
1624   SMLoc Start, End;
1625 
1626   // Offset, length, type and size operators.
1627   if (isParsingInlineAsm()) {
1628     StringRef AsmTokStr = Tok.getString();
1629     if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1630       return ParseIntelOffsetOfOperator();
1631     if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1632       return ParseIntelOperator(IOK_LENGTH);
1633     if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1634       return ParseIntelOperator(IOK_SIZE);
1635     if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1636       return ParseIntelOperator(IOK_TYPE);
1637   }
1638 
1639   unsigned Size = getIntelMemOperandSize(Tok.getString());
1640   if (Size) {
1641     Parser.Lex(); // Eat operand size (e.g., byte, word).
1642     if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1643       return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1644     Parser.Lex(); // Eat ptr.
1645   }
1646   Start = Tok.getLoc();
1647 
1648   // Immediate.
1649   if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1650       getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1651     AsmToken StartTok = Tok;
1652     IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1653                              /*AddImmPrefix=*/false);
1654     if (ParseIntelExpression(SM, End))
1655       return nullptr;
1656 
1657     int64_t Imm = SM.getImm();
1658     if (isParsingInlineAsm()) {
1659       unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1660       if (StartTok.getString().size() == Len)
1661         // Just add a prefix if this wasn't a complex immediate expression.
1662         InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1663       else
1664         // Otherwise, rewrite the complex expression as a single immediate.
1665         InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1666     }
1667 
1668     if (getLexer().isNot(AsmToken::LBrac)) {
1669       // If a directional label (ie. 1f or 2b) was parsed above from
1670       // ParseIntelExpression() then SM.getSym() was set to a pointer to
1671       // to the MCExpr with the directional local symbol and this is a
1672       // memory operand not an immediate operand.
1673       if (SM.getSym())
1674         return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1675                                      Size);
1676 
1677       const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1678       return X86Operand::CreateImm(ImmExpr, Start, End);
1679     }
1680 
1681     // Only positive immediates are valid.
1682     if (Imm < 0)
1683       return ErrorOperand(Start, "expected a positive immediate displacement "
1684                           "before bracketed expr.");
1685 
1686     // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1687     return ParseIntelMemOperand(Imm, Start, Size);
1688   }
1689 
1690   // rounding mode token
1691   if (STI.getFeatureBits() & X86::FeatureAVX512 &&
1692       getLexer().is(AsmToken::LCurly))
1693     return ParseRoundingModeOp(Start, End);
1694 
1695   // Register.
1696   unsigned RegNo = 0;
1697   if (!ParseRegister(RegNo, Start, End)) {
1698     // If this is a segment register followed by a ':', then this is the start
1699     // of a segment override, otherwise this is a normal register reference.
1700     if (getLexer().isNot(AsmToken::Colon))
1701       return X86Operand::CreateReg(RegNo, Start, End);
1702 
1703     return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1704   }
1705 
1706   // Memory operand.
1707   return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1708 }
1709 
ParseATTOperand()1710 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1711   MCAsmParser &Parser = getParser();
1712   switch (getLexer().getKind()) {
1713   default:
1714     // Parse a memory operand with no segment register.
1715     return ParseMemOperand(0, Parser.getTok().getLoc());
1716   case AsmToken::Percent: {
1717     // Read the register.
1718     unsigned RegNo;
1719     SMLoc Start, End;
1720     if (ParseRegister(RegNo, Start, End)) return nullptr;
1721     if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1722       Error(Start, "%eiz and %riz can only be used as index registers",
1723             SMRange(Start, End));
1724       return nullptr;
1725     }
1726 
1727     // If this is a segment register followed by a ':', then this is the start
1728     // of a memory reference, otherwise this is a normal register reference.
1729     if (getLexer().isNot(AsmToken::Colon))
1730       return X86Operand::CreateReg(RegNo, Start, End);
1731 
1732     if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1733       return ErrorOperand(Start, "invalid segment register");
1734 
1735     getParser().Lex(); // Eat the colon.
1736     return ParseMemOperand(RegNo, Start);
1737   }
1738   case AsmToken::Dollar: {
1739     // $42 -> immediate.
1740     SMLoc Start = Parser.getTok().getLoc(), End;
1741     Parser.Lex();
1742     const MCExpr *Val;
1743     if (getParser().parseExpression(Val, End))
1744       return nullptr;
1745     return X86Operand::CreateImm(Val, Start, End);
1746   }
1747   case AsmToken::LCurly:{
1748     SMLoc Start = Parser.getTok().getLoc(), End;
1749     if (STI.getFeatureBits() & X86::FeatureAVX512)
1750       return ParseRoundingModeOp(Start, End);
1751     return ErrorOperand(Start, "unknown token in expression");
1752   }
1753   }
1754 }
1755 
HandleAVX512Operand(OperandVector & Operands,const MCParsedAsmOperand & Op)1756 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1757                                        const MCParsedAsmOperand &Op) {
1758   MCAsmParser &Parser = getParser();
1759   if(STI.getFeatureBits() & X86::FeatureAVX512) {
1760     if (getLexer().is(AsmToken::LCurly)) {
1761       // Eat "{" and mark the current place.
1762       const SMLoc consumedToken = consumeToken();
1763       // Distinguish {1to<NUM>} from {%k<NUM>}.
1764       if(getLexer().is(AsmToken::Integer)) {
1765         // Parse memory broadcasting ({1to<NUM>}).
1766         if (getLexer().getTok().getIntVal() != 1)
1767           return !ErrorAndEatStatement(getLexer().getLoc(),
1768                                        "Expected 1to<NUM> at this point");
1769         Parser.Lex();  // Eat "1" of 1to8
1770         if (!getLexer().is(AsmToken::Identifier) ||
1771             !getLexer().getTok().getIdentifier().startswith("to"))
1772           return !ErrorAndEatStatement(getLexer().getLoc(),
1773                                        "Expected 1to<NUM> at this point");
1774         // Recognize only reasonable suffixes.
1775         const char *BroadcastPrimitive =
1776           StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1777             .Case("to2",  "{1to2}")
1778             .Case("to4",  "{1to4}")
1779             .Case("to8",  "{1to8}")
1780             .Case("to16", "{1to16}")
1781             .Default(nullptr);
1782         if (!BroadcastPrimitive)
1783           return !ErrorAndEatStatement(getLexer().getLoc(),
1784                                        "Invalid memory broadcast primitive.");
1785         Parser.Lex();  // Eat "toN" of 1toN
1786         if (!getLexer().is(AsmToken::RCurly))
1787           return !ErrorAndEatStatement(getLexer().getLoc(),
1788                                        "Expected } at this point");
1789         Parser.Lex();  // Eat "}"
1790         Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1791                                                    consumedToken));
1792         // No AVX512 specific primitives can pass
1793         // after memory broadcasting, so return.
1794         return true;
1795       } else {
1796         // Parse mask register {%k1}
1797         Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1798         if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1799           Operands.push_back(std::move(Op));
1800           if (!getLexer().is(AsmToken::RCurly))
1801             return !ErrorAndEatStatement(getLexer().getLoc(),
1802                                          "Expected } at this point");
1803           Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1804 
1805           // Parse "zeroing non-masked" semantic {z}
1806           if (getLexer().is(AsmToken::LCurly)) {
1807             Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1808             if (!getLexer().is(AsmToken::Identifier) ||
1809                 getLexer().getTok().getIdentifier() != "z")
1810               return !ErrorAndEatStatement(getLexer().getLoc(),
1811                                            "Expected z at this point");
1812             Parser.Lex();  // Eat the z
1813             if (!getLexer().is(AsmToken::RCurly))
1814               return !ErrorAndEatStatement(getLexer().getLoc(),
1815                                            "Expected } at this point");
1816             Parser.Lex();  // Eat the }
1817           }
1818         }
1819       }
1820     }
1821   }
1822   return true;
1823 }
1824 
1825 /// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
1826 /// has already been parsed if present.
ParseMemOperand(unsigned SegReg,SMLoc MemStart)1827 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1828                                                           SMLoc MemStart) {
1829 
1830   MCAsmParser &Parser = getParser();
1831   // We have to disambiguate a parenthesized expression "(4+5)" from the start
1832   // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
1833   // only way to do this without lookahead is to eat the '(' and see what is
1834   // after it.
1835   const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1836   if (getLexer().isNot(AsmToken::LParen)) {
1837     SMLoc ExprEnd;
1838     if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1839 
1840     // After parsing the base expression we could either have a parenthesized
1841     // memory address or not.  If not, return now.  If so, eat the (.
1842     if (getLexer().isNot(AsmToken::LParen)) {
1843       // Unless we have a segment register, treat this as an immediate.
1844       if (SegReg == 0)
1845         return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
1846       return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1847                                    MemStart, ExprEnd);
1848     }
1849 
1850     // Eat the '('.
1851     Parser.Lex();
1852   } else {
1853     // Okay, we have a '('.  We don't know if this is an expression or not, but
1854     // so we have to eat the ( to see beyond it.
1855     SMLoc LParenLoc = Parser.getTok().getLoc();
1856     Parser.Lex(); // Eat the '('.
1857 
1858     if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1859       // Nothing to do here, fall into the code below with the '(' part of the
1860       // memory operand consumed.
1861     } else {
1862       SMLoc ExprEnd;
1863 
1864       // It must be an parenthesized expression, parse it now.
1865       if (getParser().parseParenExpression(Disp, ExprEnd))
1866         return nullptr;
1867 
1868       // After parsing the base expression we could either have a parenthesized
1869       // memory address or not.  If not, return now.  If so, eat the (.
1870       if (getLexer().isNot(AsmToken::LParen)) {
1871         // Unless we have a segment register, treat this as an immediate.
1872         if (SegReg == 0)
1873           return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
1874                                        ExprEnd);
1875         return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1876                                      MemStart, ExprEnd);
1877       }
1878 
1879       // Eat the '('.
1880       Parser.Lex();
1881     }
1882   }
1883 
1884   // If we reached here, then we just ate the ( of the memory operand.  Process
1885   // the rest of the memory operand.
1886   unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1887   SMLoc IndexLoc, BaseLoc;
1888 
1889   if (getLexer().is(AsmToken::Percent)) {
1890     SMLoc StartLoc, EndLoc;
1891     BaseLoc = Parser.getTok().getLoc();
1892     if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1893     if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1894       Error(StartLoc, "eiz and riz can only be used as index registers",
1895             SMRange(StartLoc, EndLoc));
1896       return nullptr;
1897     }
1898   }
1899 
1900   if (getLexer().is(AsmToken::Comma)) {
1901     Parser.Lex(); // Eat the comma.
1902     IndexLoc = Parser.getTok().getLoc();
1903 
1904     // Following the comma we should have either an index register, or a scale
1905     // value. We don't support the later form, but we want to parse it
1906     // correctly.
1907     //
1908     // Not that even though it would be completely consistent to support syntax
1909     // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1910     if (getLexer().is(AsmToken::Percent)) {
1911       SMLoc L;
1912       if (ParseRegister(IndexReg, L, L)) return nullptr;
1913 
1914       if (getLexer().isNot(AsmToken::RParen)) {
1915         // Parse the scale amount:
1916         //  ::= ',' [scale-expression]
1917         if (getLexer().isNot(AsmToken::Comma)) {
1918           Error(Parser.getTok().getLoc(),
1919                 "expected comma in scale expression");
1920           return nullptr;
1921         }
1922         Parser.Lex(); // Eat the comma.
1923 
1924         if (getLexer().isNot(AsmToken::RParen)) {
1925           SMLoc Loc = Parser.getTok().getLoc();
1926 
1927           int64_t ScaleVal;
1928           if (getParser().parseAbsoluteExpression(ScaleVal)){
1929             Error(Loc, "expected scale expression");
1930             return nullptr;
1931           }
1932 
1933           // Validate the scale amount.
1934 	  if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1935               ScaleVal != 1) {
1936             Error(Loc, "scale factor in 16-bit address must be 1");
1937             return nullptr;
1938 	  }
1939           if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1940             Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1941             return nullptr;
1942           }
1943           Scale = (unsigned)ScaleVal;
1944         }
1945       }
1946     } else if (getLexer().isNot(AsmToken::RParen)) {
1947       // A scale amount without an index is ignored.
1948       // index.
1949       SMLoc Loc = Parser.getTok().getLoc();
1950 
1951       int64_t Value;
1952       if (getParser().parseAbsoluteExpression(Value))
1953         return nullptr;
1954 
1955       if (Value != 1)
1956         Warning(Loc, "scale factor without index register is ignored");
1957       Scale = 1;
1958     }
1959   }
1960 
1961   // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1962   if (getLexer().isNot(AsmToken::RParen)) {
1963     Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1964     return nullptr;
1965   }
1966   SMLoc MemEnd = Parser.getTok().getEndLoc();
1967   Parser.Lex(); // Eat the ')'.
1968 
1969   // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1970   // and then only in non-64-bit modes. Except for DX, which is a special case
1971   // because an unofficial form of in/out instructions uses it.
1972   if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1973       (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1974                          BaseReg != X86::SI && BaseReg != X86::DI)) &&
1975       BaseReg != X86::DX) {
1976     Error(BaseLoc, "invalid 16-bit base register");
1977     return nullptr;
1978   }
1979   if (BaseReg == 0 &&
1980       X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1981     Error(IndexLoc, "16-bit memory operand may not include only index register");
1982     return nullptr;
1983   }
1984 
1985   StringRef ErrMsg;
1986   if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1987     Error(BaseLoc, ErrMsg);
1988     return nullptr;
1989   }
1990 
1991   if (SegReg || BaseReg || IndexReg)
1992     return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1993                                  IndexReg, Scale, MemStart, MemEnd);
1994   return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
1995 }
1996 
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)1997 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1998                                     SMLoc NameLoc, OperandVector &Operands) {
1999   MCAsmParser &Parser = getParser();
2000   InstInfo = &Info;
2001   StringRef PatchedName = Name;
2002 
2003   // FIXME: Hack to recognize setneb as setne.
2004   if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2005       PatchedName != "setb" && PatchedName != "setnb")
2006     PatchedName = PatchedName.substr(0, Name.size()-1);
2007 
2008   // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2009   if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2010       (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2011        PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2012     bool IsVCMP = PatchedName[0] == 'v';
2013     unsigned CCIdx = IsVCMP ? 4 : 3;
2014     unsigned ComparisonCode = StringSwitch<unsigned>(
2015       PatchedName.slice(CCIdx, PatchedName.size() - 2))
2016       .Case("eq",       0x00)
2017       .Case("lt",       0x01)
2018       .Case("le",       0x02)
2019       .Case("unord",    0x03)
2020       .Case("neq",      0x04)
2021       .Case("nlt",      0x05)
2022       .Case("nle",      0x06)
2023       .Case("ord",      0x07)
2024       /* AVX only from here */
2025       .Case("eq_uq",    0x08)
2026       .Case("nge",      0x09)
2027       .Case("ngt",      0x0A)
2028       .Case("false",    0x0B)
2029       .Case("neq_oq",   0x0C)
2030       .Case("ge",       0x0D)
2031       .Case("gt",       0x0E)
2032       .Case("true",     0x0F)
2033       .Case("eq_os",    0x10)
2034       .Case("lt_oq",    0x11)
2035       .Case("le_oq",    0x12)
2036       .Case("unord_s",  0x13)
2037       .Case("neq_us",   0x14)
2038       .Case("nlt_uq",   0x15)
2039       .Case("nle_uq",   0x16)
2040       .Case("ord_s",    0x17)
2041       .Case("eq_us",    0x18)
2042       .Case("nge_uq",   0x19)
2043       .Case("ngt_uq",   0x1A)
2044       .Case("false_os", 0x1B)
2045       .Case("neq_os",   0x1C)
2046       .Case("ge_oq",    0x1D)
2047       .Case("gt_oq",    0x1E)
2048       .Case("true_us",  0x1F)
2049       .Default(~0U);
2050     if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2051 
2052       Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2053                                                  NameLoc));
2054 
2055       const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
2056                                                    getParser().getContext());
2057       Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2058 
2059       PatchedName = PatchedName.substr(PatchedName.size() - 2);
2060     }
2061   }
2062 
2063   // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2064   if (PatchedName.startswith("vpcmp") &&
2065       (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2066        PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2067     unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2068     unsigned ComparisonCode = StringSwitch<unsigned>(
2069       PatchedName.slice(5, PatchedName.size() - CCIdx))
2070       .Case("eq",    0x0) // Only allowed on unsigned. Checked below.
2071       .Case("lt",    0x1)
2072       .Case("le",    0x2)
2073       //.Case("false", 0x3) // Not a documented alias.
2074       .Case("neq",   0x4)
2075       .Case("nlt",   0x5)
2076       .Case("nle",   0x6)
2077       //.Case("true",  0x7) // Not a documented alias.
2078       .Default(~0U);
2079     if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2080       Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2081 
2082       const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
2083                                                    getParser().getContext());
2084       Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2085 
2086       PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2087     }
2088   }
2089 
2090   // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2091   if (PatchedName.startswith("vpcom") &&
2092       (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2093        PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2094     unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2095     unsigned ComparisonCode = StringSwitch<unsigned>(
2096       PatchedName.slice(5, PatchedName.size() - CCIdx))
2097       .Case("lt",    0x0)
2098       .Case("le",    0x1)
2099       .Case("gt",    0x2)
2100       .Case("ge",    0x3)
2101       .Case("eq",    0x4)
2102       .Case("neq",   0x5)
2103       .Case("false", 0x6)
2104       .Case("true",  0x7)
2105       .Default(~0U);
2106     if (ComparisonCode != ~0U) {
2107       Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2108 
2109       const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
2110                                                    getParser().getContext());
2111       Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2112 
2113       PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2114     }
2115   }
2116 
2117   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2118 
2119   // Determine whether this is an instruction prefix.
2120   bool isPrefix =
2121     Name == "lock" || Name == "rep" ||
2122     Name == "repe" || Name == "repz" ||
2123     Name == "repne" || Name == "repnz" ||
2124     Name == "rex64" || Name == "data16";
2125 
2126 
2127   // This does the actual operand parsing.  Don't parse any more if we have a
2128   // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2129   // just want to parse the "lock" as the first instruction and the "incl" as
2130   // the next one.
2131   if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2132 
2133     // Parse '*' modifier.
2134     if (getLexer().is(AsmToken::Star))
2135       Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2136 
2137     // Read the operands.
2138     while(1) {
2139       if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2140         Operands.push_back(std::move(Op));
2141         if (!HandleAVX512Operand(Operands, *Operands.back()))
2142           return true;
2143       } else {
2144          Parser.eatToEndOfStatement();
2145          return true;
2146       }
2147       // check for comma and eat it
2148       if (getLexer().is(AsmToken::Comma))
2149         Parser.Lex();
2150       else
2151         break;
2152      }
2153 
2154     if (getLexer().isNot(AsmToken::EndOfStatement))
2155       return ErrorAndEatStatement(getLexer().getLoc(),
2156                                   "unexpected token in argument list");
2157    }
2158 
2159   // Consume the EndOfStatement or the prefix separator Slash
2160   if (getLexer().is(AsmToken::EndOfStatement) ||
2161       (isPrefix && getLexer().is(AsmToken::Slash)))
2162     Parser.Lex();
2163 
2164   // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2165   // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
2166   // documented form in various unofficial manuals, so a lot of code uses it.
2167   if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2168       Operands.size() == 3) {
2169     X86Operand &Op = (X86Operand &)*Operands.back();
2170     if (Op.isMem() && Op.Mem.SegReg == 0 &&
2171         isa<MCConstantExpr>(Op.Mem.Disp) &&
2172         cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2173         Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2174       SMLoc Loc = Op.getEndLoc();
2175       Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2176     }
2177   }
2178   // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2179   if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2180       Operands.size() == 3) {
2181     X86Operand &Op = (X86Operand &)*Operands[1];
2182     if (Op.isMem() && Op.Mem.SegReg == 0 &&
2183         isa<MCConstantExpr>(Op.Mem.Disp) &&
2184         cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2185         Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2186       SMLoc Loc = Op.getEndLoc();
2187       Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2188     }
2189   }
2190 
2191   // Append default arguments to "ins[bwld]"
2192   if (Name.startswith("ins") && Operands.size() == 1 &&
2193       (Name == "insb" || Name == "insw" || Name == "insl" ||
2194        Name == "insd" )) {
2195     if (isParsingIntelSyntax()) {
2196       Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2197       Operands.push_back(DefaultMemDIOperand(NameLoc));
2198     } else {
2199       Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2200       Operands.push_back(DefaultMemDIOperand(NameLoc));
2201     }
2202   }
2203 
2204   // Append default arguments to "outs[bwld]"
2205   if (Name.startswith("outs") && Operands.size() == 1 &&
2206       (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2207        Name == "outsd" )) {
2208     if (isParsingIntelSyntax()) {
2209       Operands.push_back(DefaultMemSIOperand(NameLoc));
2210       Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2211     } else {
2212       Operands.push_back(DefaultMemSIOperand(NameLoc));
2213       Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2214     }
2215   }
2216 
2217   // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2218   // values of $SIREG according to the mode. It would be nice if this
2219   // could be achieved with InstAlias in the tables.
2220   if (Name.startswith("lods") && Operands.size() == 1 &&
2221       (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2222        Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2223     Operands.push_back(DefaultMemSIOperand(NameLoc));
2224 
2225   // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2226   // values of $DIREG according to the mode. It would be nice if this
2227   // could be achieved with InstAlias in the tables.
2228   if (Name.startswith("stos") && Operands.size() == 1 &&
2229       (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2230        Name == "stosl" || Name == "stosd" || Name == "stosq"))
2231     Operands.push_back(DefaultMemDIOperand(NameLoc));
2232 
2233   // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2234   // values of $DIREG according to the mode. It would be nice if this
2235   // could be achieved with InstAlias in the tables.
2236   if (Name.startswith("scas") && Operands.size() == 1 &&
2237       (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2238        Name == "scasl" || Name == "scasd" || Name == "scasq"))
2239     Operands.push_back(DefaultMemDIOperand(NameLoc));
2240 
2241   // Add default SI and DI operands to "cmps[bwlq]".
2242   if (Name.startswith("cmps") &&
2243       (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2244        Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2245     if (Operands.size() == 1) {
2246       if (isParsingIntelSyntax()) {
2247         Operands.push_back(DefaultMemSIOperand(NameLoc));
2248         Operands.push_back(DefaultMemDIOperand(NameLoc));
2249       } else {
2250         Operands.push_back(DefaultMemDIOperand(NameLoc));
2251         Operands.push_back(DefaultMemSIOperand(NameLoc));
2252       }
2253     } else if (Operands.size() == 3) {
2254       X86Operand &Op = (X86Operand &)*Operands[1];
2255       X86Operand &Op2 = (X86Operand &)*Operands[2];
2256       if (!doSrcDstMatch(Op, Op2))
2257         return Error(Op.getStartLoc(),
2258                      "mismatching source and destination index registers");
2259     }
2260   }
2261 
2262   // Add default SI and DI operands to "movs[bwlq]".
2263   if ((Name.startswith("movs") &&
2264       (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2265        Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2266       (Name.startswith("smov") &&
2267       (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2268        Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2269     if (Operands.size() == 1) {
2270       if (Name == "movsd")
2271         Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2272       if (isParsingIntelSyntax()) {
2273         Operands.push_back(DefaultMemDIOperand(NameLoc));
2274         Operands.push_back(DefaultMemSIOperand(NameLoc));
2275       } else {
2276         Operands.push_back(DefaultMemSIOperand(NameLoc));
2277         Operands.push_back(DefaultMemDIOperand(NameLoc));
2278       }
2279     } else if (Operands.size() == 3) {
2280       X86Operand &Op = (X86Operand &)*Operands[1];
2281       X86Operand &Op2 = (X86Operand &)*Operands[2];
2282       if (!doSrcDstMatch(Op, Op2))
2283         return Error(Op.getStartLoc(),
2284                      "mismatching source and destination index registers");
2285     }
2286   }
2287 
2288   // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>.  Canonicalize to
2289   // "shift <op>".
2290   if ((Name.startswith("shr") || Name.startswith("sar") ||
2291        Name.startswith("shl") || Name.startswith("sal") ||
2292        Name.startswith("rcl") || Name.startswith("rcr") ||
2293        Name.startswith("rol") || Name.startswith("ror")) &&
2294       Operands.size() == 3) {
2295     if (isParsingIntelSyntax()) {
2296       // Intel syntax
2297       X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2298       if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2299           cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2300         Operands.pop_back();
2301     } else {
2302       X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2303       if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2304           cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2305         Operands.erase(Operands.begin() + 1);
2306     }
2307   }
2308 
2309   // Transforms "int $3" into "int3" as a size optimization.  We can't write an
2310   // instalias with an immediate operand yet.
2311   if (Name == "int" && Operands.size() == 2) {
2312     X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2313     if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2314         cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2315       Operands.erase(Operands.begin() + 1);
2316       static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2317     }
2318   }
2319 
2320   return false;
2321 }
2322 
convertToSExti8(MCInst & Inst,unsigned Opcode,unsigned Reg,bool isCmp)2323 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2324                             bool isCmp) {
2325   MCInst TmpInst;
2326   TmpInst.setOpcode(Opcode);
2327   if (!isCmp)
2328     TmpInst.addOperand(MCOperand::CreateReg(Reg));
2329   TmpInst.addOperand(MCOperand::CreateReg(Reg));
2330   TmpInst.addOperand(Inst.getOperand(0));
2331   Inst = TmpInst;
2332   return true;
2333 }
2334 
convert16i16to16ri8(MCInst & Inst,unsigned Opcode,bool isCmp=false)2335 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2336                                 bool isCmp = false) {
2337   if (!Inst.getOperand(0).isImm() ||
2338       !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2339     return false;
2340 
2341   return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2342 }
2343 
convert32i32to32ri8(MCInst & Inst,unsigned Opcode,bool isCmp=false)2344 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2345                                 bool isCmp = false) {
2346   if (!Inst.getOperand(0).isImm() ||
2347       !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2348     return false;
2349 
2350   return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2351 }
2352 
convert64i32to64ri8(MCInst & Inst,unsigned Opcode,bool isCmp=false)2353 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2354                                 bool isCmp = false) {
2355   if (!Inst.getOperand(0).isImm() ||
2356       !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2357     return false;
2358 
2359   return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2360 }
2361 
validateInstruction(MCInst & Inst,const OperandVector & Ops)2362 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2363   switch (Inst.getOpcode()) {
2364   default: return true;
2365   case X86::INT:
2366     X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
2367     assert(Op.isImm() && "expected immediate");
2368     int64_t Res;
2369     if (!Op.getImm()->EvaluateAsAbsolute(Res) || Res > 255) {
2370       Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
2371       return false;
2372     }
2373     return true;
2374   }
2375   llvm_unreachable("handle the instruction appropriately");
2376 }
2377 
processInstruction(MCInst & Inst,const OperandVector & Ops)2378 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2379   switch (Inst.getOpcode()) {
2380   default: return false;
2381   case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2382   case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2383   case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2384   case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2385   case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2386   case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2387   case X86::OR16i16:  return convert16i16to16ri8(Inst, X86::OR16ri8);
2388   case X86::OR32i32:  return convert32i32to32ri8(Inst, X86::OR32ri8);
2389   case X86::OR64i32:  return convert64i32to64ri8(Inst, X86::OR64ri8);
2390   case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2391   case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2392   case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2393   case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2394   case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2395   case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2396   case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2397   case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2398   case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2399   case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2400   case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2401   case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2402   case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2403   case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2404   case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2405   case X86::VMOVAPDrr:
2406   case X86::VMOVAPDYrr:
2407   case X86::VMOVAPSrr:
2408   case X86::VMOVAPSYrr:
2409   case X86::VMOVDQArr:
2410   case X86::VMOVDQAYrr:
2411   case X86::VMOVDQUrr:
2412   case X86::VMOVDQUYrr:
2413   case X86::VMOVUPDrr:
2414   case X86::VMOVUPDYrr:
2415   case X86::VMOVUPSrr:
2416   case X86::VMOVUPSYrr: {
2417     if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2418         !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2419       return false;
2420 
2421     unsigned NewOpc;
2422     switch (Inst.getOpcode()) {
2423     default: llvm_unreachable("Invalid opcode");
2424     case X86::VMOVAPDrr:  NewOpc = X86::VMOVAPDrr_REV;  break;
2425     case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2426     case X86::VMOVAPSrr:  NewOpc = X86::VMOVAPSrr_REV;  break;
2427     case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2428     case X86::VMOVDQArr:  NewOpc = X86::VMOVDQArr_REV;  break;
2429     case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2430     case X86::VMOVDQUrr:  NewOpc = X86::VMOVDQUrr_REV;  break;
2431     case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2432     case X86::VMOVUPDrr:  NewOpc = X86::VMOVUPDrr_REV;  break;
2433     case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2434     case X86::VMOVUPSrr:  NewOpc = X86::VMOVUPSrr_REV;  break;
2435     case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2436     }
2437     Inst.setOpcode(NewOpc);
2438     return true;
2439   }
2440   case X86::VMOVSDrr:
2441   case X86::VMOVSSrr: {
2442     if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2443         !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2444       return false;
2445     unsigned NewOpc;
2446     switch (Inst.getOpcode()) {
2447     default: llvm_unreachable("Invalid opcode");
2448     case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV;   break;
2449     case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV;   break;
2450     }
2451     Inst.setOpcode(NewOpc);
2452     return true;
2453   }
2454   }
2455 }
2456 
2457 static const char *getSubtargetFeatureName(uint64_t Val);
2458 
EmitInstruction(MCInst & Inst,OperandVector & Operands,MCStreamer & Out)2459 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2460                                    MCStreamer &Out) {
2461   Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2462                                                 MII, Out);
2463 }
2464 
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2465 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2466                                            OperandVector &Operands,
2467                                            MCStreamer &Out, uint64_t &ErrorInfo,
2468                                            bool MatchingInlineAsm) {
2469   if (isParsingIntelSyntax())
2470     return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2471                                         MatchingInlineAsm);
2472   return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2473                                     MatchingInlineAsm);
2474 }
2475 
MatchFPUWaitAlias(SMLoc IDLoc,X86Operand & Op,OperandVector & Operands,MCStreamer & Out,bool MatchingInlineAsm)2476 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2477                                      OperandVector &Operands, MCStreamer &Out,
2478                                      bool MatchingInlineAsm) {
2479   // FIXME: This should be replaced with a real .td file alias mechanism.
2480   // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2481   // call.
2482   const char *Repl = StringSwitch<const char *>(Op.getToken())
2483                          .Case("finit", "fninit")
2484                          .Case("fsave", "fnsave")
2485                          .Case("fstcw", "fnstcw")
2486                          .Case("fstcww", "fnstcw")
2487                          .Case("fstenv", "fnstenv")
2488                          .Case("fstsw", "fnstsw")
2489                          .Case("fstsww", "fnstsw")
2490                          .Case("fclex", "fnclex")
2491                          .Default(nullptr);
2492   if (Repl) {
2493     MCInst Inst;
2494     Inst.setOpcode(X86::WAIT);
2495     Inst.setLoc(IDLoc);
2496     if (!MatchingInlineAsm)
2497       EmitInstruction(Inst, Operands, Out);
2498     Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2499   }
2500 }
2501 
ErrorMissingFeature(SMLoc IDLoc,uint64_t ErrorInfo,bool MatchingInlineAsm)2502 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2503                                        bool MatchingInlineAsm) {
2504   assert(ErrorInfo && "Unknown missing feature!");
2505   ArrayRef<SMRange> EmptyRanges = None;
2506   SmallString<126> Msg;
2507   raw_svector_ostream OS(Msg);
2508   OS << "instruction requires:";
2509   uint64_t Mask = 1;
2510   for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2511     if (ErrorInfo & Mask)
2512       OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2513     Mask <<= 1;
2514   }
2515   return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2516 }
2517 
MatchAndEmitATTInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2518 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2519                                               OperandVector &Operands,
2520                                               MCStreamer &Out,
2521                                               uint64_t &ErrorInfo,
2522                                               bool MatchingInlineAsm) {
2523   assert(!Operands.empty() && "Unexpect empty operand list!");
2524   X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2525   assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2526   ArrayRef<SMRange> EmptyRanges = None;
2527 
2528   // First, handle aliases that expand to multiple instructions.
2529   MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2530 
2531   bool WasOriginallyInvalidOperand = false;
2532   MCInst Inst;
2533 
2534   // First, try a direct match.
2535   switch (MatchInstructionImpl(Operands, Inst,
2536                                ErrorInfo, MatchingInlineAsm,
2537                                isParsingIntelSyntax())) {
2538   default: llvm_unreachable("Unexpected match result!");
2539   case Match_Success:
2540     if (!validateInstruction(Inst, Operands))
2541       return true;
2542 
2543     // Some instructions need post-processing to, for example, tweak which
2544     // encoding is selected. Loop on it while changes happen so the
2545     // individual transformations can chain off each other.
2546     if (!MatchingInlineAsm)
2547       while (processInstruction(Inst, Operands))
2548         ;
2549 
2550     Inst.setLoc(IDLoc);
2551     if (!MatchingInlineAsm)
2552       EmitInstruction(Inst, Operands, Out);
2553     Opcode = Inst.getOpcode();
2554     return false;
2555   case Match_MissingFeature:
2556     return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2557   case Match_InvalidOperand:
2558     WasOriginallyInvalidOperand = true;
2559     break;
2560   case Match_MnemonicFail:
2561     break;
2562   }
2563 
2564   // FIXME: Ideally, we would only attempt suffix matches for things which are
2565   // valid prefixes, and we could just infer the right unambiguous
2566   // type. However, that requires substantially more matcher support than the
2567   // following hack.
2568 
2569   // Change the operand to point to a temporary token.
2570   StringRef Base = Op.getToken();
2571   SmallString<16> Tmp;
2572   Tmp += Base;
2573   Tmp += ' ';
2574   Op.setTokenValue(Tmp);
2575 
2576   // If this instruction starts with an 'f', then it is a floating point stack
2577   // instruction.  These come in up to three forms for 32-bit, 64-bit, and
2578   // 80-bit floating point, which use the suffixes s,l,t respectively.
2579   //
2580   // Otherwise, we assume that this may be an integer instruction, which comes
2581   // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2582   const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2583 
2584   // Check for the various suffix matches.
2585   uint64_t ErrorInfoIgnore;
2586   uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2587   unsigned Match[4];
2588 
2589   for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2590     Tmp.back() = Suffixes[I];
2591     Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2592                                   MatchingInlineAsm, isParsingIntelSyntax());
2593     // If this returned as a missing feature failure, remember that.
2594     if (Match[I] == Match_MissingFeature)
2595       ErrorInfoMissingFeature = ErrorInfoIgnore;
2596   }
2597 
2598   // Restore the old token.
2599   Op.setTokenValue(Base);
2600 
2601   // If exactly one matched, then we treat that as a successful match (and the
2602   // instruction will already have been filled in correctly, since the failing
2603   // matches won't have modified it).
2604   unsigned NumSuccessfulMatches =
2605       std::count(std::begin(Match), std::end(Match), Match_Success);
2606   if (NumSuccessfulMatches == 1) {
2607     Inst.setLoc(IDLoc);
2608     if (!MatchingInlineAsm)
2609       EmitInstruction(Inst, Operands, Out);
2610     Opcode = Inst.getOpcode();
2611     return false;
2612   }
2613 
2614   // Otherwise, the match failed, try to produce a decent error message.
2615 
2616   // If we had multiple suffix matches, then identify this as an ambiguous
2617   // match.
2618   if (NumSuccessfulMatches > 1) {
2619     char MatchChars[4];
2620     unsigned NumMatches = 0;
2621     for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2622       if (Match[I] == Match_Success)
2623         MatchChars[NumMatches++] = Suffixes[I];
2624 
2625     SmallString<126> Msg;
2626     raw_svector_ostream OS(Msg);
2627     OS << "ambiguous instructions require an explicit suffix (could be ";
2628     for (unsigned i = 0; i != NumMatches; ++i) {
2629       if (i != 0)
2630         OS << ", ";
2631       if (i + 1 == NumMatches)
2632         OS << "or ";
2633       OS << "'" << Base << MatchChars[i] << "'";
2634     }
2635     OS << ")";
2636     Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2637     return true;
2638   }
2639 
2640   // Okay, we know that none of the variants matched successfully.
2641 
2642   // If all of the instructions reported an invalid mnemonic, then the original
2643   // mnemonic was invalid.
2644   if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2645     if (!WasOriginallyInvalidOperand) {
2646       ArrayRef<SMRange> Ranges =
2647           MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2648       return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2649                    Ranges, MatchingInlineAsm);
2650     }
2651 
2652     // Recover location info for the operand if we know which was the problem.
2653     if (ErrorInfo != ~0ULL) {
2654       if (ErrorInfo >= Operands.size())
2655         return Error(IDLoc, "too few operands for instruction",
2656                      EmptyRanges, MatchingInlineAsm);
2657 
2658       X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2659       if (Operand.getStartLoc().isValid()) {
2660         SMRange OperandRange = Operand.getLocRange();
2661         return Error(Operand.getStartLoc(), "invalid operand for instruction",
2662                      OperandRange, MatchingInlineAsm);
2663       }
2664     }
2665 
2666     return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2667                  MatchingInlineAsm);
2668   }
2669 
2670   // If one instruction matched with a missing feature, report this as a
2671   // missing feature.
2672   if (std::count(std::begin(Match), std::end(Match),
2673                  Match_MissingFeature) == 1) {
2674     ErrorInfo = ErrorInfoMissingFeature;
2675     return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2676                                MatchingInlineAsm);
2677   }
2678 
2679   // If one instruction matched with an invalid operand, report this as an
2680   // operand failure.
2681   if (std::count(std::begin(Match), std::end(Match),
2682                  Match_InvalidOperand) == 1) {
2683     return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2684                  MatchingInlineAsm);
2685   }
2686 
2687   // If all of these were an outright failure, report it in a useless way.
2688   Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2689         EmptyRanges, MatchingInlineAsm);
2690   return true;
2691 }
2692 
MatchAndEmitIntelInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2693 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2694                                                 OperandVector &Operands,
2695                                                 MCStreamer &Out,
2696                                                 uint64_t &ErrorInfo,
2697                                                 bool MatchingInlineAsm) {
2698   assert(!Operands.empty() && "Unexpect empty operand list!");
2699   X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2700   assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2701   StringRef Mnemonic = Op.getToken();
2702   ArrayRef<SMRange> EmptyRanges = None;
2703 
2704   // First, handle aliases that expand to multiple instructions.
2705   MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2706 
2707   MCInst Inst;
2708 
2709   // Find one unsized memory operand, if present.
2710   X86Operand *UnsizedMemOp = nullptr;
2711   for (const auto &Op : Operands) {
2712     X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2713     if (X86Op->isMemUnsized())
2714       UnsizedMemOp = X86Op;
2715   }
2716 
2717   // Allow some instructions to have implicitly pointer-sized operands.  This is
2718   // compatible with gas.
2719   if (UnsizedMemOp) {
2720     static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2721     for (const char *Instr : PtrSizedInstrs) {
2722       if (Mnemonic == Instr) {
2723         UnsizedMemOp->Mem.Size = getPointerWidth();
2724         break;
2725       }
2726     }
2727   }
2728 
2729   // If an unsized memory operand is present, try to match with each memory
2730   // operand size.  In Intel assembly, the size is not part of the instruction
2731   // mnemonic.
2732   SmallVector<unsigned, 8> Match;
2733   uint64_t ErrorInfoMissingFeature = 0;
2734   if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2735     static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2736     for (unsigned Size : MopSizes) {
2737       UnsizedMemOp->Mem.Size = Size;
2738       uint64_t ErrorInfoIgnore;
2739       unsigned LastOpcode = Inst.getOpcode();
2740       unsigned M =
2741           MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2742                                MatchingInlineAsm, isParsingIntelSyntax());
2743       if (Match.empty() || LastOpcode != Inst.getOpcode())
2744         Match.push_back(M);
2745 
2746       // If this returned as a missing feature failure, remember that.
2747       if (Match.back() == Match_MissingFeature)
2748         ErrorInfoMissingFeature = ErrorInfoIgnore;
2749     }
2750 
2751     // Restore the size of the unsized memory operand if we modified it.
2752     if (UnsizedMemOp)
2753       UnsizedMemOp->Mem.Size = 0;
2754   }
2755 
2756   // If we haven't matched anything yet, this is not a basic integer or FPU
2757   // operation.  There shouldn't be any ambiguity in our mnemonic table, so try
2758   // matching with the unsized operand.
2759   if (Match.empty()) {
2760     Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2761                                          MatchingInlineAsm,
2762                                          isParsingIntelSyntax()));
2763     // If this returned as a missing feature failure, remember that.
2764     if (Match.back() == Match_MissingFeature)
2765       ErrorInfoMissingFeature = ErrorInfo;
2766   }
2767 
2768   // Restore the size of the unsized memory operand if we modified it.
2769   if (UnsizedMemOp)
2770     UnsizedMemOp->Mem.Size = 0;
2771 
2772   // If it's a bad mnemonic, all results will be the same.
2773   if (Match.back() == Match_MnemonicFail) {
2774     ArrayRef<SMRange> Ranges =
2775         MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2776     return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2777                  Ranges, MatchingInlineAsm);
2778   }
2779 
2780   // If exactly one matched, then we treat that as a successful match (and the
2781   // instruction will already have been filled in correctly, since the failing
2782   // matches won't have modified it).
2783   unsigned NumSuccessfulMatches =
2784       std::count(std::begin(Match), std::end(Match), Match_Success);
2785   if (NumSuccessfulMatches == 1) {
2786     if (!validateInstruction(Inst, Operands))
2787       return true;
2788 
2789     // Some instructions need post-processing to, for example, tweak which
2790     // encoding is selected. Loop on it while changes happen so the individual
2791     // transformations can chain off each other.
2792     if (!MatchingInlineAsm)
2793       while (processInstruction(Inst, Operands))
2794         ;
2795     Inst.setLoc(IDLoc);
2796     if (!MatchingInlineAsm)
2797       EmitInstruction(Inst, Operands, Out);
2798     Opcode = Inst.getOpcode();
2799     return false;
2800   } else if (NumSuccessfulMatches > 1) {
2801     assert(UnsizedMemOp &&
2802            "multiple matches only possible with unsized memory operands");
2803     ArrayRef<SMRange> Ranges =
2804         MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2805     return Error(UnsizedMemOp->getStartLoc(),
2806                  "ambiguous operand size for instruction '" + Mnemonic + "\'",
2807                  Ranges, MatchingInlineAsm);
2808   }
2809 
2810   // If one instruction matched with a missing feature, report this as a
2811   // missing feature.
2812   if (std::count(std::begin(Match), std::end(Match),
2813                  Match_MissingFeature) == 1) {
2814     ErrorInfo = ErrorInfoMissingFeature;
2815     return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2816                                MatchingInlineAsm);
2817   }
2818 
2819   // If one instruction matched with an invalid operand, report this as an
2820   // operand failure.
2821   if (std::count(std::begin(Match), std::end(Match),
2822                  Match_InvalidOperand) == 1) {
2823     return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2824                  MatchingInlineAsm);
2825   }
2826 
2827   // If all of these were an outright failure, report it in a useless way.
2828   return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2829                MatchingInlineAsm);
2830 }
2831 
OmitRegisterFromClobberLists(unsigned RegNo)2832 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2833   return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2834 }
2835 
ParseDirective(AsmToken DirectiveID)2836 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2837   MCAsmParser &Parser = getParser();
2838   StringRef IDVal = DirectiveID.getIdentifier();
2839   if (IDVal == ".word")
2840     return ParseDirectiveWord(2, DirectiveID.getLoc());
2841   else if (IDVal.startswith(".code"))
2842     return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2843   else if (IDVal.startswith(".att_syntax")) {
2844     if (getLexer().isNot(AsmToken::EndOfStatement)) {
2845       if (Parser.getTok().getString() == "prefix")
2846         Parser.Lex();
2847       else if (Parser.getTok().getString() == "noprefix")
2848         return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2849                                            "supported: registers must have a "
2850                                            "'%' prefix in .att_syntax");
2851     }
2852     getParser().setAssemblerDialect(0);
2853     return false;
2854   } else if (IDVal.startswith(".intel_syntax")) {
2855     getParser().setAssemblerDialect(1);
2856     if (getLexer().isNot(AsmToken::EndOfStatement)) {
2857       if (Parser.getTok().getString() == "noprefix")
2858         Parser.Lex();
2859       else if (Parser.getTok().getString() == "prefix")
2860         return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2861                                            "supported: registers must not have "
2862                                            "a '%' prefix in .intel_syntax");
2863     }
2864     return false;
2865   }
2866   return true;
2867 }
2868 
2869 /// ParseDirectiveWord
2870 ///  ::= .word [ expression (, expression)* ]
ParseDirectiveWord(unsigned Size,SMLoc L)2871 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2872   MCAsmParser &Parser = getParser();
2873   if (getLexer().isNot(AsmToken::EndOfStatement)) {
2874     for (;;) {
2875       const MCExpr *Value;
2876       if (getParser().parseExpression(Value))
2877         return false;
2878 
2879       getParser().getStreamer().EmitValue(Value, Size);
2880 
2881       if (getLexer().is(AsmToken::EndOfStatement))
2882         break;
2883 
2884       // FIXME: Improve diagnostic.
2885       if (getLexer().isNot(AsmToken::Comma)) {
2886         Error(L, "unexpected token in directive");
2887         return false;
2888       }
2889       Parser.Lex();
2890     }
2891   }
2892 
2893   Parser.Lex();
2894   return false;
2895 }
2896 
2897 /// ParseDirectiveCode
2898 ///  ::= .code16 | .code32 | .code64
ParseDirectiveCode(StringRef IDVal,SMLoc L)2899 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2900   MCAsmParser &Parser = getParser();
2901   if (IDVal == ".code16") {
2902     Parser.Lex();
2903     if (!is16BitMode()) {
2904       SwitchMode(X86::Mode16Bit);
2905       getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2906     }
2907   } else if (IDVal == ".code32") {
2908     Parser.Lex();
2909     if (!is32BitMode()) {
2910       SwitchMode(X86::Mode32Bit);
2911       getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2912     }
2913   } else if (IDVal == ".code64") {
2914     Parser.Lex();
2915     if (!is64BitMode()) {
2916       SwitchMode(X86::Mode64Bit);
2917       getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2918     }
2919   } else {
2920     Error(L, "unknown directive " + IDVal);
2921     return false;
2922   }
2923 
2924   return false;
2925 }
2926 
2927 // Force static initialization.
LLVMInitializeX86AsmParser()2928 extern "C" void LLVMInitializeX86AsmParser() {
2929   RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2930   RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2931 }
2932 
2933 #define GET_REGISTER_MATCHER
2934 #define GET_MATCHER_IMPLEMENTATION
2935 #define GET_SUBTARGET_FEATURE_NAME
2936 #include "X86GenAsmMatcher.inc"
2937