1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "InstPrinter/X86IntelInstPrinter.h"
11 #include "MCTargetDesc/X86BaseInfo.h"
12 #include "MCTargetDesc/X86MCExpr.h"
13 #include "MCTargetDesc/X86TargetStreamer.h"
14 #include "X86AsmInstrumentation.h"
15 #include "X86AsmParserCommon.h"
16 #include "X86Operand.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCParser/MCAsmLexer.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSection.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Support/SourceMgr.h"
36 #include "llvm/Support/TargetRegistry.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include <algorithm>
39 #include <memory>
40
41 using namespace llvm;
42
checkScale(unsigned Scale,StringRef & ErrMsg)43 static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
44 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
45 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
46 return true;
47 }
48 return false;
49 }
50
51 namespace {
52
53 static const char OpPrecedence[] = {
54 0, // IC_OR
55 1, // IC_XOR
56 2, // IC_AND
57 3, // IC_LSHIFT
58 3, // IC_RSHIFT
59 4, // IC_PLUS
60 4, // IC_MINUS
61 5, // IC_MULTIPLY
62 5, // IC_DIVIDE
63 5, // IC_MOD
64 6, // IC_NOT
65 7, // IC_NEG
66 8, // IC_RPAREN
67 9, // IC_LPAREN
68 0, // IC_IMM
69 0 // IC_REGISTER
70 };
71
72 class X86AsmParser : public MCTargetAsmParser {
73 ParseInstructionInfo *InstInfo;
74 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
75 bool Code16GCC;
76
77 private:
consumeToken()78 SMLoc consumeToken() {
79 MCAsmParser &Parser = getParser();
80 SMLoc Result = Parser.getTok().getLoc();
81 Parser.Lex();
82 return Result;
83 }
84
getTargetStreamer()85 X86TargetStreamer &getTargetStreamer() {
86 assert(getParser().getStreamer().getTargetStreamer() &&
87 "do not have a target streamer");
88 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
89 return static_cast<X86TargetStreamer &>(TS);
90 }
91
MatchInstruction(const OperandVector & Operands,MCInst & Inst,uint64_t & ErrorInfo,bool matchingInlineAsm,unsigned VariantID=0)92 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
93 uint64_t &ErrorInfo, bool matchingInlineAsm,
94 unsigned VariantID = 0) {
95 // In Code16GCC mode, match as 32-bit.
96 if (Code16GCC)
97 SwitchMode(X86::Mode32Bit);
98 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
99 matchingInlineAsm, VariantID);
100 if (Code16GCC)
101 SwitchMode(X86::Mode16Bit);
102 return rv;
103 }
104
105 enum InfixCalculatorTok {
106 IC_OR = 0,
107 IC_XOR,
108 IC_AND,
109 IC_LSHIFT,
110 IC_RSHIFT,
111 IC_PLUS,
112 IC_MINUS,
113 IC_MULTIPLY,
114 IC_DIVIDE,
115 IC_MOD,
116 IC_NOT,
117 IC_NEG,
118 IC_RPAREN,
119 IC_LPAREN,
120 IC_IMM,
121 IC_REGISTER
122 };
123
124 enum IntelOperatorKind {
125 IOK_INVALID = 0,
126 IOK_LENGTH,
127 IOK_SIZE,
128 IOK_TYPE,
129 IOK_OFFSET
130 };
131
132 class InfixCalculator {
133 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
134 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
135 SmallVector<ICToken, 4> PostfixStack;
136
isUnaryOperator(const InfixCalculatorTok Op)137 bool isUnaryOperator(const InfixCalculatorTok Op) {
138 return Op == IC_NEG || Op == IC_NOT;
139 }
140
141 public:
popOperand()142 int64_t popOperand() {
143 assert (!PostfixStack.empty() && "Poped an empty stack!");
144 ICToken Op = PostfixStack.pop_back_val();
145 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
146 return -1; // The invalid Scale value will be caught later by checkScale
147 return Op.second;
148 }
pushOperand(InfixCalculatorTok Op,int64_t Val=0)149 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
150 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
151 "Unexpected operand!");
152 PostfixStack.push_back(std::make_pair(Op, Val));
153 }
154
popOperator()155 void popOperator() { InfixOperatorStack.pop_back(); }
pushOperator(InfixCalculatorTok Op)156 void pushOperator(InfixCalculatorTok Op) {
157 // Push the new operator if the stack is empty.
158 if (InfixOperatorStack.empty()) {
159 InfixOperatorStack.push_back(Op);
160 return;
161 }
162
163 // Push the new operator if it has a higher precedence than the operator
164 // on the top of the stack or the operator on the top of the stack is a
165 // left parentheses.
166 unsigned Idx = InfixOperatorStack.size() - 1;
167 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
168 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
169 InfixOperatorStack.push_back(Op);
170 return;
171 }
172
173 // The operator on the top of the stack has higher precedence than the
174 // new operator.
175 unsigned ParenCount = 0;
176 while (1) {
177 // Nothing to process.
178 if (InfixOperatorStack.empty())
179 break;
180
181 Idx = InfixOperatorStack.size() - 1;
182 StackOp = InfixOperatorStack[Idx];
183 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
184 break;
185
186 // If we have an even parentheses count and we see a left parentheses,
187 // then stop processing.
188 if (!ParenCount && StackOp == IC_LPAREN)
189 break;
190
191 if (StackOp == IC_RPAREN) {
192 ++ParenCount;
193 InfixOperatorStack.pop_back();
194 } else if (StackOp == IC_LPAREN) {
195 --ParenCount;
196 InfixOperatorStack.pop_back();
197 } else {
198 InfixOperatorStack.pop_back();
199 PostfixStack.push_back(std::make_pair(StackOp, 0));
200 }
201 }
202 // Push the new operator.
203 InfixOperatorStack.push_back(Op);
204 }
205
execute()206 int64_t execute() {
207 // Push any remaining operators onto the postfix stack.
208 while (!InfixOperatorStack.empty()) {
209 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
210 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
211 PostfixStack.push_back(std::make_pair(StackOp, 0));
212 }
213
214 if (PostfixStack.empty())
215 return 0;
216
217 SmallVector<ICToken, 16> OperandStack;
218 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
219 ICToken Op = PostfixStack[i];
220 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
221 OperandStack.push_back(Op);
222 } else if (isUnaryOperator(Op.first)) {
223 assert (OperandStack.size() > 0 && "Too few operands.");
224 ICToken Operand = OperandStack.pop_back_val();
225 assert (Operand.first == IC_IMM &&
226 "Unary operation with a register!");
227 switch (Op.first) {
228 default:
229 report_fatal_error("Unexpected operator!");
230 break;
231 case IC_NEG:
232 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
233 break;
234 case IC_NOT:
235 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
236 break;
237 }
238 } else {
239 assert (OperandStack.size() > 1 && "Too few operands.");
240 int64_t Val;
241 ICToken Op2 = OperandStack.pop_back_val();
242 ICToken Op1 = OperandStack.pop_back_val();
243 switch (Op.first) {
244 default:
245 report_fatal_error("Unexpected operator!");
246 break;
247 case IC_PLUS:
248 Val = Op1.second + Op2.second;
249 OperandStack.push_back(std::make_pair(IC_IMM, Val));
250 break;
251 case IC_MINUS:
252 Val = Op1.second - Op2.second;
253 OperandStack.push_back(std::make_pair(IC_IMM, Val));
254 break;
255 case IC_MULTIPLY:
256 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
257 "Multiply operation with an immediate and a register!");
258 Val = Op1.second * Op2.second;
259 OperandStack.push_back(std::make_pair(IC_IMM, Val));
260 break;
261 case IC_DIVIDE:
262 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
263 "Divide operation with an immediate and a register!");
264 assert (Op2.second != 0 && "Division by zero!");
265 Val = Op1.second / Op2.second;
266 OperandStack.push_back(std::make_pair(IC_IMM, Val));
267 break;
268 case IC_MOD:
269 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
270 "Modulo operation with an immediate and a register!");
271 Val = Op1.second % Op2.second;
272 OperandStack.push_back(std::make_pair(IC_IMM, Val));
273 break;
274 case IC_OR:
275 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
276 "Or operation with an immediate and a register!");
277 Val = Op1.second | Op2.second;
278 OperandStack.push_back(std::make_pair(IC_IMM, Val));
279 break;
280 case IC_XOR:
281 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
282 "Xor operation with an immediate and a register!");
283 Val = Op1.second ^ Op2.second;
284 OperandStack.push_back(std::make_pair(IC_IMM, Val));
285 break;
286 case IC_AND:
287 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
288 "And operation with an immediate and a register!");
289 Val = Op1.second & Op2.second;
290 OperandStack.push_back(std::make_pair(IC_IMM, Val));
291 break;
292 case IC_LSHIFT:
293 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
294 "Left shift operation with an immediate and a register!");
295 Val = Op1.second << Op2.second;
296 OperandStack.push_back(std::make_pair(IC_IMM, Val));
297 break;
298 case IC_RSHIFT:
299 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
300 "Right shift operation with an immediate and a register!");
301 Val = Op1.second >> Op2.second;
302 OperandStack.push_back(std::make_pair(IC_IMM, Val));
303 break;
304 }
305 }
306 }
307 assert (OperandStack.size() == 1 && "Expected a single result.");
308 return OperandStack.pop_back_val().second;
309 }
310 };
311
312 enum IntelExprState {
313 IES_INIT,
314 IES_OR,
315 IES_XOR,
316 IES_AND,
317 IES_LSHIFT,
318 IES_RSHIFT,
319 IES_PLUS,
320 IES_MINUS,
321 IES_NOT,
322 IES_MULTIPLY,
323 IES_DIVIDE,
324 IES_MOD,
325 IES_LBRAC,
326 IES_RBRAC,
327 IES_LPAREN,
328 IES_RPAREN,
329 IES_REGISTER,
330 IES_INTEGER,
331 IES_IDENTIFIER,
332 IES_ERROR
333 };
334
335 class IntelExprStateMachine {
336 IntelExprState State, PrevState;
337 unsigned BaseReg, IndexReg, TmpReg, Scale;
338 int64_t Imm;
339 const MCExpr *Sym;
340 StringRef SymName;
341 InfixCalculator IC;
342 InlineAsmIdentifierInfo Info;
343 short BracCount;
344 bool MemExpr;
345
346 public:
IntelExprStateMachine()347 IntelExprStateMachine()
348 : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0),
349 TmpReg(0), Scale(0), Imm(0), Sym(nullptr), BracCount(0),
350 MemExpr(false) {}
351
addImm(int64_t imm)352 void addImm(int64_t imm) { Imm += imm; }
getBracCount()353 short getBracCount() { return BracCount; }
isMemExpr()354 bool isMemExpr() { return MemExpr; }
getBaseReg()355 unsigned getBaseReg() { return BaseReg; }
getIndexReg()356 unsigned getIndexReg() { return IndexReg; }
getScale()357 unsigned getScale() { return Scale; }
getSym()358 const MCExpr *getSym() { return Sym; }
getSymName()359 StringRef getSymName() { return SymName; }
getImm()360 int64_t getImm() { return Imm + IC.execute(); }
isValidEndState()361 bool isValidEndState() {
362 return State == IES_RBRAC || State == IES_INTEGER;
363 }
hadError()364 bool hadError() { return State == IES_ERROR; }
getIdentifierInfo()365 InlineAsmIdentifierInfo &getIdentifierInfo() { return Info; }
366
onOr()367 void onOr() {
368 IntelExprState CurrState = State;
369 switch (State) {
370 default:
371 State = IES_ERROR;
372 break;
373 case IES_INTEGER:
374 case IES_RPAREN:
375 case IES_REGISTER:
376 State = IES_OR;
377 IC.pushOperator(IC_OR);
378 break;
379 }
380 PrevState = CurrState;
381 }
onXor()382 void onXor() {
383 IntelExprState CurrState = State;
384 switch (State) {
385 default:
386 State = IES_ERROR;
387 break;
388 case IES_INTEGER:
389 case IES_RPAREN:
390 case IES_REGISTER:
391 State = IES_XOR;
392 IC.pushOperator(IC_XOR);
393 break;
394 }
395 PrevState = CurrState;
396 }
onAnd()397 void onAnd() {
398 IntelExprState CurrState = State;
399 switch (State) {
400 default:
401 State = IES_ERROR;
402 break;
403 case IES_INTEGER:
404 case IES_RPAREN:
405 case IES_REGISTER:
406 State = IES_AND;
407 IC.pushOperator(IC_AND);
408 break;
409 }
410 PrevState = CurrState;
411 }
onLShift()412 void onLShift() {
413 IntelExprState CurrState = State;
414 switch (State) {
415 default:
416 State = IES_ERROR;
417 break;
418 case IES_INTEGER:
419 case IES_RPAREN:
420 case IES_REGISTER:
421 State = IES_LSHIFT;
422 IC.pushOperator(IC_LSHIFT);
423 break;
424 }
425 PrevState = CurrState;
426 }
onRShift()427 void onRShift() {
428 IntelExprState CurrState = State;
429 switch (State) {
430 default:
431 State = IES_ERROR;
432 break;
433 case IES_INTEGER:
434 case IES_RPAREN:
435 case IES_REGISTER:
436 State = IES_RSHIFT;
437 IC.pushOperator(IC_RSHIFT);
438 break;
439 }
440 PrevState = CurrState;
441 }
onPlus(StringRef & ErrMsg)442 bool onPlus(StringRef &ErrMsg) {
443 IntelExprState CurrState = State;
444 switch (State) {
445 default:
446 State = IES_ERROR;
447 break;
448 case IES_INTEGER:
449 case IES_RPAREN:
450 case IES_REGISTER:
451 State = IES_PLUS;
452 IC.pushOperator(IC_PLUS);
453 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
454 // If we already have a BaseReg, then assume this is the IndexReg with
455 // no explicit scale.
456 if (!BaseReg) {
457 BaseReg = TmpReg;
458 } else {
459 if (IndexReg) {
460 ErrMsg = "BaseReg/IndexReg already set!";
461 return true;
462 }
463 IndexReg = TmpReg;
464 Scale = 0;
465 }
466 }
467 break;
468 }
469 PrevState = CurrState;
470 return false;
471 }
onMinus(StringRef & ErrMsg)472 bool onMinus(StringRef &ErrMsg) {
473 IntelExprState CurrState = State;
474 switch (State) {
475 default:
476 State = IES_ERROR;
477 break;
478 case IES_OR:
479 case IES_XOR:
480 case IES_AND:
481 case IES_LSHIFT:
482 case IES_RSHIFT:
483 case IES_PLUS:
484 case IES_NOT:
485 case IES_MULTIPLY:
486 case IES_DIVIDE:
487 case IES_MOD:
488 case IES_LPAREN:
489 case IES_RPAREN:
490 case IES_LBRAC:
491 case IES_RBRAC:
492 case IES_INTEGER:
493 case IES_REGISTER:
494 case IES_INIT:
495 State = IES_MINUS;
496 // push minus operator if it is not a negate operator
497 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
498 CurrState == IES_INTEGER || CurrState == IES_RBRAC)
499 IC.pushOperator(IC_MINUS);
500 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
501 // We have negate operator for Scale: it's illegal
502 ErrMsg = "Scale can't be negative";
503 return true;
504 } else
505 IC.pushOperator(IC_NEG);
506 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
507 // If we already have a BaseReg, then assume this is the IndexReg with
508 // no explicit scale.
509 if (!BaseReg) {
510 BaseReg = TmpReg;
511 } else {
512 if (IndexReg) {
513 ErrMsg = "BaseReg/IndexReg already set!";
514 return true;
515 }
516 IndexReg = TmpReg;
517 Scale = 0;
518 }
519 }
520 break;
521 }
522 PrevState = CurrState;
523 return false;
524 }
onNot()525 void onNot() {
526 IntelExprState CurrState = State;
527 switch (State) {
528 default:
529 State = IES_ERROR;
530 break;
531 case IES_OR:
532 case IES_XOR:
533 case IES_AND:
534 case IES_LSHIFT:
535 case IES_RSHIFT:
536 case IES_PLUS:
537 case IES_MINUS:
538 case IES_NOT:
539 case IES_MULTIPLY:
540 case IES_DIVIDE:
541 case IES_MOD:
542 case IES_LPAREN:
543 case IES_LBRAC:
544 case IES_INIT:
545 State = IES_NOT;
546 IC.pushOperator(IC_NOT);
547 break;
548 }
549 PrevState = CurrState;
550 }
551
onRegister(unsigned Reg,StringRef & ErrMsg)552 bool onRegister(unsigned Reg, StringRef &ErrMsg) {
553 IntelExprState CurrState = State;
554 switch (State) {
555 default:
556 State = IES_ERROR;
557 break;
558 case IES_PLUS:
559 case IES_LPAREN:
560 case IES_LBRAC:
561 State = IES_REGISTER;
562 TmpReg = Reg;
563 IC.pushOperand(IC_REGISTER);
564 break;
565 case IES_MULTIPLY:
566 // Index Register - Scale * Register
567 if (PrevState == IES_INTEGER) {
568 if (IndexReg) {
569 ErrMsg = "BaseReg/IndexReg already set!";
570 return true;
571 }
572 State = IES_REGISTER;
573 IndexReg = Reg;
574 // Get the scale and replace the 'Scale * Register' with '0'.
575 Scale = IC.popOperand();
576 if (checkScale(Scale, ErrMsg))
577 return true;
578 IC.pushOperand(IC_IMM);
579 IC.popOperator();
580 } else {
581 State = IES_ERROR;
582 }
583 break;
584 }
585 PrevState = CurrState;
586 return false;
587 }
onIdentifierExpr(const MCExpr * SymRef,StringRef SymRefName,const InlineAsmIdentifierInfo & IDInfo,bool ParsingInlineAsm,StringRef & ErrMsg)588 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
589 const InlineAsmIdentifierInfo &IDInfo,
590 bool ParsingInlineAsm, StringRef &ErrMsg) {
591 // InlineAsm: Treat an enum value as an integer
592 if (ParsingInlineAsm)
593 if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
594 return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
595 // Treat a symbolic constant like an integer
596 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
597 return onInteger(CE->getValue(), ErrMsg);
598 PrevState = State;
599 bool HasSymbol = Sym != nullptr;
600 switch (State) {
601 default:
602 State = IES_ERROR;
603 break;
604 case IES_PLUS:
605 case IES_MINUS:
606 case IES_NOT:
607 case IES_INIT:
608 case IES_LBRAC:
609 MemExpr = true;
610 State = IES_INTEGER;
611 Sym = SymRef;
612 SymName = SymRefName;
613 IC.pushOperand(IC_IMM);
614 if (ParsingInlineAsm)
615 Info = IDInfo;
616 break;
617 }
618 if (HasSymbol)
619 ErrMsg = "cannot use more than one symbol in memory operand";
620 return HasSymbol;
621 }
onInteger(int64_t TmpInt,StringRef & ErrMsg)622 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
623 IntelExprState CurrState = State;
624 switch (State) {
625 default:
626 State = IES_ERROR;
627 break;
628 case IES_PLUS:
629 case IES_MINUS:
630 case IES_NOT:
631 case IES_OR:
632 case IES_XOR:
633 case IES_AND:
634 case IES_LSHIFT:
635 case IES_RSHIFT:
636 case IES_DIVIDE:
637 case IES_MOD:
638 case IES_MULTIPLY:
639 case IES_LPAREN:
640 case IES_INIT:
641 case IES_LBRAC:
642 State = IES_INTEGER;
643 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
644 // Index Register - Register * Scale
645 if (IndexReg) {
646 ErrMsg = "BaseReg/IndexReg already set!";
647 return true;
648 }
649 IndexReg = TmpReg;
650 Scale = TmpInt;
651 if (checkScale(Scale, ErrMsg))
652 return true;
653 // Get the scale and replace the 'Register * Scale' with '0'.
654 IC.popOperator();
655 } else {
656 IC.pushOperand(IC_IMM, TmpInt);
657 }
658 break;
659 }
660 PrevState = CurrState;
661 return false;
662 }
onStar()663 void onStar() {
664 PrevState = State;
665 switch (State) {
666 default:
667 State = IES_ERROR;
668 break;
669 case IES_INTEGER:
670 case IES_REGISTER:
671 case IES_RPAREN:
672 State = IES_MULTIPLY;
673 IC.pushOperator(IC_MULTIPLY);
674 break;
675 }
676 }
onDivide()677 void onDivide() {
678 PrevState = State;
679 switch (State) {
680 default:
681 State = IES_ERROR;
682 break;
683 case IES_INTEGER:
684 case IES_RPAREN:
685 State = IES_DIVIDE;
686 IC.pushOperator(IC_DIVIDE);
687 break;
688 }
689 }
onMod()690 void onMod() {
691 PrevState = State;
692 switch (State) {
693 default:
694 State = IES_ERROR;
695 break;
696 case IES_INTEGER:
697 case IES_RPAREN:
698 State = IES_MOD;
699 IC.pushOperator(IC_MOD);
700 break;
701 }
702 }
onLBrac()703 bool onLBrac() {
704 if (BracCount)
705 return true;
706 PrevState = State;
707 switch (State) {
708 default:
709 State = IES_ERROR;
710 break;
711 case IES_RBRAC:
712 case IES_INTEGER:
713 case IES_RPAREN:
714 State = IES_PLUS;
715 IC.pushOperator(IC_PLUS);
716 break;
717 case IES_INIT:
718 assert(!BracCount && "BracCount should be zero on parsing's start");
719 State = IES_LBRAC;
720 break;
721 }
722 MemExpr = true;
723 BracCount++;
724 return false;
725 }
onRBrac()726 bool onRBrac() {
727 IntelExprState CurrState = State;
728 switch (State) {
729 default:
730 State = IES_ERROR;
731 break;
732 case IES_INTEGER:
733 case IES_REGISTER:
734 case IES_RPAREN:
735 if (BracCount-- != 1)
736 return true;
737 State = IES_RBRAC;
738 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
739 // If we already have a BaseReg, then assume this is the IndexReg with
740 // no explicit scale.
741 if (!BaseReg) {
742 BaseReg = TmpReg;
743 } else {
744 assert (!IndexReg && "BaseReg/IndexReg already set!");
745 IndexReg = TmpReg;
746 Scale = 0;
747 }
748 }
749 break;
750 }
751 PrevState = CurrState;
752 return false;
753 }
onLParen()754 void onLParen() {
755 IntelExprState CurrState = State;
756 switch (State) {
757 default:
758 State = IES_ERROR;
759 break;
760 case IES_PLUS:
761 case IES_MINUS:
762 case IES_NOT:
763 case IES_OR:
764 case IES_XOR:
765 case IES_AND:
766 case IES_LSHIFT:
767 case IES_RSHIFT:
768 case IES_MULTIPLY:
769 case IES_DIVIDE:
770 case IES_MOD:
771 case IES_LPAREN:
772 case IES_INIT:
773 case IES_LBRAC:
774 State = IES_LPAREN;
775 IC.pushOperator(IC_LPAREN);
776 break;
777 }
778 PrevState = CurrState;
779 }
onRParen()780 void onRParen() {
781 PrevState = State;
782 switch (State) {
783 default:
784 State = IES_ERROR;
785 break;
786 case IES_INTEGER:
787 case IES_REGISTER:
788 case IES_RPAREN:
789 State = IES_RPAREN;
790 IC.pushOperator(IC_RPAREN);
791 break;
792 }
793 }
794 };
795
Error(SMLoc L,const Twine & Msg,SMRange Range=None,bool MatchingInlineAsm=false)796 bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
797 bool MatchingInlineAsm = false) {
798 MCAsmParser &Parser = getParser();
799 if (MatchingInlineAsm) {
800 if (!getLexer().isAtStartOfStatement())
801 Parser.eatToEndOfStatement();
802 return false;
803 }
804 return Parser.Error(L, Msg, Range);
805 }
806
ErrorOperand(SMLoc Loc,StringRef Msg)807 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
808 Error(Loc, Msg);
809 return nullptr;
810 }
811
812 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
813 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
814 bool IsSIReg(unsigned Reg);
815 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
816 void
817 AddDefaultSrcDestOperands(OperandVector &Operands,
818 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
819 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
820 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
821 OperandVector &FinalOperands);
822 std::unique_ptr<X86Operand> ParseOperand();
823 std::unique_ptr<X86Operand> ParseATTOperand();
824 std::unique_ptr<X86Operand> ParseIntelOperand();
825 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
826 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
827 unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
828 unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
829 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start);
830 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM);
831 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
832 SMLoc End);
833 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
834 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
835 InlineAsmIdentifierInfo &Info,
836 bool IsUnevaluatedOperand, SMLoc &End);
837
838 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc MemStart);
839
840 bool ParseIntelMemoryOperandSize(unsigned &Size);
841 std::unique_ptr<X86Operand>
842 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
843 unsigned IndexReg, unsigned Scale, SMLoc Start,
844 SMLoc End, unsigned Size, StringRef Identifier,
845 const InlineAsmIdentifierInfo &Info);
846
847 bool parseDirectiveEven(SMLoc L);
848 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
849
850 /// CodeView FPO data directives.
851 bool parseDirectiveFPOProc(SMLoc L);
852 bool parseDirectiveFPOSetFrame(SMLoc L);
853 bool parseDirectiveFPOPushReg(SMLoc L);
854 bool parseDirectiveFPOStackAlloc(SMLoc L);
855 bool parseDirectiveFPOEndPrologue(SMLoc L);
856 bool parseDirectiveFPOEndProc(SMLoc L);
857 bool parseDirectiveFPOData(SMLoc L);
858
859 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
860 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
861
862 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
863 /// instrumentation around Inst.
864 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
865
866 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
867 OperandVector &Operands, MCStreamer &Out,
868 uint64_t &ErrorInfo,
869 bool MatchingInlineAsm) override;
870
871 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
872 MCStreamer &Out, bool MatchingInlineAsm);
873
874 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
875 bool MatchingInlineAsm);
876
877 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
878 OperandVector &Operands, MCStreamer &Out,
879 uint64_t &ErrorInfo,
880 bool MatchingInlineAsm);
881
882 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
883 OperandVector &Operands, MCStreamer &Out,
884 uint64_t &ErrorInfo,
885 bool MatchingInlineAsm);
886
887 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
888
889 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
890 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
891 /// return false if no parsing errors occurred, true otherwise.
892 bool HandleAVX512Operand(OperandVector &Operands,
893 const MCParsedAsmOperand &Op);
894
895 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
896
is64BitMode() const897 bool is64BitMode() const {
898 // FIXME: Can tablegen auto-generate this?
899 return getSTI().getFeatureBits()[X86::Mode64Bit];
900 }
is32BitMode() const901 bool is32BitMode() const {
902 // FIXME: Can tablegen auto-generate this?
903 return getSTI().getFeatureBits()[X86::Mode32Bit];
904 }
is16BitMode() const905 bool is16BitMode() const {
906 // FIXME: Can tablegen auto-generate this?
907 return getSTI().getFeatureBits()[X86::Mode16Bit];
908 }
SwitchMode(unsigned mode)909 void SwitchMode(unsigned mode) {
910 MCSubtargetInfo &STI = copySTI();
911 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
912 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
913 uint64_t FB = ComputeAvailableFeatures(
914 STI.ToggleFeature(OldMode.flip(mode)));
915 setAvailableFeatures(FB);
916
917 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
918 }
919
getPointerWidth()920 unsigned getPointerWidth() {
921 if (is16BitMode()) return 16;
922 if (is32BitMode()) return 32;
923 if (is64BitMode()) return 64;
924 llvm_unreachable("invalid mode");
925 }
926
isParsingIntelSyntax()927 bool isParsingIntelSyntax() {
928 return getParser().getAssemblerDialect();
929 }
930
931 /// @name Auto-generated Matcher Functions
932 /// {
933
934 #define GET_ASSEMBLER_HEADER
935 #include "X86GenAsmMatcher.inc"
936
937 /// }
938
939 public:
940
X86AsmParser(const MCSubtargetInfo & sti,MCAsmParser & Parser,const MCInstrInfo & mii,const MCTargetOptions & Options)941 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
942 const MCInstrInfo &mii, const MCTargetOptions &Options)
943 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr),
944 Code16GCC(false) {
945
946 Parser.addAliasForDirective(".word", ".2byte");
947
948 // Initialize the set of available features.
949 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
950 Instrumentation.reset(
951 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
952 }
953
954 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
955
956 void SetFrameRegister(unsigned RegNo) override;
957
958 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
959
960 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
961 SMLoc NameLoc, OperandVector &Operands) override;
962
963 bool ParseDirective(AsmToken DirectiveID) override;
964 };
965 } // end anonymous namespace
966
967 /// @name Auto-generated Match Functions
968 /// {
969
970 static unsigned MatchRegisterName(StringRef Name);
971
972 /// }
973
CheckBaseRegAndIndexRegAndScale(unsigned BaseReg,unsigned IndexReg,unsigned Scale,bool Is64BitMode,StringRef & ErrMsg)974 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
975 unsigned Scale, bool Is64BitMode,
976 StringRef &ErrMsg) {
977 // If we have both a base register and an index register make sure they are
978 // both 64-bit or 32-bit registers.
979 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
980
981 if (BaseReg != 0 &&
982 !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
983 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
984 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
985 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
986 ErrMsg = "invalid base+index expression";
987 return true;
988 }
989
990 if (IndexReg != 0 &&
991 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
992 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
993 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
994 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
995 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
996 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
997 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
998 ErrMsg = "invalid base+index expression";
999 return true;
1000 }
1001
1002 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) ||
1003 IndexReg == X86::EIP || IndexReg == X86::RIP ||
1004 IndexReg == X86::ESP || IndexReg == X86::RSP) {
1005 ErrMsg = "invalid base+index expression";
1006 return true;
1007 }
1008
1009 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1010 // and then only in non-64-bit modes.
1011 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1012 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1013 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1014 BaseReg != X86::DX) {
1015 ErrMsg = "invalid 16-bit base register";
1016 return true;
1017 }
1018
1019 if (BaseReg == 0 &&
1020 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1021 ErrMsg = "16-bit memory operand may not include only index register";
1022 return true;
1023 }
1024
1025 if (BaseReg != 0 && IndexReg != 0) {
1026 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1027 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1028 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1029 IndexReg == X86::EIZ)) {
1030 ErrMsg = "base register is 64-bit, but index register is not";
1031 return true;
1032 }
1033 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1034 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1035 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1036 IndexReg == X86::RIZ)) {
1037 ErrMsg = "base register is 32-bit, but index register is not";
1038 return true;
1039 }
1040 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1041 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1042 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1043 ErrMsg = "base register is 16-bit, but index register is not";
1044 return true;
1045 }
1046 if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1047 (IndexReg != X86::SI && IndexReg != X86::DI)) {
1048 ErrMsg = "invalid 16-bit base/index register combination";
1049 return true;
1050 }
1051 }
1052 }
1053
1054 // RIP/EIP-relative addressing is only supported in 64-bit mode.
1055 if (!Is64BitMode && BaseReg != 0 &&
1056 (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1057 ErrMsg = "IP-relative addressing requires 64-bit mode";
1058 return true;
1059 }
1060
1061 return checkScale(Scale, ErrMsg);
1062 }
1063
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)1064 bool X86AsmParser::ParseRegister(unsigned &RegNo,
1065 SMLoc &StartLoc, SMLoc &EndLoc) {
1066 MCAsmParser &Parser = getParser();
1067 RegNo = 0;
1068 const AsmToken &PercentTok = Parser.getTok();
1069 StartLoc = PercentTok.getLoc();
1070
1071 // If we encounter a %, ignore it. This code handles registers with and
1072 // without the prefix, unprefixed registers can occur in cfi directives.
1073 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
1074 Parser.Lex(); // Eat percent token.
1075
1076 const AsmToken &Tok = Parser.getTok();
1077 EndLoc = Tok.getEndLoc();
1078
1079 if (Tok.isNot(AsmToken::Identifier)) {
1080 if (isParsingIntelSyntax()) return true;
1081 return Error(StartLoc, "invalid register name",
1082 SMRange(StartLoc, EndLoc));
1083 }
1084
1085 RegNo = MatchRegisterName(Tok.getString());
1086
1087 // If the match failed, try the register name as lowercase.
1088 if (RegNo == 0)
1089 RegNo = MatchRegisterName(Tok.getString().lower());
1090
1091 // The "flags" register cannot be referenced directly.
1092 // Treat it as an identifier instead.
1093 if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
1094 RegNo = 0;
1095
1096 if (!is64BitMode()) {
1097 // FIXME: This should be done using Requires<Not64BitMode> and
1098 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1099 // checked.
1100 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
1101 // REX prefix.
1102 if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1103 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1104 X86II::isX86_64NonExtLowByteReg(RegNo) ||
1105 X86II::isX86_64ExtendedReg(RegNo))
1106 return Error(StartLoc, "register %"
1107 + Tok.getString() + " is only available in 64-bit mode",
1108 SMRange(StartLoc, EndLoc));
1109 }
1110
1111 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1112 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
1113 RegNo = X86::ST0;
1114 Parser.Lex(); // Eat 'st'
1115
1116 // Check to see if we have '(4)' after %st.
1117 if (getLexer().isNot(AsmToken::LParen))
1118 return false;
1119 // Lex the paren.
1120 getParser().Lex();
1121
1122 const AsmToken &IntTok = Parser.getTok();
1123 if (IntTok.isNot(AsmToken::Integer))
1124 return Error(IntTok.getLoc(), "expected stack index");
1125 switch (IntTok.getIntVal()) {
1126 case 0: RegNo = X86::ST0; break;
1127 case 1: RegNo = X86::ST1; break;
1128 case 2: RegNo = X86::ST2; break;
1129 case 3: RegNo = X86::ST3; break;
1130 case 4: RegNo = X86::ST4; break;
1131 case 5: RegNo = X86::ST5; break;
1132 case 6: RegNo = X86::ST6; break;
1133 case 7: RegNo = X86::ST7; break;
1134 default: return Error(IntTok.getLoc(), "invalid stack index");
1135 }
1136
1137 if (getParser().Lex().isNot(AsmToken::RParen))
1138 return Error(Parser.getTok().getLoc(), "expected ')'");
1139
1140 EndLoc = Parser.getTok().getEndLoc();
1141 Parser.Lex(); // Eat ')'
1142 return false;
1143 }
1144
1145 EndLoc = Parser.getTok().getEndLoc();
1146
1147 // If this is "db[0-15]", match it as an alias
1148 // for dr[0-15].
1149 if (RegNo == 0 && Tok.getString().startswith("db")) {
1150 if (Tok.getString().size() == 3) {
1151 switch (Tok.getString()[2]) {
1152 case '0': RegNo = X86::DR0; break;
1153 case '1': RegNo = X86::DR1; break;
1154 case '2': RegNo = X86::DR2; break;
1155 case '3': RegNo = X86::DR3; break;
1156 case '4': RegNo = X86::DR4; break;
1157 case '5': RegNo = X86::DR5; break;
1158 case '6': RegNo = X86::DR6; break;
1159 case '7': RegNo = X86::DR7; break;
1160 case '8': RegNo = X86::DR8; break;
1161 case '9': RegNo = X86::DR9; break;
1162 }
1163 } else if (Tok.getString().size() == 4 && Tok.getString()[2] == '1') {
1164 switch (Tok.getString()[3]) {
1165 case '0': RegNo = X86::DR10; break;
1166 case '1': RegNo = X86::DR11; break;
1167 case '2': RegNo = X86::DR12; break;
1168 case '3': RegNo = X86::DR13; break;
1169 case '4': RegNo = X86::DR14; break;
1170 case '5': RegNo = X86::DR15; break;
1171 }
1172 }
1173
1174 if (RegNo != 0) {
1175 EndLoc = Parser.getTok().getEndLoc();
1176 Parser.Lex(); // Eat it.
1177 return false;
1178 }
1179 }
1180
1181 if (RegNo == 0) {
1182 if (isParsingIntelSyntax()) return true;
1183 return Error(StartLoc, "invalid register name",
1184 SMRange(StartLoc, EndLoc));
1185 }
1186
1187 Parser.Lex(); // Eat identifier token.
1188 return false;
1189 }
1190
SetFrameRegister(unsigned RegNo)1191 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
1192 Instrumentation->SetInitialFrameRegister(RegNo);
1193 }
1194
DefaultMemSIOperand(SMLoc Loc)1195 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1196 bool Parse32 = is32BitMode() || Code16GCC;
1197 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1198 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1199 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1200 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1201 Loc, Loc, 0);
1202 }
1203
DefaultMemDIOperand(SMLoc Loc)1204 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1205 bool Parse32 = is32BitMode() || Code16GCC;
1206 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1207 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1208 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1209 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1210 Loc, Loc, 0);
1211 }
1212
IsSIReg(unsigned Reg)1213 bool X86AsmParser::IsSIReg(unsigned Reg) {
1214 switch (Reg) {
1215 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1216 case X86::RSI:
1217 case X86::ESI:
1218 case X86::SI:
1219 return true;
1220 case X86::RDI:
1221 case X86::EDI:
1222 case X86::DI:
1223 return false;
1224 }
1225 }
1226
GetSIDIForRegClass(unsigned RegClassID,unsigned Reg,bool IsSIReg)1227 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1228 bool IsSIReg) {
1229 switch (RegClassID) {
1230 default: llvm_unreachable("Unexpected register class");
1231 case X86::GR64RegClassID:
1232 return IsSIReg ? X86::RSI : X86::RDI;
1233 case X86::GR32RegClassID:
1234 return IsSIReg ? X86::ESI : X86::EDI;
1235 case X86::GR16RegClassID:
1236 return IsSIReg ? X86::SI : X86::DI;
1237 }
1238 }
1239
AddDefaultSrcDestOperands(OperandVector & Operands,std::unique_ptr<llvm::MCParsedAsmOperand> && Src,std::unique_ptr<llvm::MCParsedAsmOperand> && Dst)1240 void X86AsmParser::AddDefaultSrcDestOperands(
1241 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1242 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1243 if (isParsingIntelSyntax()) {
1244 Operands.push_back(std::move(Dst));
1245 Operands.push_back(std::move(Src));
1246 }
1247 else {
1248 Operands.push_back(std::move(Src));
1249 Operands.push_back(std::move(Dst));
1250 }
1251 }
1252
VerifyAndAdjustOperands(OperandVector & OrigOperands,OperandVector & FinalOperands)1253 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1254 OperandVector &FinalOperands) {
1255
1256 if (OrigOperands.size() > 1) {
1257 // Check if sizes match, OrigOperands also contains the instruction name
1258 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1259 "Operand size mismatch");
1260
1261 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1262 // Verify types match
1263 int RegClassID = -1;
1264 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1265 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1266 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1267
1268 if (FinalOp.isReg() &&
1269 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1270 // Return false and let a normal complaint about bogus operands happen
1271 return false;
1272
1273 if (FinalOp.isMem()) {
1274
1275 if (!OrigOp.isMem())
1276 // Return false and let a normal complaint about bogus operands happen
1277 return false;
1278
1279 unsigned OrigReg = OrigOp.Mem.BaseReg;
1280 unsigned FinalReg = FinalOp.Mem.BaseReg;
1281
1282 // If we've already encounterd a register class, make sure all register
1283 // bases are of the same register class
1284 if (RegClassID != -1 &&
1285 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1286 return Error(OrigOp.getStartLoc(),
1287 "mismatching source and destination index registers");
1288 }
1289
1290 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1291 RegClassID = X86::GR64RegClassID;
1292 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1293 RegClassID = X86::GR32RegClassID;
1294 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1295 RegClassID = X86::GR16RegClassID;
1296 else
1297 // Unexpected register class type
1298 // Return false and let a normal complaint about bogus operands happen
1299 return false;
1300
1301 bool IsSI = IsSIReg(FinalReg);
1302 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1303
1304 if (FinalReg != OrigReg) {
1305 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1306 Warnings.push_back(std::make_pair(
1307 OrigOp.getStartLoc(),
1308 "memory operand is only for determining the size, " + RegName +
1309 " will be used for the location"));
1310 }
1311
1312 FinalOp.Mem.Size = OrigOp.Mem.Size;
1313 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1314 FinalOp.Mem.BaseReg = FinalReg;
1315 }
1316 }
1317
1318 // Produce warnings only if all the operands passed the adjustment - prevent
1319 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1320 for (auto &WarningMsg : Warnings) {
1321 Warning(WarningMsg.first, WarningMsg.second);
1322 }
1323
1324 // Remove old operands
1325 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1326 OrigOperands.pop_back();
1327 }
1328 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1329 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1330 OrigOperands.push_back(std::move(FinalOperands[i]));
1331
1332 return false;
1333 }
1334
ParseOperand()1335 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1336 if (isParsingIntelSyntax())
1337 return ParseIntelOperand();
1338 return ParseATTOperand();
1339 }
1340
CreateMemForInlineAsm(unsigned SegReg,const MCExpr * Disp,unsigned BaseReg,unsigned IndexReg,unsigned Scale,SMLoc Start,SMLoc End,unsigned Size,StringRef Identifier,const InlineAsmIdentifierInfo & Info)1341 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1342 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1343 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1344 const InlineAsmIdentifierInfo &Info) {
1345 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1346 // some other label reference.
1347 if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) {
1348 // Insert an explicit size if the user didn't have one.
1349 if (!Size) {
1350 Size = getPointerWidth();
1351 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1352 /*Len=*/0, Size);
1353 }
1354 // Create an absolute memory reference in order to match against
1355 // instructions taking a PC relative operand.
1356 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1357 Identifier, Info.Label.Decl);
1358 }
1359 // We either have a direct symbol reference, or an offset from a symbol. The
1360 // parser always puts the symbol on the LHS, so look there for size
1361 // calculation purposes.
1362 unsigned FrontendSize = 0;
1363 void *Decl = nullptr;
1364 bool IsGlobalLV = false;
1365 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1366 // Size is in terms of bits in this context.
1367 FrontendSize = Info.Var.Type * 8;
1368 Decl = Info.Var.Decl;
1369 IsGlobalLV = Info.Var.IsGlobalLV;
1370 }
1371 // It is widely common for MS InlineAsm to use a global variable and one/two
1372 // registers in a mmory expression, and though unaccessible via rip/eip.
1373 if (IsGlobalLV && (BaseReg || IndexReg)) {
1374 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End);
1375 // Otherwise, we set the base register to a non-zero value
1376 // if we don't know the actual value at this time. This is necessary to
1377 // get the matching correct in some cases.
1378 } else {
1379 BaseReg = BaseReg ? BaseReg : 1;
1380 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1381 IndexReg, Scale, Start, End, Size, Identifier,
1382 Decl, FrontendSize);
1383 }
1384 }
1385
1386 // Some binary bitwise operators have a named synonymous
1387 // Query a candidate string for being such a named operator
1388 // and if so - invoke the appropriate handler
ParseIntelNamedOperator(StringRef Name,IntelExprStateMachine & SM)1389 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) {
1390 // A named operator should be either lower or upper case, but not a mix
1391 if (Name.compare(Name.lower()) && Name.compare(Name.upper()))
1392 return false;
1393 if (Name.equals_lower("not"))
1394 SM.onNot();
1395 else if (Name.equals_lower("or"))
1396 SM.onOr();
1397 else if (Name.equals_lower("shl"))
1398 SM.onLShift();
1399 else if (Name.equals_lower("shr"))
1400 SM.onRShift();
1401 else if (Name.equals_lower("xor"))
1402 SM.onXor();
1403 else if (Name.equals_lower("and"))
1404 SM.onAnd();
1405 else if (Name.equals_lower("mod"))
1406 SM.onMod();
1407 else
1408 return false;
1409 return true;
1410 }
1411
ParseIntelExpression(IntelExprStateMachine & SM,SMLoc & End)1412 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1413 MCAsmParser &Parser = getParser();
1414 const AsmToken &Tok = Parser.getTok();
1415 StringRef ErrMsg;
1416
1417 AsmToken::TokenKind PrevTK = AsmToken::Error;
1418 bool Done = false;
1419 while (!Done) {
1420 bool UpdateLocLex = true;
1421 AsmToken::TokenKind TK = getLexer().getKind();
1422
1423 switch (TK) {
1424 default:
1425 if ((Done = SM.isValidEndState()))
1426 break;
1427 return Error(Tok.getLoc(), "unknown token in expression");
1428 case AsmToken::EndOfStatement:
1429 Done = true;
1430 break;
1431 case AsmToken::Real:
1432 // DotOperator: [ebx].0
1433 UpdateLocLex = false;
1434 if (ParseIntelDotOperator(SM, End))
1435 return true;
1436 break;
1437 case AsmToken::At:
1438 case AsmToken::String:
1439 case AsmToken::Identifier: {
1440 SMLoc IdentLoc = Tok.getLoc();
1441 StringRef Identifier = Tok.getString();
1442 UpdateLocLex = false;
1443 // Register
1444 unsigned Reg;
1445 if (Tok.is(AsmToken::Identifier) && !ParseRegister(Reg, IdentLoc, End)) {
1446 if (SM.onRegister(Reg, ErrMsg))
1447 return Error(Tok.getLoc(), ErrMsg);
1448 break;
1449 }
1450 // Operator synonymous ("not", "or" etc.)
1451 if ((UpdateLocLex = ParseIntelNamedOperator(Identifier, SM)))
1452 break;
1453 // Symbol reference, when parsing assembly content
1454 InlineAsmIdentifierInfo Info;
1455 const MCExpr *Val;
1456 if (!isParsingInlineAsm()) {
1457 if (getParser().parsePrimaryExpr(Val, End)) {
1458 return Error(Tok.getLoc(), "Unexpected identifier!");
1459 } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) {
1460 return Error(IdentLoc, ErrMsg);
1461 } else
1462 break;
1463 }
1464 // MS InlineAsm operators (TYPE/LENGTH/SIZE)
1465 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
1466 if (OpKind == IOK_OFFSET)
1467 return Error(IdentLoc, "Dealing OFFSET operator as part of"
1468 "a compound immediate expression is yet to be supported");
1469 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
1470 if (SM.onInteger(Val, ErrMsg))
1471 return Error(IdentLoc, ErrMsg);
1472 } else
1473 return true;
1474 break;
1475 }
1476 // MS Dot Operator expression
1477 if (Identifier.count('.') && PrevTK == AsmToken::RBrac) {
1478 if (ParseIntelDotOperator(SM, End))
1479 return true;
1480 break;
1481 }
1482 // MS InlineAsm identifier
1483 // Call parseIdentifier() to combine @ with the identifier behind it.
1484 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
1485 return Error(IdentLoc, "expected identifier");
1486 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
1487 return true;
1488 else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg))
1489 return Error(IdentLoc, ErrMsg);
1490 break;
1491 }
1492 case AsmToken::Integer: {
1493 // Look for 'b' or 'f' following an Integer as a directional label
1494 SMLoc Loc = getTok().getLoc();
1495 int64_t IntVal = getTok().getIntVal();
1496 End = consumeToken();
1497 UpdateLocLex = false;
1498 if (getLexer().getKind() == AsmToken::Identifier) {
1499 StringRef IDVal = getTok().getString();
1500 if (IDVal == "f" || IDVal == "b") {
1501 MCSymbol *Sym =
1502 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1503 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1504 const MCExpr *Val =
1505 MCSymbolRefExpr::create(Sym, Variant, getContext());
1506 if (IDVal == "b" && Sym->isUndefined())
1507 return Error(Loc, "invalid reference to undefined symbol");
1508 StringRef Identifier = Sym->getName();
1509 InlineAsmIdentifierInfo Info;
1510 if (SM.onIdentifierExpr(Val, Identifier, Info,
1511 isParsingInlineAsm(), ErrMsg))
1512 return Error(Loc, ErrMsg);
1513 End = consumeToken();
1514 } else {
1515 if (SM.onInteger(IntVal, ErrMsg))
1516 return Error(Loc, ErrMsg);
1517 }
1518 } else {
1519 if (SM.onInteger(IntVal, ErrMsg))
1520 return Error(Loc, ErrMsg);
1521 }
1522 break;
1523 }
1524 case AsmToken::Plus:
1525 if (SM.onPlus(ErrMsg))
1526 return Error(getTok().getLoc(), ErrMsg);
1527 break;
1528 case AsmToken::Minus:
1529 if (SM.onMinus(ErrMsg))
1530 return Error(getTok().getLoc(), ErrMsg);
1531 break;
1532 case AsmToken::Tilde: SM.onNot(); break;
1533 case AsmToken::Star: SM.onStar(); break;
1534 case AsmToken::Slash: SM.onDivide(); break;
1535 case AsmToken::Percent: SM.onMod(); break;
1536 case AsmToken::Pipe: SM.onOr(); break;
1537 case AsmToken::Caret: SM.onXor(); break;
1538 case AsmToken::Amp: SM.onAnd(); break;
1539 case AsmToken::LessLess:
1540 SM.onLShift(); break;
1541 case AsmToken::GreaterGreater:
1542 SM.onRShift(); break;
1543 case AsmToken::LBrac:
1544 if (SM.onLBrac())
1545 return Error(Tok.getLoc(), "unexpected bracket encountered");
1546 break;
1547 case AsmToken::RBrac:
1548 if (SM.onRBrac())
1549 return Error(Tok.getLoc(), "unexpected bracket encountered");
1550 break;
1551 case AsmToken::LParen: SM.onLParen(); break;
1552 case AsmToken::RParen: SM.onRParen(); break;
1553 }
1554 if (SM.hadError())
1555 return Error(Tok.getLoc(), "unknown token in expression");
1556
1557 if (!Done && UpdateLocLex)
1558 End = consumeToken();
1559
1560 PrevTK = TK;
1561 }
1562 return false;
1563 }
1564
RewriteIntelExpression(IntelExprStateMachine & SM,SMLoc Start,SMLoc End)1565 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
1566 SMLoc Start, SMLoc End) {
1567 SMLoc Loc = Start;
1568 unsigned ExprLen = End.getPointer() - Start.getPointer();
1569 // Skip everything before a symbol displacement (if we have one)
1570 if (SM.getSym()) {
1571 StringRef SymName = SM.getSymName();
1572 if (unsigned Len = SymName.data() - Start.getPointer())
1573 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
1574 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
1575 ExprLen = End.getPointer() - (SymName.data() + SymName.size());
1576 // If we have only a symbol than there's no need for complex rewrite,
1577 // simply skip everything after it
1578 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
1579 if (ExprLen)
1580 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
1581 return;
1582 }
1583 }
1584 // Build an Intel Expression rewrite
1585 StringRef BaseRegStr;
1586 StringRef IndexRegStr;
1587 if (SM.getBaseReg())
1588 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
1589 if (SM.getIndexReg())
1590 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
1591 // Emit it
1592 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), SM.getImm(), SM.isMemExpr());
1593 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
1594 }
1595
1596 // Inline assembly may use variable names with namespace alias qualifiers.
ParseIntelInlineAsmIdentifier(const MCExpr * & Val,StringRef & Identifier,InlineAsmIdentifierInfo & Info,bool IsUnevaluatedOperand,SMLoc & End)1597 bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val,
1598 StringRef &Identifier,
1599 InlineAsmIdentifierInfo &Info,
1600 bool IsUnevaluatedOperand,
1601 SMLoc &End) {
1602 MCAsmParser &Parser = getParser();
1603 assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1604 Val = nullptr;
1605
1606 StringRef LineBuf(Identifier.data());
1607 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1608
1609 const AsmToken &Tok = Parser.getTok();
1610 SMLoc Loc = Tok.getLoc();
1611
1612 // Advance the token stream until the end of the current token is
1613 // after the end of what the frontend claimed.
1614 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1615 do {
1616 End = Tok.getEndLoc();
1617 getLexer().Lex();
1618 } while (End.getPointer() < EndPtr);
1619 Identifier = LineBuf;
1620
1621 // The frontend should end parsing on an assembler token boundary, unless it
1622 // failed parsing.
1623 assert((End.getPointer() == EndPtr ||
1624 Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) &&
1625 "frontend claimed part of a token?");
1626
1627 // If the identifier lookup was unsuccessful, assume that we are dealing with
1628 // a label.
1629 if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) {
1630 StringRef InternalName =
1631 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1632 Loc, false);
1633 assert(InternalName.size() && "We should have an internal name here.");
1634 // Push a rewrite for replacing the identifier name with the internal name.
1635 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
1636 InternalName);
1637 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
1638 return false;
1639 // Create the symbol reference.
1640 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1641 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1642 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1643 return false;
1644 }
1645
1646 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1647 std::unique_ptr<X86Operand>
ParseRoundingModeOp(SMLoc Start)1648 X86AsmParser::ParseRoundingModeOp(SMLoc Start) {
1649 MCAsmParser &Parser = getParser();
1650 const AsmToken &Tok = Parser.getTok();
1651 // Eat "{" and mark the current place.
1652 const SMLoc consumedToken = consumeToken();
1653 if (Tok.getIdentifier().startswith("r")){
1654 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1655 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1656 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1657 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1658 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1659 .Default(-1);
1660 if (-1 == rndMode)
1661 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1662 Parser.Lex(); // Eat "r*" of r*-sae
1663 if (!getLexer().is(AsmToken::Minus))
1664 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1665 Parser.Lex(); // Eat "-"
1666 Parser.Lex(); // Eat the sae
1667 if (!getLexer().is(AsmToken::RCurly))
1668 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1669 SMLoc End = Tok.getEndLoc();
1670 Parser.Lex(); // Eat "}"
1671 const MCExpr *RndModeOp =
1672 MCConstantExpr::create(rndMode, Parser.getContext());
1673 return X86Operand::CreateImm(RndModeOp, Start, End);
1674 }
1675 if(Tok.getIdentifier().equals("sae")){
1676 Parser.Lex(); // Eat the sae
1677 if (!getLexer().is(AsmToken::RCurly))
1678 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1679 Parser.Lex(); // Eat "}"
1680 return X86Operand::CreateToken("{sae}", consumedToken);
1681 }
1682 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1683 }
1684
1685 /// Parse the '.' operator.
ParseIntelDotOperator(IntelExprStateMachine & SM,SMLoc & End)1686 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) {
1687 const AsmToken &Tok = getTok();
1688 unsigned Offset;
1689
1690 // Drop the optional '.'.
1691 StringRef DotDispStr = Tok.getString();
1692 if (DotDispStr.startswith("."))
1693 DotDispStr = DotDispStr.drop_front(1);
1694
1695 // .Imm gets lexed as a real.
1696 if (Tok.is(AsmToken::Real)) {
1697 APInt DotDisp;
1698 DotDispStr.getAsInteger(10, DotDisp);
1699 Offset = DotDisp.getZExtValue();
1700 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1701 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1702 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1703 Offset))
1704 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1705 } else
1706 return Error(Tok.getLoc(), "Unexpected token type!");
1707
1708 // Eat the DotExpression and update End
1709 End = SMLoc::getFromPointer(DotDispStr.data());
1710 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
1711 while (Tok.getLoc().getPointer() < DotExprEndLoc)
1712 Lex();
1713 SM.addImm(Offset);
1714 return false;
1715 }
1716
1717 /// Parse the 'offset' operator. This operator is used to specify the
1718 /// location rather then the content of a variable.
ParseIntelOffsetOfOperator()1719 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1720 MCAsmParser &Parser = getParser();
1721 const AsmToken &Tok = Parser.getTok();
1722 SMLoc OffsetOfLoc = Tok.getLoc();
1723 Parser.Lex(); // Eat offset.
1724
1725 const MCExpr *Val;
1726 InlineAsmIdentifierInfo Info;
1727 SMLoc Start = Tok.getLoc(), End;
1728 StringRef Identifier = Tok.getString();
1729 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
1730 /*Unevaluated=*/false, End))
1731 return nullptr;
1732
1733 void *Decl = nullptr;
1734 // FIXME: MS evaluates "offset <Constant>" to the underlying integral
1735 if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
1736 return ErrorOperand(Start, "offset operator cannot yet handle constants");
1737 else if (Info.isKind(InlineAsmIdentifierInfo::IK_Var))
1738 Decl = Info.Var.Decl;
1739 // Don't emit the offset operator.
1740 InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
1741
1742 // The offset operator will have an 'r' constraint, thus we need to create
1743 // register operand to ensure proper matching. Just pick a GPR based on
1744 // the size of a pointer.
1745 bool Parse32 = is32BitMode() || Code16GCC;
1746 unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX);
1747
1748 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1749 OffsetOfLoc, Identifier, Decl);
1750 }
1751
1752 // Query a candidate string for being an Intel assembly operator
1753 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
IdentifyIntelInlineAsmOperator(StringRef Name)1754 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
1755 return StringSwitch<unsigned>(Name)
1756 .Cases("TYPE","type",IOK_TYPE)
1757 .Cases("SIZE","size",IOK_SIZE)
1758 .Cases("LENGTH","length",IOK_LENGTH)
1759 .Cases("OFFSET","offset",IOK_OFFSET)
1760 .Default(IOK_INVALID);
1761 }
1762
1763 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1764 /// returns the number of elements in an array. It returns the value 1 for
1765 /// non-array variables. The SIZE operator returns the size of a C or C++
1766 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1767 /// TYPE operator returns the size of a C or C++ type or variable. If the
1768 /// variable is an array, TYPE returns the size of a single element.
ParseIntelInlineAsmOperator(unsigned OpKind)1769 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
1770 MCAsmParser &Parser = getParser();
1771 const AsmToken &Tok = Parser.getTok();
1772 Parser.Lex(); // Eat operator.
1773
1774 const MCExpr *Val = nullptr;
1775 InlineAsmIdentifierInfo Info;
1776 SMLoc Start = Tok.getLoc(), End;
1777 StringRef Identifier = Tok.getString();
1778 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
1779 /*Unevaluated=*/true, End))
1780 return 0;
1781
1782 if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1783 Error(Start, "unable to lookup expression");
1784 return 0;
1785 }
1786
1787 unsigned CVal = 0;
1788 switch(OpKind) {
1789 default: llvm_unreachable("Unexpected operand kind!");
1790 case IOK_LENGTH: CVal = Info.Var.Length; break;
1791 case IOK_SIZE: CVal = Info.Var.Size; break;
1792 case IOK_TYPE: CVal = Info.Var.Type; break;
1793 }
1794
1795 return CVal;
1796 }
1797
ParseIntelMemoryOperandSize(unsigned & Size)1798 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
1799 Size = StringSwitch<unsigned>(getTok().getString())
1800 .Cases("BYTE", "byte", 8)
1801 .Cases("WORD", "word", 16)
1802 .Cases("DWORD", "dword", 32)
1803 .Cases("FLOAT", "float", 32)
1804 .Cases("LONG", "long", 32)
1805 .Cases("FWORD", "fword", 48)
1806 .Cases("DOUBLE", "double", 64)
1807 .Cases("QWORD", "qword", 64)
1808 .Cases("MMWORD","mmword", 64)
1809 .Cases("XWORD", "xword", 80)
1810 .Cases("TBYTE", "tbyte", 80)
1811 .Cases("XMMWORD", "xmmword", 128)
1812 .Cases("YMMWORD", "ymmword", 256)
1813 .Cases("ZMMWORD", "zmmword", 512)
1814 .Default(0);
1815 if (Size) {
1816 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
1817 if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr")))
1818 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1819 Lex(); // Eat ptr.
1820 }
1821 return false;
1822 }
1823
ParseIntelOperand()1824 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1825 MCAsmParser &Parser = getParser();
1826 const AsmToken &Tok = Parser.getTok();
1827 SMLoc Start, End;
1828
1829 // FIXME: Offset operator
1830 // Should be handled as part of immediate expression, as other operators
1831 // Currently, only supported as a stand-alone operand
1832 if (isParsingInlineAsm())
1833 if (IdentifyIntelInlineAsmOperator(Tok.getString()) == IOK_OFFSET)
1834 return ParseIntelOffsetOfOperator();
1835
1836 // Parse optional Size directive.
1837 unsigned Size;
1838 if (ParseIntelMemoryOperandSize(Size))
1839 return nullptr;
1840 bool PtrInOperand = bool(Size);
1841
1842 Start = Tok.getLoc();
1843
1844 // Rounding mode operand.
1845 if (getLexer().is(AsmToken::LCurly))
1846 return ParseRoundingModeOp(Start);
1847
1848 // Register operand.
1849 unsigned RegNo = 0;
1850 if (Tok.is(AsmToken::Identifier) && !ParseRegister(RegNo, Start, End)) {
1851 if (RegNo == X86::RIP)
1852 return ErrorOperand(Start, "rip can only be used as a base register");
1853 // A Register followed by ':' is considered a segment override
1854 if (Tok.isNot(AsmToken::Colon))
1855 return !PtrInOperand ? X86Operand::CreateReg(RegNo, Start, End) :
1856 ErrorOperand(Start, "expected memory operand after 'ptr', "
1857 "found register operand instead");
1858 // An alleged segment override. check if we have a valid segment register
1859 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1860 return ErrorOperand(Start, "invalid segment register");
1861 // Eat ':' and update Start location
1862 Start = Lex().getLoc();
1863 }
1864
1865 // Immediates and Memory
1866 IntelExprStateMachine SM;
1867 if (ParseIntelExpression(SM, End))
1868 return nullptr;
1869
1870 if (isParsingInlineAsm())
1871 RewriteIntelExpression(SM, Start, Tok.getLoc());
1872
1873 int64_t Imm = SM.getImm();
1874 const MCExpr *Disp = SM.getSym();
1875 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
1876 if (Disp && Imm)
1877 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
1878 if (!Disp)
1879 Disp = ImmDisp;
1880
1881 // RegNo != 0 specifies a valid segment register,
1882 // and we are parsing a segment override
1883 if (!SM.isMemExpr() && !RegNo)
1884 return X86Operand::CreateImm(Disp, Start, End);
1885
1886 StringRef ErrMsg;
1887 unsigned BaseReg = SM.getBaseReg();
1888 unsigned IndexReg = SM.getIndexReg();
1889 unsigned Scale = SM.getScale();
1890
1891 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
1892 (IndexReg == X86::ESP || IndexReg == X86::RSP))
1893 std::swap(BaseReg, IndexReg);
1894
1895 // If BaseReg is a vector register and IndexReg is not, swap them unless
1896 // Scale was specified in which case it would be an error.
1897 if (Scale == 0 &&
1898 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1899 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1900 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
1901 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
1902 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
1903 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
1904 std::swap(BaseReg, IndexReg);
1905
1906 if (Scale != 0 &&
1907 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
1908 return ErrorOperand(Start, "16-bit addresses cannot have a scale");
1909
1910 // If there was no explicit scale specified, change it to 1.
1911 if (Scale == 0)
1912 Scale = 1;
1913
1914 // If this is a 16-bit addressing mode with the base and index in the wrong
1915 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
1916 // shared with att syntax where order matters.
1917 if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
1918 (IndexReg == X86::BX || IndexReg == X86::BP))
1919 std::swap(BaseReg, IndexReg);
1920
1921 if ((BaseReg || IndexReg) &&
1922 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
1923 ErrMsg))
1924 return ErrorOperand(Start, ErrMsg);
1925 if (isParsingInlineAsm())
1926 return CreateMemForInlineAsm(RegNo, Disp, BaseReg, IndexReg,
1927 Scale, Start, End, Size, SM.getSymName(),
1928 SM.getIdentifierInfo());
1929 if (!(BaseReg || IndexReg || RegNo))
1930 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1931 return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
1932 BaseReg, IndexReg, Scale, Start, End, Size);
1933 }
1934
ParseATTOperand()1935 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1936 MCAsmParser &Parser = getParser();
1937 switch (getLexer().getKind()) {
1938 default:
1939 // Parse a memory operand with no segment register.
1940 return ParseMemOperand(0, Parser.getTok().getLoc());
1941 case AsmToken::Percent: {
1942 // Read the register.
1943 unsigned RegNo;
1944 SMLoc Start, End;
1945 if (ParseRegister(RegNo, Start, End)) return nullptr;
1946 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1947 Error(Start, "%eiz and %riz can only be used as index registers",
1948 SMRange(Start, End));
1949 return nullptr;
1950 }
1951 if (RegNo == X86::RIP) {
1952 Error(Start, "%rip can only be used as a base register",
1953 SMRange(Start, End));
1954 return nullptr;
1955 }
1956
1957 // If this is a segment register followed by a ':', then this is the start
1958 // of a memory reference, otherwise this is a normal register reference.
1959 if (getLexer().isNot(AsmToken::Colon))
1960 return X86Operand::CreateReg(RegNo, Start, End);
1961
1962 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1963 return ErrorOperand(Start, "invalid segment register");
1964
1965 getParser().Lex(); // Eat the colon.
1966 return ParseMemOperand(RegNo, Start);
1967 }
1968 case AsmToken::Dollar: {
1969 // $42 -> immediate.
1970 SMLoc Start = Parser.getTok().getLoc(), End;
1971 Parser.Lex();
1972 const MCExpr *Val;
1973 if (getParser().parseExpression(Val, End))
1974 return nullptr;
1975 return X86Operand::CreateImm(Val, Start, End);
1976 }
1977 case AsmToken::LCurly:{
1978 SMLoc Start = Parser.getTok().getLoc();
1979 return ParseRoundingModeOp(Start);
1980 }
1981 }
1982 }
1983
1984 // true on failure, false otherwise
1985 // If no {z} mark was found - Parser doesn't advance
ParseZ(std::unique_ptr<X86Operand> & Z,const SMLoc & StartLoc)1986 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
1987 const SMLoc &StartLoc) {
1988 MCAsmParser &Parser = getParser();
1989 // Assuming we are just pass the '{' mark, quering the next token
1990 // Searched for {z}, but none was found. Return false, as no parsing error was
1991 // encountered
1992 if (!(getLexer().is(AsmToken::Identifier) &&
1993 (getLexer().getTok().getIdentifier() == "z")))
1994 return false;
1995 Parser.Lex(); // Eat z
1996 // Query and eat the '}' mark
1997 if (!getLexer().is(AsmToken::RCurly))
1998 return Error(getLexer().getLoc(), "Expected } at this point");
1999 Parser.Lex(); // Eat '}'
2000 // Assign Z with the {z} mark opernad
2001 Z = X86Operand::CreateToken("{z}", StartLoc);
2002 return false;
2003 }
2004
2005 // true on failure, false otherwise
HandleAVX512Operand(OperandVector & Operands,const MCParsedAsmOperand & Op)2006 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
2007 const MCParsedAsmOperand &Op) {
2008 MCAsmParser &Parser = getParser();
2009 if (getLexer().is(AsmToken::LCurly)) {
2010 // Eat "{" and mark the current place.
2011 const SMLoc consumedToken = consumeToken();
2012 // Distinguish {1to<NUM>} from {%k<NUM>}.
2013 if(getLexer().is(AsmToken::Integer)) {
2014 // Parse memory broadcasting ({1to<NUM>}).
2015 if (getLexer().getTok().getIntVal() != 1)
2016 return TokError("Expected 1to<NUM> at this point");
2017 Parser.Lex(); // Eat "1" of 1to8
2018 if (!getLexer().is(AsmToken::Identifier) ||
2019 !getLexer().getTok().getIdentifier().startswith("to"))
2020 return TokError("Expected 1to<NUM> at this point");
2021 // Recognize only reasonable suffixes.
2022 const char *BroadcastPrimitive =
2023 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
2024 .Case("to2", "{1to2}")
2025 .Case("to4", "{1to4}")
2026 .Case("to8", "{1to8}")
2027 .Case("to16", "{1to16}")
2028 .Default(nullptr);
2029 if (!BroadcastPrimitive)
2030 return TokError("Invalid memory broadcast primitive.");
2031 Parser.Lex(); // Eat "toN" of 1toN
2032 if (!getLexer().is(AsmToken::RCurly))
2033 return TokError("Expected } at this point");
2034 Parser.Lex(); // Eat "}"
2035 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2036 consumedToken));
2037 // No AVX512 specific primitives can pass
2038 // after memory broadcasting, so return.
2039 return false;
2040 } else {
2041 // Parse either {k}{z}, {z}{k}, {k} or {z}
2042 // last one have no meaning, but GCC accepts it
2043 // Currently, we're just pass a '{' mark
2044 std::unique_ptr<X86Operand> Z;
2045 if (ParseZ(Z, consumedToken))
2046 return true;
2047 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2048 // no errors.
2049 // Query for the need of further parsing for a {%k<NUM>} mark
2050 if (!Z || getLexer().is(AsmToken::LCurly)) {
2051 SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2052 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2053 // expected
2054 unsigned RegNo;
2055 SMLoc RegLoc;
2056 if (!ParseRegister(RegNo, RegLoc, StartLoc) &&
2057 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2058 if (RegNo == X86::K0)
2059 return Error(RegLoc, "Register k0 can't be used as write mask");
2060 if (!getLexer().is(AsmToken::RCurly))
2061 return Error(getLexer().getLoc(), "Expected } at this point");
2062 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2063 Operands.push_back(
2064 X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2065 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2066 } else
2067 return Error(getLexer().getLoc(),
2068 "Expected an op-mask register at this point");
2069 // {%k<NUM>} mark is found, inquire for {z}
2070 if (getLexer().is(AsmToken::LCurly) && !Z) {
2071 // Have we've found a parsing error, or found no (expected) {z} mark
2072 // - report an error
2073 if (ParseZ(Z, consumeToken()) || !Z)
2074 return Error(getLexer().getLoc(),
2075 "Expected a {z} mark at this point");
2076
2077 }
2078 // '{z}' on its own is meaningless, hence should be ignored.
2079 // on the contrary - have it been accompanied by a K register,
2080 // allow it.
2081 if (Z)
2082 Operands.push_back(std::move(Z));
2083 }
2084 }
2085 }
2086 return false;
2087 }
2088
2089 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
2090 /// has already been parsed if present.
ParseMemOperand(unsigned SegReg,SMLoc MemStart)2091 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
2092 SMLoc MemStart) {
2093
2094 MCAsmParser &Parser = getParser();
2095 // We have to disambiguate a parenthesized expression "(4+5)" from the start
2096 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
2097 // only way to do this without lookahead is to eat the '(' and see what is
2098 // after it.
2099 const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
2100 if (getLexer().isNot(AsmToken::LParen)) {
2101 SMLoc ExprEnd;
2102 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
2103 // Disp may be a variable, handle register values.
2104 if (auto *RE = dyn_cast<X86MCExpr>(Disp))
2105 return X86Operand::CreateReg(RE->getRegNo(), MemStart, ExprEnd);
2106
2107 // After parsing the base expression we could either have a parenthesized
2108 // memory address or not. If not, return now. If so, eat the (.
2109 if (getLexer().isNot(AsmToken::LParen)) {
2110 // Unless we have a segment register, treat this as an immediate.
2111 if (SegReg == 0)
2112 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
2113 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2114 MemStart, ExprEnd);
2115 }
2116
2117 // Eat the '('.
2118 Parser.Lex();
2119 } else {
2120 // Okay, we have a '('. We don't know if this is an expression or not, but
2121 // so we have to eat the ( to see beyond it.
2122 SMLoc LParenLoc = Parser.getTok().getLoc();
2123 Parser.Lex(); // Eat the '('.
2124
2125 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
2126 // Nothing to do here, fall into the code below with the '(' part of the
2127 // memory operand consumed.
2128 } else {
2129 SMLoc ExprEnd;
2130 getLexer().UnLex(AsmToken(AsmToken::LParen, "("));
2131
2132 // It must be either an parenthesized expression, or an expression that
2133 // begins from a parenthesized expression, parse it now. Example: (1+2) or
2134 // (1+2)+3
2135 if (getParser().parseExpression(Disp, ExprEnd))
2136 return nullptr;
2137
2138 // After parsing the base expression we could either have a parenthesized
2139 // memory address or not. If not, return now. If so, eat the (.
2140 if (getLexer().isNot(AsmToken::LParen)) {
2141 // Unless we have a segment register, treat this as an immediate.
2142 if (SegReg == 0)
2143 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
2144 ExprEnd);
2145 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2146 MemStart, ExprEnd);
2147 }
2148
2149 // Eat the '('.
2150 Parser.Lex();
2151 }
2152 }
2153
2154 // If we reached here, then we just ate the ( of the memory operand. Process
2155 // the rest of the memory operand.
2156 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2157 SMLoc IndexLoc, BaseLoc;
2158
2159 if (getLexer().is(AsmToken::Percent)) {
2160 SMLoc StartLoc, EndLoc;
2161 BaseLoc = Parser.getTok().getLoc();
2162 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
2163 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
2164 Error(StartLoc, "eiz and riz can only be used as index registers",
2165 SMRange(StartLoc, EndLoc));
2166 return nullptr;
2167 }
2168 }
2169
2170 if (getLexer().is(AsmToken::Comma)) {
2171 Parser.Lex(); // Eat the comma.
2172 IndexLoc = Parser.getTok().getLoc();
2173
2174 // Following the comma we should have either an index register, or a scale
2175 // value. We don't support the later form, but we want to parse it
2176 // correctly.
2177 //
2178 // Not that even though it would be completely consistent to support syntax
2179 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2180 if (getLexer().is(AsmToken::Percent)) {
2181 SMLoc L;
2182 if (ParseRegister(IndexReg, L, L))
2183 return nullptr;
2184 if (BaseReg == X86::RIP) {
2185 Error(IndexLoc, "%rip as base register can not have an index register");
2186 return nullptr;
2187 }
2188 if (IndexReg == X86::RIP) {
2189 Error(IndexLoc, "%rip is not allowed as an index register");
2190 return nullptr;
2191 }
2192
2193 if (getLexer().isNot(AsmToken::RParen)) {
2194 // Parse the scale amount:
2195 // ::= ',' [scale-expression]
2196 if (parseToken(AsmToken::Comma, "expected comma in scale expression"))
2197 return nullptr;
2198
2199 if (getLexer().isNot(AsmToken::RParen)) {
2200 SMLoc Loc = Parser.getTok().getLoc();
2201
2202 int64_t ScaleVal;
2203 if (getParser().parseAbsoluteExpression(ScaleVal)){
2204 Error(Loc, "expected scale expression");
2205 return nullptr;
2206 }
2207
2208 // Validate the scale amount.
2209 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2210 ScaleVal != 1) {
2211 Error(Loc, "scale factor in 16-bit address must be 1");
2212 return nullptr;
2213 }
2214 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 &&
2215 ScaleVal != 8) {
2216 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
2217 return nullptr;
2218 }
2219 Scale = (unsigned)ScaleVal;
2220 }
2221 }
2222 } else if (getLexer().isNot(AsmToken::RParen)) {
2223 // A scale amount without an index is ignored.
2224 // index.
2225 SMLoc Loc = Parser.getTok().getLoc();
2226
2227 int64_t Value;
2228 if (getParser().parseAbsoluteExpression(Value))
2229 return nullptr;
2230
2231 if (Value != 1)
2232 Warning(Loc, "scale factor without index register is ignored");
2233 Scale = 1;
2234 }
2235 }
2236
2237 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2238 SMLoc MemEnd = Parser.getTok().getEndLoc();
2239 if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
2240 return nullptr;
2241
2242 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
2243 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2244 // documented form in various unofficial manuals, so a lot of code uses it.
2245 if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 &&
2246 SegReg == 0 && isa<MCConstantExpr>(Disp) &&
2247 cast<MCConstantExpr>(Disp)->getValue() == 0)
2248 return X86Operand::CreateDXReg(BaseLoc, BaseLoc);
2249
2250 StringRef ErrMsg;
2251 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2252 ErrMsg)) {
2253 Error(BaseLoc, ErrMsg);
2254 return nullptr;
2255 }
2256
2257 if (SegReg || BaseReg || IndexReg)
2258 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2259 IndexReg, Scale, MemStart, MemEnd);
2260 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
2261 }
2262
2263 // Parse either a standard primary expression or a register.
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)2264 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
2265 MCAsmParser &Parser = getParser();
2266 if (Parser.parsePrimaryExpr(Res, EndLoc)) {
2267 SMLoc StartLoc = Parser.getTok().getLoc();
2268 // Normal Expression parse fails, check if it could be a register.
2269 unsigned RegNo;
2270 bool TryRegParse =
2271 getTok().is(AsmToken::Percent) ||
2272 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier));
2273 if (!TryRegParse || ParseRegister(RegNo, StartLoc, EndLoc))
2274 return true;
2275 // Clear previous parse error and return correct expression.
2276 Parser.clearPendingErrors();
2277 Res = X86MCExpr::create(RegNo, Parser.getContext());
2278 return false;
2279 }
2280
2281 return false;
2282 }
2283
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)2284 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2285 SMLoc NameLoc, OperandVector &Operands) {
2286 MCAsmParser &Parser = getParser();
2287 InstInfo = &Info;
2288 StringRef PatchedName = Name;
2289
2290 if ((Name.equals("jmp") || Name.equals("jc") || Name.equals("jz")) &&
2291 isParsingIntelSyntax() && isParsingInlineAsm()) {
2292 StringRef NextTok = Parser.getTok().getString();
2293 if (NextTok == "short") {
2294 SMLoc NameEndLoc =
2295 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
2296 // Eat the short keyword
2297 Parser.Lex();
2298 // MS ignores the short keyword, it determines the jmp type based
2299 // on the distance of the label
2300 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
2301 NextTok.size() + 1);
2302 }
2303 }
2304
2305 // FIXME: Hack to recognize setneb as setne.
2306 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2307 PatchedName != "setb" && PatchedName != "setnb")
2308 PatchedName = PatchedName.substr(0, Name.size()-1);
2309
2310 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2311 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2312 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2313 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2314 bool IsVCMP = PatchedName[0] == 'v';
2315 unsigned CCIdx = IsVCMP ? 4 : 3;
2316 unsigned ComparisonCode = StringSwitch<unsigned>(
2317 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2318 .Case("eq", 0x00)
2319 .Case("eq_oq", 0x00)
2320 .Case("lt", 0x01)
2321 .Case("lt_os", 0x01)
2322 .Case("le", 0x02)
2323 .Case("le_os", 0x02)
2324 .Case("unord", 0x03)
2325 .Case("unord_q", 0x03)
2326 .Case("neq", 0x04)
2327 .Case("neq_uq", 0x04)
2328 .Case("nlt", 0x05)
2329 .Case("nlt_us", 0x05)
2330 .Case("nle", 0x06)
2331 .Case("nle_us", 0x06)
2332 .Case("ord", 0x07)
2333 .Case("ord_q", 0x07)
2334 /* AVX only from here */
2335 .Case("eq_uq", 0x08)
2336 .Case("nge", 0x09)
2337 .Case("nge_us", 0x09)
2338 .Case("ngt", 0x0A)
2339 .Case("ngt_us", 0x0A)
2340 .Case("false", 0x0B)
2341 .Case("false_oq", 0x0B)
2342 .Case("neq_oq", 0x0C)
2343 .Case("ge", 0x0D)
2344 .Case("ge_os", 0x0D)
2345 .Case("gt", 0x0E)
2346 .Case("gt_os", 0x0E)
2347 .Case("true", 0x0F)
2348 .Case("true_uq", 0x0F)
2349 .Case("eq_os", 0x10)
2350 .Case("lt_oq", 0x11)
2351 .Case("le_oq", 0x12)
2352 .Case("unord_s", 0x13)
2353 .Case("neq_us", 0x14)
2354 .Case("nlt_uq", 0x15)
2355 .Case("nle_uq", 0x16)
2356 .Case("ord_s", 0x17)
2357 .Case("eq_us", 0x18)
2358 .Case("nge_uq", 0x19)
2359 .Case("ngt_uq", 0x1A)
2360 .Case("false_os", 0x1B)
2361 .Case("neq_os", 0x1C)
2362 .Case("ge_oq", 0x1D)
2363 .Case("gt_oq", 0x1E)
2364 .Case("true_us", 0x1F)
2365 .Default(~0U);
2366 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2367
2368 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2369 NameLoc));
2370
2371 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2372 getParser().getContext());
2373 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2374
2375 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2376 }
2377 }
2378
2379 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2380 if (PatchedName.startswith("vpcmp") &&
2381 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2382 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2383 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2384 unsigned ComparisonCode = StringSwitch<unsigned>(
2385 PatchedName.slice(5, PatchedName.size() - CCIdx))
2386 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2387 .Case("lt", 0x1)
2388 .Case("le", 0x2)
2389 //.Case("false", 0x3) // Not a documented alias.
2390 .Case("neq", 0x4)
2391 .Case("nlt", 0x5)
2392 .Case("nle", 0x6)
2393 //.Case("true", 0x7) // Not a documented alias.
2394 .Default(~0U);
2395 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2396 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2397
2398 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2399 getParser().getContext());
2400 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2401
2402 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2403 }
2404 }
2405
2406 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2407 if (PatchedName.startswith("vpcom") &&
2408 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2409 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2410 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2411 unsigned ComparisonCode = StringSwitch<unsigned>(
2412 PatchedName.slice(5, PatchedName.size() - CCIdx))
2413 .Case("lt", 0x0)
2414 .Case("le", 0x1)
2415 .Case("gt", 0x2)
2416 .Case("ge", 0x3)
2417 .Case("eq", 0x4)
2418 .Case("neq", 0x5)
2419 .Case("false", 0x6)
2420 .Case("true", 0x7)
2421 .Default(~0U);
2422 if (ComparisonCode != ~0U) {
2423 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2424
2425 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2426 getParser().getContext());
2427 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2428
2429 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2430 }
2431 }
2432
2433
2434 // Determine whether this is an instruction prefix.
2435 // FIXME:
2436 // Enhance prefixes integrity robustness. for example, following forms
2437 // are currently tolerated:
2438 // repz repnz <insn> ; GAS errors for the use of two similar prefixes
2439 // lock addq %rax, %rbx ; Destination operand must be of memory type
2440 // xacquire <insn> ; xacquire must be accompanied by 'lock'
2441 bool isPrefix = StringSwitch<bool>(Name)
2442 .Cases("rex64", "data32", "data16", true)
2443 .Cases("xacquire", "xrelease", true)
2444 .Cases("acquire", "release", isParsingIntelSyntax())
2445 .Default(false);
2446
2447 auto isLockRepeatNtPrefix = [](StringRef N) {
2448 return StringSwitch<bool>(N)
2449 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
2450 .Default(false);
2451 };
2452
2453 bool CurlyAsEndOfStatement = false;
2454
2455 unsigned Flags = X86::IP_NO_PREFIX;
2456 while (isLockRepeatNtPrefix(Name.lower())) {
2457 unsigned Prefix =
2458 StringSwitch<unsigned>(Name)
2459 .Cases("lock", "lock", X86::IP_HAS_LOCK)
2460 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
2461 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
2462 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
2463 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
2464 Flags |= Prefix;
2465 if (getLexer().is(AsmToken::EndOfStatement)) {
2466 // We don't have real instr with the given prefix
2467 // let's use the prefix as the instr.
2468 // TODO: there could be several prefixes one after another
2469 Flags = X86::IP_NO_PREFIX;
2470 break;
2471 }
2472 Name = Parser.getTok().getString();
2473 Parser.Lex(); // eat the prefix
2474 // Hack: we could have something like "rep # some comment" or
2475 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
2476 while (Name.startswith(";") || Name.startswith("\n") ||
2477 Name.startswith("#") || Name.startswith("\t") ||
2478 Name.startswith("/")) {
2479 Name = Parser.getTok().getString();
2480 Parser.Lex(); // go to next prefix or instr
2481 }
2482 }
2483
2484 if (Flags)
2485 PatchedName = Name;
2486
2487 // Hacks to handle 'data16' and 'data32'
2488 if (PatchedName == "data16" && is16BitMode()) {
2489 return Error(NameLoc, "redundant data16 prefix");
2490 }
2491 if (PatchedName == "data32") {
2492 if (is32BitMode())
2493 return Error(NameLoc, "redundant data32 prefix");
2494 if (is64BitMode())
2495 return Error(NameLoc, "'data32' is not supported in 64-bit mode");
2496 // Hack to 'data16' for the table lookup.
2497 PatchedName = "data16";
2498 }
2499
2500 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2501
2502 // This does the actual operand parsing. Don't parse any more if we have a
2503 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2504 // just want to parse the "lock" as the first instruction and the "incl" as
2505 // the next one.
2506 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2507 // Parse '*' modifier.
2508 if (getLexer().is(AsmToken::Star))
2509 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2510
2511 // Read the operands.
2512 while(1) {
2513 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2514 Operands.push_back(std::move(Op));
2515 if (HandleAVX512Operand(Operands, *Operands.back()))
2516 return true;
2517 } else {
2518 return true;
2519 }
2520 // check for comma and eat it
2521 if (getLexer().is(AsmToken::Comma))
2522 Parser.Lex();
2523 else
2524 break;
2525 }
2526
2527 // In MS inline asm curly braces mark the beginning/end of a block,
2528 // therefore they should be interepreted as end of statement
2529 CurlyAsEndOfStatement =
2530 isParsingIntelSyntax() && isParsingInlineAsm() &&
2531 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
2532 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
2533 return TokError("unexpected token in argument list");
2534 }
2535
2536 // Consume the EndOfStatement or the prefix separator Slash
2537 if (getLexer().is(AsmToken::EndOfStatement) ||
2538 (isPrefix && getLexer().is(AsmToken::Slash)))
2539 Parser.Lex();
2540 else if (CurlyAsEndOfStatement)
2541 // Add an actual EndOfStatement before the curly brace
2542 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
2543 getLexer().getTok().getLoc(), 0);
2544
2545 // This is for gas compatibility and cannot be done in td.
2546 // Adding "p" for some floating point with no argument.
2547 // For example: fsub --> fsubp
2548 bool IsFp =
2549 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
2550 if (IsFp && Operands.size() == 1) {
2551 const char *Repl = StringSwitch<const char *>(Name)
2552 .Case("fsub", "fsubp")
2553 .Case("fdiv", "fdivp")
2554 .Case("fsubr", "fsubrp")
2555 .Case("fdivr", "fdivrp");
2556 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
2557 }
2558
2559 // Moving a 32 or 16 bit value into a segment register has the same
2560 // behavior. Modify such instructions to always take shorter form.
2561 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
2562 (Operands.size() == 3)) {
2563 X86Operand &Op1 = (X86Operand &)*Operands[1];
2564 X86Operand &Op2 = (X86Operand &)*Operands[2];
2565 SMLoc Loc = Op1.getEndLoc();
2566 if (Op1.isReg() && Op2.isReg() &&
2567 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
2568 Op2.getReg()) &&
2569 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
2570 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
2571 // Change instruction name to match new instruction.
2572 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
2573 Name = is16BitMode() ? "movw" : "movl";
2574 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
2575 }
2576 // Select the correct equivalent 16-/32-bit source register.
2577 unsigned Reg =
2578 getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
2579 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
2580 }
2581 }
2582
2583 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
2584 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2585 // documented form in various unofficial manuals, so a lot of code uses it.
2586 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
2587 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
2588 Operands.size() == 3) {
2589 X86Operand &Op = (X86Operand &)*Operands.back();
2590 if (Op.isDXReg())
2591 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
2592 Op.getEndLoc());
2593 }
2594 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
2595 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
2596 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
2597 Operands.size() == 3) {
2598 X86Operand &Op = (X86Operand &)*Operands[1];
2599 if (Op.isDXReg())
2600 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
2601 Op.getEndLoc());
2602 }
2603
2604 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
2605 bool HadVerifyError = false;
2606
2607 // Append default arguments to "ins[bwld]"
2608 if (Name.startswith("ins") &&
2609 (Operands.size() == 1 || Operands.size() == 3) &&
2610 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
2611 Name == "ins")) {
2612
2613 AddDefaultSrcDestOperands(TmpOperands,
2614 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
2615 DefaultMemDIOperand(NameLoc));
2616 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2617 }
2618
2619 // Append default arguments to "outs[bwld]"
2620 if (Name.startswith("outs") &&
2621 (Operands.size() == 1 || Operands.size() == 3) &&
2622 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2623 Name == "outsd" || Name == "outs")) {
2624 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2625 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2626 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2627 }
2628
2629 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2630 // values of $SIREG according to the mode. It would be nice if this
2631 // could be achieved with InstAlias in the tables.
2632 if (Name.startswith("lods") &&
2633 (Operands.size() == 1 || Operands.size() == 2) &&
2634 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2635 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
2636 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
2637 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2638 }
2639
2640 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2641 // values of $DIREG according to the mode. It would be nice if this
2642 // could be achieved with InstAlias in the tables.
2643 if (Name.startswith("stos") &&
2644 (Operands.size() == 1 || Operands.size() == 2) &&
2645 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2646 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
2647 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2648 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2649 }
2650
2651 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2652 // values of $DIREG according to the mode. It would be nice if this
2653 // could be achieved with InstAlias in the tables.
2654 if (Name.startswith("scas") &&
2655 (Operands.size() == 1 || Operands.size() == 2) &&
2656 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2657 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
2658 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2659 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2660 }
2661
2662 // Add default SI and DI operands to "cmps[bwlq]".
2663 if (Name.startswith("cmps") &&
2664 (Operands.size() == 1 || Operands.size() == 3) &&
2665 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2666 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2667 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
2668 DefaultMemSIOperand(NameLoc));
2669 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2670 }
2671
2672 // Add default SI and DI operands to "movs[bwlq]".
2673 if (((Name.startswith("movs") &&
2674 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2675 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2676 (Name.startswith("smov") &&
2677 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2678 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
2679 (Operands.size() == 1 || Operands.size() == 3)) {
2680 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
2681 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2682 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2683 DefaultMemDIOperand(NameLoc));
2684 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2685 }
2686
2687 // Check if we encountered an error for one the string insturctions
2688 if (HadVerifyError) {
2689 return HadVerifyError;
2690 }
2691
2692 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2693 // "shift <op>".
2694 if ((Name.startswith("shr") || Name.startswith("sar") ||
2695 Name.startswith("shl") || Name.startswith("sal") ||
2696 Name.startswith("rcl") || Name.startswith("rcr") ||
2697 Name.startswith("rol") || Name.startswith("ror")) &&
2698 Operands.size() == 3) {
2699 if (isParsingIntelSyntax()) {
2700 // Intel syntax
2701 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2702 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2703 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2704 Operands.pop_back();
2705 } else {
2706 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2707 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2708 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2709 Operands.erase(Operands.begin() + 1);
2710 }
2711 }
2712
2713 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2714 // instalias with an immediate operand yet.
2715 if (Name == "int" && Operands.size() == 2) {
2716 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2717 if (Op1.isImm())
2718 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
2719 if (CE->getValue() == 3) {
2720 Operands.erase(Operands.begin() + 1);
2721 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2722 }
2723 }
2724
2725 // Transforms "xlat mem8" into "xlatb"
2726 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
2727 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2728 if (Op1.isMem8()) {
2729 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
2730 "size, (R|E)BX will be used for the location");
2731 Operands.pop_back();
2732 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
2733 }
2734 }
2735
2736 if (Flags)
2737 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
2738 return false;
2739 }
2740
processInstruction(MCInst & Inst,const OperandVector & Ops)2741 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2742 return false;
2743 }
2744
validateInstruction(MCInst & Inst,const OperandVector & Ops)2745 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2746 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
2747
2748 switch (Inst.getOpcode()) {
2749 case X86::VGATHERDPDYrm:
2750 case X86::VGATHERDPDrm:
2751 case X86::VGATHERDPSYrm:
2752 case X86::VGATHERDPSrm:
2753 case X86::VGATHERQPDYrm:
2754 case X86::VGATHERQPDrm:
2755 case X86::VGATHERQPSYrm:
2756 case X86::VGATHERQPSrm:
2757 case X86::VPGATHERDDYrm:
2758 case X86::VPGATHERDDrm:
2759 case X86::VPGATHERDQYrm:
2760 case X86::VPGATHERDQrm:
2761 case X86::VPGATHERQDYrm:
2762 case X86::VPGATHERQDrm:
2763 case X86::VPGATHERQQYrm:
2764 case X86::VPGATHERQQrm: {
2765 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
2766 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
2767 unsigned Index =
2768 MRI->getEncodingValue(Inst.getOperand(3 + X86::AddrIndexReg).getReg());
2769 if (Dest == Mask || Dest == Index || Mask == Index)
2770 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
2771 "registers should be distinct");
2772 break;
2773 }
2774 case X86::VGATHERDPDZ128rm:
2775 case X86::VGATHERDPDZ256rm:
2776 case X86::VGATHERDPDZrm:
2777 case X86::VGATHERDPSZ128rm:
2778 case X86::VGATHERDPSZ256rm:
2779 case X86::VGATHERDPSZrm:
2780 case X86::VGATHERQPDZ128rm:
2781 case X86::VGATHERQPDZ256rm:
2782 case X86::VGATHERQPDZrm:
2783 case X86::VGATHERQPSZ128rm:
2784 case X86::VGATHERQPSZ256rm:
2785 case X86::VGATHERQPSZrm:
2786 case X86::VPGATHERDDZ128rm:
2787 case X86::VPGATHERDDZ256rm:
2788 case X86::VPGATHERDDZrm:
2789 case X86::VPGATHERDQZ128rm:
2790 case X86::VPGATHERDQZ256rm:
2791 case X86::VPGATHERDQZrm:
2792 case X86::VPGATHERQDZ128rm:
2793 case X86::VPGATHERQDZ256rm:
2794 case X86::VPGATHERQDZrm:
2795 case X86::VPGATHERQQZ128rm:
2796 case X86::VPGATHERQQZ256rm:
2797 case X86::VPGATHERQQZrm: {
2798 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
2799 unsigned Index =
2800 MRI->getEncodingValue(Inst.getOperand(4 + X86::AddrIndexReg).getReg());
2801 if (Dest == Index)
2802 return Warning(Ops[0]->getStartLoc(), "index and destination registers "
2803 "should be distinct");
2804 break;
2805 }
2806 case X86::V4FMADDPSrm:
2807 case X86::V4FMADDPSrmk:
2808 case X86::V4FMADDPSrmkz:
2809 case X86::V4FMADDSSrm:
2810 case X86::V4FMADDSSrmk:
2811 case X86::V4FMADDSSrmkz:
2812 case X86::V4FNMADDPSrm:
2813 case X86::V4FNMADDPSrmk:
2814 case X86::V4FNMADDPSrmkz:
2815 case X86::V4FNMADDSSrm:
2816 case X86::V4FNMADDSSrmk:
2817 case X86::V4FNMADDSSrmkz:
2818 case X86::VP4DPWSSDSrm:
2819 case X86::VP4DPWSSDSrmk:
2820 case X86::VP4DPWSSDSrmkz:
2821 case X86::VP4DPWSSDrm:
2822 case X86::VP4DPWSSDrmk:
2823 case X86::VP4DPWSSDrmkz: {
2824 unsigned Src2 = Inst.getOperand(Inst.getNumOperands() -
2825 X86::AddrNumOperands - 1).getReg();
2826 unsigned Src2Enc = MRI->getEncodingValue(Src2);
2827 if (Src2Enc % 4 != 0) {
2828 StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2);
2829 unsigned GroupStart = (Src2Enc / 4) * 4;
2830 unsigned GroupEnd = GroupStart + 3;
2831 return Warning(Ops[0]->getStartLoc(),
2832 "source register '" + RegName + "' implicitly denotes '" +
2833 RegName.take_front(3) + Twine(GroupStart) + "' to '" +
2834 RegName.take_front(3) + Twine(GroupEnd) +
2835 "' source group");
2836 }
2837 break;
2838 }
2839 }
2840
2841 return false;
2842 }
2843
2844 static const char *getSubtargetFeatureName(uint64_t Val);
2845
EmitInstruction(MCInst & Inst,OperandVector & Operands,MCStreamer & Out)2846 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2847 MCStreamer &Out) {
2848 Instrumentation->InstrumentAndEmitInstruction(
2849 Inst, Operands, getContext(), MII, Out,
2850 getParser().shouldPrintSchedInfo());
2851 }
2852
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2853 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2854 OperandVector &Operands,
2855 MCStreamer &Out, uint64_t &ErrorInfo,
2856 bool MatchingInlineAsm) {
2857 if (isParsingIntelSyntax())
2858 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2859 MatchingInlineAsm);
2860 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2861 MatchingInlineAsm);
2862 }
2863
MatchFPUWaitAlias(SMLoc IDLoc,X86Operand & Op,OperandVector & Operands,MCStreamer & Out,bool MatchingInlineAsm)2864 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2865 OperandVector &Operands, MCStreamer &Out,
2866 bool MatchingInlineAsm) {
2867 // FIXME: This should be replaced with a real .td file alias mechanism.
2868 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2869 // call.
2870 const char *Repl = StringSwitch<const char *>(Op.getToken())
2871 .Case("finit", "fninit")
2872 .Case("fsave", "fnsave")
2873 .Case("fstcw", "fnstcw")
2874 .Case("fstcww", "fnstcw")
2875 .Case("fstenv", "fnstenv")
2876 .Case("fstsw", "fnstsw")
2877 .Case("fstsww", "fnstsw")
2878 .Case("fclex", "fnclex")
2879 .Default(nullptr);
2880 if (Repl) {
2881 MCInst Inst;
2882 Inst.setOpcode(X86::WAIT);
2883 Inst.setLoc(IDLoc);
2884 if (!MatchingInlineAsm)
2885 EmitInstruction(Inst, Operands, Out);
2886 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2887 }
2888 }
2889
ErrorMissingFeature(SMLoc IDLoc,uint64_t ErrorInfo,bool MatchingInlineAsm)2890 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2891 bool MatchingInlineAsm) {
2892 assert(ErrorInfo && "Unknown missing feature!");
2893 SmallString<126> Msg;
2894 raw_svector_ostream OS(Msg);
2895 OS << "instruction requires:";
2896 uint64_t Mask = 1;
2897 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2898 if (ErrorInfo & Mask)
2899 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2900 Mask <<= 1;
2901 }
2902 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
2903 }
2904
getPrefixes(OperandVector & Operands)2905 static unsigned getPrefixes(OperandVector &Operands) {
2906 unsigned Result = 0;
2907 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
2908 if (Prefix.isPrefix()) {
2909 Result = Prefix.getPrefix();
2910 Operands.pop_back();
2911 }
2912 return Result;
2913 }
2914
MatchAndEmitATTInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2915 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2916 OperandVector &Operands,
2917 MCStreamer &Out,
2918 uint64_t &ErrorInfo,
2919 bool MatchingInlineAsm) {
2920 assert(!Operands.empty() && "Unexpect empty operand list!");
2921 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2922 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2923 SMRange EmptyRange = None;
2924
2925 // First, handle aliases that expand to multiple instructions.
2926 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2927
2928 bool WasOriginallyInvalidOperand = false;
2929 unsigned Prefixes = getPrefixes(Operands);
2930
2931 MCInst Inst;
2932
2933 if (Prefixes)
2934 Inst.setFlags(Prefixes);
2935
2936 // First, try a direct match.
2937 switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
2938 isParsingIntelSyntax())) {
2939 default: llvm_unreachable("Unexpected match result!");
2940 case Match_Success:
2941 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
2942 return true;
2943 // Some instructions need post-processing to, for example, tweak which
2944 // encoding is selected. Loop on it while changes happen so the
2945 // individual transformations can chain off each other.
2946 if (!MatchingInlineAsm)
2947 while (processInstruction(Inst, Operands))
2948 ;
2949
2950 Inst.setLoc(IDLoc);
2951 if (!MatchingInlineAsm)
2952 EmitInstruction(Inst, Operands, Out);
2953 Opcode = Inst.getOpcode();
2954 return false;
2955 case Match_MissingFeature:
2956 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2957 case Match_InvalidOperand:
2958 WasOriginallyInvalidOperand = true;
2959 break;
2960 case Match_MnemonicFail:
2961 break;
2962 }
2963
2964 // FIXME: Ideally, we would only attempt suffix matches for things which are
2965 // valid prefixes, and we could just infer the right unambiguous
2966 // type. However, that requires substantially more matcher support than the
2967 // following hack.
2968
2969 // Change the operand to point to a temporary token.
2970 StringRef Base = Op.getToken();
2971 SmallString<16> Tmp;
2972 Tmp += Base;
2973 Tmp += ' ';
2974 Op.setTokenValue(Tmp);
2975
2976 // If this instruction starts with an 'f', then it is a floating point stack
2977 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2978 // 80-bit floating point, which use the suffixes s,l,t respectively.
2979 //
2980 // Otherwise, we assume that this may be an integer instruction, which comes
2981 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2982 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2983
2984 // Check for the various suffix matches.
2985 uint64_t ErrorInfoIgnore;
2986 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2987 unsigned Match[4];
2988
2989 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2990 Tmp.back() = Suffixes[I];
2991 Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
2992 MatchingInlineAsm, isParsingIntelSyntax());
2993 // If this returned as a missing feature failure, remember that.
2994 if (Match[I] == Match_MissingFeature)
2995 ErrorInfoMissingFeature = ErrorInfoIgnore;
2996 }
2997
2998 // Restore the old token.
2999 Op.setTokenValue(Base);
3000
3001 // If exactly one matched, then we treat that as a successful match (and the
3002 // instruction will already have been filled in correctly, since the failing
3003 // matches won't have modified it).
3004 unsigned NumSuccessfulMatches =
3005 std::count(std::begin(Match), std::end(Match), Match_Success);
3006 if (NumSuccessfulMatches == 1) {
3007 Inst.setLoc(IDLoc);
3008 if (!MatchingInlineAsm)
3009 EmitInstruction(Inst, Operands, Out);
3010 Opcode = Inst.getOpcode();
3011 return false;
3012 }
3013
3014 // Otherwise, the match failed, try to produce a decent error message.
3015
3016 // If we had multiple suffix matches, then identify this as an ambiguous
3017 // match.
3018 if (NumSuccessfulMatches > 1) {
3019 char MatchChars[4];
3020 unsigned NumMatches = 0;
3021 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
3022 if (Match[I] == Match_Success)
3023 MatchChars[NumMatches++] = Suffixes[I];
3024
3025 SmallString<126> Msg;
3026 raw_svector_ostream OS(Msg);
3027 OS << "ambiguous instructions require an explicit suffix (could be ";
3028 for (unsigned i = 0; i != NumMatches; ++i) {
3029 if (i != 0)
3030 OS << ", ";
3031 if (i + 1 == NumMatches)
3032 OS << "or ";
3033 OS << "'" << Base << MatchChars[i] << "'";
3034 }
3035 OS << ")";
3036 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
3037 return true;
3038 }
3039
3040 // Okay, we know that none of the variants matched successfully.
3041
3042 // If all of the instructions reported an invalid mnemonic, then the original
3043 // mnemonic was invalid.
3044 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
3045 if (!WasOriginallyInvalidOperand) {
3046 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
3047 Op.getLocRange(), MatchingInlineAsm);
3048 }
3049
3050 // Recover location info for the operand if we know which was the problem.
3051 if (ErrorInfo != ~0ULL) {
3052 if (ErrorInfo >= Operands.size())
3053 return Error(IDLoc, "too few operands for instruction", EmptyRange,
3054 MatchingInlineAsm);
3055
3056 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
3057 if (Operand.getStartLoc().isValid()) {
3058 SMRange OperandRange = Operand.getLocRange();
3059 return Error(Operand.getStartLoc(), "invalid operand for instruction",
3060 OperandRange, MatchingInlineAsm);
3061 }
3062 }
3063
3064 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3065 MatchingInlineAsm);
3066 }
3067
3068 // If one instruction matched with a missing feature, report this as a
3069 // missing feature.
3070 if (std::count(std::begin(Match), std::end(Match),
3071 Match_MissingFeature) == 1) {
3072 ErrorInfo = ErrorInfoMissingFeature;
3073 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
3074 MatchingInlineAsm);
3075 }
3076
3077 // If one instruction matched with an invalid operand, report this as an
3078 // operand failure.
3079 if (std::count(std::begin(Match), std::end(Match),
3080 Match_InvalidOperand) == 1) {
3081 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3082 MatchingInlineAsm);
3083 }
3084
3085 // If all of these were an outright failure, report it in a useless way.
3086 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
3087 EmptyRange, MatchingInlineAsm);
3088 return true;
3089 }
3090
MatchAndEmitIntelInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)3091 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
3092 OperandVector &Operands,
3093 MCStreamer &Out,
3094 uint64_t &ErrorInfo,
3095 bool MatchingInlineAsm) {
3096 assert(!Operands.empty() && "Unexpect empty operand list!");
3097 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
3098 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
3099 StringRef Mnemonic = Op.getToken();
3100 SMRange EmptyRange = None;
3101 StringRef Base = Op.getToken();
3102 unsigned Prefixes = getPrefixes(Operands);
3103
3104 // First, handle aliases that expand to multiple instructions.
3105 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
3106
3107 MCInst Inst;
3108
3109 if (Prefixes)
3110 Inst.setFlags(Prefixes);
3111
3112 // Find one unsized memory operand, if present.
3113 X86Operand *UnsizedMemOp = nullptr;
3114 for (const auto &Op : Operands) {
3115 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
3116 if (X86Op->isMemUnsized()) {
3117 UnsizedMemOp = X86Op;
3118 // Have we found an unqualified memory operand,
3119 // break. IA allows only one memory operand.
3120 break;
3121 }
3122 }
3123
3124 // Allow some instructions to have implicitly pointer-sized operands. This is
3125 // compatible with gas.
3126 if (UnsizedMemOp) {
3127 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
3128 for (const char *Instr : PtrSizedInstrs) {
3129 if (Mnemonic == Instr) {
3130 UnsizedMemOp->Mem.Size = getPointerWidth();
3131 break;
3132 }
3133 }
3134 }
3135
3136 SmallVector<unsigned, 8> Match;
3137 uint64_t ErrorInfoMissingFeature = 0;
3138
3139 // If unsized push has immediate operand we should default the default pointer
3140 // size for the size.
3141 if (Mnemonic == "push" && Operands.size() == 2) {
3142 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
3143 if (X86Op->isImm()) {
3144 // If it's not a constant fall through and let remainder take care of it.
3145 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
3146 unsigned Size = getPointerWidth();
3147 if (CE &&
3148 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
3149 SmallString<16> Tmp;
3150 Tmp += Base;
3151 Tmp += (is64BitMode())
3152 ? "q"
3153 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
3154 Op.setTokenValue(Tmp);
3155 // Do match in ATT mode to allow explicit suffix usage.
3156 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
3157 MatchingInlineAsm,
3158 false /*isParsingIntelSyntax()*/));
3159 Op.setTokenValue(Base);
3160 }
3161 }
3162 }
3163
3164 // If an unsized memory operand is present, try to match with each memory
3165 // operand size. In Intel assembly, the size is not part of the instruction
3166 // mnemonic.
3167 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
3168 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
3169 for (unsigned Size : MopSizes) {
3170 UnsizedMemOp->Mem.Size = Size;
3171 uint64_t ErrorInfoIgnore;
3172 unsigned LastOpcode = Inst.getOpcode();
3173 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
3174 MatchingInlineAsm, isParsingIntelSyntax());
3175 if (Match.empty() || LastOpcode != Inst.getOpcode())
3176 Match.push_back(M);
3177
3178 // If this returned as a missing feature failure, remember that.
3179 if (Match.back() == Match_MissingFeature)
3180 ErrorInfoMissingFeature = ErrorInfoIgnore;
3181 }
3182
3183 // Restore the size of the unsized memory operand if we modified it.
3184 UnsizedMemOp->Mem.Size = 0;
3185 }
3186
3187 // If we haven't matched anything yet, this is not a basic integer or FPU
3188 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
3189 // matching with the unsized operand.
3190 if (Match.empty()) {
3191 Match.push_back(MatchInstruction(
3192 Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax()));
3193 // If this returned as a missing feature failure, remember that.
3194 if (Match.back() == Match_MissingFeature)
3195 ErrorInfoMissingFeature = ErrorInfo;
3196 }
3197
3198 // Restore the size of the unsized memory operand if we modified it.
3199 if (UnsizedMemOp)
3200 UnsizedMemOp->Mem.Size = 0;
3201
3202 // If it's a bad mnemonic, all results will be the same.
3203 if (Match.back() == Match_MnemonicFail) {
3204 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
3205 Op.getLocRange(), MatchingInlineAsm);
3206 }
3207
3208 unsigned NumSuccessfulMatches =
3209 std::count(std::begin(Match), std::end(Match), Match_Success);
3210
3211 // If matching was ambiguous and we had size information from the frontend,
3212 // try again with that. This handles cases like "movxz eax, m8/m16".
3213 if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
3214 UnsizedMemOp->getMemFrontendSize()) {
3215 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
3216 unsigned M = MatchInstruction(
3217 Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax());
3218 if (M == Match_Success)
3219 NumSuccessfulMatches = 1;
3220
3221 // Add a rewrite that encodes the size information we used from the
3222 // frontend.
3223 InstInfo->AsmRewrites->emplace_back(
3224 AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
3225 /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
3226 }
3227
3228 // If exactly one matched, then we treat that as a successful match (and the
3229 // instruction will already have been filled in correctly, since the failing
3230 // matches won't have modified it).
3231 if (NumSuccessfulMatches == 1) {
3232 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
3233 return true;
3234 // Some instructions need post-processing to, for example, tweak which
3235 // encoding is selected. Loop on it while changes happen so the individual
3236 // transformations can chain off each other.
3237 if (!MatchingInlineAsm)
3238 while (processInstruction(Inst, Operands))
3239 ;
3240 Inst.setLoc(IDLoc);
3241 if (!MatchingInlineAsm)
3242 EmitInstruction(Inst, Operands, Out);
3243 Opcode = Inst.getOpcode();
3244 return false;
3245 } else if (NumSuccessfulMatches > 1) {
3246 assert(UnsizedMemOp &&
3247 "multiple matches only possible with unsized memory operands");
3248 return Error(UnsizedMemOp->getStartLoc(),
3249 "ambiguous operand size for instruction '" + Mnemonic + "\'",
3250 UnsizedMemOp->getLocRange());
3251 }
3252
3253 // If one instruction matched with a missing feature, report this as a
3254 // missing feature.
3255 if (std::count(std::begin(Match), std::end(Match),
3256 Match_MissingFeature) == 1) {
3257 ErrorInfo = ErrorInfoMissingFeature;
3258 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
3259 MatchingInlineAsm);
3260 }
3261
3262 // If one instruction matched with an invalid operand, report this as an
3263 // operand failure.
3264 if (std::count(std::begin(Match), std::end(Match),
3265 Match_InvalidOperand) == 1) {
3266 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3267 MatchingInlineAsm);
3268 }
3269
3270 // If all of these were an outright failure, report it in a useless way.
3271 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
3272 MatchingInlineAsm);
3273 }
3274
OmitRegisterFromClobberLists(unsigned RegNo)3275 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
3276 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
3277 }
3278
ParseDirective(AsmToken DirectiveID)3279 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
3280 MCAsmParser &Parser = getParser();
3281 StringRef IDVal = DirectiveID.getIdentifier();
3282 if (IDVal.startswith(".code"))
3283 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
3284 else if (IDVal.startswith(".att_syntax")) {
3285 getParser().setParsingInlineAsm(false);
3286 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3287 if (Parser.getTok().getString() == "prefix")
3288 Parser.Lex();
3289 else if (Parser.getTok().getString() == "noprefix")
3290 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
3291 "supported: registers must have a "
3292 "'%' prefix in .att_syntax");
3293 }
3294 getParser().setAssemblerDialect(0);
3295 return false;
3296 } else if (IDVal.startswith(".intel_syntax")) {
3297 getParser().setAssemblerDialect(1);
3298 getParser().setParsingInlineAsm(true);
3299 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3300 if (Parser.getTok().getString() == "noprefix")
3301 Parser.Lex();
3302 else if (Parser.getTok().getString() == "prefix")
3303 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
3304 "supported: registers must not have "
3305 "a '%' prefix in .intel_syntax");
3306 }
3307 return false;
3308 } else if (IDVal == ".even")
3309 return parseDirectiveEven(DirectiveID.getLoc());
3310 else if (IDVal == ".cv_fpo_proc")
3311 return parseDirectiveFPOProc(DirectiveID.getLoc());
3312 else if (IDVal == ".cv_fpo_setframe")
3313 return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
3314 else if (IDVal == ".cv_fpo_pushreg")
3315 return parseDirectiveFPOPushReg(DirectiveID.getLoc());
3316 else if (IDVal == ".cv_fpo_stackalloc")
3317 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
3318 else if (IDVal == ".cv_fpo_endprologue")
3319 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
3320 else if (IDVal == ".cv_fpo_endproc")
3321 return parseDirectiveFPOEndProc(DirectiveID.getLoc());
3322
3323 return true;
3324 }
3325
3326 /// parseDirectiveEven
3327 /// ::= .even
parseDirectiveEven(SMLoc L)3328 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
3329 if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
3330 return false;
3331
3332 const MCSection *Section = getStreamer().getCurrentSectionOnly();
3333 if (!Section) {
3334 getStreamer().InitSections(false);
3335 Section = getStreamer().getCurrentSectionOnly();
3336 }
3337 if (Section->UseCodeAlign())
3338 getStreamer().EmitCodeAlignment(2, 0);
3339 else
3340 getStreamer().EmitValueToAlignment(2, 0, 1, 0);
3341 return false;
3342 }
3343
3344 /// ParseDirectiveCode
3345 /// ::= .code16 | .code32 | .code64
ParseDirectiveCode(StringRef IDVal,SMLoc L)3346 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
3347 MCAsmParser &Parser = getParser();
3348 Code16GCC = false;
3349 if (IDVal == ".code16") {
3350 Parser.Lex();
3351 if (!is16BitMode()) {
3352 SwitchMode(X86::Mode16Bit);
3353 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3354 }
3355 } else if (IDVal == ".code16gcc") {
3356 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
3357 Parser.Lex();
3358 Code16GCC = true;
3359 if (!is16BitMode()) {
3360 SwitchMode(X86::Mode16Bit);
3361 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3362 }
3363 } else if (IDVal == ".code32") {
3364 Parser.Lex();
3365 if (!is32BitMode()) {
3366 SwitchMode(X86::Mode32Bit);
3367 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
3368 }
3369 } else if (IDVal == ".code64") {
3370 Parser.Lex();
3371 if (!is64BitMode()) {
3372 SwitchMode(X86::Mode64Bit);
3373 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
3374 }
3375 } else {
3376 Error(L, "unknown directive " + IDVal);
3377 return false;
3378 }
3379
3380 return false;
3381 }
3382
3383 // .cv_fpo_proc foo
parseDirectiveFPOProc(SMLoc L)3384 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
3385 MCAsmParser &Parser = getParser();
3386 StringRef ProcName;
3387 int64_t ParamsSize;
3388 if (Parser.parseIdentifier(ProcName))
3389 return Parser.TokError("expected symbol name");
3390 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
3391 return true;
3392 if (!isUIntN(32, ParamsSize))
3393 return Parser.TokError("parameters size out of range");
3394 if (Parser.parseEOL("unexpected tokens"))
3395 return addErrorSuffix(" in '.cv_fpo_proc' directive");
3396 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
3397 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
3398 }
3399
3400 // .cv_fpo_setframe ebp
parseDirectiveFPOSetFrame(SMLoc L)3401 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
3402 MCAsmParser &Parser = getParser();
3403 unsigned Reg;
3404 SMLoc DummyLoc;
3405 if (ParseRegister(Reg, DummyLoc, DummyLoc) ||
3406 Parser.parseEOL("unexpected tokens"))
3407 return addErrorSuffix(" in '.cv_fpo_setframe' directive");
3408 return getTargetStreamer().emitFPOSetFrame(Reg, L);
3409 }
3410
3411 // .cv_fpo_pushreg ebx
parseDirectiveFPOPushReg(SMLoc L)3412 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
3413 MCAsmParser &Parser = getParser();
3414 unsigned Reg;
3415 SMLoc DummyLoc;
3416 if (ParseRegister(Reg, DummyLoc, DummyLoc) ||
3417 Parser.parseEOL("unexpected tokens"))
3418 return addErrorSuffix(" in '.cv_fpo_pushreg' directive");
3419 return getTargetStreamer().emitFPOPushReg(Reg, L);
3420 }
3421
3422 // .cv_fpo_stackalloc 20
parseDirectiveFPOStackAlloc(SMLoc L)3423 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
3424 MCAsmParser &Parser = getParser();
3425 int64_t Offset;
3426 if (Parser.parseIntToken(Offset, "expected offset") ||
3427 Parser.parseEOL("unexpected tokens"))
3428 return addErrorSuffix(" in '.cv_fpo_stackalloc' directive");
3429 return getTargetStreamer().emitFPOStackAlloc(Offset, L);
3430 }
3431
3432 // .cv_fpo_endprologue
parseDirectiveFPOEndPrologue(SMLoc L)3433 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
3434 MCAsmParser &Parser = getParser();
3435 if (Parser.parseEOL("unexpected tokens"))
3436 return addErrorSuffix(" in '.cv_fpo_endprologue' directive");
3437 return getTargetStreamer().emitFPOEndPrologue(L);
3438 }
3439
3440 // .cv_fpo_endproc
parseDirectiveFPOEndProc(SMLoc L)3441 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
3442 MCAsmParser &Parser = getParser();
3443 if (Parser.parseEOL("unexpected tokens"))
3444 return addErrorSuffix(" in '.cv_fpo_endproc' directive");
3445 return getTargetStreamer().emitFPOEndProc(L);
3446 }
3447
3448 // Force static initialization.
LLVMInitializeX86AsmParser()3449 extern "C" void LLVMInitializeX86AsmParser() {
3450 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
3451 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
3452 }
3453
3454 #define GET_REGISTER_MATCHER
3455 #define GET_MATCHER_IMPLEMENTATION
3456 #define GET_SUBTARGET_FEATURE_NAME
3457 #include "X86GenAsmMatcher.inc"
3458