1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "X86ISelLowering.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/MC/MCContext.h"
22 #include "llvm/MC/MCExpr.h"
23 #include "llvm/MC/MCInst.h"
24 #include "llvm/MC/MCInstrInfo.h"
25 #include "llvm/MC/MCParser/MCAsmLexer.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/MC/MCTargetAsmParser.h"
33 #include "llvm/Support/SourceMgr.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include <algorithm>
37 #include <memory>
38
39 using namespace llvm;
40
41 namespace {
42
43 static const char OpPrecedence[] = {
44 0, // IC_OR
45 1, // IC_AND
46 2, // IC_LSHIFT
47 2, // IC_RSHIFT
48 3, // IC_PLUS
49 3, // IC_MINUS
50 4, // IC_MULTIPLY
51 4, // IC_DIVIDE
52 5, // IC_RPAREN
53 6, // IC_LPAREN
54 0, // IC_IMM
55 0 // IC_REGISTER
56 };
57
58 class X86AsmParser : public MCTargetAsmParser {
59 MCSubtargetInfo &STI;
60 const MCInstrInfo &MII;
61 ParseInstructionInfo *InstInfo;
62 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
63 private:
consumeToken()64 SMLoc consumeToken() {
65 MCAsmParser &Parser = getParser();
66 SMLoc Result = Parser.getTok().getLoc();
67 Parser.Lex();
68 return Result;
69 }
70
71 enum InfixCalculatorTok {
72 IC_OR = 0,
73 IC_AND,
74 IC_LSHIFT,
75 IC_RSHIFT,
76 IC_PLUS,
77 IC_MINUS,
78 IC_MULTIPLY,
79 IC_DIVIDE,
80 IC_RPAREN,
81 IC_LPAREN,
82 IC_IMM,
83 IC_REGISTER
84 };
85
86 class InfixCalculator {
87 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
88 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
89 SmallVector<ICToken, 4> PostfixStack;
90
91 public:
popOperand()92 int64_t popOperand() {
93 assert (!PostfixStack.empty() && "Poped an empty stack!");
94 ICToken Op = PostfixStack.pop_back_val();
95 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
96 && "Expected and immediate or register!");
97 return Op.second;
98 }
pushOperand(InfixCalculatorTok Op,int64_t Val=0)99 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
100 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
101 "Unexpected operand!");
102 PostfixStack.push_back(std::make_pair(Op, Val));
103 }
104
popOperator()105 void popOperator() { InfixOperatorStack.pop_back(); }
pushOperator(InfixCalculatorTok Op)106 void pushOperator(InfixCalculatorTok Op) {
107 // Push the new operator if the stack is empty.
108 if (InfixOperatorStack.empty()) {
109 InfixOperatorStack.push_back(Op);
110 return;
111 }
112
113 // Push the new operator if it has a higher precedence than the operator
114 // on the top of the stack or the operator on the top of the stack is a
115 // left parentheses.
116 unsigned Idx = InfixOperatorStack.size() - 1;
117 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
118 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
119 InfixOperatorStack.push_back(Op);
120 return;
121 }
122
123 // The operator on the top of the stack has higher precedence than the
124 // new operator.
125 unsigned ParenCount = 0;
126 while (1) {
127 // Nothing to process.
128 if (InfixOperatorStack.empty())
129 break;
130
131 Idx = InfixOperatorStack.size() - 1;
132 StackOp = InfixOperatorStack[Idx];
133 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
134 break;
135
136 // If we have an even parentheses count and we see a left parentheses,
137 // then stop processing.
138 if (!ParenCount && StackOp == IC_LPAREN)
139 break;
140
141 if (StackOp == IC_RPAREN) {
142 ++ParenCount;
143 InfixOperatorStack.pop_back();
144 } else if (StackOp == IC_LPAREN) {
145 --ParenCount;
146 InfixOperatorStack.pop_back();
147 } else {
148 InfixOperatorStack.pop_back();
149 PostfixStack.push_back(std::make_pair(StackOp, 0));
150 }
151 }
152 // Push the new operator.
153 InfixOperatorStack.push_back(Op);
154 }
execute()155 int64_t execute() {
156 // Push any remaining operators onto the postfix stack.
157 while (!InfixOperatorStack.empty()) {
158 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
159 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
160 PostfixStack.push_back(std::make_pair(StackOp, 0));
161 }
162
163 if (PostfixStack.empty())
164 return 0;
165
166 SmallVector<ICToken, 16> OperandStack;
167 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
168 ICToken Op = PostfixStack[i];
169 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
170 OperandStack.push_back(Op);
171 } else {
172 assert (OperandStack.size() > 1 && "Too few operands.");
173 int64_t Val;
174 ICToken Op2 = OperandStack.pop_back_val();
175 ICToken Op1 = OperandStack.pop_back_val();
176 switch (Op.first) {
177 default:
178 report_fatal_error("Unexpected operator!");
179 break;
180 case IC_PLUS:
181 Val = Op1.second + Op2.second;
182 OperandStack.push_back(std::make_pair(IC_IMM, Val));
183 break;
184 case IC_MINUS:
185 Val = Op1.second - Op2.second;
186 OperandStack.push_back(std::make_pair(IC_IMM, Val));
187 break;
188 case IC_MULTIPLY:
189 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
190 "Multiply operation with an immediate and a register!");
191 Val = Op1.second * Op2.second;
192 OperandStack.push_back(std::make_pair(IC_IMM, Val));
193 break;
194 case IC_DIVIDE:
195 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
196 "Divide operation with an immediate and a register!");
197 assert (Op2.second != 0 && "Division by zero!");
198 Val = Op1.second / Op2.second;
199 OperandStack.push_back(std::make_pair(IC_IMM, Val));
200 break;
201 case IC_OR:
202 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
203 "Or operation with an immediate and a register!");
204 Val = Op1.second | Op2.second;
205 OperandStack.push_back(std::make_pair(IC_IMM, Val));
206 break;
207 case IC_AND:
208 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
209 "And operation with an immediate and a register!");
210 Val = Op1.second & Op2.second;
211 OperandStack.push_back(std::make_pair(IC_IMM, Val));
212 break;
213 case IC_LSHIFT:
214 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
215 "Left shift operation with an immediate and a register!");
216 Val = Op1.second << Op2.second;
217 OperandStack.push_back(std::make_pair(IC_IMM, Val));
218 break;
219 case IC_RSHIFT:
220 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
221 "Right shift operation with an immediate and a register!");
222 Val = Op1.second >> Op2.second;
223 OperandStack.push_back(std::make_pair(IC_IMM, Val));
224 break;
225 }
226 }
227 }
228 assert (OperandStack.size() == 1 && "Expected a single result.");
229 return OperandStack.pop_back_val().second;
230 }
231 };
232
233 enum IntelExprState {
234 IES_OR,
235 IES_AND,
236 IES_LSHIFT,
237 IES_RSHIFT,
238 IES_PLUS,
239 IES_MINUS,
240 IES_NOT,
241 IES_MULTIPLY,
242 IES_DIVIDE,
243 IES_LBRAC,
244 IES_RBRAC,
245 IES_LPAREN,
246 IES_RPAREN,
247 IES_REGISTER,
248 IES_INTEGER,
249 IES_IDENTIFIER,
250 IES_ERROR
251 };
252
253 class IntelExprStateMachine {
254 IntelExprState State, PrevState;
255 unsigned BaseReg, IndexReg, TmpReg, Scale;
256 int64_t Imm;
257 const MCExpr *Sym;
258 StringRef SymName;
259 bool StopOnLBrac, AddImmPrefix;
260 InfixCalculator IC;
261 InlineAsmIdentifierInfo Info;
262 public:
IntelExprStateMachine(int64_t imm,bool stoponlbrac,bool addimmprefix)263 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
264 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
265 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
266 AddImmPrefix(addimmprefix) { Info.clear(); }
267
getBaseReg()268 unsigned getBaseReg() { return BaseReg; }
getIndexReg()269 unsigned getIndexReg() { return IndexReg; }
getScale()270 unsigned getScale() { return Scale; }
getSym()271 const MCExpr *getSym() { return Sym; }
getSymName()272 StringRef getSymName() { return SymName; }
getImm()273 int64_t getImm() { return Imm + IC.execute(); }
isValidEndState()274 bool isValidEndState() {
275 return State == IES_RBRAC || State == IES_INTEGER;
276 }
getStopOnLBrac()277 bool getStopOnLBrac() { return StopOnLBrac; }
getAddImmPrefix()278 bool getAddImmPrefix() { return AddImmPrefix; }
hadError()279 bool hadError() { return State == IES_ERROR; }
280
getIdentifierInfo()281 InlineAsmIdentifierInfo &getIdentifierInfo() {
282 return Info;
283 }
284
onOr()285 void onOr() {
286 IntelExprState CurrState = State;
287 switch (State) {
288 default:
289 State = IES_ERROR;
290 break;
291 case IES_INTEGER:
292 case IES_RPAREN:
293 case IES_REGISTER:
294 State = IES_OR;
295 IC.pushOperator(IC_OR);
296 break;
297 }
298 PrevState = CurrState;
299 }
onAnd()300 void onAnd() {
301 IntelExprState CurrState = State;
302 switch (State) {
303 default:
304 State = IES_ERROR;
305 break;
306 case IES_INTEGER:
307 case IES_RPAREN:
308 case IES_REGISTER:
309 State = IES_AND;
310 IC.pushOperator(IC_AND);
311 break;
312 }
313 PrevState = CurrState;
314 }
onLShift()315 void onLShift() {
316 IntelExprState CurrState = State;
317 switch (State) {
318 default:
319 State = IES_ERROR;
320 break;
321 case IES_INTEGER:
322 case IES_RPAREN:
323 case IES_REGISTER:
324 State = IES_LSHIFT;
325 IC.pushOperator(IC_LSHIFT);
326 break;
327 }
328 PrevState = CurrState;
329 }
onRShift()330 void onRShift() {
331 IntelExprState CurrState = State;
332 switch (State) {
333 default:
334 State = IES_ERROR;
335 break;
336 case IES_INTEGER:
337 case IES_RPAREN:
338 case IES_REGISTER:
339 State = IES_RSHIFT;
340 IC.pushOperator(IC_RSHIFT);
341 break;
342 }
343 PrevState = CurrState;
344 }
onPlus()345 void onPlus() {
346 IntelExprState CurrState = State;
347 switch (State) {
348 default:
349 State = IES_ERROR;
350 break;
351 case IES_INTEGER:
352 case IES_RPAREN:
353 case IES_REGISTER:
354 State = IES_PLUS;
355 IC.pushOperator(IC_PLUS);
356 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
357 // If we already have a BaseReg, then assume this is the IndexReg with
358 // a scale of 1.
359 if (!BaseReg) {
360 BaseReg = TmpReg;
361 } else {
362 assert (!IndexReg && "BaseReg/IndexReg already set!");
363 IndexReg = TmpReg;
364 Scale = 1;
365 }
366 }
367 break;
368 }
369 PrevState = CurrState;
370 }
onMinus()371 void onMinus() {
372 IntelExprState CurrState = State;
373 switch (State) {
374 default:
375 State = IES_ERROR;
376 break;
377 case IES_PLUS:
378 case IES_NOT:
379 case IES_MULTIPLY:
380 case IES_DIVIDE:
381 case IES_LPAREN:
382 case IES_RPAREN:
383 case IES_LBRAC:
384 case IES_RBRAC:
385 case IES_INTEGER:
386 case IES_REGISTER:
387 State = IES_MINUS;
388 // Only push the minus operator if it is not a unary operator.
389 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
390 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
391 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
392 IC.pushOperator(IC_MINUS);
393 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
394 // If we already have a BaseReg, then assume this is the IndexReg with
395 // a scale of 1.
396 if (!BaseReg) {
397 BaseReg = TmpReg;
398 } else {
399 assert (!IndexReg && "BaseReg/IndexReg already set!");
400 IndexReg = TmpReg;
401 Scale = 1;
402 }
403 }
404 break;
405 }
406 PrevState = CurrState;
407 }
onNot()408 void onNot() {
409 IntelExprState CurrState = State;
410 switch (State) {
411 default:
412 State = IES_ERROR;
413 break;
414 case IES_PLUS:
415 case IES_NOT:
416 State = IES_NOT;
417 break;
418 }
419 PrevState = CurrState;
420 }
onRegister(unsigned Reg)421 void onRegister(unsigned Reg) {
422 IntelExprState CurrState = State;
423 switch (State) {
424 default:
425 State = IES_ERROR;
426 break;
427 case IES_PLUS:
428 case IES_LPAREN:
429 State = IES_REGISTER;
430 TmpReg = Reg;
431 IC.pushOperand(IC_REGISTER);
432 break;
433 case IES_MULTIPLY:
434 // Index Register - Scale * Register
435 if (PrevState == IES_INTEGER) {
436 assert (!IndexReg && "IndexReg already set!");
437 State = IES_REGISTER;
438 IndexReg = Reg;
439 // Get the scale and replace the 'Scale * Register' with '0'.
440 Scale = IC.popOperand();
441 IC.pushOperand(IC_IMM);
442 IC.popOperator();
443 } else {
444 State = IES_ERROR;
445 }
446 break;
447 }
448 PrevState = CurrState;
449 }
onIdentifierExpr(const MCExpr * SymRef,StringRef SymRefName)450 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
451 PrevState = State;
452 switch (State) {
453 default:
454 State = IES_ERROR;
455 break;
456 case IES_PLUS:
457 case IES_MINUS:
458 case IES_NOT:
459 State = IES_INTEGER;
460 Sym = SymRef;
461 SymName = SymRefName;
462 IC.pushOperand(IC_IMM);
463 break;
464 }
465 }
onInteger(int64_t TmpInt,StringRef & ErrMsg)466 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
467 IntelExprState CurrState = State;
468 switch (State) {
469 default:
470 State = IES_ERROR;
471 break;
472 case IES_PLUS:
473 case IES_MINUS:
474 case IES_NOT:
475 case IES_OR:
476 case IES_AND:
477 case IES_LSHIFT:
478 case IES_RSHIFT:
479 case IES_DIVIDE:
480 case IES_MULTIPLY:
481 case IES_LPAREN:
482 State = IES_INTEGER;
483 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
484 // Index Register - Register * Scale
485 assert (!IndexReg && "IndexReg already set!");
486 IndexReg = TmpReg;
487 Scale = TmpInt;
488 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
489 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
490 return true;
491 }
492 // Get the scale and replace the 'Register * Scale' with '0'.
493 IC.popOperator();
494 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
495 PrevState == IES_OR || PrevState == IES_AND ||
496 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
497 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
498 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
499 PrevState == IES_NOT) &&
500 CurrState == IES_MINUS) {
501 // Unary minus. No need to pop the minus operand because it was never
502 // pushed.
503 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
504 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
505 PrevState == IES_OR || PrevState == IES_AND ||
506 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
507 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
508 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
509 PrevState == IES_NOT) &&
510 CurrState == IES_NOT) {
511 // Unary not. No need to pop the not operand because it was never
512 // pushed.
513 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
514 } else {
515 IC.pushOperand(IC_IMM, TmpInt);
516 }
517 break;
518 }
519 PrevState = CurrState;
520 return false;
521 }
onStar()522 void onStar() {
523 PrevState = State;
524 switch (State) {
525 default:
526 State = IES_ERROR;
527 break;
528 case IES_INTEGER:
529 case IES_REGISTER:
530 case IES_RPAREN:
531 State = IES_MULTIPLY;
532 IC.pushOperator(IC_MULTIPLY);
533 break;
534 }
535 }
onDivide()536 void onDivide() {
537 PrevState = State;
538 switch (State) {
539 default:
540 State = IES_ERROR;
541 break;
542 case IES_INTEGER:
543 case IES_RPAREN:
544 State = IES_DIVIDE;
545 IC.pushOperator(IC_DIVIDE);
546 break;
547 }
548 }
onLBrac()549 void onLBrac() {
550 PrevState = State;
551 switch (State) {
552 default:
553 State = IES_ERROR;
554 break;
555 case IES_RBRAC:
556 State = IES_PLUS;
557 IC.pushOperator(IC_PLUS);
558 break;
559 }
560 }
onRBrac()561 void onRBrac() {
562 IntelExprState CurrState = State;
563 switch (State) {
564 default:
565 State = IES_ERROR;
566 break;
567 case IES_INTEGER:
568 case IES_REGISTER:
569 case IES_RPAREN:
570 State = IES_RBRAC;
571 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
572 // If we already have a BaseReg, then assume this is the IndexReg with
573 // a scale of 1.
574 if (!BaseReg) {
575 BaseReg = TmpReg;
576 } else {
577 assert (!IndexReg && "BaseReg/IndexReg already set!");
578 IndexReg = TmpReg;
579 Scale = 1;
580 }
581 }
582 break;
583 }
584 PrevState = CurrState;
585 }
onLParen()586 void onLParen() {
587 IntelExprState CurrState = State;
588 switch (State) {
589 default:
590 State = IES_ERROR;
591 break;
592 case IES_PLUS:
593 case IES_MINUS:
594 case IES_NOT:
595 case IES_OR:
596 case IES_AND:
597 case IES_LSHIFT:
598 case IES_RSHIFT:
599 case IES_MULTIPLY:
600 case IES_DIVIDE:
601 case IES_LPAREN:
602 // FIXME: We don't handle this type of unary minus or not, yet.
603 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
604 PrevState == IES_OR || PrevState == IES_AND ||
605 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
606 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
607 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
608 PrevState == IES_NOT) &&
609 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
610 State = IES_ERROR;
611 break;
612 }
613 State = IES_LPAREN;
614 IC.pushOperator(IC_LPAREN);
615 break;
616 }
617 PrevState = CurrState;
618 }
onRParen()619 void onRParen() {
620 PrevState = State;
621 switch (State) {
622 default:
623 State = IES_ERROR;
624 break;
625 case IES_INTEGER:
626 case IES_REGISTER:
627 case IES_RPAREN:
628 State = IES_RPAREN;
629 IC.pushOperator(IC_RPAREN);
630 break;
631 }
632 }
633 };
634
Error(SMLoc L,const Twine & Msg,ArrayRef<SMRange> Ranges=None,bool MatchingInlineAsm=false)635 bool Error(SMLoc L, const Twine &Msg,
636 ArrayRef<SMRange> Ranges = None,
637 bool MatchingInlineAsm = false) {
638 MCAsmParser &Parser = getParser();
639 if (MatchingInlineAsm) return true;
640 return Parser.Error(L, Msg, Ranges);
641 }
642
ErrorAndEatStatement(SMLoc L,const Twine & Msg,ArrayRef<SMRange> Ranges=None,bool MatchingInlineAsm=false)643 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
644 ArrayRef<SMRange> Ranges = None,
645 bool MatchingInlineAsm = false) {
646 MCAsmParser &Parser = getParser();
647 Parser.eatToEndOfStatement();
648 return Error(L, Msg, Ranges, MatchingInlineAsm);
649 }
650
ErrorOperand(SMLoc Loc,StringRef Msg)651 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
652 Error(Loc, Msg);
653 return nullptr;
654 }
655
656 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
657 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
658 std::unique_ptr<X86Operand> ParseOperand();
659 std::unique_ptr<X86Operand> ParseATTOperand();
660 std::unique_ptr<X86Operand> ParseIntelOperand();
661 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
662 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
663 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
664 std::unique_ptr<X86Operand>
665 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
666 std::unique_ptr<X86Operand>
667 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
668 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
669 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
670 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
671 SMLoc Start,
672 int64_t ImmDisp,
673 unsigned Size);
674 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
675 InlineAsmIdentifierInfo &Info,
676 bool IsUnevaluatedOperand, SMLoc &End);
677
678 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
679
680 std::unique_ptr<X86Operand>
681 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
682 unsigned IndexReg, unsigned Scale, SMLoc Start,
683 SMLoc End, unsigned Size, StringRef Identifier,
684 InlineAsmIdentifierInfo &Info);
685
686 bool ParseDirectiveWord(unsigned Size, SMLoc L);
687 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
688
689 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
690 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
691
692 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
693 /// instrumentation around Inst.
694 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
695
696 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
697 OperandVector &Operands, MCStreamer &Out,
698 uint64_t &ErrorInfo,
699 bool MatchingInlineAsm) override;
700
701 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
702 MCStreamer &Out, bool MatchingInlineAsm);
703
704 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
705 bool MatchingInlineAsm);
706
707 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
708 OperandVector &Operands, MCStreamer &Out,
709 uint64_t &ErrorInfo,
710 bool MatchingInlineAsm);
711
712 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
713 OperandVector &Operands, MCStreamer &Out,
714 uint64_t &ErrorInfo,
715 bool MatchingInlineAsm);
716
717 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
718
719 /// doSrcDstMatch - Returns true if operands are matching in their
720 /// word size (%si and %di, %esi and %edi, etc.). Order depends on
721 /// the parsing mode (Intel vs. AT&T).
722 bool doSrcDstMatch(X86Operand &Op1, X86Operand &Op2);
723
724 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
725 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
726 /// \return \c true if no parsing errors occurred, \c false otherwise.
727 bool HandleAVX512Operand(OperandVector &Operands,
728 const MCParsedAsmOperand &Op);
729
is64BitMode() const730 bool is64BitMode() const {
731 // FIXME: Can tablegen auto-generate this?
732 return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
733 }
is32BitMode() const734 bool is32BitMode() const {
735 // FIXME: Can tablegen auto-generate this?
736 return (STI.getFeatureBits() & X86::Mode32Bit) != 0;
737 }
is16BitMode() const738 bool is16BitMode() const {
739 // FIXME: Can tablegen auto-generate this?
740 return (STI.getFeatureBits() & X86::Mode16Bit) != 0;
741 }
SwitchMode(uint64_t mode)742 void SwitchMode(uint64_t mode) {
743 uint64_t oldMode = STI.getFeatureBits() &
744 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit);
745 unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(oldMode | mode));
746 setAvailableFeatures(FB);
747 assert(mode == (STI.getFeatureBits() &
748 (X86::Mode64Bit | X86::Mode32Bit | X86::Mode16Bit)));
749 }
750
getPointerWidth()751 unsigned getPointerWidth() {
752 if (is16BitMode()) return 16;
753 if (is32BitMode()) return 32;
754 if (is64BitMode()) return 64;
755 llvm_unreachable("invalid mode");
756 }
757
isParsingIntelSyntax()758 bool isParsingIntelSyntax() {
759 return getParser().getAssemblerDialect();
760 }
761
762 /// @name Auto-generated Matcher Functions
763 /// {
764
765 #define GET_ASSEMBLER_HEADER
766 #include "X86GenAsmMatcher.inc"
767
768 /// }
769
770 public:
X86AsmParser(MCSubtargetInfo & sti,MCAsmParser & Parser,const MCInstrInfo & mii,const MCTargetOptions & Options)771 X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &Parser,
772 const MCInstrInfo &mii, const MCTargetOptions &Options)
773 : MCTargetAsmParser(), STI(sti), MII(mii), InstInfo(nullptr) {
774
775 // Initialize the set of available features.
776 setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
777 Instrumentation.reset(
778 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
779 }
780
781 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
782
783 void SetFrameRegister(unsigned RegNo) override;
784
785 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
786 SMLoc NameLoc, OperandVector &Operands) override;
787
788 bool ParseDirective(AsmToken DirectiveID) override;
789 };
790 } // end anonymous namespace
791
792 /// @name Auto-generated Match Functions
793 /// {
794
795 static unsigned MatchRegisterName(StringRef Name);
796
797 /// }
798
CheckBaseRegAndIndexReg(unsigned BaseReg,unsigned IndexReg,StringRef & ErrMsg)799 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
800 StringRef &ErrMsg) {
801 // If we have both a base register and an index register make sure they are
802 // both 64-bit or 32-bit registers.
803 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
804 if (BaseReg != 0 && IndexReg != 0) {
805 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
806 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
807 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
808 IndexReg != X86::RIZ) {
809 ErrMsg = "base register is 64-bit, but index register is not";
810 return true;
811 }
812 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
813 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
814 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
815 IndexReg != X86::EIZ){
816 ErrMsg = "base register is 32-bit, but index register is not";
817 return true;
818 }
819 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
820 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
821 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
822 ErrMsg = "base register is 16-bit, but index register is not";
823 return true;
824 }
825 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
826 IndexReg != X86::SI && IndexReg != X86::DI) ||
827 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
828 IndexReg != X86::BX && IndexReg != X86::BP)) {
829 ErrMsg = "invalid 16-bit base/index register combination";
830 return true;
831 }
832 }
833 }
834 return false;
835 }
836
doSrcDstMatch(X86Operand & Op1,X86Operand & Op2)837 bool X86AsmParser::doSrcDstMatch(X86Operand &Op1, X86Operand &Op2)
838 {
839 // Return true and let a normal complaint about bogus operands happen.
840 if (!Op1.isMem() || !Op2.isMem())
841 return true;
842
843 // Actually these might be the other way round if Intel syntax is
844 // being used. It doesn't matter.
845 unsigned diReg = Op1.Mem.BaseReg;
846 unsigned siReg = Op2.Mem.BaseReg;
847
848 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(siReg))
849 return X86MCRegisterClasses[X86::GR16RegClassID].contains(diReg);
850 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(siReg))
851 return X86MCRegisterClasses[X86::GR32RegClassID].contains(diReg);
852 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(siReg))
853 return X86MCRegisterClasses[X86::GR64RegClassID].contains(diReg);
854 // Again, return true and let another error happen.
855 return true;
856 }
857
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)858 bool X86AsmParser::ParseRegister(unsigned &RegNo,
859 SMLoc &StartLoc, SMLoc &EndLoc) {
860 MCAsmParser &Parser = getParser();
861 RegNo = 0;
862 const AsmToken &PercentTok = Parser.getTok();
863 StartLoc = PercentTok.getLoc();
864
865 // If we encounter a %, ignore it. This code handles registers with and
866 // without the prefix, unprefixed registers can occur in cfi directives.
867 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
868 Parser.Lex(); // Eat percent token.
869
870 const AsmToken &Tok = Parser.getTok();
871 EndLoc = Tok.getEndLoc();
872
873 if (Tok.isNot(AsmToken::Identifier)) {
874 if (isParsingIntelSyntax()) return true;
875 return Error(StartLoc, "invalid register name",
876 SMRange(StartLoc, EndLoc));
877 }
878
879 RegNo = MatchRegisterName(Tok.getString());
880
881 // If the match failed, try the register name as lowercase.
882 if (RegNo == 0)
883 RegNo = MatchRegisterName(Tok.getString().lower());
884
885 if (!is64BitMode()) {
886 // FIXME: This should be done using Requires<Not64BitMode> and
887 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
888 // checked.
889 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
890 // REX prefix.
891 if (RegNo == X86::RIZ ||
892 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
893 X86II::isX86_64NonExtLowByteReg(RegNo) ||
894 X86II::isX86_64ExtendedReg(RegNo))
895 return Error(StartLoc, "register %"
896 + Tok.getString() + " is only available in 64-bit mode",
897 SMRange(StartLoc, EndLoc));
898 }
899
900 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
901 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
902 RegNo = X86::ST0;
903 Parser.Lex(); // Eat 'st'
904
905 // Check to see if we have '(4)' after %st.
906 if (getLexer().isNot(AsmToken::LParen))
907 return false;
908 // Lex the paren.
909 getParser().Lex();
910
911 const AsmToken &IntTok = Parser.getTok();
912 if (IntTok.isNot(AsmToken::Integer))
913 return Error(IntTok.getLoc(), "expected stack index");
914 switch (IntTok.getIntVal()) {
915 case 0: RegNo = X86::ST0; break;
916 case 1: RegNo = X86::ST1; break;
917 case 2: RegNo = X86::ST2; break;
918 case 3: RegNo = X86::ST3; break;
919 case 4: RegNo = X86::ST4; break;
920 case 5: RegNo = X86::ST5; break;
921 case 6: RegNo = X86::ST6; break;
922 case 7: RegNo = X86::ST7; break;
923 default: return Error(IntTok.getLoc(), "invalid stack index");
924 }
925
926 if (getParser().Lex().isNot(AsmToken::RParen))
927 return Error(Parser.getTok().getLoc(), "expected ')'");
928
929 EndLoc = Parser.getTok().getEndLoc();
930 Parser.Lex(); // Eat ')'
931 return false;
932 }
933
934 EndLoc = Parser.getTok().getEndLoc();
935
936 // If this is "db[0-7]", match it as an alias
937 // for dr[0-7].
938 if (RegNo == 0 && Tok.getString().size() == 3 &&
939 Tok.getString().startswith("db")) {
940 switch (Tok.getString()[2]) {
941 case '0': RegNo = X86::DR0; break;
942 case '1': RegNo = X86::DR1; break;
943 case '2': RegNo = X86::DR2; break;
944 case '3': RegNo = X86::DR3; break;
945 case '4': RegNo = X86::DR4; break;
946 case '5': RegNo = X86::DR5; break;
947 case '6': RegNo = X86::DR6; break;
948 case '7': RegNo = X86::DR7; break;
949 }
950
951 if (RegNo != 0) {
952 EndLoc = Parser.getTok().getEndLoc();
953 Parser.Lex(); // Eat it.
954 return false;
955 }
956 }
957
958 if (RegNo == 0) {
959 if (isParsingIntelSyntax()) return true;
960 return Error(StartLoc, "invalid register name",
961 SMRange(StartLoc, EndLoc));
962 }
963
964 Parser.Lex(); // Eat identifier token.
965 return false;
966 }
967
SetFrameRegister(unsigned RegNo)968 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
969 Instrumentation->SetInitialFrameRegister(RegNo);
970 }
971
DefaultMemSIOperand(SMLoc Loc)972 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
973 unsigned basereg =
974 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
975 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
976 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
977 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
978 Loc, Loc, 0);
979 }
980
DefaultMemDIOperand(SMLoc Loc)981 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
982 unsigned basereg =
983 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
984 const MCExpr *Disp = MCConstantExpr::Create(0, getContext());
985 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
986 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
987 Loc, Loc, 0);
988 }
989
ParseOperand()990 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
991 if (isParsingIntelSyntax())
992 return ParseIntelOperand();
993 return ParseATTOperand();
994 }
995
996 /// getIntelMemOperandSize - Return intel memory operand size.
getIntelMemOperandSize(StringRef OpStr)997 static unsigned getIntelMemOperandSize(StringRef OpStr) {
998 unsigned Size = StringSwitch<unsigned>(OpStr)
999 .Cases("BYTE", "byte", 8)
1000 .Cases("WORD", "word", 16)
1001 .Cases("DWORD", "dword", 32)
1002 .Cases("QWORD", "qword", 64)
1003 .Cases("XWORD", "xword", 80)
1004 .Cases("XMMWORD", "xmmword", 128)
1005 .Cases("YMMWORD", "ymmword", 256)
1006 .Cases("ZMMWORD", "zmmword", 512)
1007 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1008 .Default(0);
1009 return Size;
1010 }
1011
CreateMemForInlineAsm(unsigned SegReg,const MCExpr * Disp,unsigned BaseReg,unsigned IndexReg,unsigned Scale,SMLoc Start,SMLoc End,unsigned Size,StringRef Identifier,InlineAsmIdentifierInfo & Info)1012 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1013 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1014 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1015 InlineAsmIdentifierInfo &Info) {
1016 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1017 // some other label reference.
1018 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1019 // Insert an explicit size if the user didn't have one.
1020 if (!Size) {
1021 Size = getPointerWidth();
1022 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1023 /*Len=*/0, Size));
1024 }
1025
1026 // Create an absolute memory reference in order to match against
1027 // instructions taking a PC relative operand.
1028 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1029 Identifier, Info.OpDecl);
1030 }
1031
1032 // We either have a direct symbol reference, or an offset from a symbol. The
1033 // parser always puts the symbol on the LHS, so look there for size
1034 // calculation purposes.
1035 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1036 bool IsSymRef =
1037 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1038 if (IsSymRef) {
1039 if (!Size) {
1040 Size = Info.Type * 8; // Size is in terms of bits in this context.
1041 if (Size)
1042 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_SizeDirective, Start,
1043 /*Len=*/0, Size));
1044 }
1045 }
1046
1047 // When parsing inline assembly we set the base register to a non-zero value
1048 // if we don't know the actual value at this time. This is necessary to
1049 // get the matching correct in some cases.
1050 BaseReg = BaseReg ? BaseReg : 1;
1051 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1052 IndexReg, Scale, Start, End, Size, Identifier,
1053 Info.OpDecl);
1054 }
1055
1056 static void
RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> * AsmRewrites,StringRef SymName,int64_t ImmDisp,int64_t FinalImmDisp,SMLoc & BracLoc,SMLoc & StartInBrac,SMLoc & End)1057 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> *AsmRewrites,
1058 StringRef SymName, int64_t ImmDisp,
1059 int64_t FinalImmDisp, SMLoc &BracLoc,
1060 SMLoc &StartInBrac, SMLoc &End) {
1061 // Remove the '[' and ']' from the IR string.
1062 AsmRewrites->push_back(AsmRewrite(AOK_Skip, BracLoc, 1));
1063 AsmRewrites->push_back(AsmRewrite(AOK_Skip, End, 1));
1064
1065 // If ImmDisp is non-zero, then we parsed a displacement before the
1066 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1067 // If ImmDisp doesn't match the displacement computed by the state machine
1068 // then we have an additional displacement in the bracketed expression.
1069 if (ImmDisp != FinalImmDisp) {
1070 if (ImmDisp) {
1071 // We have an immediate displacement before the bracketed expression.
1072 // Adjust this to match the final immediate displacement.
1073 bool Found = false;
1074 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1075 E = AsmRewrites->end(); I != E; ++I) {
1076 if ((*I).Loc.getPointer() > BracLoc.getPointer())
1077 continue;
1078 if ((*I).Kind == AOK_ImmPrefix || (*I).Kind == AOK_Imm) {
1079 assert (!Found && "ImmDisp already rewritten.");
1080 (*I).Kind = AOK_Imm;
1081 (*I).Len = BracLoc.getPointer() - (*I).Loc.getPointer();
1082 (*I).Val = FinalImmDisp;
1083 Found = true;
1084 break;
1085 }
1086 }
1087 assert (Found && "Unable to rewrite ImmDisp.");
1088 (void)Found;
1089 } else {
1090 // We have a symbolic and an immediate displacement, but no displacement
1091 // before the bracketed expression. Put the immediate displacement
1092 // before the bracketed expression.
1093 AsmRewrites->push_back(AsmRewrite(AOK_Imm, BracLoc, 0, FinalImmDisp));
1094 }
1095 }
1096 // Remove all the ImmPrefix rewrites within the brackets.
1097 for (SmallVectorImpl<AsmRewrite>::iterator I = AsmRewrites->begin(),
1098 E = AsmRewrites->end(); I != E; ++I) {
1099 if ((*I).Loc.getPointer() < StartInBrac.getPointer())
1100 continue;
1101 if ((*I).Kind == AOK_ImmPrefix)
1102 (*I).Kind = AOK_Delete;
1103 }
1104 const char *SymLocPtr = SymName.data();
1105 // Skip everything before the symbol.
1106 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1107 assert(Len > 0 && "Expected a non-negative length.");
1108 AsmRewrites->push_back(AsmRewrite(AOK_Skip, StartInBrac, Len));
1109 }
1110 // Skip everything after the symbol.
1111 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1112 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1113 assert(Len > 0 && "Expected a non-negative length.");
1114 AsmRewrites->push_back(AsmRewrite(AOK_Skip, Loc, Len));
1115 }
1116 }
1117
ParseIntelExpression(IntelExprStateMachine & SM,SMLoc & End)1118 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1119 MCAsmParser &Parser = getParser();
1120 const AsmToken &Tok = Parser.getTok();
1121
1122 bool Done = false;
1123 while (!Done) {
1124 bool UpdateLocLex = true;
1125
1126 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1127 // identifier. Don't try an parse it as a register.
1128 if (Tok.getString().startswith("."))
1129 break;
1130
1131 // If we're parsing an immediate expression, we don't expect a '['.
1132 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1133 break;
1134
1135 AsmToken::TokenKind TK = getLexer().getKind();
1136 switch (TK) {
1137 default: {
1138 if (SM.isValidEndState()) {
1139 Done = true;
1140 break;
1141 }
1142 return Error(Tok.getLoc(), "unknown token in expression");
1143 }
1144 case AsmToken::EndOfStatement: {
1145 Done = true;
1146 break;
1147 }
1148 case AsmToken::String:
1149 case AsmToken::Identifier: {
1150 // This could be a register or a symbolic displacement.
1151 unsigned TmpReg;
1152 const MCExpr *Val;
1153 SMLoc IdentLoc = Tok.getLoc();
1154 StringRef Identifier = Tok.getString();
1155 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1156 SM.onRegister(TmpReg);
1157 UpdateLocLex = false;
1158 break;
1159 } else {
1160 if (!isParsingInlineAsm()) {
1161 if (getParser().parsePrimaryExpr(Val, End))
1162 return Error(Tok.getLoc(), "Unexpected identifier!");
1163 } else {
1164 // This is a dot operator, not an adjacent identifier.
1165 if (Identifier.find('.') != StringRef::npos) {
1166 return false;
1167 } else {
1168 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1169 if (ParseIntelIdentifier(Val, Identifier, Info,
1170 /*Unevaluated=*/false, End))
1171 return true;
1172 }
1173 }
1174 SM.onIdentifierExpr(Val, Identifier);
1175 UpdateLocLex = false;
1176 break;
1177 }
1178 return Error(Tok.getLoc(), "Unexpected identifier!");
1179 }
1180 case AsmToken::Integer: {
1181 StringRef ErrMsg;
1182 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1183 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix,
1184 Tok.getLoc()));
1185 // Look for 'b' or 'f' following an Integer as a directional label
1186 SMLoc Loc = getTok().getLoc();
1187 int64_t IntVal = getTok().getIntVal();
1188 End = consumeToken();
1189 UpdateLocLex = false;
1190 if (getLexer().getKind() == AsmToken::Identifier) {
1191 StringRef IDVal = getTok().getString();
1192 if (IDVal == "f" || IDVal == "b") {
1193 MCSymbol *Sym =
1194 getContext().GetDirectionalLocalSymbol(IntVal, IDVal == "b");
1195 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1196 const MCExpr *Val =
1197 MCSymbolRefExpr::Create(Sym, Variant, getContext());
1198 if (IDVal == "b" && Sym->isUndefined())
1199 return Error(Loc, "invalid reference to undefined symbol");
1200 StringRef Identifier = Sym->getName();
1201 SM.onIdentifierExpr(Val, Identifier);
1202 End = consumeToken();
1203 } else {
1204 if (SM.onInteger(IntVal, ErrMsg))
1205 return Error(Loc, ErrMsg);
1206 }
1207 } else {
1208 if (SM.onInteger(IntVal, ErrMsg))
1209 return Error(Loc, ErrMsg);
1210 }
1211 break;
1212 }
1213 case AsmToken::Plus: SM.onPlus(); break;
1214 case AsmToken::Minus: SM.onMinus(); break;
1215 case AsmToken::Tilde: SM.onNot(); break;
1216 case AsmToken::Star: SM.onStar(); break;
1217 case AsmToken::Slash: SM.onDivide(); break;
1218 case AsmToken::Pipe: SM.onOr(); break;
1219 case AsmToken::Amp: SM.onAnd(); break;
1220 case AsmToken::LessLess:
1221 SM.onLShift(); break;
1222 case AsmToken::GreaterGreater:
1223 SM.onRShift(); break;
1224 case AsmToken::LBrac: SM.onLBrac(); break;
1225 case AsmToken::RBrac: SM.onRBrac(); break;
1226 case AsmToken::LParen: SM.onLParen(); break;
1227 case AsmToken::RParen: SM.onRParen(); break;
1228 }
1229 if (SM.hadError())
1230 return Error(Tok.getLoc(), "unknown token in expression");
1231
1232 if (!Done && UpdateLocLex)
1233 End = consumeToken();
1234 }
1235 return false;
1236 }
1237
1238 std::unique_ptr<X86Operand>
ParseIntelBracExpression(unsigned SegReg,SMLoc Start,int64_t ImmDisp,unsigned Size)1239 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1240 int64_t ImmDisp, unsigned Size) {
1241 MCAsmParser &Parser = getParser();
1242 const AsmToken &Tok = Parser.getTok();
1243 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1244 if (getLexer().isNot(AsmToken::LBrac))
1245 return ErrorOperand(BracLoc, "Expected '[' token!");
1246 Parser.Lex(); // Eat '['
1247
1248 SMLoc StartInBrac = Tok.getLoc();
1249 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1250 // may have already parsed an immediate displacement before the bracketed
1251 // expression.
1252 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1253 if (ParseIntelExpression(SM, End))
1254 return nullptr;
1255
1256 const MCExpr *Disp = nullptr;
1257 if (const MCExpr *Sym = SM.getSym()) {
1258 // A symbolic displacement.
1259 Disp = Sym;
1260 if (isParsingInlineAsm())
1261 RewriteIntelBracExpression(InstInfo->AsmRewrites, SM.getSymName(),
1262 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1263 End);
1264 }
1265
1266 if (SM.getImm() || !Disp) {
1267 const MCExpr *Imm = MCConstantExpr::Create(SM.getImm(), getContext());
1268 if (Disp)
1269 Disp = MCBinaryExpr::CreateAdd(Disp, Imm, getContext());
1270 else
1271 Disp = Imm; // An immediate displacement only.
1272 }
1273
1274 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1275 // will in fact do global lookup the field name inside all global typedefs,
1276 // but we don't emulate that.
1277 if (Tok.getString().find('.') != StringRef::npos) {
1278 const MCExpr *NewDisp;
1279 if (ParseIntelDotOperator(Disp, NewDisp))
1280 return nullptr;
1281
1282 End = Tok.getEndLoc();
1283 Parser.Lex(); // Eat the field.
1284 Disp = NewDisp;
1285 }
1286
1287 int BaseReg = SM.getBaseReg();
1288 int IndexReg = SM.getIndexReg();
1289 int Scale = SM.getScale();
1290 if (!isParsingInlineAsm()) {
1291 // handle [-42]
1292 if (!BaseReg && !IndexReg) {
1293 if (!SegReg)
1294 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1295 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1296 Start, End, Size);
1297 }
1298 StringRef ErrMsg;
1299 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1300 Error(StartInBrac, ErrMsg);
1301 return nullptr;
1302 }
1303 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1304 IndexReg, Scale, Start, End, Size);
1305 }
1306
1307 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1308 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1309 End, Size, SM.getSymName(), Info);
1310 }
1311
1312 // Inline assembly may use variable names with namespace alias qualifiers.
ParseIntelIdentifier(const MCExpr * & Val,StringRef & Identifier,InlineAsmIdentifierInfo & Info,bool IsUnevaluatedOperand,SMLoc & End)1313 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1314 StringRef &Identifier,
1315 InlineAsmIdentifierInfo &Info,
1316 bool IsUnevaluatedOperand, SMLoc &End) {
1317 MCAsmParser &Parser = getParser();
1318 assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1319 Val = nullptr;
1320
1321 StringRef LineBuf(Identifier.data());
1322 void *Result =
1323 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1324
1325 const AsmToken &Tok = Parser.getTok();
1326 SMLoc Loc = Tok.getLoc();
1327
1328 // Advance the token stream until the end of the current token is
1329 // after the end of what the frontend claimed.
1330 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1331 while (true) {
1332 End = Tok.getEndLoc();
1333 getLexer().Lex();
1334
1335 assert(End.getPointer() <= EndPtr && "frontend claimed part of a token?");
1336 if (End.getPointer() == EndPtr) break;
1337 }
1338 Identifier = LineBuf;
1339
1340 // If the identifier lookup was unsuccessful, assume that we are dealing with
1341 // a label.
1342 if (!Result) {
1343 StringRef InternalName =
1344 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1345 Loc, false);
1346 assert(InternalName.size() && "We should have an internal name here.");
1347 // Push a rewrite for replacing the identifier name with the internal name.
1348 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Label, Loc,
1349 Identifier.size(),
1350 InternalName));
1351 }
1352
1353 // Create the symbol reference.
1354 MCSymbol *Sym = getContext().GetOrCreateSymbol(Identifier);
1355 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1356 Val = MCSymbolRefExpr::Create(Sym, Variant, getParser().getContext());
1357 return false;
1358 }
1359
1360 /// \brief Parse intel style segment override.
1361 std::unique_ptr<X86Operand>
ParseIntelSegmentOverride(unsigned SegReg,SMLoc Start,unsigned Size)1362 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1363 unsigned Size) {
1364 MCAsmParser &Parser = getParser();
1365 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1366 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1367 if (Tok.isNot(AsmToken::Colon))
1368 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1369 Parser.Lex(); // Eat ':'
1370
1371 int64_t ImmDisp = 0;
1372 if (getLexer().is(AsmToken::Integer)) {
1373 ImmDisp = Tok.getIntVal();
1374 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1375
1376 if (isParsingInlineAsm())
1377 InstInfo->AsmRewrites->push_back(
1378 AsmRewrite(AOK_ImmPrefix, ImmDispToken.getLoc()));
1379
1380 if (getLexer().isNot(AsmToken::LBrac)) {
1381 // An immediate following a 'segment register', 'colon' token sequence can
1382 // be followed by a bracketed expression. If it isn't we know we have our
1383 // final segment override.
1384 const MCExpr *Disp = MCConstantExpr::Create(ImmDisp, getContext());
1385 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1386 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1387 Start, ImmDispToken.getEndLoc(), Size);
1388 }
1389 }
1390
1391 if (getLexer().is(AsmToken::LBrac))
1392 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1393
1394 const MCExpr *Val;
1395 SMLoc End;
1396 if (!isParsingInlineAsm()) {
1397 if (getParser().parsePrimaryExpr(Val, End))
1398 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1399
1400 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1401 }
1402
1403 InlineAsmIdentifierInfo Info;
1404 StringRef Identifier = Tok.getString();
1405 if (ParseIntelIdentifier(Val, Identifier, Info,
1406 /*Unevaluated=*/false, End))
1407 return nullptr;
1408 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1409 /*Scale=*/1, Start, End, Size, Identifier, Info);
1410 }
1411
1412 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1413 std::unique_ptr<X86Operand>
ParseRoundingModeOp(SMLoc Start,SMLoc End)1414 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1415 MCAsmParser &Parser = getParser();
1416 const AsmToken &Tok = Parser.getTok();
1417 consumeToken(); // Eat "{"
1418 if (Tok.getIdentifier().startswith("r")){
1419 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1420 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1421 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1422 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1423 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1424 .Default(-1);
1425 if (-1 == rndMode)
1426 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1427 Parser.Lex(); // Eat "r*" of r*-sae
1428 if (!getLexer().is(AsmToken::Minus))
1429 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1430 Parser.Lex(); // Eat "-"
1431 Parser.Lex(); // Eat the sae
1432 if (!getLexer().is(AsmToken::RCurly))
1433 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1434 Parser.Lex(); // Eat "}"
1435 const MCExpr *RndModeOp =
1436 MCConstantExpr::Create(rndMode, Parser.getContext());
1437 return X86Operand::CreateImm(RndModeOp, Start, End);
1438 }
1439 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1440 }
1441 /// ParseIntelMemOperand - Parse intel style memory operand.
ParseIntelMemOperand(int64_t ImmDisp,SMLoc Start,unsigned Size)1442 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1443 SMLoc Start,
1444 unsigned Size) {
1445 MCAsmParser &Parser = getParser();
1446 const AsmToken &Tok = Parser.getTok();
1447 SMLoc End;
1448
1449 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1450 if (getLexer().is(AsmToken::LBrac))
1451 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1452 assert(ImmDisp == 0);
1453
1454 const MCExpr *Val;
1455 if (!isParsingInlineAsm()) {
1456 if (getParser().parsePrimaryExpr(Val, End))
1457 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1458
1459 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1460 }
1461
1462 InlineAsmIdentifierInfo Info;
1463 StringRef Identifier = Tok.getString();
1464 if (ParseIntelIdentifier(Val, Identifier, Info,
1465 /*Unevaluated=*/false, End))
1466 return nullptr;
1467
1468 if (!getLexer().is(AsmToken::LBrac))
1469 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1470 /*Scale=*/1, Start, End, Size, Identifier, Info);
1471
1472 Parser.Lex(); // Eat '['
1473
1474 // Parse Identifier [ ImmDisp ]
1475 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1476 /*AddImmPrefix=*/false);
1477 if (ParseIntelExpression(SM, End))
1478 return nullptr;
1479
1480 if (SM.getSym()) {
1481 Error(Start, "cannot use more than one symbol in memory operand");
1482 return nullptr;
1483 }
1484 if (SM.getBaseReg()) {
1485 Error(Start, "cannot use base register with variable reference");
1486 return nullptr;
1487 }
1488 if (SM.getIndexReg()) {
1489 Error(Start, "cannot use index register with variable reference");
1490 return nullptr;
1491 }
1492
1493 const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
1494 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1495 // we're pointing to a local variable in memory, so the base register is
1496 // really the frame or stack pointer.
1497 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1498 /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
1499 Start, End, Size, Identifier, Info.OpDecl);
1500 }
1501
1502 /// Parse the '.' operator.
ParseIntelDotOperator(const MCExpr * Disp,const MCExpr * & NewDisp)1503 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1504 const MCExpr *&NewDisp) {
1505 MCAsmParser &Parser = getParser();
1506 const AsmToken &Tok = Parser.getTok();
1507 int64_t OrigDispVal, DotDispVal;
1508
1509 // FIXME: Handle non-constant expressions.
1510 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1511 OrigDispVal = OrigDisp->getValue();
1512 else
1513 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1514
1515 // Drop the optional '.'.
1516 StringRef DotDispStr = Tok.getString();
1517 if (DotDispStr.startswith("."))
1518 DotDispStr = DotDispStr.drop_front(1);
1519
1520 // .Imm gets lexed as a real.
1521 if (Tok.is(AsmToken::Real)) {
1522 APInt DotDisp;
1523 DotDispStr.getAsInteger(10, DotDisp);
1524 DotDispVal = DotDisp.getZExtValue();
1525 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1526 unsigned DotDisp;
1527 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1528 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1529 DotDisp))
1530 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1531 DotDispVal = DotDisp;
1532 } else
1533 return Error(Tok.getLoc(), "Unexpected token type!");
1534
1535 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1536 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1537 unsigned Len = DotDispStr.size();
1538 unsigned Val = OrigDispVal + DotDispVal;
1539 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_DotOperator, Loc, Len,
1540 Val));
1541 }
1542
1543 NewDisp = MCConstantExpr::Create(OrigDispVal + DotDispVal, getContext());
1544 return false;
1545 }
1546
1547 /// Parse the 'offset' operator. This operator is used to specify the
1548 /// location rather then the content of a variable.
ParseIntelOffsetOfOperator()1549 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1550 MCAsmParser &Parser = getParser();
1551 const AsmToken &Tok = Parser.getTok();
1552 SMLoc OffsetOfLoc = Tok.getLoc();
1553 Parser.Lex(); // Eat offset.
1554
1555 const MCExpr *Val;
1556 InlineAsmIdentifierInfo Info;
1557 SMLoc Start = Tok.getLoc(), End;
1558 StringRef Identifier = Tok.getString();
1559 if (ParseIntelIdentifier(Val, Identifier, Info,
1560 /*Unevaluated=*/false, End))
1561 return nullptr;
1562
1563 // Don't emit the offset operator.
1564 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
1565
1566 // The offset operator will have an 'r' constraint, thus we need to create
1567 // register operand to ensure proper matching. Just pick a GPR based on
1568 // the size of a pointer.
1569 unsigned RegNo =
1570 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1571 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1572 OffsetOfLoc, Identifier, Info.OpDecl);
1573 }
1574
1575 enum IntelOperatorKind {
1576 IOK_LENGTH,
1577 IOK_SIZE,
1578 IOK_TYPE
1579 };
1580
1581 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1582 /// returns the number of elements in an array. It returns the value 1 for
1583 /// non-array variables. The SIZE operator returns the size of a C or C++
1584 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1585 /// TYPE operator returns the size of a C or C++ type or variable. If the
1586 /// variable is an array, TYPE returns the size of a single element.
ParseIntelOperator(unsigned OpKind)1587 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1588 MCAsmParser &Parser = getParser();
1589 const AsmToken &Tok = Parser.getTok();
1590 SMLoc TypeLoc = Tok.getLoc();
1591 Parser.Lex(); // Eat operator.
1592
1593 const MCExpr *Val = nullptr;
1594 InlineAsmIdentifierInfo Info;
1595 SMLoc Start = Tok.getLoc(), End;
1596 StringRef Identifier = Tok.getString();
1597 if (ParseIntelIdentifier(Val, Identifier, Info,
1598 /*Unevaluated=*/true, End))
1599 return nullptr;
1600
1601 if (!Info.OpDecl)
1602 return ErrorOperand(Start, "unable to lookup expression");
1603
1604 unsigned CVal = 0;
1605 switch(OpKind) {
1606 default: llvm_unreachable("Unexpected operand kind!");
1607 case IOK_LENGTH: CVal = Info.Length; break;
1608 case IOK_SIZE: CVal = Info.Size; break;
1609 case IOK_TYPE: CVal = Info.Type; break;
1610 }
1611
1612 // Rewrite the type operator and the C or C++ type or variable in terms of an
1613 // immediate. E.g. TYPE foo -> $$4
1614 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1615 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, TypeLoc, Len, CVal));
1616
1617 const MCExpr *Imm = MCConstantExpr::Create(CVal, getContext());
1618 return X86Operand::CreateImm(Imm, Start, End);
1619 }
1620
ParseIntelOperand()1621 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1622 MCAsmParser &Parser = getParser();
1623 const AsmToken &Tok = Parser.getTok();
1624 SMLoc Start, End;
1625
1626 // Offset, length, type and size operators.
1627 if (isParsingInlineAsm()) {
1628 StringRef AsmTokStr = Tok.getString();
1629 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1630 return ParseIntelOffsetOfOperator();
1631 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1632 return ParseIntelOperator(IOK_LENGTH);
1633 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1634 return ParseIntelOperator(IOK_SIZE);
1635 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1636 return ParseIntelOperator(IOK_TYPE);
1637 }
1638
1639 unsigned Size = getIntelMemOperandSize(Tok.getString());
1640 if (Size) {
1641 Parser.Lex(); // Eat operand size (e.g., byte, word).
1642 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1643 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1644 Parser.Lex(); // Eat ptr.
1645 }
1646 Start = Tok.getLoc();
1647
1648 // Immediate.
1649 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1650 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1651 AsmToken StartTok = Tok;
1652 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1653 /*AddImmPrefix=*/false);
1654 if (ParseIntelExpression(SM, End))
1655 return nullptr;
1656
1657 int64_t Imm = SM.getImm();
1658 if (isParsingInlineAsm()) {
1659 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1660 if (StartTok.getString().size() == Len)
1661 // Just add a prefix if this wasn't a complex immediate expression.
1662 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_ImmPrefix, Start));
1663 else
1664 // Otherwise, rewrite the complex expression as a single immediate.
1665 InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Imm, Start, Len, Imm));
1666 }
1667
1668 if (getLexer().isNot(AsmToken::LBrac)) {
1669 // If a directional label (ie. 1f or 2b) was parsed above from
1670 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1671 // to the MCExpr with the directional local symbol and this is a
1672 // memory operand not an immediate operand.
1673 if (SM.getSym())
1674 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1675 Size);
1676
1677 const MCExpr *ImmExpr = MCConstantExpr::Create(Imm, getContext());
1678 return X86Operand::CreateImm(ImmExpr, Start, End);
1679 }
1680
1681 // Only positive immediates are valid.
1682 if (Imm < 0)
1683 return ErrorOperand(Start, "expected a positive immediate displacement "
1684 "before bracketed expr.");
1685
1686 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1687 return ParseIntelMemOperand(Imm, Start, Size);
1688 }
1689
1690 // rounding mode token
1691 if (STI.getFeatureBits() & X86::FeatureAVX512 &&
1692 getLexer().is(AsmToken::LCurly))
1693 return ParseRoundingModeOp(Start, End);
1694
1695 // Register.
1696 unsigned RegNo = 0;
1697 if (!ParseRegister(RegNo, Start, End)) {
1698 // If this is a segment register followed by a ':', then this is the start
1699 // of a segment override, otherwise this is a normal register reference.
1700 if (getLexer().isNot(AsmToken::Colon))
1701 return X86Operand::CreateReg(RegNo, Start, End);
1702
1703 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1704 }
1705
1706 // Memory operand.
1707 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1708 }
1709
ParseATTOperand()1710 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1711 MCAsmParser &Parser = getParser();
1712 switch (getLexer().getKind()) {
1713 default:
1714 // Parse a memory operand with no segment register.
1715 return ParseMemOperand(0, Parser.getTok().getLoc());
1716 case AsmToken::Percent: {
1717 // Read the register.
1718 unsigned RegNo;
1719 SMLoc Start, End;
1720 if (ParseRegister(RegNo, Start, End)) return nullptr;
1721 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1722 Error(Start, "%eiz and %riz can only be used as index registers",
1723 SMRange(Start, End));
1724 return nullptr;
1725 }
1726
1727 // If this is a segment register followed by a ':', then this is the start
1728 // of a memory reference, otherwise this is a normal register reference.
1729 if (getLexer().isNot(AsmToken::Colon))
1730 return X86Operand::CreateReg(RegNo, Start, End);
1731
1732 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1733 return ErrorOperand(Start, "invalid segment register");
1734
1735 getParser().Lex(); // Eat the colon.
1736 return ParseMemOperand(RegNo, Start);
1737 }
1738 case AsmToken::Dollar: {
1739 // $42 -> immediate.
1740 SMLoc Start = Parser.getTok().getLoc(), End;
1741 Parser.Lex();
1742 const MCExpr *Val;
1743 if (getParser().parseExpression(Val, End))
1744 return nullptr;
1745 return X86Operand::CreateImm(Val, Start, End);
1746 }
1747 case AsmToken::LCurly:{
1748 SMLoc Start = Parser.getTok().getLoc(), End;
1749 if (STI.getFeatureBits() & X86::FeatureAVX512)
1750 return ParseRoundingModeOp(Start, End);
1751 return ErrorOperand(Start, "unknown token in expression");
1752 }
1753 }
1754 }
1755
HandleAVX512Operand(OperandVector & Operands,const MCParsedAsmOperand & Op)1756 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1757 const MCParsedAsmOperand &Op) {
1758 MCAsmParser &Parser = getParser();
1759 if(STI.getFeatureBits() & X86::FeatureAVX512) {
1760 if (getLexer().is(AsmToken::LCurly)) {
1761 // Eat "{" and mark the current place.
1762 const SMLoc consumedToken = consumeToken();
1763 // Distinguish {1to<NUM>} from {%k<NUM>}.
1764 if(getLexer().is(AsmToken::Integer)) {
1765 // Parse memory broadcasting ({1to<NUM>}).
1766 if (getLexer().getTok().getIntVal() != 1)
1767 return !ErrorAndEatStatement(getLexer().getLoc(),
1768 "Expected 1to<NUM> at this point");
1769 Parser.Lex(); // Eat "1" of 1to8
1770 if (!getLexer().is(AsmToken::Identifier) ||
1771 !getLexer().getTok().getIdentifier().startswith("to"))
1772 return !ErrorAndEatStatement(getLexer().getLoc(),
1773 "Expected 1to<NUM> at this point");
1774 // Recognize only reasonable suffixes.
1775 const char *BroadcastPrimitive =
1776 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1777 .Case("to2", "{1to2}")
1778 .Case("to4", "{1to4}")
1779 .Case("to8", "{1to8}")
1780 .Case("to16", "{1to16}")
1781 .Default(nullptr);
1782 if (!BroadcastPrimitive)
1783 return !ErrorAndEatStatement(getLexer().getLoc(),
1784 "Invalid memory broadcast primitive.");
1785 Parser.Lex(); // Eat "toN" of 1toN
1786 if (!getLexer().is(AsmToken::RCurly))
1787 return !ErrorAndEatStatement(getLexer().getLoc(),
1788 "Expected } at this point");
1789 Parser.Lex(); // Eat "}"
1790 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1791 consumedToken));
1792 // No AVX512 specific primitives can pass
1793 // after memory broadcasting, so return.
1794 return true;
1795 } else {
1796 // Parse mask register {%k1}
1797 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1798 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1799 Operands.push_back(std::move(Op));
1800 if (!getLexer().is(AsmToken::RCurly))
1801 return !ErrorAndEatStatement(getLexer().getLoc(),
1802 "Expected } at this point");
1803 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1804
1805 // Parse "zeroing non-masked" semantic {z}
1806 if (getLexer().is(AsmToken::LCurly)) {
1807 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1808 if (!getLexer().is(AsmToken::Identifier) ||
1809 getLexer().getTok().getIdentifier() != "z")
1810 return !ErrorAndEatStatement(getLexer().getLoc(),
1811 "Expected z at this point");
1812 Parser.Lex(); // Eat the z
1813 if (!getLexer().is(AsmToken::RCurly))
1814 return !ErrorAndEatStatement(getLexer().getLoc(),
1815 "Expected } at this point");
1816 Parser.Lex(); // Eat the }
1817 }
1818 }
1819 }
1820 }
1821 }
1822 return true;
1823 }
1824
1825 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1826 /// has already been parsed if present.
ParseMemOperand(unsigned SegReg,SMLoc MemStart)1827 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1828 SMLoc MemStart) {
1829
1830 MCAsmParser &Parser = getParser();
1831 // We have to disambiguate a parenthesized expression "(4+5)" from the start
1832 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
1833 // only way to do this without lookahead is to eat the '(' and see what is
1834 // after it.
1835 const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
1836 if (getLexer().isNot(AsmToken::LParen)) {
1837 SMLoc ExprEnd;
1838 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
1839
1840 // After parsing the base expression we could either have a parenthesized
1841 // memory address or not. If not, return now. If so, eat the (.
1842 if (getLexer().isNot(AsmToken::LParen)) {
1843 // Unless we have a segment register, treat this as an immediate.
1844 if (SegReg == 0)
1845 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
1846 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1847 MemStart, ExprEnd);
1848 }
1849
1850 // Eat the '('.
1851 Parser.Lex();
1852 } else {
1853 // Okay, we have a '('. We don't know if this is an expression or not, but
1854 // so we have to eat the ( to see beyond it.
1855 SMLoc LParenLoc = Parser.getTok().getLoc();
1856 Parser.Lex(); // Eat the '('.
1857
1858 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
1859 // Nothing to do here, fall into the code below with the '(' part of the
1860 // memory operand consumed.
1861 } else {
1862 SMLoc ExprEnd;
1863
1864 // It must be an parenthesized expression, parse it now.
1865 if (getParser().parseParenExpression(Disp, ExprEnd))
1866 return nullptr;
1867
1868 // After parsing the base expression we could either have a parenthesized
1869 // memory address or not. If not, return now. If so, eat the (.
1870 if (getLexer().isNot(AsmToken::LParen)) {
1871 // Unless we have a segment register, treat this as an immediate.
1872 if (SegReg == 0)
1873 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
1874 ExprEnd);
1875 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1876 MemStart, ExprEnd);
1877 }
1878
1879 // Eat the '('.
1880 Parser.Lex();
1881 }
1882 }
1883
1884 // If we reached here, then we just ate the ( of the memory operand. Process
1885 // the rest of the memory operand.
1886 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
1887 SMLoc IndexLoc, BaseLoc;
1888
1889 if (getLexer().is(AsmToken::Percent)) {
1890 SMLoc StartLoc, EndLoc;
1891 BaseLoc = Parser.getTok().getLoc();
1892 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
1893 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
1894 Error(StartLoc, "eiz and riz can only be used as index registers",
1895 SMRange(StartLoc, EndLoc));
1896 return nullptr;
1897 }
1898 }
1899
1900 if (getLexer().is(AsmToken::Comma)) {
1901 Parser.Lex(); // Eat the comma.
1902 IndexLoc = Parser.getTok().getLoc();
1903
1904 // Following the comma we should have either an index register, or a scale
1905 // value. We don't support the later form, but we want to parse it
1906 // correctly.
1907 //
1908 // Not that even though it would be completely consistent to support syntax
1909 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
1910 if (getLexer().is(AsmToken::Percent)) {
1911 SMLoc L;
1912 if (ParseRegister(IndexReg, L, L)) return nullptr;
1913
1914 if (getLexer().isNot(AsmToken::RParen)) {
1915 // Parse the scale amount:
1916 // ::= ',' [scale-expression]
1917 if (getLexer().isNot(AsmToken::Comma)) {
1918 Error(Parser.getTok().getLoc(),
1919 "expected comma in scale expression");
1920 return nullptr;
1921 }
1922 Parser.Lex(); // Eat the comma.
1923
1924 if (getLexer().isNot(AsmToken::RParen)) {
1925 SMLoc Loc = Parser.getTok().getLoc();
1926
1927 int64_t ScaleVal;
1928 if (getParser().parseAbsoluteExpression(ScaleVal)){
1929 Error(Loc, "expected scale expression");
1930 return nullptr;
1931 }
1932
1933 // Validate the scale amount.
1934 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1935 ScaleVal != 1) {
1936 Error(Loc, "scale factor in 16-bit address must be 1");
1937 return nullptr;
1938 }
1939 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
1940 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
1941 return nullptr;
1942 }
1943 Scale = (unsigned)ScaleVal;
1944 }
1945 }
1946 } else if (getLexer().isNot(AsmToken::RParen)) {
1947 // A scale amount without an index is ignored.
1948 // index.
1949 SMLoc Loc = Parser.getTok().getLoc();
1950
1951 int64_t Value;
1952 if (getParser().parseAbsoluteExpression(Value))
1953 return nullptr;
1954
1955 if (Value != 1)
1956 Warning(Loc, "scale factor without index register is ignored");
1957 Scale = 1;
1958 }
1959 }
1960
1961 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
1962 if (getLexer().isNot(AsmToken::RParen)) {
1963 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
1964 return nullptr;
1965 }
1966 SMLoc MemEnd = Parser.getTok().getEndLoc();
1967 Parser.Lex(); // Eat the ')'.
1968
1969 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1970 // and then only in non-64-bit modes. Except for DX, which is a special case
1971 // because an unofficial form of in/out instructions uses it.
1972 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1973 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
1974 BaseReg != X86::SI && BaseReg != X86::DI)) &&
1975 BaseReg != X86::DX) {
1976 Error(BaseLoc, "invalid 16-bit base register");
1977 return nullptr;
1978 }
1979 if (BaseReg == 0 &&
1980 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1981 Error(IndexLoc, "16-bit memory operand may not include only index register");
1982 return nullptr;
1983 }
1984
1985 StringRef ErrMsg;
1986 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1987 Error(BaseLoc, ErrMsg);
1988 return nullptr;
1989 }
1990
1991 if (SegReg || BaseReg || IndexReg)
1992 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1993 IndexReg, Scale, MemStart, MemEnd);
1994 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
1995 }
1996
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)1997 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1998 SMLoc NameLoc, OperandVector &Operands) {
1999 MCAsmParser &Parser = getParser();
2000 InstInfo = &Info;
2001 StringRef PatchedName = Name;
2002
2003 // FIXME: Hack to recognize setneb as setne.
2004 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2005 PatchedName != "setb" && PatchedName != "setnb")
2006 PatchedName = PatchedName.substr(0, Name.size()-1);
2007
2008 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2009 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2010 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2011 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2012 bool IsVCMP = PatchedName[0] == 'v';
2013 unsigned CCIdx = IsVCMP ? 4 : 3;
2014 unsigned ComparisonCode = StringSwitch<unsigned>(
2015 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2016 .Case("eq", 0x00)
2017 .Case("lt", 0x01)
2018 .Case("le", 0x02)
2019 .Case("unord", 0x03)
2020 .Case("neq", 0x04)
2021 .Case("nlt", 0x05)
2022 .Case("nle", 0x06)
2023 .Case("ord", 0x07)
2024 /* AVX only from here */
2025 .Case("eq_uq", 0x08)
2026 .Case("nge", 0x09)
2027 .Case("ngt", 0x0A)
2028 .Case("false", 0x0B)
2029 .Case("neq_oq", 0x0C)
2030 .Case("ge", 0x0D)
2031 .Case("gt", 0x0E)
2032 .Case("true", 0x0F)
2033 .Case("eq_os", 0x10)
2034 .Case("lt_oq", 0x11)
2035 .Case("le_oq", 0x12)
2036 .Case("unord_s", 0x13)
2037 .Case("neq_us", 0x14)
2038 .Case("nlt_uq", 0x15)
2039 .Case("nle_uq", 0x16)
2040 .Case("ord_s", 0x17)
2041 .Case("eq_us", 0x18)
2042 .Case("nge_uq", 0x19)
2043 .Case("ngt_uq", 0x1A)
2044 .Case("false_os", 0x1B)
2045 .Case("neq_os", 0x1C)
2046 .Case("ge_oq", 0x1D)
2047 .Case("gt_oq", 0x1E)
2048 .Case("true_us", 0x1F)
2049 .Default(~0U);
2050 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2051
2052 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2053 NameLoc));
2054
2055 const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
2056 getParser().getContext());
2057 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2058
2059 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2060 }
2061 }
2062
2063 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2064 if (PatchedName.startswith("vpcmp") &&
2065 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2066 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2067 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2068 unsigned ComparisonCode = StringSwitch<unsigned>(
2069 PatchedName.slice(5, PatchedName.size() - CCIdx))
2070 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2071 .Case("lt", 0x1)
2072 .Case("le", 0x2)
2073 //.Case("false", 0x3) // Not a documented alias.
2074 .Case("neq", 0x4)
2075 .Case("nlt", 0x5)
2076 .Case("nle", 0x6)
2077 //.Case("true", 0x7) // Not a documented alias.
2078 .Default(~0U);
2079 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2080 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2081
2082 const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
2083 getParser().getContext());
2084 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2085
2086 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2087 }
2088 }
2089
2090 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2091 if (PatchedName.startswith("vpcom") &&
2092 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2093 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2094 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2095 unsigned ComparisonCode = StringSwitch<unsigned>(
2096 PatchedName.slice(5, PatchedName.size() - CCIdx))
2097 .Case("lt", 0x0)
2098 .Case("le", 0x1)
2099 .Case("gt", 0x2)
2100 .Case("ge", 0x3)
2101 .Case("eq", 0x4)
2102 .Case("neq", 0x5)
2103 .Case("false", 0x6)
2104 .Case("true", 0x7)
2105 .Default(~0U);
2106 if (ComparisonCode != ~0U) {
2107 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2108
2109 const MCExpr *ImmOp = MCConstantExpr::Create(ComparisonCode,
2110 getParser().getContext());
2111 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2112
2113 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2114 }
2115 }
2116
2117 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2118
2119 // Determine whether this is an instruction prefix.
2120 bool isPrefix =
2121 Name == "lock" || Name == "rep" ||
2122 Name == "repe" || Name == "repz" ||
2123 Name == "repne" || Name == "repnz" ||
2124 Name == "rex64" || Name == "data16";
2125
2126
2127 // This does the actual operand parsing. Don't parse any more if we have a
2128 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2129 // just want to parse the "lock" as the first instruction and the "incl" as
2130 // the next one.
2131 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2132
2133 // Parse '*' modifier.
2134 if (getLexer().is(AsmToken::Star))
2135 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2136
2137 // Read the operands.
2138 while(1) {
2139 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2140 Operands.push_back(std::move(Op));
2141 if (!HandleAVX512Operand(Operands, *Operands.back()))
2142 return true;
2143 } else {
2144 Parser.eatToEndOfStatement();
2145 return true;
2146 }
2147 // check for comma and eat it
2148 if (getLexer().is(AsmToken::Comma))
2149 Parser.Lex();
2150 else
2151 break;
2152 }
2153
2154 if (getLexer().isNot(AsmToken::EndOfStatement))
2155 return ErrorAndEatStatement(getLexer().getLoc(),
2156 "unexpected token in argument list");
2157 }
2158
2159 // Consume the EndOfStatement or the prefix separator Slash
2160 if (getLexer().is(AsmToken::EndOfStatement) ||
2161 (isPrefix && getLexer().is(AsmToken::Slash)))
2162 Parser.Lex();
2163
2164 // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
2165 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2166 // documented form in various unofficial manuals, so a lot of code uses it.
2167 if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
2168 Operands.size() == 3) {
2169 X86Operand &Op = (X86Operand &)*Operands.back();
2170 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2171 isa<MCConstantExpr>(Op.Mem.Disp) &&
2172 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2173 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2174 SMLoc Loc = Op.getEndLoc();
2175 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2176 }
2177 }
2178 // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
2179 if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
2180 Operands.size() == 3) {
2181 X86Operand &Op = (X86Operand &)*Operands[1];
2182 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2183 isa<MCConstantExpr>(Op.Mem.Disp) &&
2184 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2185 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2186 SMLoc Loc = Op.getEndLoc();
2187 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2188 }
2189 }
2190
2191 // Append default arguments to "ins[bwld]"
2192 if (Name.startswith("ins") && Operands.size() == 1 &&
2193 (Name == "insb" || Name == "insw" || Name == "insl" ||
2194 Name == "insd" )) {
2195 if (isParsingIntelSyntax()) {
2196 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2197 Operands.push_back(DefaultMemDIOperand(NameLoc));
2198 } else {
2199 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2200 Operands.push_back(DefaultMemDIOperand(NameLoc));
2201 }
2202 }
2203
2204 // Append default arguments to "outs[bwld]"
2205 if (Name.startswith("outs") && Operands.size() == 1 &&
2206 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2207 Name == "outsd" )) {
2208 if (isParsingIntelSyntax()) {
2209 Operands.push_back(DefaultMemSIOperand(NameLoc));
2210 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2211 } else {
2212 Operands.push_back(DefaultMemSIOperand(NameLoc));
2213 Operands.push_back(X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2214 }
2215 }
2216
2217 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2218 // values of $SIREG according to the mode. It would be nice if this
2219 // could be achieved with InstAlias in the tables.
2220 if (Name.startswith("lods") && Operands.size() == 1 &&
2221 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2222 Name == "lodsl" || Name == "lodsd" || Name == "lodsq"))
2223 Operands.push_back(DefaultMemSIOperand(NameLoc));
2224
2225 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2226 // values of $DIREG according to the mode. It would be nice if this
2227 // could be achieved with InstAlias in the tables.
2228 if (Name.startswith("stos") && Operands.size() == 1 &&
2229 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2230 Name == "stosl" || Name == "stosd" || Name == "stosq"))
2231 Operands.push_back(DefaultMemDIOperand(NameLoc));
2232
2233 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2234 // values of $DIREG according to the mode. It would be nice if this
2235 // could be achieved with InstAlias in the tables.
2236 if (Name.startswith("scas") && Operands.size() == 1 &&
2237 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2238 Name == "scasl" || Name == "scasd" || Name == "scasq"))
2239 Operands.push_back(DefaultMemDIOperand(NameLoc));
2240
2241 // Add default SI and DI operands to "cmps[bwlq]".
2242 if (Name.startswith("cmps") &&
2243 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2244 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2245 if (Operands.size() == 1) {
2246 if (isParsingIntelSyntax()) {
2247 Operands.push_back(DefaultMemSIOperand(NameLoc));
2248 Operands.push_back(DefaultMemDIOperand(NameLoc));
2249 } else {
2250 Operands.push_back(DefaultMemDIOperand(NameLoc));
2251 Operands.push_back(DefaultMemSIOperand(NameLoc));
2252 }
2253 } else if (Operands.size() == 3) {
2254 X86Operand &Op = (X86Operand &)*Operands[1];
2255 X86Operand &Op2 = (X86Operand &)*Operands[2];
2256 if (!doSrcDstMatch(Op, Op2))
2257 return Error(Op.getStartLoc(),
2258 "mismatching source and destination index registers");
2259 }
2260 }
2261
2262 // Add default SI and DI operands to "movs[bwlq]".
2263 if ((Name.startswith("movs") &&
2264 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2265 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2266 (Name.startswith("smov") &&
2267 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2268 Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
2269 if (Operands.size() == 1) {
2270 if (Name == "movsd")
2271 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2272 if (isParsingIntelSyntax()) {
2273 Operands.push_back(DefaultMemDIOperand(NameLoc));
2274 Operands.push_back(DefaultMemSIOperand(NameLoc));
2275 } else {
2276 Operands.push_back(DefaultMemSIOperand(NameLoc));
2277 Operands.push_back(DefaultMemDIOperand(NameLoc));
2278 }
2279 } else if (Operands.size() == 3) {
2280 X86Operand &Op = (X86Operand &)*Operands[1];
2281 X86Operand &Op2 = (X86Operand &)*Operands[2];
2282 if (!doSrcDstMatch(Op, Op2))
2283 return Error(Op.getStartLoc(),
2284 "mismatching source and destination index registers");
2285 }
2286 }
2287
2288 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2289 // "shift <op>".
2290 if ((Name.startswith("shr") || Name.startswith("sar") ||
2291 Name.startswith("shl") || Name.startswith("sal") ||
2292 Name.startswith("rcl") || Name.startswith("rcr") ||
2293 Name.startswith("rol") || Name.startswith("ror")) &&
2294 Operands.size() == 3) {
2295 if (isParsingIntelSyntax()) {
2296 // Intel syntax
2297 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2298 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2299 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2300 Operands.pop_back();
2301 } else {
2302 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2303 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2304 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2305 Operands.erase(Operands.begin() + 1);
2306 }
2307 }
2308
2309 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2310 // instalias with an immediate operand yet.
2311 if (Name == "int" && Operands.size() == 2) {
2312 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2313 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2314 cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) {
2315 Operands.erase(Operands.begin() + 1);
2316 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2317 }
2318 }
2319
2320 return false;
2321 }
2322
convertToSExti8(MCInst & Inst,unsigned Opcode,unsigned Reg,bool isCmp)2323 static bool convertToSExti8(MCInst &Inst, unsigned Opcode, unsigned Reg,
2324 bool isCmp) {
2325 MCInst TmpInst;
2326 TmpInst.setOpcode(Opcode);
2327 if (!isCmp)
2328 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2329 TmpInst.addOperand(MCOperand::CreateReg(Reg));
2330 TmpInst.addOperand(Inst.getOperand(0));
2331 Inst = TmpInst;
2332 return true;
2333 }
2334
convert16i16to16ri8(MCInst & Inst,unsigned Opcode,bool isCmp=false)2335 static bool convert16i16to16ri8(MCInst &Inst, unsigned Opcode,
2336 bool isCmp = false) {
2337 if (!Inst.getOperand(0).isImm() ||
2338 !isImmSExti16i8Value(Inst.getOperand(0).getImm()))
2339 return false;
2340
2341 return convertToSExti8(Inst, Opcode, X86::AX, isCmp);
2342 }
2343
convert32i32to32ri8(MCInst & Inst,unsigned Opcode,bool isCmp=false)2344 static bool convert32i32to32ri8(MCInst &Inst, unsigned Opcode,
2345 bool isCmp = false) {
2346 if (!Inst.getOperand(0).isImm() ||
2347 !isImmSExti32i8Value(Inst.getOperand(0).getImm()))
2348 return false;
2349
2350 return convertToSExti8(Inst, Opcode, X86::EAX, isCmp);
2351 }
2352
convert64i32to64ri8(MCInst & Inst,unsigned Opcode,bool isCmp=false)2353 static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode,
2354 bool isCmp = false) {
2355 if (!Inst.getOperand(0).isImm() ||
2356 !isImmSExti64i8Value(Inst.getOperand(0).getImm()))
2357 return false;
2358
2359 return convertToSExti8(Inst, Opcode, X86::RAX, isCmp);
2360 }
2361
validateInstruction(MCInst & Inst,const OperandVector & Ops)2362 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2363 switch (Inst.getOpcode()) {
2364 default: return true;
2365 case X86::INT:
2366 X86Operand &Op = static_cast<X86Operand &>(*Ops[1]);
2367 assert(Op.isImm() && "expected immediate");
2368 int64_t Res;
2369 if (!Op.getImm()->EvaluateAsAbsolute(Res) || Res > 255) {
2370 Error(Op.getStartLoc(), "interrupt vector must be in range [0-255]");
2371 return false;
2372 }
2373 return true;
2374 }
2375 llvm_unreachable("handle the instruction appropriately");
2376 }
2377
processInstruction(MCInst & Inst,const OperandVector & Ops)2378 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2379 switch (Inst.getOpcode()) {
2380 default: return false;
2381 case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8);
2382 case X86::AND32i32: return convert32i32to32ri8(Inst, X86::AND32ri8);
2383 case X86::AND64i32: return convert64i32to64ri8(Inst, X86::AND64ri8);
2384 case X86::XOR16i16: return convert16i16to16ri8(Inst, X86::XOR16ri8);
2385 case X86::XOR32i32: return convert32i32to32ri8(Inst, X86::XOR32ri8);
2386 case X86::XOR64i32: return convert64i32to64ri8(Inst, X86::XOR64ri8);
2387 case X86::OR16i16: return convert16i16to16ri8(Inst, X86::OR16ri8);
2388 case X86::OR32i32: return convert32i32to32ri8(Inst, X86::OR32ri8);
2389 case X86::OR64i32: return convert64i32to64ri8(Inst, X86::OR64ri8);
2390 case X86::CMP16i16: return convert16i16to16ri8(Inst, X86::CMP16ri8, true);
2391 case X86::CMP32i32: return convert32i32to32ri8(Inst, X86::CMP32ri8, true);
2392 case X86::CMP64i32: return convert64i32to64ri8(Inst, X86::CMP64ri8, true);
2393 case X86::ADD16i16: return convert16i16to16ri8(Inst, X86::ADD16ri8);
2394 case X86::ADD32i32: return convert32i32to32ri8(Inst, X86::ADD32ri8);
2395 case X86::ADD64i32: return convert64i32to64ri8(Inst, X86::ADD64ri8);
2396 case X86::SUB16i16: return convert16i16to16ri8(Inst, X86::SUB16ri8);
2397 case X86::SUB32i32: return convert32i32to32ri8(Inst, X86::SUB32ri8);
2398 case X86::SUB64i32: return convert64i32to64ri8(Inst, X86::SUB64ri8);
2399 case X86::ADC16i16: return convert16i16to16ri8(Inst, X86::ADC16ri8);
2400 case X86::ADC32i32: return convert32i32to32ri8(Inst, X86::ADC32ri8);
2401 case X86::ADC64i32: return convert64i32to64ri8(Inst, X86::ADC64ri8);
2402 case X86::SBB16i16: return convert16i16to16ri8(Inst, X86::SBB16ri8);
2403 case X86::SBB32i32: return convert32i32to32ri8(Inst, X86::SBB32ri8);
2404 case X86::SBB64i32: return convert64i32to64ri8(Inst, X86::SBB64ri8);
2405 case X86::VMOVAPDrr:
2406 case X86::VMOVAPDYrr:
2407 case X86::VMOVAPSrr:
2408 case X86::VMOVAPSYrr:
2409 case X86::VMOVDQArr:
2410 case X86::VMOVDQAYrr:
2411 case X86::VMOVDQUrr:
2412 case X86::VMOVDQUYrr:
2413 case X86::VMOVUPDrr:
2414 case X86::VMOVUPDYrr:
2415 case X86::VMOVUPSrr:
2416 case X86::VMOVUPSYrr: {
2417 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2418 !X86II::isX86_64ExtendedReg(Inst.getOperand(1).getReg()))
2419 return false;
2420
2421 unsigned NewOpc;
2422 switch (Inst.getOpcode()) {
2423 default: llvm_unreachable("Invalid opcode");
2424 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
2425 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
2426 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
2427 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
2428 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
2429 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
2430 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
2431 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
2432 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
2433 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
2434 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
2435 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
2436 }
2437 Inst.setOpcode(NewOpc);
2438 return true;
2439 }
2440 case X86::VMOVSDrr:
2441 case X86::VMOVSSrr: {
2442 if (X86II::isX86_64ExtendedReg(Inst.getOperand(0).getReg()) ||
2443 !X86II::isX86_64ExtendedReg(Inst.getOperand(2).getReg()))
2444 return false;
2445 unsigned NewOpc;
2446 switch (Inst.getOpcode()) {
2447 default: llvm_unreachable("Invalid opcode");
2448 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
2449 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
2450 }
2451 Inst.setOpcode(NewOpc);
2452 return true;
2453 }
2454 }
2455 }
2456
2457 static const char *getSubtargetFeatureName(uint64_t Val);
2458
EmitInstruction(MCInst & Inst,OperandVector & Operands,MCStreamer & Out)2459 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2460 MCStreamer &Out) {
2461 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2462 MII, Out);
2463 }
2464
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2465 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2466 OperandVector &Operands,
2467 MCStreamer &Out, uint64_t &ErrorInfo,
2468 bool MatchingInlineAsm) {
2469 if (isParsingIntelSyntax())
2470 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2471 MatchingInlineAsm);
2472 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2473 MatchingInlineAsm);
2474 }
2475
MatchFPUWaitAlias(SMLoc IDLoc,X86Operand & Op,OperandVector & Operands,MCStreamer & Out,bool MatchingInlineAsm)2476 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2477 OperandVector &Operands, MCStreamer &Out,
2478 bool MatchingInlineAsm) {
2479 // FIXME: This should be replaced with a real .td file alias mechanism.
2480 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2481 // call.
2482 const char *Repl = StringSwitch<const char *>(Op.getToken())
2483 .Case("finit", "fninit")
2484 .Case("fsave", "fnsave")
2485 .Case("fstcw", "fnstcw")
2486 .Case("fstcww", "fnstcw")
2487 .Case("fstenv", "fnstenv")
2488 .Case("fstsw", "fnstsw")
2489 .Case("fstsww", "fnstsw")
2490 .Case("fclex", "fnclex")
2491 .Default(nullptr);
2492 if (Repl) {
2493 MCInst Inst;
2494 Inst.setOpcode(X86::WAIT);
2495 Inst.setLoc(IDLoc);
2496 if (!MatchingInlineAsm)
2497 EmitInstruction(Inst, Operands, Out);
2498 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2499 }
2500 }
2501
ErrorMissingFeature(SMLoc IDLoc,uint64_t ErrorInfo,bool MatchingInlineAsm)2502 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2503 bool MatchingInlineAsm) {
2504 assert(ErrorInfo && "Unknown missing feature!");
2505 ArrayRef<SMRange> EmptyRanges = None;
2506 SmallString<126> Msg;
2507 raw_svector_ostream OS(Msg);
2508 OS << "instruction requires:";
2509 uint64_t Mask = 1;
2510 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2511 if (ErrorInfo & Mask)
2512 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2513 Mask <<= 1;
2514 }
2515 return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2516 }
2517
MatchAndEmitATTInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2518 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2519 OperandVector &Operands,
2520 MCStreamer &Out,
2521 uint64_t &ErrorInfo,
2522 bool MatchingInlineAsm) {
2523 assert(!Operands.empty() && "Unexpect empty operand list!");
2524 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2525 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2526 ArrayRef<SMRange> EmptyRanges = None;
2527
2528 // First, handle aliases that expand to multiple instructions.
2529 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2530
2531 bool WasOriginallyInvalidOperand = false;
2532 MCInst Inst;
2533
2534 // First, try a direct match.
2535 switch (MatchInstructionImpl(Operands, Inst,
2536 ErrorInfo, MatchingInlineAsm,
2537 isParsingIntelSyntax())) {
2538 default: llvm_unreachable("Unexpected match result!");
2539 case Match_Success:
2540 if (!validateInstruction(Inst, Operands))
2541 return true;
2542
2543 // Some instructions need post-processing to, for example, tweak which
2544 // encoding is selected. Loop on it while changes happen so the
2545 // individual transformations can chain off each other.
2546 if (!MatchingInlineAsm)
2547 while (processInstruction(Inst, Operands))
2548 ;
2549
2550 Inst.setLoc(IDLoc);
2551 if (!MatchingInlineAsm)
2552 EmitInstruction(Inst, Operands, Out);
2553 Opcode = Inst.getOpcode();
2554 return false;
2555 case Match_MissingFeature:
2556 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2557 case Match_InvalidOperand:
2558 WasOriginallyInvalidOperand = true;
2559 break;
2560 case Match_MnemonicFail:
2561 break;
2562 }
2563
2564 // FIXME: Ideally, we would only attempt suffix matches for things which are
2565 // valid prefixes, and we could just infer the right unambiguous
2566 // type. However, that requires substantially more matcher support than the
2567 // following hack.
2568
2569 // Change the operand to point to a temporary token.
2570 StringRef Base = Op.getToken();
2571 SmallString<16> Tmp;
2572 Tmp += Base;
2573 Tmp += ' ';
2574 Op.setTokenValue(Tmp);
2575
2576 // If this instruction starts with an 'f', then it is a floating point stack
2577 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2578 // 80-bit floating point, which use the suffixes s,l,t respectively.
2579 //
2580 // Otherwise, we assume that this may be an integer instruction, which comes
2581 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2582 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2583
2584 // Check for the various suffix matches.
2585 uint64_t ErrorInfoIgnore;
2586 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2587 unsigned Match[4];
2588
2589 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2590 Tmp.back() = Suffixes[I];
2591 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2592 MatchingInlineAsm, isParsingIntelSyntax());
2593 // If this returned as a missing feature failure, remember that.
2594 if (Match[I] == Match_MissingFeature)
2595 ErrorInfoMissingFeature = ErrorInfoIgnore;
2596 }
2597
2598 // Restore the old token.
2599 Op.setTokenValue(Base);
2600
2601 // If exactly one matched, then we treat that as a successful match (and the
2602 // instruction will already have been filled in correctly, since the failing
2603 // matches won't have modified it).
2604 unsigned NumSuccessfulMatches =
2605 std::count(std::begin(Match), std::end(Match), Match_Success);
2606 if (NumSuccessfulMatches == 1) {
2607 Inst.setLoc(IDLoc);
2608 if (!MatchingInlineAsm)
2609 EmitInstruction(Inst, Operands, Out);
2610 Opcode = Inst.getOpcode();
2611 return false;
2612 }
2613
2614 // Otherwise, the match failed, try to produce a decent error message.
2615
2616 // If we had multiple suffix matches, then identify this as an ambiguous
2617 // match.
2618 if (NumSuccessfulMatches > 1) {
2619 char MatchChars[4];
2620 unsigned NumMatches = 0;
2621 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2622 if (Match[I] == Match_Success)
2623 MatchChars[NumMatches++] = Suffixes[I];
2624
2625 SmallString<126> Msg;
2626 raw_svector_ostream OS(Msg);
2627 OS << "ambiguous instructions require an explicit suffix (could be ";
2628 for (unsigned i = 0; i != NumMatches; ++i) {
2629 if (i != 0)
2630 OS << ", ";
2631 if (i + 1 == NumMatches)
2632 OS << "or ";
2633 OS << "'" << Base << MatchChars[i] << "'";
2634 }
2635 OS << ")";
2636 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2637 return true;
2638 }
2639
2640 // Okay, we know that none of the variants matched successfully.
2641
2642 // If all of the instructions reported an invalid mnemonic, then the original
2643 // mnemonic was invalid.
2644 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2645 if (!WasOriginallyInvalidOperand) {
2646 ArrayRef<SMRange> Ranges =
2647 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2648 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2649 Ranges, MatchingInlineAsm);
2650 }
2651
2652 // Recover location info for the operand if we know which was the problem.
2653 if (ErrorInfo != ~0ULL) {
2654 if (ErrorInfo >= Operands.size())
2655 return Error(IDLoc, "too few operands for instruction",
2656 EmptyRanges, MatchingInlineAsm);
2657
2658 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2659 if (Operand.getStartLoc().isValid()) {
2660 SMRange OperandRange = Operand.getLocRange();
2661 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2662 OperandRange, MatchingInlineAsm);
2663 }
2664 }
2665
2666 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2667 MatchingInlineAsm);
2668 }
2669
2670 // If one instruction matched with a missing feature, report this as a
2671 // missing feature.
2672 if (std::count(std::begin(Match), std::end(Match),
2673 Match_MissingFeature) == 1) {
2674 ErrorInfo = ErrorInfoMissingFeature;
2675 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2676 MatchingInlineAsm);
2677 }
2678
2679 // If one instruction matched with an invalid operand, report this as an
2680 // operand failure.
2681 if (std::count(std::begin(Match), std::end(Match),
2682 Match_InvalidOperand) == 1) {
2683 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2684 MatchingInlineAsm);
2685 }
2686
2687 // If all of these were an outright failure, report it in a useless way.
2688 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2689 EmptyRanges, MatchingInlineAsm);
2690 return true;
2691 }
2692
MatchAndEmitIntelInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2693 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2694 OperandVector &Operands,
2695 MCStreamer &Out,
2696 uint64_t &ErrorInfo,
2697 bool MatchingInlineAsm) {
2698 assert(!Operands.empty() && "Unexpect empty operand list!");
2699 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2700 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2701 StringRef Mnemonic = Op.getToken();
2702 ArrayRef<SMRange> EmptyRanges = None;
2703
2704 // First, handle aliases that expand to multiple instructions.
2705 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2706
2707 MCInst Inst;
2708
2709 // Find one unsized memory operand, if present.
2710 X86Operand *UnsizedMemOp = nullptr;
2711 for (const auto &Op : Operands) {
2712 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2713 if (X86Op->isMemUnsized())
2714 UnsizedMemOp = X86Op;
2715 }
2716
2717 // Allow some instructions to have implicitly pointer-sized operands. This is
2718 // compatible with gas.
2719 if (UnsizedMemOp) {
2720 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2721 for (const char *Instr : PtrSizedInstrs) {
2722 if (Mnemonic == Instr) {
2723 UnsizedMemOp->Mem.Size = getPointerWidth();
2724 break;
2725 }
2726 }
2727 }
2728
2729 // If an unsized memory operand is present, try to match with each memory
2730 // operand size. In Intel assembly, the size is not part of the instruction
2731 // mnemonic.
2732 SmallVector<unsigned, 8> Match;
2733 uint64_t ErrorInfoMissingFeature = 0;
2734 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2735 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2736 for (unsigned Size : MopSizes) {
2737 UnsizedMemOp->Mem.Size = Size;
2738 uint64_t ErrorInfoIgnore;
2739 unsigned LastOpcode = Inst.getOpcode();
2740 unsigned M =
2741 MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2742 MatchingInlineAsm, isParsingIntelSyntax());
2743 if (Match.empty() || LastOpcode != Inst.getOpcode())
2744 Match.push_back(M);
2745
2746 // If this returned as a missing feature failure, remember that.
2747 if (Match.back() == Match_MissingFeature)
2748 ErrorInfoMissingFeature = ErrorInfoIgnore;
2749 }
2750
2751 // Restore the size of the unsized memory operand if we modified it.
2752 if (UnsizedMemOp)
2753 UnsizedMemOp->Mem.Size = 0;
2754 }
2755
2756 // If we haven't matched anything yet, this is not a basic integer or FPU
2757 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
2758 // matching with the unsized operand.
2759 if (Match.empty()) {
2760 Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2761 MatchingInlineAsm,
2762 isParsingIntelSyntax()));
2763 // If this returned as a missing feature failure, remember that.
2764 if (Match.back() == Match_MissingFeature)
2765 ErrorInfoMissingFeature = ErrorInfo;
2766 }
2767
2768 // Restore the size of the unsized memory operand if we modified it.
2769 if (UnsizedMemOp)
2770 UnsizedMemOp->Mem.Size = 0;
2771
2772 // If it's a bad mnemonic, all results will be the same.
2773 if (Match.back() == Match_MnemonicFail) {
2774 ArrayRef<SMRange> Ranges =
2775 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2776 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2777 Ranges, MatchingInlineAsm);
2778 }
2779
2780 // If exactly one matched, then we treat that as a successful match (and the
2781 // instruction will already have been filled in correctly, since the failing
2782 // matches won't have modified it).
2783 unsigned NumSuccessfulMatches =
2784 std::count(std::begin(Match), std::end(Match), Match_Success);
2785 if (NumSuccessfulMatches == 1) {
2786 if (!validateInstruction(Inst, Operands))
2787 return true;
2788
2789 // Some instructions need post-processing to, for example, tweak which
2790 // encoding is selected. Loop on it while changes happen so the individual
2791 // transformations can chain off each other.
2792 if (!MatchingInlineAsm)
2793 while (processInstruction(Inst, Operands))
2794 ;
2795 Inst.setLoc(IDLoc);
2796 if (!MatchingInlineAsm)
2797 EmitInstruction(Inst, Operands, Out);
2798 Opcode = Inst.getOpcode();
2799 return false;
2800 } else if (NumSuccessfulMatches > 1) {
2801 assert(UnsizedMemOp &&
2802 "multiple matches only possible with unsized memory operands");
2803 ArrayRef<SMRange> Ranges =
2804 MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2805 return Error(UnsizedMemOp->getStartLoc(),
2806 "ambiguous operand size for instruction '" + Mnemonic + "\'",
2807 Ranges, MatchingInlineAsm);
2808 }
2809
2810 // If one instruction matched with a missing feature, report this as a
2811 // missing feature.
2812 if (std::count(std::begin(Match), std::end(Match),
2813 Match_MissingFeature) == 1) {
2814 ErrorInfo = ErrorInfoMissingFeature;
2815 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2816 MatchingInlineAsm);
2817 }
2818
2819 // If one instruction matched with an invalid operand, report this as an
2820 // operand failure.
2821 if (std::count(std::begin(Match), std::end(Match),
2822 Match_InvalidOperand) == 1) {
2823 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2824 MatchingInlineAsm);
2825 }
2826
2827 // If all of these were an outright failure, report it in a useless way.
2828 return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2829 MatchingInlineAsm);
2830 }
2831
OmitRegisterFromClobberLists(unsigned RegNo)2832 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2833 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2834 }
2835
ParseDirective(AsmToken DirectiveID)2836 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2837 MCAsmParser &Parser = getParser();
2838 StringRef IDVal = DirectiveID.getIdentifier();
2839 if (IDVal == ".word")
2840 return ParseDirectiveWord(2, DirectiveID.getLoc());
2841 else if (IDVal.startswith(".code"))
2842 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2843 else if (IDVal.startswith(".att_syntax")) {
2844 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2845 if (Parser.getTok().getString() == "prefix")
2846 Parser.Lex();
2847 else if (Parser.getTok().getString() == "noprefix")
2848 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2849 "supported: registers must have a "
2850 "'%' prefix in .att_syntax");
2851 }
2852 getParser().setAssemblerDialect(0);
2853 return false;
2854 } else if (IDVal.startswith(".intel_syntax")) {
2855 getParser().setAssemblerDialect(1);
2856 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2857 if (Parser.getTok().getString() == "noprefix")
2858 Parser.Lex();
2859 else if (Parser.getTok().getString() == "prefix")
2860 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2861 "supported: registers must not have "
2862 "a '%' prefix in .intel_syntax");
2863 }
2864 return false;
2865 }
2866 return true;
2867 }
2868
2869 /// ParseDirectiveWord
2870 /// ::= .word [ expression (, expression)* ]
ParseDirectiveWord(unsigned Size,SMLoc L)2871 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2872 MCAsmParser &Parser = getParser();
2873 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2874 for (;;) {
2875 const MCExpr *Value;
2876 if (getParser().parseExpression(Value))
2877 return false;
2878
2879 getParser().getStreamer().EmitValue(Value, Size);
2880
2881 if (getLexer().is(AsmToken::EndOfStatement))
2882 break;
2883
2884 // FIXME: Improve diagnostic.
2885 if (getLexer().isNot(AsmToken::Comma)) {
2886 Error(L, "unexpected token in directive");
2887 return false;
2888 }
2889 Parser.Lex();
2890 }
2891 }
2892
2893 Parser.Lex();
2894 return false;
2895 }
2896
2897 /// ParseDirectiveCode
2898 /// ::= .code16 | .code32 | .code64
ParseDirectiveCode(StringRef IDVal,SMLoc L)2899 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
2900 MCAsmParser &Parser = getParser();
2901 if (IDVal == ".code16") {
2902 Parser.Lex();
2903 if (!is16BitMode()) {
2904 SwitchMode(X86::Mode16Bit);
2905 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
2906 }
2907 } else if (IDVal == ".code32") {
2908 Parser.Lex();
2909 if (!is32BitMode()) {
2910 SwitchMode(X86::Mode32Bit);
2911 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
2912 }
2913 } else if (IDVal == ".code64") {
2914 Parser.Lex();
2915 if (!is64BitMode()) {
2916 SwitchMode(X86::Mode64Bit);
2917 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
2918 }
2919 } else {
2920 Error(L, "unknown directive " + IDVal);
2921 return false;
2922 }
2923
2924 return false;
2925 }
2926
2927 // Force static initialization.
LLVMInitializeX86AsmParser()2928 extern "C" void LLVMInitializeX86AsmParser() {
2929 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
2930 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
2931 }
2932
2933 #define GET_REGISTER_MATCHER
2934 #define GET_MATCHER_IMPLEMENTATION
2935 #define GET_SUBTARGET_FEATURE_NAME
2936 #include "X86GenAsmMatcher.inc"
2937