1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 11 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 12 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/MC/MCExpr.h" 15 #include "llvm/MC/MCInstrInfo.h" 16 #include "llvm/MC/MCParser/MCAsmLexer.h" 17 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 18 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 19 #include "llvm/MC/MCTargetOptions.h" 20 #include "llvm/Support/SMLoc.h" 21 #include <cstdint> 22 #include <memory> 23 24 namespace llvm { 25 26 class MCInst; 27 class MCParsedAsmOperand; 28 class MCStreamer; 29 class MCSubtargetInfo; 30 template <typename T> class SmallVectorImpl; 31 32 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>; 33 34 enum AsmRewriteKind { 35 AOK_Align, // Rewrite align as .align. 36 AOK_EVEN, // Rewrite even as .even. 37 AOK_Emit, // Rewrite _emit as .byte. 38 AOK_Input, // Rewrite in terms of $N. 39 AOK_Output, // Rewrite in terms of $N. 40 AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr). 41 AOK_Label, // Rewrite local labels. 42 AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t"). 43 AOK_Skip, // Skip emission (e.g., offset/type operators). 44 AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp] 45 }; 46 47 const char AsmRewritePrecedence [] = { 48 2, // AOK_Align 49 2, // AOK_EVEN 50 2, // AOK_Emit 51 3, // AOK_Input 52 3, // AOK_Output 53 5, // AOK_SizeDirective 54 1, // AOK_Label 55 5, // AOK_EndOfStatement 56 2, // AOK_Skip 57 2 // AOK_IntelExpr 58 }; 59 60 // Represnt the various parts which makes up an intel expression, 61 // used for emitting compound intel expressions 62 struct IntelExpr { 63 bool NeedBracs; 64 int64_t Imm; 65 StringRef BaseReg; 66 StringRef IndexReg; 67 unsigned Scale; 68 NeedBracsIntelExpr69 IntelExpr(bool needBracs = false) : NeedBracs(needBracs), Imm(0), 70 BaseReg(StringRef()), IndexReg(StringRef()), 71 Scale(1) {} 72 // Compund immediate expression IntelExprIntelExpr73 IntelExpr(int64_t imm, bool needBracs) : IntelExpr(needBracs) { 74 Imm = imm; 75 } 76 // [Reg + ImmediateExpression] 77 // We don't bother to emit an immediate expression evaluated to zero 78 IntelExpr(StringRef reg, int64_t imm = 0, unsigned scale = 0, 79 bool needBracs = true) : IntelExprIntelExpr80 IntelExpr(imm, needBracs) { 81 IndexReg = reg; 82 if (scale) 83 Scale = scale; 84 } 85 // [BaseReg + IndexReg * ScaleExpression + ImmediateExpression] 86 IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale = 0, 87 int64_t imm = 0, bool needBracs = true) : IntelExprIntelExpr88 IntelExpr(indexReg, imm, scale, needBracs) { 89 BaseReg = baseReg; 90 } hasBaseRegIntelExpr91 bool hasBaseReg() const { 92 return BaseReg.size(); 93 } hasIndexRegIntelExpr94 bool hasIndexReg() const { 95 return IndexReg.size(); 96 } hasRegsIntelExpr97 bool hasRegs() const { 98 return hasBaseReg() || hasIndexReg(); 99 } isValidIntelExpr100 bool isValid() const { 101 return (Scale == 1) || 102 (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8)); 103 } 104 }; 105 106 struct AsmRewrite { 107 AsmRewriteKind Kind; 108 SMLoc Loc; 109 unsigned Len; 110 int64_t Val; 111 StringRef Label; 112 IntelExpr IntelExp; 113 114 public: 115 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0) KindAsmRewrite116 : Kind(kind), Loc(loc), Len(len), Val(val) {} AsmRewriteAsmRewrite117 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label) 118 : AsmRewrite(kind, loc, len) { Label = label; } AsmRewriteAsmRewrite119 AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp) 120 : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; } 121 }; 122 123 struct ParseInstructionInfo { 124 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr; 125 126 ParseInstructionInfo() = default; ParseInstructionInfoParseInstructionInfo127 ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites) 128 : AsmRewrites(rewrites) {} 129 }; 130 131 enum OperandMatchResultTy { 132 MatchOperand_Success, // operand matched successfully 133 MatchOperand_NoMatch, // operand did not match 134 MatchOperand_ParseFail // operand matched but had errors 135 }; 136 137 enum class DiagnosticPredicateTy { 138 Match, 139 NearMatch, 140 NoMatch, 141 }; 142 143 // When an operand is parsed, the assembler will try to iterate through a set of 144 // possible operand classes that the operand might match and call the 145 // corresponding PredicateMethod to determine that. 146 // 147 // If there are two AsmOperands that would give a specific diagnostic if there 148 // is no match, there is currently no mechanism to distinguish which operand is 149 // a closer match. The DiagnosticPredicate distinguishes between 'completely 150 // no match' and 'near match', so the assembler can decide whether to give a 151 // specific diagnostic, or use 'InvalidOperand' and continue to find a 152 // 'better matching' diagnostic. 153 // 154 // For example: 155 // opcode opnd0, onpd1, opnd2 156 // 157 // where: 158 // opnd2 could be an 'immediate of range [-8, 7]' 159 // opnd2 could be a 'register + shift/extend'. 160 // 161 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes 162 // little sense to give a diagnostic that the operand should be an immediate 163 // in range [-8, 7]. 164 // 165 // This is a light-weight alternative to the 'NearMissInfo' approach 166 // below which collects *all* possible diagnostics. This alternative 167 // is optional and fully backward compatible with existing 168 // PredicateMethods that return a 'bool' (match or no match). 169 struct DiagnosticPredicate { 170 DiagnosticPredicateTy Type; 171 DiagnosticPredicateDiagnosticPredicate172 explicit DiagnosticPredicate(bool Match) 173 : Type(Match ? DiagnosticPredicateTy::Match 174 : DiagnosticPredicateTy::NearMatch) {} DiagnosticPredicateDiagnosticPredicate175 DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {} 176 DiagnosticPredicate(const DiagnosticPredicate &) = default; 177 178 operator bool() const { return Type == DiagnosticPredicateTy::Match; } isMatchDiagnosticPredicate179 bool isMatch() const { return Type == DiagnosticPredicateTy::Match; } isNearMatchDiagnosticPredicate180 bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; } isNoMatchDiagnosticPredicate181 bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; } 182 }; 183 184 // When matching of an assembly instruction fails, there may be multiple 185 // encodings that are close to being a match. It's often ambiguous which one 186 // the programmer intended to use, so we want to report an error which mentions 187 // each of these "near-miss" encodings. This struct contains information about 188 // one such encoding, and why it did not match the parsed instruction. 189 class NearMissInfo { 190 public: 191 enum NearMissKind { 192 NoNearMiss, 193 NearMissOperand, 194 NearMissFeature, 195 NearMissPredicate, 196 NearMissTooFewOperands, 197 }; 198 199 // The encoding is valid for the parsed assembly string. This is only used 200 // internally to the table-generated assembly matcher. getSuccess()201 static NearMissInfo getSuccess() { return NearMissInfo(); } 202 203 // The instruction encoding is not valid because it requires some target 204 // features that are not currently enabled. MissingFeatures has a bit set for 205 // each feature that the encoding needs but which is not enabled. getMissedFeature(uint64_t MissingFeatures)206 static NearMissInfo getMissedFeature(uint64_t MissingFeatures) { 207 NearMissInfo Result; 208 Result.Kind = NearMissFeature; 209 Result.Features = MissingFeatures; 210 return Result; 211 } 212 213 // The instruction encoding is not valid because the target-specific 214 // predicate function returned an error code. FailureCode is the 215 // target-specific error code returned by the predicate. getMissedPredicate(unsigned FailureCode)216 static NearMissInfo getMissedPredicate(unsigned FailureCode) { 217 NearMissInfo Result; 218 Result.Kind = NearMissPredicate; 219 Result.PredicateError = FailureCode; 220 return Result; 221 } 222 223 // The instruction encoding is not valid because one (and only one) parsed 224 // operand is not of the correct type. OperandError is the error code 225 // relating to the operand class expected by the encoding. OperandClass is 226 // the type of the expected operand. Opcode is the opcode of the encoding. 227 // OperandIndex is the index into the parsed operand list. getMissedOperand(unsigned OperandError,unsigned OperandClass,unsigned Opcode,unsigned OperandIndex)228 static NearMissInfo getMissedOperand(unsigned OperandError, 229 unsigned OperandClass, unsigned Opcode, 230 unsigned OperandIndex) { 231 NearMissInfo Result; 232 Result.Kind = NearMissOperand; 233 Result.MissedOperand.Error = OperandError; 234 Result.MissedOperand.Class = OperandClass; 235 Result.MissedOperand.Opcode = Opcode; 236 Result.MissedOperand.Index = OperandIndex; 237 return Result; 238 } 239 240 // The instruction encoding is not valid because it expects more operands 241 // than were parsed. OperandClass is the class of the expected operand that 242 // was not provided. Opcode is the instruction encoding. getTooFewOperands(unsigned OperandClass,unsigned Opcode)243 static NearMissInfo getTooFewOperands(unsigned OperandClass, 244 unsigned Opcode) { 245 NearMissInfo Result; 246 Result.Kind = NearMissTooFewOperands; 247 Result.TooFewOperands.Class = OperandClass; 248 Result.TooFewOperands.Opcode = Opcode; 249 return Result; 250 } 251 252 operator bool() const { return Kind != NoNearMiss; } 253 getKind()254 NearMissKind getKind() const { return Kind; } 255 256 // Feature flags required by the instruction, that the current target does 257 // not have. getFeatures()258 uint64_t getFeatures() const { 259 assert(Kind == NearMissFeature); 260 return Features; 261 } 262 // Error code returned by the target predicate when validating this 263 // instruction encoding. getPredicateError()264 unsigned getPredicateError() const { 265 assert(Kind == NearMissPredicate); 266 return PredicateError; 267 } 268 // MatchClassKind of the operand that we expected to see. getOperandClass()269 unsigned getOperandClass() const { 270 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); 271 return MissedOperand.Class; 272 } 273 // Opcode of the encoding we were trying to match. getOpcode()274 unsigned getOpcode() const { 275 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); 276 return MissedOperand.Opcode; 277 } 278 // Error code returned when validating the operand. getOperandError()279 unsigned getOperandError() const { 280 assert(Kind == NearMissOperand); 281 return MissedOperand.Error; 282 } 283 // Index of the actual operand we were trying to match in the list of parsed 284 // operands. getOperandIndex()285 unsigned getOperandIndex() const { 286 assert(Kind == NearMissOperand); 287 return MissedOperand.Index; 288 } 289 290 private: 291 NearMissKind Kind; 292 293 // These two structs share a common prefix, so we can safely rely on the fact 294 // that they overlap in the union. 295 struct MissedOpInfo { 296 unsigned Class; 297 unsigned Opcode; 298 unsigned Error; 299 unsigned Index; 300 }; 301 302 struct TooFewOperandsInfo { 303 unsigned Class; 304 unsigned Opcode; 305 }; 306 307 union { 308 uint64_t Features; 309 unsigned PredicateError; 310 MissedOpInfo MissedOperand; 311 TooFewOperandsInfo TooFewOperands; 312 }; 313 NearMissInfo()314 NearMissInfo() : Kind(NoNearMiss) {} 315 }; 316 317 /// MCTargetAsmParser - Generic interface to target specific assembly parsers. 318 class MCTargetAsmParser : public MCAsmParserExtension { 319 public: 320 enum MatchResultTy { 321 Match_InvalidOperand, 322 Match_InvalidTiedOperand, 323 Match_MissingFeature, 324 Match_MnemonicFail, 325 Match_Success, 326 Match_NearMisses, 327 FIRST_TARGET_MATCH_RESULT_TY 328 }; 329 330 protected: // Can only create subclasses. 331 MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI, 332 const MCInstrInfo &MII); 333 334 /// Create a copy of STI and return a non-const reference to it. 335 MCSubtargetInfo ©STI(); 336 337 /// AvailableFeatures - The current set of available features. 338 uint64_t AvailableFeatures = 0; 339 340 /// ParsingInlineAsm - Are we parsing ms-style inline assembly? 341 bool ParsingInlineAsm = false; 342 343 /// SemaCallback - The Sema callback implementation. Must be set when parsing 344 /// ms-style inline assembly. 345 MCAsmParserSemaCallback *SemaCallback; 346 347 /// Set of options which affects instrumentation of inline assembly. 348 MCTargetOptions MCOptions; 349 350 /// Current STI. 351 const MCSubtargetInfo *STI; 352 353 const MCInstrInfo &MII; 354 355 public: 356 MCTargetAsmParser(const MCTargetAsmParser &) = delete; 357 MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete; 358 359 ~MCTargetAsmParser() override; 360 361 const MCSubtargetInfo &getSTI() const; 362 getAvailableFeatures()363 uint64_t getAvailableFeatures() const { return AvailableFeatures; } setAvailableFeatures(uint64_t Value)364 void setAvailableFeatures(uint64_t Value) { AvailableFeatures = Value; } 365 isParsingInlineAsm()366 bool isParsingInlineAsm () { return ParsingInlineAsm; } setParsingInlineAsm(bool Value)367 void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; } 368 getTargetOptions()369 MCTargetOptions getTargetOptions() const { return MCOptions; } 370 setSemaCallback(MCAsmParserSemaCallback * Callback)371 void setSemaCallback(MCAsmParserSemaCallback *Callback) { 372 SemaCallback = Callback; 373 } 374 375 // Target-specific parsing of expression. parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)376 virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 377 return getParser().parsePrimaryExpr(Res, EndLoc); 378 } 379 380 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 381 SMLoc &EndLoc) = 0; 382 383 /// Sets frame register corresponding to the current MachineFunction. SetFrameRegister(unsigned RegNo)384 virtual void SetFrameRegister(unsigned RegNo) {} 385 386 /// ParseInstruction - Parse one assembly instruction. 387 /// 388 /// The parser is positioned following the instruction name. The target 389 /// specific instruction parser should parse the entire instruction and 390 /// construct the appropriate MCInst, or emit an error. On success, the entire 391 /// line should be parsed up to and including the end-of-statement token. On 392 /// failure, the parser is not required to read to the end of the line. 393 // 394 /// \param Name - The instruction name. 395 /// \param NameLoc - The source location of the name. 396 /// \param Operands [out] - The list of parsed operands, this returns 397 /// ownership of them to the caller. 398 /// \return True on failure. 399 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 400 SMLoc NameLoc, OperandVector &Operands) = 0; ParseInstruction(ParseInstructionInfo & Info,StringRef Name,AsmToken Token,OperandVector & Operands)401 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 402 AsmToken Token, OperandVector &Operands) { 403 return ParseInstruction(Info, Name, Token.getLoc(), Operands); 404 } 405 406 /// ParseDirective - Parse a target specific assembler directive 407 /// 408 /// The parser is positioned following the directive name. The target 409 /// specific directive parser should parse the entire directive doing or 410 /// recording any target specific work, or return true and do nothing if the 411 /// directive is not target specific. If the directive is specific for 412 /// the target, the entire line is parsed up to and including the 413 /// end-of-statement token and false is returned. 414 /// 415 /// \param DirectiveID - the identifier token of the directive. 416 virtual bool ParseDirective(AsmToken DirectiveID) = 0; 417 418 /// MatchAndEmitInstruction - Recognize a series of operands of a parsed 419 /// instruction as an actual MCInst and emit it to the specified MCStreamer. 420 /// This returns false on success and returns true on failure to match. 421 /// 422 /// On failure, the target parser is responsible for emitting a diagnostic 423 /// explaining the match failure. 424 virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 425 OperandVector &Operands, MCStreamer &Out, 426 uint64_t &ErrorInfo, 427 bool MatchingInlineAsm) = 0; 428 429 /// Allows targets to let registers opt out of clobber lists. OmitRegisterFromClobberLists(unsigned RegNo)430 virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; } 431 432 /// Allow a target to add special case operand matching for things that 433 /// tblgen doesn't/can't handle effectively. For example, literal 434 /// immediates on ARM. TableGen expects a token operand, but the parser 435 /// will recognize them as immediates. validateTargetOperandClass(MCParsedAsmOperand & Op,unsigned Kind)436 virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 437 unsigned Kind) { 438 return Match_InvalidOperand; 439 } 440 441 /// Validate the instruction match against any complex target predicates 442 /// before rendering any operands to it. 443 virtual unsigned checkEarlyTargetMatchPredicate(MCInst & Inst,const OperandVector & Operands)444 checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) { 445 return Match_Success; 446 } 447 448 /// checkTargetMatchPredicate - Validate the instruction match against 449 /// any complex target predicates not expressible via match classes. checkTargetMatchPredicate(MCInst & Inst)450 virtual unsigned checkTargetMatchPredicate(MCInst &Inst) { 451 return Match_Success; 452 } 453 454 virtual void convertToMapAndConstraints(unsigned Kind, 455 const OperandVector &Operands) = 0; 456 457 /// Returns whether two registers are equal and is used by the tied-operands 458 /// checks in the AsmMatcher. This method can be overridden allow e.g. a 459 /// sub- or super-register as the tied operand. regsEqual(const MCParsedAsmOperand & Op1,const MCParsedAsmOperand & Op2)460 virtual bool regsEqual(const MCParsedAsmOperand &Op1, 461 const MCParsedAsmOperand &Op2) const { 462 assert(Op1.isReg() && Op2.isReg() && "Operands not all regs"); 463 return Op1.getReg() == Op2.getReg(); 464 } 465 466 // Return whether this parser uses assignment statements with equals tokens equalIsAsmAssignment()467 virtual bool equalIsAsmAssignment() { return true; }; 468 // Return whether this start of statement identifier is a label isLabel(AsmToken & Token)469 virtual bool isLabel(AsmToken &Token) { return true; }; 470 // Return whether this parser accept star as start of statement starIsStartOfStatement()471 virtual bool starIsStartOfStatement() { return false; }; 472 applyModifierToExpr(const MCExpr * E,MCSymbolRefExpr::VariantKind,MCContext & Ctx)473 virtual const MCExpr *applyModifierToExpr(const MCExpr *E, 474 MCSymbolRefExpr::VariantKind, 475 MCContext &Ctx) { 476 return nullptr; 477 } 478 onLabelParsed(MCSymbol * Symbol)479 virtual void onLabelParsed(MCSymbol *Symbol) {} 480 481 /// Ensure that all previously parsed instructions have been emitted to the 482 /// output streamer, if the target does not emit them immediately. flushPendingInstructions(MCStreamer & Out)483 virtual void flushPendingInstructions(MCStreamer &Out) {} 484 createTargetUnaryExpr(const MCExpr * E,AsmToken::TokenKind OperatorToken,MCContext & Ctx)485 virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E, 486 AsmToken::TokenKind OperatorToken, 487 MCContext &Ctx) { 488 return nullptr; 489 } 490 }; 491 492 } // end namespace llvm 493 494 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 495