1 //===- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface ------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H 10 #define LLVM_MC_MCPARSER_MCASMLEXER_H 11 12 #include "llvm/ADT/ArrayRef.h" 13 #include "llvm/ADT/SmallVector.h" 14 #include "llvm/MC/MCAsmMacro.h" 15 #include <algorithm> 16 #include <cassert> 17 #include <cstddef> 18 #include <cstdint> 19 #include <string> 20 21 namespace llvm { 22 23 /// A callback class which is notified of each comment in an assembly file as 24 /// it is lexed. 25 class AsmCommentConsumer { 26 public: 27 virtual ~AsmCommentConsumer() = default; 28 29 /// Callback function for when a comment is lexed. Loc is the start of the 30 /// comment text (excluding the comment-start marker). CommentText is the text 31 /// of the comment, excluding the comment start and end markers, and the 32 /// newline for single-line comments. 33 virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0; 34 }; 35 36 37 /// Generic assembler lexer interface, for use by target specific assembly 38 /// lexers. 39 class MCAsmLexer { 40 /// The current token, stored in the base class for faster access. 41 SmallVector<AsmToken, 1> CurTok; 42 43 /// The location and description of the current error 44 SMLoc ErrLoc; 45 std::string Err; 46 47 protected: // Can only create subclasses. 48 const char *TokStart = nullptr; 49 bool SkipSpace = true; 50 bool AllowAtInIdentifier; 51 bool IsAtStartOfStatement = true; 52 bool LexMasmHexFloats = false; 53 bool LexMasmIntegers = false; 54 bool LexMasmStrings = false; 55 bool UseMasmDefaultRadix = false; 56 unsigned DefaultRadix = 10; 57 AsmCommentConsumer *CommentConsumer = nullptr; 58 59 MCAsmLexer(); 60 61 virtual AsmToken LexToken() = 0; 62 SetError(SMLoc errLoc,const std::string & err)63 void SetError(SMLoc errLoc, const std::string &err) { 64 ErrLoc = errLoc; 65 Err = err; 66 } 67 68 public: 69 MCAsmLexer(const MCAsmLexer &) = delete; 70 MCAsmLexer &operator=(const MCAsmLexer &) = delete; 71 virtual ~MCAsmLexer(); 72 73 /// Consume the next token from the input stream and return it. 74 /// 75 /// The lexer will continuously return the end-of-file token once the end of 76 /// the main input file has been reached. Lex()77 const AsmToken &Lex() { 78 assert(!CurTok.empty()); 79 // Mark if we parsing out a EndOfStatement. 80 IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement; 81 CurTok.erase(CurTok.begin()); 82 // LexToken may generate multiple tokens via UnLex but will always return 83 // the first one. Place returned value at head of CurTok vector. 84 if (CurTok.empty()) { 85 AsmToken T = LexToken(); 86 CurTok.insert(CurTok.begin(), T); 87 } 88 return CurTok.front(); 89 } 90 UnLex(AsmToken const & Token)91 void UnLex(AsmToken const &Token) { 92 IsAtStartOfStatement = false; 93 CurTok.insert(CurTok.begin(), Token); 94 } 95 isAtStartOfStatement()96 bool isAtStartOfStatement() { return IsAtStartOfStatement; } 97 98 virtual StringRef LexUntilEndOfStatement() = 0; 99 100 /// Get the current source location. 101 SMLoc getLoc() const; 102 103 /// Get the current (last) lexed token. getTok()104 const AsmToken &getTok() const { 105 return CurTok[0]; 106 } 107 108 /// Look ahead at the next token to be lexed. 109 const AsmToken peekTok(bool ShouldSkipSpace = true) { 110 AsmToken Tok; 111 112 MutableArrayRef<AsmToken> Buf(Tok); 113 size_t ReadCount = peekTokens(Buf, ShouldSkipSpace); 114 115 assert(ReadCount == 1); 116 (void)ReadCount; 117 118 return Tok; 119 } 120 121 /// Look ahead an arbitrary number of tokens. 122 virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf, 123 bool ShouldSkipSpace = true) = 0; 124 125 /// Get the current error location getErrLoc()126 SMLoc getErrLoc() { 127 return ErrLoc; 128 } 129 130 /// Get the current error string getErr()131 const std::string &getErr() { 132 return Err; 133 } 134 135 /// Get the kind of current token. getKind()136 AsmToken::TokenKind getKind() const { return getTok().getKind(); } 137 138 /// Check if the current token has kind \p K. is(AsmToken::TokenKind K)139 bool is(AsmToken::TokenKind K) const { return getTok().is(K); } 140 141 /// Check if the current token has kind \p K. isNot(AsmToken::TokenKind K)142 bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); } 143 144 /// Set whether spaces should be ignored by the lexer setSkipSpace(bool val)145 void setSkipSpace(bool val) { SkipSpace = val; } 146 getAllowAtInIdentifier()147 bool getAllowAtInIdentifier() { return AllowAtInIdentifier; } setAllowAtInIdentifier(bool v)148 void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; } 149 setCommentConsumer(AsmCommentConsumer * CommentConsumer)150 void setCommentConsumer(AsmCommentConsumer *CommentConsumer) { 151 this->CommentConsumer = CommentConsumer; 152 } 153 154 /// Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified 155 /// literals (e.g., 0ABCh [hex], 576t [decimal], 77o [octal], 1101y [binary]). setLexMasmIntegers(bool V)156 void setLexMasmIntegers(bool V) { LexMasmIntegers = V; } 157 158 /// Set whether to use masm-style default-radix integer literals. If disabled, 159 /// assume decimal unless prefixed (e.g., 0x2c [hex], 077 [octal]). useMasmDefaultRadix(bool V)160 void useMasmDefaultRadix(bool V) { UseMasmDefaultRadix = V; } 161 getMasmDefaultRadix()162 unsigned getMasmDefaultRadix() const { return DefaultRadix; } setMasmDefaultRadix(unsigned Radix)163 void setMasmDefaultRadix(unsigned Radix) { DefaultRadix = Radix; } 164 165 /// Set whether to lex masm-style hex float literals, such as 3f800000r. setLexMasmHexFloats(bool V)166 void setLexMasmHexFloats(bool V) { LexMasmHexFloats = V; } 167 168 /// Set whether to lex masm-style string literals, such as 'Can''t find file' 169 /// and "This ""value"" not found". setLexMasmStrings(bool V)170 void setLexMasmStrings(bool V) { LexMasmStrings = V; } 171 }; 172 173 } // end namespace llvm 174 175 #endif // LLVM_MC_MCPARSER_MCASMLEXER_H 176