1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H 11 #define LLVM_MC_MCPARSER_MCASMLEXER_H 12 13 #include "llvm/ADT/APInt.h" 14 #include "llvm/ADT/ArrayRef.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/Support/Compiler.h" 18 #include "llvm/Support/DataTypes.h" 19 #include "llvm/Support/SMLoc.h" 20 #include <utility> 21 22 namespace llvm { 23 24 /// Target independent representation for an assembler token. 25 class AsmToken { 26 public: 27 enum TokenKind { 28 // Markers 29 Eof, Error, 30 31 // String values. 32 Identifier, 33 String, 34 35 // Integer values. 36 Integer, 37 BigNum, // larger than 64 bits 38 39 // Real values. 40 Real, 41 42 // Comments 43 Comment, 44 HashDirective, 45 // No-value. 46 EndOfStatement, 47 Colon, 48 Space, 49 Plus, Minus, Tilde, 50 Slash, // '/' 51 BackSlash, // '\' 52 LParen, RParen, LBrac, RBrac, LCurly, RCurly, 53 Star, Dot, Comma, Dollar, Equal, EqualEqual, 54 55 Pipe, PipePipe, Caret, 56 Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash, 57 Less, LessEqual, LessLess, LessGreater, 58 Greater, GreaterEqual, GreaterGreater, At 59 }; 60 61 private: 62 TokenKind Kind; 63 64 /// A reference to the entire token contents; this is always a pointer into 65 /// a memory buffer owned by the source manager. 66 StringRef Str; 67 68 APInt IntVal; 69 70 public: AsmToken()71 AsmToken() {} AsmToken(TokenKind Kind,StringRef Str,APInt IntVal)72 AsmToken(TokenKind Kind, StringRef Str, APInt IntVal) 73 : Kind(Kind), Str(Str), IntVal(std::move(IntVal)) {} 74 AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0) Kind(Kind)75 : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {} 76 getKind()77 TokenKind getKind() const { return Kind; } is(TokenKind K)78 bool is(TokenKind K) const { return Kind == K; } isNot(TokenKind K)79 bool isNot(TokenKind K) const { return Kind != K; } 80 81 SMLoc getLoc() const; 82 SMLoc getEndLoc() const; 83 SMRange getLocRange() const; 84 85 /// Get the contents of a string token (without quotes). getStringContents()86 StringRef getStringContents() const { 87 assert(Kind == String && "This token isn't a string!"); 88 return Str.slice(1, Str.size() - 1); 89 } 90 91 /// Get the identifier string for the current token, which should be an 92 /// identifier or a string. This gets the portion of the string which should 93 /// be used as the identifier, e.g., it does not include the quotes on 94 /// strings. getIdentifier()95 StringRef getIdentifier() const { 96 if (Kind == Identifier) 97 return getString(); 98 return getStringContents(); 99 } 100 101 /// Get the string for the current token, this includes all characters (for 102 /// example, the quotes on strings) in the token. 103 /// 104 /// The returned StringRef points into the source manager's memory buffer, and 105 /// is safe to store across calls to Lex(). getString()106 StringRef getString() const { return Str; } 107 108 // FIXME: Don't compute this in advance, it makes every token larger, and is 109 // also not generally what we want (it is nicer for recovery etc. to lex 123br 110 // as a single token, then diagnose as an invalid number). getIntVal()111 int64_t getIntVal() const { 112 assert(Kind == Integer && "This token isn't an integer!"); 113 return IntVal.getZExtValue(); 114 } 115 getAPIntVal()116 APInt getAPIntVal() const { 117 assert((Kind == Integer || Kind == BigNum) && 118 "This token isn't an integer!"); 119 return IntVal; 120 } 121 }; 122 123 /// Generic assembler lexer interface, for use by target specific assembly 124 /// lexers. 125 class MCAsmLexer { 126 /// The current token, stored in the base class for faster access. 127 SmallVector<AsmToken, 1> CurTok; 128 129 /// The location and description of the current error 130 SMLoc ErrLoc; 131 std::string Err; 132 133 MCAsmLexer(const MCAsmLexer &) = delete; 134 void operator=(const MCAsmLexer &) = delete; 135 protected: // Can only create subclasses. 136 const char *TokStart; 137 bool SkipSpace; 138 bool AllowAtInIdentifier; 139 140 MCAsmLexer(); 141 142 virtual AsmToken LexToken() = 0; 143 SetError(SMLoc errLoc,const std::string & err)144 void SetError(SMLoc errLoc, const std::string &err) { 145 ErrLoc = errLoc; 146 Err = err; 147 } 148 149 public: 150 virtual ~MCAsmLexer(); 151 152 /// Consume the next token from the input stream and return it. 153 /// 154 /// The lexer will continuosly return the end-of-file token once the end of 155 /// the main input file has been reached. Lex()156 const AsmToken &Lex() { 157 assert(!CurTok.empty()); 158 CurTok.erase(CurTok.begin()); 159 // LexToken may generate multiple tokens via UnLex but will always return 160 // the first one. Place returned value at head of CurTok vector. 161 if (CurTok.empty()) { 162 AsmToken T = LexToken(); 163 CurTok.insert(CurTok.begin(), T); 164 } 165 return CurTok.front(); 166 } 167 UnLex(AsmToken const & Token)168 void UnLex(AsmToken const &Token) { 169 CurTok.insert(CurTok.begin(), Token); 170 } 171 172 virtual StringRef LexUntilEndOfStatement() = 0; 173 174 /// Get the current source location. 175 SMLoc getLoc() const; 176 177 /// Get the current (last) lexed token. getTok()178 const AsmToken &getTok() const { 179 return CurTok[0]; 180 } 181 182 /// Look ahead at the next token to be lexed. 183 const AsmToken peekTok(bool ShouldSkipSpace = true) { 184 AsmToken Tok; 185 186 MutableArrayRef<AsmToken> Buf(Tok); 187 size_t ReadCount = peekTokens(Buf, ShouldSkipSpace); 188 189 assert(ReadCount == 1); 190 (void)ReadCount; 191 192 return Tok; 193 } 194 195 /// Look ahead an arbitrary number of tokens. 196 virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf, 197 bool ShouldSkipSpace = true) = 0; 198 199 /// Get the current error location getErrLoc()200 SMLoc getErrLoc() { 201 return ErrLoc; 202 } 203 204 /// Get the current error string getErr()205 const std::string &getErr() { 206 return Err; 207 } 208 209 /// Get the kind of current token. getKind()210 AsmToken::TokenKind getKind() const { return getTok().getKind(); } 211 212 /// Check if the current token has kind \p K. is(AsmToken::TokenKind K)213 bool is(AsmToken::TokenKind K) const { return getTok().is(K); } 214 215 /// Check if the current token has kind \p K. isNot(AsmToken::TokenKind K)216 bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); } 217 218 /// Set whether spaces should be ignored by the lexer setSkipSpace(bool val)219 void setSkipSpace(bool val) { SkipSpace = val; } 220 getAllowAtInIdentifier()221 bool getAllowAtInIdentifier() { return AllowAtInIdentifier; } setAllowAtInIdentifier(bool v)222 void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; } 223 }; 224 225 } // End llvm namespace 226 227 #endif 228