1 //===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
11 #define LLVM_MC_MCPARSER_MCASMLEXER_H
12 
13 #include "llvm/ADT/APInt.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/Compiler.h"
18 #include "llvm/Support/DataTypes.h"
19 #include "llvm/Support/SMLoc.h"
20 #include <utility>
21 
22 namespace llvm {
23 
24 /// Target independent representation for an assembler token.
25 class AsmToken {
26 public:
27   enum TokenKind {
28     // Markers
29     Eof, Error,
30 
31     // String values.
32     Identifier,
33     String,
34 
35     // Integer values.
36     Integer,
37     BigNum, // larger than 64 bits
38 
39     // Real values.
40     Real,
41 
42     // Comments
43     Comment,
44     HashDirective,
45     // No-value.
46     EndOfStatement,
47     Colon,
48     Space,
49     Plus, Minus, Tilde,
50     Slash,     // '/'
51     BackSlash, // '\'
52     LParen, RParen, LBrac, RBrac, LCurly, RCurly,
53     Star, Dot, Comma, Dollar, Equal, EqualEqual,
54 
55     Pipe, PipePipe, Caret,
56     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
57     Less, LessEqual, LessLess, LessGreater,
58     Greater, GreaterEqual, GreaterGreater, At
59   };
60 
61 private:
62   TokenKind Kind;
63 
64   /// A reference to the entire token contents; this is always a pointer into
65   /// a memory buffer owned by the source manager.
66   StringRef Str;
67 
68   APInt IntVal;
69 
70 public:
AsmToken()71   AsmToken() {}
AsmToken(TokenKind Kind,StringRef Str,APInt IntVal)72   AsmToken(TokenKind Kind, StringRef Str, APInt IntVal)
73       : Kind(Kind), Str(Str), IntVal(std::move(IntVal)) {}
74   AsmToken(TokenKind Kind, StringRef Str, int64_t IntVal = 0)
Kind(Kind)75       : Kind(Kind), Str(Str), IntVal(64, IntVal, true) {}
76 
getKind()77   TokenKind getKind() const { return Kind; }
is(TokenKind K)78   bool is(TokenKind K) const { return Kind == K; }
isNot(TokenKind K)79   bool isNot(TokenKind K) const { return Kind != K; }
80 
81   SMLoc getLoc() const;
82   SMLoc getEndLoc() const;
83   SMRange getLocRange() const;
84 
85   /// Get the contents of a string token (without quotes).
getStringContents()86   StringRef getStringContents() const {
87     assert(Kind == String && "This token isn't a string!");
88     return Str.slice(1, Str.size() - 1);
89   }
90 
91   /// Get the identifier string for the current token, which should be an
92   /// identifier or a string. This gets the portion of the string which should
93   /// be used as the identifier, e.g., it does not include the quotes on
94   /// strings.
getIdentifier()95   StringRef getIdentifier() const {
96     if (Kind == Identifier)
97       return getString();
98     return getStringContents();
99   }
100 
101   /// Get the string for the current token, this includes all characters (for
102   /// example, the quotes on strings) in the token.
103   ///
104   /// The returned StringRef points into the source manager's memory buffer, and
105   /// is safe to store across calls to Lex().
getString()106   StringRef getString() const { return Str; }
107 
108   // FIXME: Don't compute this in advance, it makes every token larger, and is
109   // also not generally what we want (it is nicer for recovery etc. to lex 123br
110   // as a single token, then diagnose as an invalid number).
getIntVal()111   int64_t getIntVal() const {
112     assert(Kind == Integer && "This token isn't an integer!");
113     return IntVal.getZExtValue();
114   }
115 
getAPIntVal()116   APInt getAPIntVal() const {
117     assert((Kind == Integer || Kind == BigNum) &&
118            "This token isn't an integer!");
119     return IntVal;
120   }
121 };
122 
123 /// Generic assembler lexer interface, for use by target specific assembly
124 /// lexers.
125 class MCAsmLexer {
126   /// The current token, stored in the base class for faster access.
127   SmallVector<AsmToken, 1> CurTok;
128 
129   /// The location and description of the current error
130   SMLoc ErrLoc;
131   std::string Err;
132 
133   MCAsmLexer(const MCAsmLexer &) = delete;
134   void operator=(const MCAsmLexer &) = delete;
135 protected: // Can only create subclasses.
136   const char *TokStart;
137   bool SkipSpace;
138   bool AllowAtInIdentifier;
139 
140   MCAsmLexer();
141 
142   virtual AsmToken LexToken() = 0;
143 
SetError(SMLoc errLoc,const std::string & err)144   void SetError(SMLoc errLoc, const std::string &err) {
145     ErrLoc = errLoc;
146     Err = err;
147   }
148 
149 public:
150   virtual ~MCAsmLexer();
151 
152   /// Consume the next token from the input stream and return it.
153   ///
154   /// The lexer will continuosly return the end-of-file token once the end of
155   /// the main input file has been reached.
Lex()156   const AsmToken &Lex() {
157     assert(!CurTok.empty());
158     CurTok.erase(CurTok.begin());
159     // LexToken may generate multiple tokens via UnLex but will always return
160     // the first one. Place returned value at head of CurTok vector.
161     if (CurTok.empty()) {
162       AsmToken T = LexToken();
163       CurTok.insert(CurTok.begin(), T);
164     }
165     return CurTok.front();
166   }
167 
UnLex(AsmToken const & Token)168   void UnLex(AsmToken const &Token) {
169     CurTok.insert(CurTok.begin(), Token);
170   }
171 
172   virtual StringRef LexUntilEndOfStatement() = 0;
173 
174   /// Get the current source location.
175   SMLoc getLoc() const;
176 
177   /// Get the current (last) lexed token.
getTok()178   const AsmToken &getTok() const {
179     return CurTok[0];
180   }
181 
182   /// Look ahead at the next token to be lexed.
183   const AsmToken peekTok(bool ShouldSkipSpace = true) {
184     AsmToken Tok;
185 
186     MutableArrayRef<AsmToken> Buf(Tok);
187     size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
188 
189     assert(ReadCount == 1);
190     (void)ReadCount;
191 
192     return Tok;
193   }
194 
195   /// Look ahead an arbitrary number of tokens.
196   virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
197                             bool ShouldSkipSpace = true) = 0;
198 
199   /// Get the current error location
getErrLoc()200   SMLoc getErrLoc() {
201     return ErrLoc;
202   }
203 
204   /// Get the current error string
getErr()205   const std::string &getErr() {
206     return Err;
207   }
208 
209   /// Get the kind of current token.
getKind()210   AsmToken::TokenKind getKind() const { return getTok().getKind(); }
211 
212   /// Check if the current token has kind \p K.
is(AsmToken::TokenKind K)213   bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
214 
215   /// Check if the current token has kind \p K.
isNot(AsmToken::TokenKind K)216   bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
217 
218   /// Set whether spaces should be ignored by the lexer
setSkipSpace(bool val)219   void setSkipSpace(bool val) { SkipSpace = val; }
220 
getAllowAtInIdentifier()221   bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
setAllowAtInIdentifier(bool v)222   void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
223 };
224 
225 } // End llvm namespace
226 
227 #endif
228