1 //===-- llvm-mcmarkup.cpp - Parse the MC assembly markup tags -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Example simple parser implementation for the MC assembly markup language.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Support/CommandLine.h"
15 #include "llvm/Support/Format.h"
16 #include "llvm/Support/ManagedStatic.h"
17 #include "llvm/Support/MemoryBuffer.h"
18 #include "llvm/Support/PrettyStackTrace.h"
19 #include "llvm/Support/Signals.h"
20 #include "llvm/Support/SourceMgr.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include <system_error>
23 using namespace llvm;
24
25 static cl::list<std::string>
26 InputFilenames(cl::Positional, cl::desc("<input files>"),
27 cl::ZeroOrMore);
28 static cl::opt<bool>
29 DumpTags("dump-tags", cl::desc("List all tags encountered in input"));
30
31 static StringRef ToolName;
32
33 /// Trivial lexer for the markup parser. Input is always handled a character
34 /// at a time. The lexer just encapsulates EOF and lookahead handling.
35 class MarkupLexer {
36 StringRef::const_iterator Start;
37 StringRef::const_iterator CurPtr;
38 StringRef::const_iterator End;
39 public:
MarkupLexer(StringRef Source)40 MarkupLexer(StringRef Source)
41 : Start(Source.begin()), CurPtr(Source.begin()), End(Source.end()) {}
42 // When processing non-markup, input is consumed a character at a time.
isEOF()43 bool isEOF() { return CurPtr == End; }
getNextChar()44 int getNextChar() {
45 if (CurPtr == End) return EOF;
46 return *CurPtr++;
47 }
peekNextChar()48 int peekNextChar() {
49 if (CurPtr == End) return EOF;
50 return *CurPtr;
51 }
getPosition() const52 StringRef::const_iterator getPosition() const { return CurPtr; }
53 };
54
55 /// A markup tag is a name and a (usually empty) list of modifiers.
56 class MarkupTag {
57 StringRef Name;
58 StringRef Modifiers;
59 SMLoc StartLoc;
60 public:
MarkupTag(StringRef n,StringRef m,SMLoc Loc)61 MarkupTag(StringRef n, StringRef m, SMLoc Loc)
62 : Name(n), Modifiers(m), StartLoc(Loc) {}
getName() const63 StringRef getName() const { return Name; }
getModifiers() const64 StringRef getModifiers() const { return Modifiers; }
getLoc() const65 SMLoc getLoc() const { return StartLoc; }
66 };
67
68 /// A simple parser implementation for creating MarkupTags from input text.
69 class MarkupParser {
70 MarkupLexer &Lex;
71 SourceMgr &SM;
72 public:
MarkupParser(MarkupLexer & lex,SourceMgr & SrcMgr)73 MarkupParser(MarkupLexer &lex, SourceMgr &SrcMgr) : Lex(lex), SM(SrcMgr) {}
74 /// Create a MarkupTag from the current position in the MarkupLexer.
75 /// The parseTag() method should be called when the lexer has processed
76 /// the opening '<' character. Input will be consumed up to and including
77 /// the ':' which terminates the tag open.
78 MarkupTag parseTag();
79 /// Issue a diagnostic and terminate program execution.
80 void FatalError(SMLoc Loc, StringRef Msg);
81 };
82
FatalError(SMLoc Loc,StringRef Msg)83 void MarkupParser::FatalError(SMLoc Loc, StringRef Msg) {
84 SM.PrintMessage(Loc, SourceMgr::DK_Error, Msg);
85 exit(1);
86 }
87
88 // Example handler for when a tag is recognized.
processStartTag(MarkupTag & Tag)89 static void processStartTag(MarkupTag &Tag) {
90 // If we're just printing the tags, do that, otherwise do some simple
91 // colorization.
92 if (DumpTags) {
93 outs() << Tag.getName();
94 if (Tag.getModifiers().size())
95 outs() << " " << Tag.getModifiers();
96 outs() << "\n";
97 return;
98 }
99
100 if (!outs().has_colors())
101 return;
102 // Color registers as red and immediates as cyan. Those don't have nested
103 // tags, so don't bother keeping a stack of colors to reset to.
104 if (Tag.getName() == "reg")
105 outs().changeColor(raw_ostream::RED);
106 else if (Tag.getName() == "imm")
107 outs().changeColor(raw_ostream::CYAN);
108 }
109
110 // Example handler for when the end of a tag is recognized.
processEndTag(MarkupTag & Tag)111 static void processEndTag(MarkupTag &Tag) {
112 // If we're printing the tags, there's nothing more to do here. Otherwise,
113 // set the color back the normal.
114 if (DumpTags)
115 return;
116 if (!outs().has_colors())
117 return;
118 // Just reset to basic white.
119 outs().changeColor(raw_ostream::WHITE, false);
120 }
121
parseTag()122 MarkupTag MarkupParser::parseTag() {
123 // First off, extract the tag into it's own StringRef so we can look at it
124 // outside of the context of consuming input.
125 StringRef::const_iterator Start = Lex.getPosition();
126 SMLoc Loc = SMLoc::getFromPointer(Start - 1);
127 while(Lex.getNextChar() != ':') {
128 // EOF is an error.
129 if (Lex.isEOF())
130 FatalError(SMLoc::getFromPointer(Start), "unterminated markup tag");
131 }
132 StringRef RawTag(Start, Lex.getPosition() - Start - 1);
133 std::pair<StringRef, StringRef> SplitTag = RawTag.split(' ');
134 return MarkupTag(SplitTag.first, SplitTag.second, Loc);
135 }
136
parseMCMarkup(StringRef Filename)137 static void parseMCMarkup(StringRef Filename) {
138 ErrorOr<std::unique_ptr<MemoryBuffer>> BufferPtr =
139 MemoryBuffer::getFileOrSTDIN(Filename);
140 if (std::error_code EC = BufferPtr.getError()) {
141 errs() << ToolName << ": " << EC.message() << '\n';
142 return;
143 }
144 std::unique_ptr<MemoryBuffer> &Buffer = BufferPtr.get();
145
146 SourceMgr SrcMgr;
147
148 StringRef InputSource = Buffer->getBuffer();
149
150 // Tell SrcMgr about this buffer, which is what the parser will pick up.
151 SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
152
153 MarkupLexer Lex(InputSource);
154 MarkupParser Parser(Lex, SrcMgr);
155
156 SmallVector<MarkupTag, 4> TagStack;
157
158 for (int CurChar = Lex.getNextChar();
159 CurChar != EOF;
160 CurChar = Lex.getNextChar()) {
161 switch (CurChar) {
162 case '<': {
163 // A "<<" is output as a literal '<' and does not start a markup tag.
164 if (Lex.peekNextChar() == '<') {
165 (void)Lex.getNextChar();
166 break;
167 }
168 // Parse the markup entry.
169 TagStack.push_back(Parser.parseTag());
170
171 // Do any special handling for the start of a tag.
172 processStartTag(TagStack.back());
173 continue;
174 }
175 case '>': {
176 SMLoc Loc = SMLoc::getFromPointer(Lex.getPosition() - 1);
177 // A ">>" is output as a literal '>' and does not end a markup tag.
178 if (Lex.peekNextChar() == '>') {
179 (void)Lex.getNextChar();
180 break;
181 }
182 // Close out the innermost tag.
183 if (TagStack.empty())
184 Parser.FatalError(Loc, "'>' without matching '<'");
185
186 // Do any special handling for the end of a tag.
187 processEndTag(TagStack.back());
188
189 TagStack.pop_back();
190 continue;
191 }
192 default:
193 break;
194 }
195 // For anything else, just echo the character back out.
196 if (!DumpTags && CurChar != EOF)
197 outs() << (char)CurChar;
198 }
199
200 // If there are any unterminated markup tags, issue diagnostics for them.
201 while (!TagStack.empty()) {
202 MarkupTag &Tag = TagStack.back();
203 SrcMgr.PrintMessage(Tag.getLoc(), SourceMgr::DK_Error,
204 "unterminated markup tag");
205 TagStack.pop_back();
206 }
207 }
208
main(int argc,char ** argv)209 int main(int argc, char **argv) {
210 // Print a stack trace if we signal out.
211 sys::PrintStackTraceOnErrorSignal(argv[0]);
212 PrettyStackTraceProgram X(argc, argv);
213
214 llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
215 cl::ParseCommandLineOptions(argc, argv, "llvm MC markup parser\n");
216
217 ToolName = argv[0];
218
219 // If no input files specified, read from stdin.
220 if (InputFilenames.size() == 0)
221 InputFilenames.push_back("-");
222
223 std::for_each(InputFilenames.begin(), InputFilenames.end(),
224 parseMCMarkup);
225 return 0;
226 }
227