1 //===--- Parser.h - Matcher expression parser -----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Simple matcher expression parser.
12 ///
13 /// The parser understands matcher expressions of the form:
14 ///   MatcherName(Arg0, Arg1, ..., ArgN)
15 /// as well as simple types like strings.
16 /// The parser does not know how to process the matchers. It delegates this task
17 /// to a Sema object received as an argument.
18 ///
19 /// \code
20 /// Grammar for the expressions supported:
21 /// <Expression>        := <Literal> | <NamedValue> | <MatcherExpression>
22 /// <Literal>           := <StringLiteral> | <Unsigned>
23 /// <StringLiteral>     := "quoted string"
24 /// <Unsigned>          := [0-9]+
25 /// <NamedValue>        := <Identifier>
26 /// <MatcherExpression> := <Identifier>(<ArgumentList>) |
27 ///                        <Identifier>(<ArgumentList>).bind(<StringLiteral>)
28 /// <Identifier>        := [a-zA-Z]+
29 /// <ArgumentList>      := <Expression> | <Expression>,<ArgumentList>
30 /// \endcode
31 ///
32 //===----------------------------------------------------------------------===//
33 
34 #ifndef LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H
35 #define LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H
36 
37 #include "clang/ASTMatchers/Dynamic/Diagnostics.h"
38 #include "clang/ASTMatchers/Dynamic/Registry.h"
39 #include "clang/ASTMatchers/Dynamic/VariantValue.h"
40 #include "clang/Basic/LLVM.h"
41 #include "llvm/ADT/ArrayRef.h"
42 #include "llvm/ADT/Optional.h"
43 #include "llvm/ADT/StringRef.h"
44 
45 namespace clang {
46 namespace ast_matchers {
47 namespace dynamic {
48 
49 /// \brief Matcher expression parser.
50 class Parser {
51 public:
52   /// \brief Interface to connect the parser with the registry and more.
53   ///
54   /// The parser uses the Sema instance passed into
55   /// parseMatcherExpression() to handle all matcher tokens. The simplest
56   /// processor implementation would simply call into the registry to create
57   /// the matchers.
58   /// However, a more complex processor might decide to intercept the matcher
59   /// creation and do some extra work. For example, it could apply some
60   /// transformation to the matcher by adding some id() nodes, or could detect
61   /// specific matcher nodes for more efficient lookup.
62   class Sema {
63   public:
64     virtual ~Sema();
65 
66     /// \brief Process a matcher expression.
67     ///
68     /// All the arguments passed here have already been processed.
69     ///
70     /// \param Ctor A matcher constructor looked up by lookupMatcherCtor.
71     ///
72     /// \param NameRange The location of the name in the matcher source.
73     ///   Useful for error reporting.
74     ///
75     /// \param BindID The ID to use to bind the matcher, or a null \c StringRef
76     ///   if no ID is specified.
77     ///
78     /// \param Args The argument list for the matcher.
79     ///
80     /// \return The matcher objects constructed by the processor, or a null
81     ///   matcher if an error occurred. In that case, \c Error will contain a
82     ///   description of the error.
83     virtual VariantMatcher actOnMatcherExpression(MatcherCtor Ctor,
84                                                   SourceRange NameRange,
85                                                   StringRef BindID,
86                                                   ArrayRef<ParserValue> Args,
87                                                   Diagnostics *Error) = 0;
88 
89     /// \brief Look up a matcher by name.
90     ///
91     /// \param MatcherName The matcher name found by the parser.
92     ///
93     /// \return The matcher constructor, or Optional<MatcherCtor>() if not
94     /// found.
95     virtual llvm::Optional<MatcherCtor>
96     lookupMatcherCtor(StringRef MatcherName) = 0;
97 
98     /// \brief Compute the list of completion types for \p Context.
99     ///
100     /// Each element of \p Context represents a matcher invocation, going from
101     /// outermost to innermost. Elements are pairs consisting of a reference to
102     /// the matcher constructor and the index of the next element in the
103     /// argument list of that matcher (or for the last element, the index of
104     /// the completion point in the argument list). An empty list requests
105     /// completion for the root matcher.
106     virtual std::vector<ArgKind> getAcceptedCompletionTypes(
107         llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context);
108 
109     /// \brief Compute the list of completions that match any of
110     /// \p AcceptedTypes.
111     ///
112     /// \param AcceptedTypes All types accepted for this completion.
113     ///
114     /// \return All completions for the specified types.
115     /// Completions should be valid when used in \c lookupMatcherCtor().
116     /// The matcher constructed from the return of \c lookupMatcherCtor()
117     /// should be convertible to some type in \p AcceptedTypes.
118     virtual std::vector<MatcherCompletion>
119     getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes);
120   };
121 
122   /// \brief Sema implementation that uses the matcher registry to process the
123   ///   tokens.
124   class RegistrySema : public Parser::Sema {
125    public:
126      ~RegistrySema() override;
127 
128     llvm::Optional<MatcherCtor>
129     lookupMatcherCtor(StringRef MatcherName) override;
130 
131     VariantMatcher actOnMatcherExpression(MatcherCtor Ctor,
132                                           SourceRange NameRange,
133                                           StringRef BindID,
134                                           ArrayRef<ParserValue> Args,
135                                           Diagnostics *Error) override;
136 
137     std::vector<ArgKind> getAcceptedCompletionTypes(
138         llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) override;
139 
140     std::vector<MatcherCompletion>
141     getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) override;
142   };
143 
144   typedef llvm::StringMap<VariantValue> NamedValueMap;
145 
146   /// \brief Parse a matcher expression.
147   ///
148   /// \param MatcherCode The matcher expression to parse.
149   ///
150   /// \param S The Sema instance that will help the parser
151   ///   construct the matchers. If null, it uses the default registry.
152   ///
153   /// \param NamedValues A map of precomputed named values.  This provides
154   ///   the dictionary for the <NamedValue> rule of the grammar.
155   ///   If null, it is ignored.
156   ///
157   /// \return The matcher object constructed by the processor, or an empty
158   ///   Optional if an error occurred. In that case, \c Error will contain a
159   ///   description of the error.
160   ///   The caller takes ownership of the DynTypedMatcher object returned.
161   static llvm::Optional<DynTypedMatcher>
162   parseMatcherExpression(StringRef MatcherCode, Sema *S,
163                          const NamedValueMap *NamedValues,
164                          Diagnostics *Error);
165   static llvm::Optional<DynTypedMatcher>
parseMatcherExpression(StringRef MatcherCode,Sema * S,Diagnostics * Error)166   parseMatcherExpression(StringRef MatcherCode, Sema *S,
167                          Diagnostics *Error) {
168     return parseMatcherExpression(MatcherCode, S, nullptr, Error);
169   }
170   static llvm::Optional<DynTypedMatcher>
parseMatcherExpression(StringRef MatcherCode,Diagnostics * Error)171   parseMatcherExpression(StringRef MatcherCode, Diagnostics *Error) {
172     return parseMatcherExpression(MatcherCode, nullptr, Error);
173   }
174 
175   /// \brief Parse an expression.
176   ///
177   /// Parses any expression supported by this parser. In general, the
178   /// \c parseMatcherExpression function is a better approach to get a matcher
179   /// object.
180   ///
181   /// \param S The Sema instance that will help the parser
182   ///   construct the matchers. If null, it uses the default registry.
183   ///
184   /// \param NamedValues A map of precomputed named values.  This provides
185   ///   the dictionary for the <NamedValue> rule of the grammar.
186   ///   If null, it is ignored.
187   static bool parseExpression(StringRef Code, Sema *S,
188                               const NamedValueMap *NamedValues,
189                               VariantValue *Value, Diagnostics *Error);
parseExpression(StringRef Code,Sema * S,VariantValue * Value,Diagnostics * Error)190   static bool parseExpression(StringRef Code, Sema *S,
191                               VariantValue *Value, Diagnostics *Error) {
192     return parseExpression(Code, S, nullptr, Value, Error);
193   }
parseExpression(StringRef Code,VariantValue * Value,Diagnostics * Error)194   static bool parseExpression(StringRef Code, VariantValue *Value,
195                               Diagnostics *Error) {
196     return parseExpression(Code, nullptr, Value, Error);
197   }
198 
199   /// \brief Complete an expression at the given offset.
200   ///
201   /// \param S The Sema instance that will help the parser
202   ///   construct the matchers. If null, it uses the default registry.
203   ///
204   /// \param NamedValues A map of precomputed named values.  This provides
205   ///   the dictionary for the <NamedValue> rule of the grammar.
206   ///   If null, it is ignored.
207   ///
208   /// \return The list of completions, which may be empty if there are no
209   /// available completions or if an error occurred.
210   static std::vector<MatcherCompletion>
211   completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S,
212                      const NamedValueMap *NamedValues);
213   static std::vector<MatcherCompletion>
completeExpression(StringRef Code,unsigned CompletionOffset,Sema * S)214   completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S) {
215     return completeExpression(Code, CompletionOffset, S, nullptr);
216   }
217   static std::vector<MatcherCompletion>
completeExpression(StringRef Code,unsigned CompletionOffset)218   completeExpression(StringRef Code, unsigned CompletionOffset) {
219     return completeExpression(Code, CompletionOffset, nullptr);
220   }
221 
222 private:
223   class CodeTokenizer;
224   struct ScopedContextEntry;
225   struct TokenInfo;
226 
227   Parser(CodeTokenizer *Tokenizer, Sema *S,
228          const NamedValueMap *NamedValues,
229          Diagnostics *Error);
230 
231   bool parseExpressionImpl(VariantValue *Value);
232   bool parseMatcherExpressionImpl(const TokenInfo &NameToken,
233                                   VariantValue *Value);
234   bool parseIdentifierPrefixImpl(VariantValue *Value);
235 
236   void addCompletion(const TokenInfo &CompToken,
237                      const MatcherCompletion &Completion);
238   void addExpressionCompletions();
239 
240   std::vector<MatcherCompletion>
241   getNamedValueCompletions(ArrayRef<ArgKind> AcceptedTypes);
242 
243   CodeTokenizer *const Tokenizer;
244   Sema *const S;
245   const NamedValueMap *const NamedValues;
246   Diagnostics *const Error;
247 
248   typedef std::vector<std::pair<MatcherCtor, unsigned> > ContextStackTy;
249   ContextStackTy ContextStack;
250   std::vector<MatcherCompletion> Completions;
251 };
252 
253 }  // namespace dynamic
254 }  // namespace ast_matchers
255 }  // namespace clang
256 
257 #endif  // LLVM_CLANG_AST_MATCHERS_DYNAMIC_PARSER_H
258