1 | //===- Parser.h - Matcher expression parser ---------------------*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | /// \file |
10 | /// Simple matcher expression parser. |
11 | /// |
12 | /// The parser understands matcher expressions of the form: |
13 | /// MatcherName(Arg0, Arg1, ..., ArgN) |
14 | /// as well as simple types like strings. |
15 | /// The parser does not know how to process the matchers. It delegates this task |
16 | /// to a Sema object received as an argument. |
17 | /// |
18 | /// \code |
19 | /// Grammar for the expressions supported: |
20 | /// <Expression> := <Literal> | <NamedValue> | <MatcherExpression> |
21 | /// <Literal> := <StringLiteral> | <Boolean> | <Double> | <Unsigned> |
22 | /// <StringLiteral> := "quoted string" |
23 | /// <Boolean> := true | false |
24 | /// <Double> := [0-9]+.[0-9]* | [0-9]+.[0-9]*[eE][-+]?[0-9]+ |
25 | /// <Unsigned> := [0-9]+ |
26 | /// <NamedValue> := <Identifier> |
27 | /// <MatcherExpression> := <Identifier>(<ArgumentList>) | |
28 | /// <Identifier>(<ArgumentList>).bind(<StringLiteral>) |
29 | /// <Identifier> := [a-zA-Z]+ |
30 | /// <ArgumentList> := <Expression> | <Expression>,<ArgumentList> |
31 | /// \endcode |
32 | // |
33 | //===----------------------------------------------------------------------===// |
34 | |
35 | #ifndef LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H |
36 | #define LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H |
37 | |
38 | #include "clang/ASTMatchers/ASTMatchersInternal.h" |
39 | #include "clang/ASTMatchers/Dynamic/Registry.h" |
40 | #include "clang/ASTMatchers/Dynamic/VariantValue.h" |
41 | #include "llvm/ADT/ArrayRef.h" |
42 | #include "llvm/ADT/Optional.h" |
43 | #include "llvm/ADT/StringMap.h" |
44 | #include "llvm/ADT/StringRef.h" |
45 | #include <utility> |
46 | #include <vector> |
47 | |
48 | namespace clang { |
49 | namespace ast_matchers { |
50 | namespace dynamic { |
51 | |
52 | class Diagnostics; |
53 | |
54 | /// Matcher expression parser. |
55 | class Parser { |
56 | public: |
57 | /// Interface to connect the parser with the registry and more. |
58 | /// |
59 | /// The parser uses the Sema instance passed into |
60 | /// parseMatcherExpression() to handle all matcher tokens. The simplest |
61 | /// processor implementation would simply call into the registry to create |
62 | /// the matchers. |
63 | /// However, a more complex processor might decide to intercept the matcher |
64 | /// creation and do some extra work. For example, it could apply some |
65 | /// transformation to the matcher by adding some id() nodes, or could detect |
66 | /// specific matcher nodes for more efficient lookup. |
67 | class Sema { |
68 | public: |
69 | virtual ~Sema(); |
70 | |
71 | /// Process a matcher expression. |
72 | /// |
73 | /// All the arguments passed here have already been processed. |
74 | /// |
75 | /// \param Ctor A matcher constructor looked up by lookupMatcherCtor. |
76 | /// |
77 | /// \param NameRange The location of the name in the matcher source. |
78 | /// Useful for error reporting. |
79 | /// |
80 | /// \param BindID The ID to use to bind the matcher, or a null \c StringRef |
81 | /// if no ID is specified. |
82 | /// |
83 | /// \param Args The argument list for the matcher. |
84 | /// |
85 | /// \return The matcher objects constructed by the processor, or a null |
86 | /// matcher if an error occurred. In that case, \c Error will contain a |
87 | /// description of the error. |
88 | virtual VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, |
89 | SourceRange NameRange, |
90 | StringRef BindID, |
91 | ArrayRef<ParserValue> Args, |
92 | Diagnostics *Error) = 0; |
93 | |
94 | /// Look up a matcher by name. |
95 | /// |
96 | /// \param MatcherName The matcher name found by the parser. |
97 | /// |
98 | /// \return The matcher constructor, or Optional<MatcherCtor>() if not |
99 | /// found. |
100 | virtual llvm::Optional<MatcherCtor> |
101 | lookupMatcherCtor(StringRef MatcherName) = 0; |
102 | |
103 | /// Compute the list of completion types for \p Context. |
104 | /// |
105 | /// Each element of \p Context represents a matcher invocation, going from |
106 | /// outermost to innermost. Elements are pairs consisting of a reference to |
107 | /// the matcher constructor and the index of the next element in the |
108 | /// argument list of that matcher (or for the last element, the index of |
109 | /// the completion point in the argument list). An empty list requests |
110 | /// completion for the root matcher. |
111 | virtual std::vector<ArgKind> getAcceptedCompletionTypes( |
112 | llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context); |
113 | |
114 | /// Compute the list of completions that match any of |
115 | /// \p AcceptedTypes. |
116 | /// |
117 | /// \param AcceptedTypes All types accepted for this completion. |
118 | /// |
119 | /// \return All completions for the specified types. |
120 | /// Completions should be valid when used in \c lookupMatcherCtor(). |
121 | /// The matcher constructed from the return of \c lookupMatcherCtor() |
122 | /// should be convertible to some type in \p AcceptedTypes. |
123 | virtual std::vector<MatcherCompletion> |
124 | getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes); |
125 | }; |
126 | |
127 | /// Sema implementation that uses the matcher registry to process the |
128 | /// tokens. |
129 | class RegistrySema : public Parser::Sema { |
130 | public: |
131 | ~RegistrySema() override; |
132 | |
133 | llvm::Optional<MatcherCtor> |
134 | lookupMatcherCtor(StringRef MatcherName) override; |
135 | |
136 | VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, |
137 | SourceRange NameRange, |
138 | StringRef BindID, |
139 | ArrayRef<ParserValue> Args, |
140 | Diagnostics *Error) override; |
141 | |
142 | std::vector<ArgKind> getAcceptedCompletionTypes( |
143 | llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) override; |
144 | |
145 | std::vector<MatcherCompletion> |
146 | getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) override; |
147 | }; |
148 | |
149 | using NamedValueMap = llvm::StringMap<VariantValue>; |
150 | |
151 | /// Parse a matcher expression. |
152 | /// |
153 | /// \param MatcherCode The matcher expression to parse. |
154 | /// |
155 | /// \param S The Sema instance that will help the parser |
156 | /// construct the matchers. If null, it uses the default registry. |
157 | /// |
158 | /// \param NamedValues A map of precomputed named values. This provides |
159 | /// the dictionary for the <NamedValue> rule of the grammar. |
160 | /// If null, it is ignored. |
161 | /// |
162 | /// \return The matcher object constructed by the processor, or an empty |
163 | /// Optional if an error occurred. In that case, \c Error will contain a |
164 | /// description of the error. |
165 | /// The caller takes ownership of the DynTypedMatcher object returned. |
166 | static llvm::Optional<DynTypedMatcher> |
167 | parseMatcherExpression(StringRef MatcherCode, Sema *S, |
168 | const NamedValueMap *NamedValues, |
169 | Diagnostics *Error); |
170 | static llvm::Optional<DynTypedMatcher> |
171 | parseMatcherExpression(StringRef MatcherCode, Sema *S, |
172 | Diagnostics *Error) { |
173 | return parseMatcherExpression(MatcherCode, S, nullptr, Error); |
174 | } |
175 | static llvm::Optional<DynTypedMatcher> |
176 | parseMatcherExpression(StringRef MatcherCode, Diagnostics *Error) { |
177 | return parseMatcherExpression(MatcherCode, nullptr, Error); |
178 | } |
179 | |
180 | /// Parse an expression. |
181 | /// |
182 | /// Parses any expression supported by this parser. In general, the |
183 | /// \c parseMatcherExpression function is a better approach to get a matcher |
184 | /// object. |
185 | /// |
186 | /// \param S The Sema instance that will help the parser |
187 | /// construct the matchers. If null, it uses the default registry. |
188 | /// |
189 | /// \param NamedValues A map of precomputed named values. This provides |
190 | /// the dictionary for the <NamedValue> rule of the grammar. |
191 | /// If null, it is ignored. |
192 | static bool parseExpression(StringRef Code, Sema *S, |
193 | const NamedValueMap *NamedValues, |
194 | VariantValue *Value, Diagnostics *Error); |
195 | static bool parseExpression(StringRef Code, Sema *S, |
196 | VariantValue *Value, Diagnostics *Error) { |
197 | return parseExpression(Code, S, nullptr, Value, Error); |
198 | } |
199 | static bool parseExpression(StringRef Code, VariantValue *Value, |
200 | Diagnostics *Error) { |
201 | return parseExpression(Code, nullptr, Value, Error); |
202 | } |
203 | |
204 | /// Complete an expression at the given offset. |
205 | /// |
206 | /// \param S The Sema instance that will help the parser |
207 | /// construct the matchers. If null, it uses the default registry. |
208 | /// |
209 | /// \param NamedValues A map of precomputed named values. This provides |
210 | /// the dictionary for the <NamedValue> rule of the grammar. |
211 | /// If null, it is ignored. |
212 | /// |
213 | /// \return The list of completions, which may be empty if there are no |
214 | /// available completions or if an error occurred. |
215 | static std::vector<MatcherCompletion> |
216 | completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S, |
217 | const NamedValueMap *NamedValues); |
218 | static std::vector<MatcherCompletion> |
219 | completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S) { |
220 | return completeExpression(Code, CompletionOffset, S, nullptr); |
221 | } |
222 | static std::vector<MatcherCompletion> |
223 | completeExpression(StringRef Code, unsigned CompletionOffset) { |
224 | return completeExpression(Code, CompletionOffset, nullptr); |
225 | } |
226 | |
227 | private: |
228 | class CodeTokenizer; |
229 | struct ScopedContextEntry; |
230 | struct TokenInfo; |
231 | |
232 | Parser(CodeTokenizer *Tokenizer, Sema *S, |
233 | const NamedValueMap *NamedValues, |
234 | Diagnostics *Error); |
235 | |
236 | bool parseBindID(std::string &BindID); |
237 | bool parseExpressionImpl(VariantValue *Value); |
238 | bool parseMatcherExpressionImpl(const TokenInfo &NameToken, |
239 | VariantValue *Value); |
240 | bool parseIdentifierPrefixImpl(VariantValue *Value); |
241 | |
242 | void addCompletion(const TokenInfo &CompToken, |
243 | const MatcherCompletion &Completion); |
244 | void addExpressionCompletions(); |
245 | |
246 | std::vector<MatcherCompletion> |
247 | getNamedValueCompletions(ArrayRef<ArgKind> AcceptedTypes); |
248 | |
249 | CodeTokenizer *const Tokenizer; |
250 | Sema *const S; |
251 | const NamedValueMap *const NamedValues; |
252 | Diagnostics *const Error; |
253 | |
254 | using ContextStackTy = std::vector<std::pair<MatcherCtor, unsigned>>; |
255 | |
256 | ContextStackTy ContextStack; |
257 | std::vector<MatcherCompletion> Completions; |
258 | }; |
259 | |
260 | } // namespace dynamic |
261 | } // namespace ast_matchers |
262 | } // namespace clang |
263 | |
264 | #endif // LLVM_CLANG_AST_MATCHERS_DYNAMIC_PARSER_H |
265 |