| 1 | //===- Parser.h - Matcher expression parser ---------------------*- C++ -*-===// |
|---|---|
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | /// \file |
| 10 | /// Simple matcher expression parser. |
| 11 | /// |
| 12 | /// The parser understands matcher expressions of the form: |
| 13 | /// MatcherName(Arg0, Arg1, ..., ArgN) |
| 14 | /// as well as simple types like strings. |
| 15 | /// The parser does not know how to process the matchers. It delegates this task |
| 16 | /// to a Sema object received as an argument. |
| 17 | /// |
| 18 | /// \code |
| 19 | /// Grammar for the expressions supported: |
| 20 | /// <Expression> := <Literal> | <NamedValue> | <MatcherExpression> |
| 21 | /// <Literal> := <StringLiteral> | <Boolean> | <Double> | <Unsigned> |
| 22 | /// <StringLiteral> := "quoted string" |
| 23 | /// <Boolean> := true | false |
| 24 | /// <Double> := [0-9]+.[0-9]* | [0-9]+.[0-9]*[eE][-+]?[0-9]+ |
| 25 | /// <Unsigned> := [0-9]+ |
| 26 | /// <NamedValue> := <Identifier> |
| 27 | /// <MatcherExpression> := <Identifier>(<ArgumentList>) | |
| 28 | /// <Identifier>(<ArgumentList>).bind(<StringLiteral>) |
| 29 | /// <Identifier> := [a-zA-Z]+ |
| 30 | /// <ArgumentList> := <Expression> | <Expression>,<ArgumentList> |
| 31 | /// \endcode |
| 32 | // |
| 33 | //===----------------------------------------------------------------------===// |
| 34 | |
| 35 | #ifndef LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H |
| 36 | #define LLVM_CLANG_ASTMATCHERS_DYNAMIC_PARSER_H |
| 37 | |
| 38 | #include "clang/ASTMatchers/ASTMatchersInternal.h" |
| 39 | #include "clang/ASTMatchers/Dynamic/Registry.h" |
| 40 | #include "clang/ASTMatchers/Dynamic/VariantValue.h" |
| 41 | #include "llvm/ADT/ArrayRef.h" |
| 42 | #include "llvm/ADT/Optional.h" |
| 43 | #include "llvm/ADT/StringMap.h" |
| 44 | #include "llvm/ADT/StringRef.h" |
| 45 | #include <utility> |
| 46 | #include <vector> |
| 47 | |
| 48 | namespace clang { |
| 49 | namespace ast_matchers { |
| 50 | namespace dynamic { |
| 51 | |
| 52 | class Diagnostics; |
| 53 | |
| 54 | /// Matcher expression parser. |
| 55 | class Parser { |
| 56 | public: |
| 57 | /// Interface to connect the parser with the registry and more. |
| 58 | /// |
| 59 | /// The parser uses the Sema instance passed into |
| 60 | /// parseMatcherExpression() to handle all matcher tokens. The simplest |
| 61 | /// processor implementation would simply call into the registry to create |
| 62 | /// the matchers. |
| 63 | /// However, a more complex processor might decide to intercept the matcher |
| 64 | /// creation and do some extra work. For example, it could apply some |
| 65 | /// transformation to the matcher by adding some id() nodes, or could detect |
| 66 | /// specific matcher nodes for more efficient lookup. |
| 67 | class Sema { |
| 68 | public: |
| 69 | virtual ~Sema(); |
| 70 | |
| 71 | /// Process a matcher expression. |
| 72 | /// |
| 73 | /// All the arguments passed here have already been processed. |
| 74 | /// |
| 75 | /// \param Ctor A matcher constructor looked up by lookupMatcherCtor. |
| 76 | /// |
| 77 | /// \param NameRange The location of the name in the matcher source. |
| 78 | /// Useful for error reporting. |
| 79 | /// |
| 80 | /// \param BindID The ID to use to bind the matcher, or a null \c StringRef |
| 81 | /// if no ID is specified. |
| 82 | /// |
| 83 | /// \param Args The argument list for the matcher. |
| 84 | /// |
| 85 | /// \return The matcher objects constructed by the processor, or a null |
| 86 | /// matcher if an error occurred. In that case, \c Error will contain a |
| 87 | /// description of the error. |
| 88 | virtual VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, |
| 89 | SourceRange NameRange, |
| 90 | StringRef BindID, |
| 91 | ArrayRef<ParserValue> Args, |
| 92 | Diagnostics *Error) = 0; |
| 93 | |
| 94 | /// Look up a matcher by name. |
| 95 | /// |
| 96 | /// \param MatcherName The matcher name found by the parser. |
| 97 | /// |
| 98 | /// \return The matcher constructor, or Optional<MatcherCtor>() if not |
| 99 | /// found. |
| 100 | virtual llvm::Optional<MatcherCtor> |
| 101 | lookupMatcherCtor(StringRef MatcherName) = 0; |
| 102 | |
| 103 | /// Compute the list of completion types for \p Context. |
| 104 | /// |
| 105 | /// Each element of \p Context represents a matcher invocation, going from |
| 106 | /// outermost to innermost. Elements are pairs consisting of a reference to |
| 107 | /// the matcher constructor and the index of the next element in the |
| 108 | /// argument list of that matcher (or for the last element, the index of |
| 109 | /// the completion point in the argument list). An empty list requests |
| 110 | /// completion for the root matcher. |
| 111 | virtual std::vector<ArgKind> getAcceptedCompletionTypes( |
| 112 | llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context); |
| 113 | |
| 114 | /// Compute the list of completions that match any of |
| 115 | /// \p AcceptedTypes. |
| 116 | /// |
| 117 | /// \param AcceptedTypes All types accepted for this completion. |
| 118 | /// |
| 119 | /// \return All completions for the specified types. |
| 120 | /// Completions should be valid when used in \c lookupMatcherCtor(). |
| 121 | /// The matcher constructed from the return of \c lookupMatcherCtor() |
| 122 | /// should be convertible to some type in \p AcceptedTypes. |
| 123 | virtual std::vector<MatcherCompletion> |
| 124 | getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes); |
| 125 | }; |
| 126 | |
| 127 | /// Sema implementation that uses the matcher registry to process the |
| 128 | /// tokens. |
| 129 | class RegistrySema : public Parser::Sema { |
| 130 | public: |
| 131 | ~RegistrySema() override; |
| 132 | |
| 133 | llvm::Optional<MatcherCtor> |
| 134 | lookupMatcherCtor(StringRef MatcherName) override; |
| 135 | |
| 136 | VariantMatcher actOnMatcherExpression(MatcherCtor Ctor, |
| 137 | SourceRange NameRange, |
| 138 | StringRef BindID, |
| 139 | ArrayRef<ParserValue> Args, |
| 140 | Diagnostics *Error) override; |
| 141 | |
| 142 | std::vector<ArgKind> getAcceptedCompletionTypes( |
| 143 | llvm::ArrayRef<std::pair<MatcherCtor, unsigned>> Context) override; |
| 144 | |
| 145 | std::vector<MatcherCompletion> |
| 146 | getMatcherCompletions(llvm::ArrayRef<ArgKind> AcceptedTypes) override; |
| 147 | }; |
| 148 | |
| 149 | using NamedValueMap = llvm::StringMap<VariantValue>; |
| 150 | |
| 151 | /// Parse a matcher expression. |
| 152 | /// |
| 153 | /// \param MatcherCode The matcher expression to parse. |
| 154 | /// |
| 155 | /// \param S The Sema instance that will help the parser |
| 156 | /// construct the matchers. If null, it uses the default registry. |
| 157 | /// |
| 158 | /// \param NamedValues A map of precomputed named values. This provides |
| 159 | /// the dictionary for the <NamedValue> rule of the grammar. |
| 160 | /// If null, it is ignored. |
| 161 | /// |
| 162 | /// \return The matcher object constructed by the processor, or an empty |
| 163 | /// Optional if an error occurred. In that case, \c Error will contain a |
| 164 | /// description of the error. |
| 165 | /// The caller takes ownership of the DynTypedMatcher object returned. |
| 166 | static llvm::Optional<DynTypedMatcher> |
| 167 | parseMatcherExpression(StringRef MatcherCode, Sema *S, |
| 168 | const NamedValueMap *NamedValues, |
| 169 | Diagnostics *Error); |
| 170 | static llvm::Optional<DynTypedMatcher> |
| 171 | parseMatcherExpression(StringRef MatcherCode, Sema *S, |
| 172 | Diagnostics *Error) { |
| 173 | return parseMatcherExpression(MatcherCode, S, nullptr, Error); |
| 174 | } |
| 175 | static llvm::Optional<DynTypedMatcher> |
| 176 | parseMatcherExpression(StringRef MatcherCode, Diagnostics *Error) { |
| 177 | return parseMatcherExpression(MatcherCode, nullptr, Error); |
| 178 | } |
| 179 | |
| 180 | /// Parse an expression. |
| 181 | /// |
| 182 | /// Parses any expression supported by this parser. In general, the |
| 183 | /// \c parseMatcherExpression function is a better approach to get a matcher |
| 184 | /// object. |
| 185 | /// |
| 186 | /// \param S The Sema instance that will help the parser |
| 187 | /// construct the matchers. If null, it uses the default registry. |
| 188 | /// |
| 189 | /// \param NamedValues A map of precomputed named values. This provides |
| 190 | /// the dictionary for the <NamedValue> rule of the grammar. |
| 191 | /// If null, it is ignored. |
| 192 | static bool parseExpression(StringRef Code, Sema *S, |
| 193 | const NamedValueMap *NamedValues, |
| 194 | VariantValue *Value, Diagnostics *Error); |
| 195 | static bool parseExpression(StringRef Code, Sema *S, |
| 196 | VariantValue *Value, Diagnostics *Error) { |
| 197 | return parseExpression(Code, S, nullptr, Value, Error); |
| 198 | } |
| 199 | static bool parseExpression(StringRef Code, VariantValue *Value, |
| 200 | Diagnostics *Error) { |
| 201 | return parseExpression(Code, nullptr, Value, Error); |
| 202 | } |
| 203 | |
| 204 | /// Complete an expression at the given offset. |
| 205 | /// |
| 206 | /// \param S The Sema instance that will help the parser |
| 207 | /// construct the matchers. If null, it uses the default registry. |
| 208 | /// |
| 209 | /// \param NamedValues A map of precomputed named values. This provides |
| 210 | /// the dictionary for the <NamedValue> rule of the grammar. |
| 211 | /// If null, it is ignored. |
| 212 | /// |
| 213 | /// \return The list of completions, which may be empty if there are no |
| 214 | /// available completions or if an error occurred. |
| 215 | static std::vector<MatcherCompletion> |
| 216 | completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S, |
| 217 | const NamedValueMap *NamedValues); |
| 218 | static std::vector<MatcherCompletion> |
| 219 | completeExpression(StringRef Code, unsigned CompletionOffset, Sema *S) { |
| 220 | return completeExpression(Code, CompletionOffset, S, nullptr); |
| 221 | } |
| 222 | static std::vector<MatcherCompletion> |
| 223 | completeExpression(StringRef Code, unsigned CompletionOffset) { |
| 224 | return completeExpression(Code, CompletionOffset, nullptr); |
| 225 | } |
| 226 | |
| 227 | private: |
| 228 | class CodeTokenizer; |
| 229 | struct ScopedContextEntry; |
| 230 | struct TokenInfo; |
| 231 | |
| 232 | Parser(CodeTokenizer *Tokenizer, Sema *S, |
| 233 | const NamedValueMap *NamedValues, |
| 234 | Diagnostics *Error); |
| 235 | |
| 236 | bool parseBindID(std::string &BindID); |
| 237 | bool parseExpressionImpl(VariantValue *Value); |
| 238 | bool parseMatcherExpressionImpl(const TokenInfo &NameToken, |
| 239 | VariantValue *Value); |
| 240 | bool parseIdentifierPrefixImpl(VariantValue *Value); |
| 241 | |
| 242 | void addCompletion(const TokenInfo &CompToken, |
| 243 | const MatcherCompletion &Completion); |
| 244 | void addExpressionCompletions(); |
| 245 | |
| 246 | std::vector<MatcherCompletion> |
| 247 | getNamedValueCompletions(ArrayRef<ArgKind> AcceptedTypes); |
| 248 | |
| 249 | CodeTokenizer *const Tokenizer; |
| 250 | Sema *const S; |
| 251 | const NamedValueMap *const NamedValues; |
| 252 | Diagnostics *const Error; |
| 253 | |
| 254 | using ContextStackTy = std::vector<std::pair<MatcherCtor, unsigned>>; |
| 255 | |
| 256 | ContextStackTy ContextStack; |
| 257 | std::vector<MatcherCompletion> Completions; |
| 258 | }; |
| 259 | |
| 260 | } // namespace dynamic |
| 261 | } // namespace ast_matchers |
| 262 | } // namespace clang |
| 263 | |
| 264 | #endif // LLVM_CLANG_AST_MATCHERS_DYNAMIC_PARSER_H |
| 265 |