1 | //===- Replacement.h - Framework for clang refactoring tools ----*- C++ -*-===// |
---|---|
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Classes supporting refactorings that span multiple translation units. |
10 | // While single translation unit refactorings are supported via the Rewriter, |
11 | // when refactoring multiple translation units changes must be stored in a |
12 | // SourceManager independent form, duplicate changes need to be removed, and |
13 | // all changes must be applied at once at the end of the refactoring so that |
14 | // the code is always parseable. |
15 | // |
16 | //===----------------------------------------------------------------------===// |
17 | |
18 | #ifndef LLVM_CLANG_TOOLING_CORE_REPLACEMENT_H |
19 | #define LLVM_CLANG_TOOLING_CORE_REPLACEMENT_H |
20 | |
21 | #include "clang/Basic/LangOptions.h" |
22 | #include "clang/Basic/SourceLocation.h" |
23 | #include "llvm/ADT/Optional.h" |
24 | #include "llvm/ADT/StringRef.h" |
25 | #include "llvm/Support/Compiler.h" |
26 | #include "llvm/Support/Error.h" |
27 | #include "llvm/Support/raw_ostream.h" |
28 | #include <map> |
29 | #include <set> |
30 | #include <string> |
31 | #include <system_error> |
32 | #include <utility> |
33 | #include <vector> |
34 | |
35 | namespace clang { |
36 | |
37 | class FileManager; |
38 | class Rewriter; |
39 | class SourceManager; |
40 | |
41 | namespace tooling { |
42 | |
43 | /// A source range independent of the \c SourceManager. |
44 | class Range { |
45 | public: |
46 | Range() = default; |
47 | Range(unsigned Offset, unsigned Length) : Offset(Offset), Length(Length) {} |
48 | |
49 | /// Accessors. |
50 | /// @{ |
51 | unsigned getOffset() const { return Offset; } |
52 | unsigned getLength() const { return Length; } |
53 | /// @} |
54 | |
55 | /// \name Range Predicates |
56 | /// @{ |
57 | /// Whether this range overlaps with \p RHS or not. |
58 | bool overlapsWith(Range RHS) const { |
59 | return Offset + Length > RHS.Offset && Offset < RHS.Offset + RHS.Length; |
60 | } |
61 | |
62 | /// Whether this range contains \p RHS or not. |
63 | bool contains(Range RHS) const { |
64 | return RHS.Offset >= Offset && |
65 | (RHS.Offset + RHS.Length) <= (Offset + Length); |
66 | } |
67 | |
68 | /// Whether this range equals to \p RHS or not. |
69 | bool operator==(const Range &RHS) const { |
70 | return Offset == RHS.getOffset() && Length == RHS.getLength(); |
71 | } |
72 | /// @} |
73 | |
74 | private: |
75 | unsigned Offset = 0; |
76 | unsigned Length = 0; |
77 | }; |
78 | |
79 | /// A text replacement. |
80 | /// |
81 | /// Represents a SourceManager independent replacement of a range of text in a |
82 | /// specific file. |
83 | class Replacement { |
84 | public: |
85 | /// Creates an invalid (not applicable) replacement. |
86 | Replacement(); |
87 | |
88 | /// Creates a replacement of the range [Offset, Offset+Length) in |
89 | /// FilePath with ReplacementText. |
90 | /// |
91 | /// \param FilePath A source file accessible via a SourceManager. |
92 | /// \param Offset The byte offset of the start of the range in the file. |
93 | /// \param Length The length of the range in bytes. |
94 | Replacement(StringRef FilePath, unsigned Offset, unsigned Length, |
95 | StringRef ReplacementText); |
96 | |
97 | /// Creates a Replacement of the range [Start, Start+Length) with |
98 | /// ReplacementText. |
99 | Replacement(const SourceManager &Sources, SourceLocation Start, |
100 | unsigned Length, StringRef ReplacementText); |
101 | |
102 | /// Creates a Replacement of the given range with ReplacementText. |
103 | Replacement(const SourceManager &Sources, const CharSourceRange &Range, |
104 | StringRef ReplacementText, |
105 | const LangOptions &LangOpts = LangOptions()); |
106 | |
107 | /// Creates a Replacement of the node with ReplacementText. |
108 | template <typename Node> |
109 | Replacement(const SourceManager &Sources, const Node &NodeToReplace, |
110 | StringRef ReplacementText, |
111 | const LangOptions &LangOpts = LangOptions()); |
112 | |
113 | /// Returns whether this replacement can be applied to a file. |
114 | /// |
115 | /// Only replacements that are in a valid file can be applied. |
116 | bool isApplicable() const; |
117 | |
118 | /// Accessors. |
119 | /// @{ |
120 | StringRef getFilePath() const { return FilePath; } |
121 | unsigned getOffset() const { return ReplacementRange.getOffset(); } |
122 | unsigned getLength() const { return ReplacementRange.getLength(); } |
123 | StringRef getReplacementText() const { return ReplacementText; } |
124 | /// @} |
125 | |
126 | /// Applies the replacement on the Rewriter. |
127 | bool apply(Rewriter &Rewrite) const; |
128 | |
129 | /// Returns a human readable string representation. |
130 | std::string toString() const; |
131 | |
132 | private: |
133 | void setFromSourceLocation(const SourceManager &Sources, SourceLocation Start, |
134 | unsigned Length, StringRef ReplacementText); |
135 | void setFromSourceRange(const SourceManager &Sources, |
136 | const CharSourceRange &Range, |
137 | StringRef ReplacementText, |
138 | const LangOptions &LangOpts); |
139 | |
140 | std::string FilePath; |
141 | Range ReplacementRange; |
142 | std::string ReplacementText; |
143 | }; |
144 | |
145 | enum class replacement_error { |
146 | fail_to_apply = 0, |
147 | wrong_file_path, |
148 | overlap_conflict, |
149 | insert_conflict, |
150 | }; |
151 | |
152 | /// Carries extra error information in replacement-related llvm::Error, |
153 | /// e.g. fail applying replacements and replacements conflict. |
154 | class ReplacementError : public llvm::ErrorInfo<ReplacementError> { |
155 | public: |
156 | ReplacementError(replacement_error Err) : Err(Err) {} |
157 | |
158 | /// Constructs an error related to an existing replacement. |
159 | ReplacementError(replacement_error Err, Replacement Existing) |
160 | : Err(Err), ExistingReplacement(std::move(Existing)) {} |
161 | |
162 | /// Constructs an error related to a new replacement and an existing |
163 | /// replacement in a set of replacements. |
164 | ReplacementError(replacement_error Err, Replacement New, Replacement Existing) |
165 | : Err(Err), NewReplacement(std::move(New)), |
166 | ExistingReplacement(std::move(Existing)) {} |
167 | |
168 | std::string message() const override; |
169 | |
170 | void log(raw_ostream &OS) const override { OS << message(); } |
171 | |
172 | replacement_error get() const { return Err; } |
173 | |
174 | static char ID; |
175 | |
176 | const llvm::Optional<Replacement> &getNewReplacement() const { |
177 | return NewReplacement; |
178 | } |
179 | |
180 | const llvm::Optional<Replacement> &getExistingReplacement() const { |
181 | return ExistingReplacement; |
182 | } |
183 | |
184 | private: |
185 | // Users are not expected to use error_code. |
186 | std::error_code convertToErrorCode() const override { |
187 | return llvm::inconvertibleErrorCode(); |
188 | } |
189 | |
190 | replacement_error Err; |
191 | |
192 | // A new replacement, which is to expected be added into a set of |
193 | // replacements, that is causing problem. |
194 | llvm::Optional<Replacement> NewReplacement; |
195 | |
196 | // An existing replacement in a replacements set that is causing problem. |
197 | llvm::Optional<Replacement> ExistingReplacement; |
198 | }; |
199 | |
200 | /// Less-than operator between two Replacements. |
201 | bool operator<(const Replacement &LHS, const Replacement &RHS); |
202 | |
203 | /// Equal-to operator between two Replacements. |
204 | bool operator==(const Replacement &LHS, const Replacement &RHS); |
205 | |
206 | /// Maintains a set of replacements that are conflict-free. |
207 | /// Two replacements are considered conflicts if they overlap or have the same |
208 | /// offset (i.e. order-dependent). |
209 | class Replacements { |
210 | private: |
211 | using ReplacementsImpl = std::set<Replacement>; |
212 | |
213 | public: |
214 | using const_iterator = ReplacementsImpl::const_iterator; |
215 | using const_reverse_iterator = ReplacementsImpl::const_reverse_iterator; |
216 | |
217 | Replacements() = default; |
218 | |
219 | explicit Replacements(const Replacement &R) { Replaces.insert(R); } |
220 | |
221 | /// Adds a new replacement \p R to the current set of replacements. |
222 | /// \p R must have the same file path as all existing replacements. |
223 | /// Returns `success` if the replacement is successfully inserted; otherwise, |
224 | /// it returns an llvm::Error, i.e. there is a conflict between R and the |
225 | /// existing replacements (i.e. they are order-dependent) or R's file path is |
226 | /// different from the filepath of existing replacements. Callers must |
227 | /// explicitly check the Error returned, and the returned error can be |
228 | /// converted to a string message with `llvm::toString()`. This prevents users |
229 | /// from adding order-dependent replacements. To control the order in which |
230 | /// order-dependent replacements are applied, use merge({R}) with R referring |
231 | /// to the changed code after applying all existing replacements. |
232 | /// Two replacements A and B are considered order-independent if applying them |
233 | /// in either order produces the same result. Note that the range of the |
234 | /// replacement that is applied later still refers to the original code. |
235 | /// These include (but not restricted to) replacements that: |
236 | /// - don't overlap (being directly adjacent is fine) and |
237 | /// - are overlapping deletions. |
238 | /// - are insertions at the same offset and applying them in either order |
239 | /// has the same effect, i.e. X + Y = Y + X when inserting X and Y |
240 | /// respectively. |
241 | /// - are identical replacements, i.e. applying the same replacement twice |
242 | /// is equivalent to applying it once. |
243 | /// Examples: |
244 | /// 1. Replacement A(0, 0, "a") and B(0, 0, "aa") are order-independent since |
245 | /// applying them in either order gives replacement (0, 0, "aaa"). |
246 | /// However, A(0, 0, "a") and B(0, 0, "b") are order-dependent since |
247 | /// applying A first gives (0, 0, "ab") while applying B first gives (B, A, |
248 | /// "ba"). |
249 | /// 2. Replacement A(0, 2, "123") and B(0, 2, "123") are order-independent |
250 | /// since applying them in either order gives (0, 2, "123"). |
251 | /// 3. Replacement A(0, 3, "123") and B(2, 3, "321") are order-independent |
252 | /// since either order gives (0, 5, "12321"). |
253 | /// 4. Replacement A(0, 3, "ab") and B(0, 3, "ab") are order-independent since |
254 | /// applying the same replacement twice is equivalent to applying it once. |
255 | /// Replacements with offset UINT_MAX are special - we do not detect conflicts |
256 | /// for such replacements since users may add them intentionally as a special |
257 | /// category of replacements. |
258 | llvm::Error add(const Replacement &R); |
259 | |
260 | /// Merges \p Replaces into the current replacements. \p Replaces |
261 | /// refers to code after applying the current replacements. |
262 | LLVM_NODISCARD Replacements merge(const Replacements &Replaces) const; |
263 | |
264 | // Returns the affected ranges in the changed code. |
265 | std::vector<Range> getAffectedRanges() const; |
266 | |
267 | // Returns the new offset in the code after replacements being applied. |
268 | // Note that if there is an insertion at Offset in the current replacements, |
269 | // \p Offset will be shifted to Offset + Length in inserted text. |
270 | unsigned getShiftedCodePosition(unsigned Position) const; |
271 | |
272 | unsigned size() const { return Replaces.size(); } |
273 | |
274 | void clear() { Replaces.clear(); } |
275 | |
276 | bool empty() const { return Replaces.empty(); } |
277 | |
278 | const_iterator begin() const { return Replaces.begin(); } |
279 | |
280 | const_iterator end() const { return Replaces.end(); } |
281 | |
282 | const_reverse_iterator rbegin() const { return Replaces.rbegin(); } |
283 | |
284 | const_reverse_iterator rend() const { return Replaces.rend(); } |
285 | |
286 | bool operator==(const Replacements &RHS) const { |
287 | return Replaces == RHS.Replaces; |
288 | } |
289 | |
290 | private: |
291 | Replacements(const_iterator Begin, const_iterator End) |
292 | : Replaces(Begin, End) {} |
293 | |
294 | // Returns `R` with new range that refers to code after `Replaces` being |
295 | // applied. |
296 | Replacement getReplacementInChangedCode(const Replacement &R) const; |
297 | |
298 | // Returns a set of replacements that is equivalent to the current |
299 | // replacements by merging all adjacent replacements. Two sets of replacements |
300 | // are considered equivalent if they have the same effect when they are |
301 | // applied. |
302 | Replacements getCanonicalReplacements() const; |
303 | |
304 | // If `R` and all existing replacements are order-indepedent, then merge it |
305 | // with `Replaces` and returns the merged replacements; otherwise, returns an |
306 | // error. |
307 | llvm::Expected<Replacements> |
308 | mergeIfOrderIndependent(const Replacement &R) const; |
309 | |
310 | ReplacementsImpl Replaces; |
311 | }; |
312 | |
313 | /// Apply all replacements in \p Replaces to the Rewriter \p Rewrite. |
314 | /// |
315 | /// Replacement applications happen independently of the success of |
316 | /// other applications. |
317 | /// |
318 | /// \returns true if all replacements apply. false otherwise. |
319 | bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite); |
320 | |
321 | /// Applies all replacements in \p Replaces to \p Code. |
322 | /// |
323 | /// This completely ignores the path stored in each replacement. If all |
324 | /// replacements are applied successfully, this returns the code with |
325 | /// replacements applied; otherwise, an llvm::Error carrying llvm::StringError |
326 | /// is returned (the Error message can be converted to string using |
327 | /// `llvm::toString()` and 'std::error_code` in the `Error` should be ignored). |
328 | llvm::Expected<std::string> applyAllReplacements(StringRef Code, |
329 | const Replacements &Replaces); |
330 | |
331 | /// Collection of Replacements generated from a single translation unit. |
332 | struct TranslationUnitReplacements { |
333 | /// Name of the main source for the translation unit. |
334 | std::string MainSourceFile; |
335 | |
336 | std::vector<Replacement> Replacements; |
337 | }; |
338 | |
339 | /// Calculates the new ranges after \p Replaces are applied. These |
340 | /// include both the original \p Ranges and the affected ranges of \p Replaces |
341 | /// in the new code. |
342 | /// |
343 | /// \pre Replacements must be for the same file. |
344 | /// |
345 | /// \return The new ranges after \p Replaces are applied. The new ranges will be |
346 | /// sorted and non-overlapping. |
347 | std::vector<Range> |
348 | calculateRangesAfterReplacements(const Replacements &Replaces, |
349 | const std::vector<Range> &Ranges); |
350 | |
351 | /// If there are multiple <File, Replacements> pairs with the same file |
352 | /// entry, we only keep one pair and discard the rest. |
353 | /// If a file does not exist, its corresponding replacements will be ignored. |
354 | std::map<std::string, Replacements> groupReplacementsByFile( |
355 | FileManager &FileMgr, |
356 | const std::map<std::string, Replacements> &FileToReplaces); |
357 | |
358 | template <typename Node> |
359 | Replacement::Replacement(const SourceManager &Sources, |
360 | const Node &NodeToReplace, StringRef ReplacementText, |
361 | const LangOptions &LangOpts) { |
362 | const CharSourceRange Range = |
363 | CharSourceRange::getTokenRange(NodeToReplace->getSourceRange()); |
364 | setFromSourceRange(Sources, Range, ReplacementText, LangOpts); |
365 | } |
366 | |
367 | } // namespace tooling |
368 | |
369 | } // namespace clang |
370 | |
371 | #endif // LLVM_CLANG_TOOLING_CORE_REPLACEMENT_H |
372 |