1 | //===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// WhitespaceManager class manages whitespace around tokens and their |
11 | /// replacements. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H |
16 | #define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H |
17 | |
18 | #include "TokenAnnotator.h" |
19 | #include "clang/Basic/SourceManager.h" |
20 | #include "clang/Format/Format.h" |
21 | #include <string> |
22 | |
23 | namespace clang { |
24 | namespace format { |
25 | |
26 | /// Manages the whitespaces around tokens and their replacements. |
27 | /// |
28 | /// This includes special handling for certain constructs, e.g. the alignment of |
29 | /// trailing line comments. |
30 | /// |
31 | /// To guarantee correctness of alignment operations, the \c WhitespaceManager |
32 | /// must be informed about every token in the source file; for each token, there |
33 | /// must be exactly one call to either \c replaceWhitespace or |
34 | /// \c addUntouchableToken. |
35 | /// |
36 | /// There may be multiple calls to \c breakToken for a given token. |
37 | class WhitespaceManager { |
38 | public: |
39 | WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, |
40 | bool UseCRLF) |
41 | : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {} |
42 | |
43 | /// Replaces the whitespace in front of \p Tok. Only call once for |
44 | /// each \c AnnotatedToken. |
45 | /// |
46 | /// \p StartOfTokenColumn is the column at which the token will start after |
47 | /// this replacement. It is needed for determining how \p Spaces is turned |
48 | /// into tabs and spaces for some format styles. |
49 | void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, |
50 | unsigned StartOfTokenColumn, |
51 | bool InPPDirective = false); |
52 | |
53 | /// Adds information about an unchangeable token's whitespace. |
54 | /// |
55 | /// Needs to be called for every token for which \c replaceWhitespace |
56 | /// was not called. |
57 | void addUntouchableToken(const FormatToken &Tok, bool InPPDirective); |
58 | |
59 | llvm::Error addReplacement(const tooling::Replacement &Replacement); |
60 | |
61 | /// Inserts or replaces whitespace in the middle of a token. |
62 | /// |
63 | /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix |
64 | /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars |
65 | /// characters. |
66 | /// |
67 | /// Note: \p Spaces can be negative to retain information about initial |
68 | /// relative column offset between a line of a block comment and the start of |
69 | /// the comment. This negative offset may be compensated by trailing comment |
70 | /// alignment here. In all other cases negative \p Spaces will be truncated to |
71 | /// 0. |
72 | /// |
73 | /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is |
74 | /// used to align backslashes correctly. |
75 | void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, |
76 | unsigned ReplaceChars, |
77 | StringRef PreviousPostfix, |
78 | StringRef CurrentPrefix, bool InPPDirective, |
79 | unsigned Newlines, int Spaces); |
80 | |
81 | /// Returns all the \c Replacements created during formatting. |
82 | const tooling::Replacements &generateReplacements(); |
83 | |
84 | /// Represents a change before a token, a break inside a token, |
85 | /// or the layout of an unchanged token (or whitespace within). |
86 | struct Change { |
87 | /// Functor to sort changes in original source order. |
88 | class IsBeforeInFile { |
89 | public: |
90 | IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} |
91 | bool operator()(const Change &C1, const Change &C2) const; |
92 | |
93 | private: |
94 | const SourceManager &SourceMgr; |
95 | }; |
96 | |
97 | /// Creates a \c Change. |
98 | /// |
99 | /// The generated \c Change will replace the characters at |
100 | /// \p OriginalWhitespaceRange with a concatenation of |
101 | /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces |
102 | /// and \p CurrentLinePrefix. |
103 | /// |
104 | /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out |
105 | /// trailing comments and escaped newlines. |
106 | Change(const FormatToken &Tok, bool CreateReplacement, |
107 | SourceRange OriginalWhitespaceRange, int Spaces, |
108 | unsigned StartOfTokenColumn, unsigned NewlinesBefore, |
109 | StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, |
110 | bool ContinuesPPDirective, bool IsInsideToken); |
111 | |
112 | // The kind of the token whose whitespace this change replaces, or in which |
113 | // this change inserts whitespace. |
114 | // FIXME: Currently this is not set correctly for breaks inside comments, as |
115 | // the \c BreakableToken is still doing its own alignment. |
116 | const FormatToken *Tok; |
117 | |
118 | bool CreateReplacement; |
119 | // Changes might be in the middle of a token, so we cannot just keep the |
120 | // FormatToken around to query its information. |
121 | SourceRange OriginalWhitespaceRange; |
122 | unsigned StartOfTokenColumn; |
123 | unsigned NewlinesBefore; |
124 | std::string PreviousLinePostfix; |
125 | std::string CurrentLinePrefix; |
126 | bool ContinuesPPDirective; |
127 | |
128 | // The number of spaces in front of the token or broken part of the token. |
129 | // This will be adapted when aligning tokens. |
130 | // Can be negative to retain information about the initial relative offset |
131 | // of the lines in a block comment. This is used when aligning trailing |
132 | // comments. Uncompensated negative offset is truncated to 0. |
133 | int Spaces; |
134 | |
135 | // If this change is inside of a token but not at the start of the token or |
136 | // directly after a newline. |
137 | bool IsInsideToken; |
138 | |
139 | // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and |
140 | // \c EscapedNewlineColumn will be calculated in |
141 | // \c calculateLineBreakInformation. |
142 | bool IsTrailingComment; |
143 | unsigned TokenLength; |
144 | unsigned PreviousEndOfTokenColumn; |
145 | unsigned EscapedNewlineColumn; |
146 | |
147 | // These fields are used to retain correct relative line indentation in a |
148 | // block comment when aligning trailing comments. |
149 | // |
150 | // If this Change represents a continuation of a block comment, |
151 | // \c StartOfBlockComment is pointer to the first Change in the block |
152 | // comment. \c IndentationOffset is a relative column offset to this |
153 | // change, so that the correct column can be reconstructed at the end of |
154 | // the alignment process. |
155 | const Change *StartOfBlockComment; |
156 | int IndentationOffset; |
157 | |
158 | // A combination of indent level and nesting level, which are used in |
159 | // tandem to compute lexical scope, for the purposes of deciding |
160 | // when to stop consecutive alignment runs. |
161 | std::pair<unsigned, unsigned> indentAndNestingLevel() const { |
162 | return std::make_pair(Tok->IndentLevel, Tok->NestingLevel); |
163 | } |
164 | }; |
165 | |
166 | private: |
167 | /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens |
168 | /// or token parts in a line and \c PreviousEndOfTokenColumn and |
169 | /// \c EscapedNewlineColumn for the first tokens or token parts in a line. |
170 | void calculateLineBreakInformation(); |
171 | |
172 | /// Align consecutive assignments over all \c Changes. |
173 | void alignConsecutiveAssignments(); |
174 | |
175 | /// Align consecutive declarations over all \c Changes. |
176 | void alignConsecutiveDeclarations(); |
177 | |
178 | /// Align trailing comments over all \c Changes. |
179 | void alignTrailingComments(); |
180 | |
181 | /// Align trailing comments from change \p Start to change \p End at |
182 | /// the specified \p Column. |
183 | void alignTrailingComments(unsigned Start, unsigned End, unsigned Column); |
184 | |
185 | /// Align escaped newlines over all \c Changes. |
186 | void alignEscapedNewlines(); |
187 | |
188 | /// Align escaped newlines from change \p Start to change \p End at |
189 | /// the specified \p Column. |
190 | void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column); |
191 | |
192 | /// Fill \c Replaces with the replacements for all effective changes. |
193 | void generateChanges(); |
194 | |
195 | /// Stores \p Text as the replacement for the whitespace in \p Range. |
196 | void storeReplacement(SourceRange Range, StringRef Text); |
197 | void appendNewlineText(std::string &Text, unsigned Newlines); |
198 | void appendEscapedNewlineText(std::string &Text, unsigned Newlines, |
199 | unsigned PreviousEndOfTokenColumn, |
200 | unsigned EscapedNewlineColumn); |
201 | void appendIndentText(std::string &Text, unsigned IndentLevel, |
202 | unsigned Spaces, unsigned WhitespaceStartColumn); |
203 | |
204 | SmallVector<Change, 16> Changes; |
205 | const SourceManager &SourceMgr; |
206 | tooling::Replacements Replaces; |
207 | const FormatStyle &Style; |
208 | bool UseCRLF; |
209 | }; |
210 | |
211 | } // namespace format |
212 | } // namespace clang |
213 | |
214 | #endif |
215 | |