1 | //===--- FormatToken.cpp - Format C++ code --------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements specific functions of \c FormatTokens and their |
11 | /// roles. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "FormatToken.h" |
16 | #include "ContinuationIndenter.h" |
17 | #include "llvm/ADT/SmallVector.h" |
18 | #include "llvm/Support/Debug.h" |
19 | #include <climits> |
20 | |
21 | namespace clang { |
22 | namespace format { |
23 | |
24 | const char *getTokenTypeName(TokenType Type) { |
25 | static const char *const TokNames[] = { |
26 | #define TYPE(X) #X, |
27 | LIST_TOKEN_TYPES |
28 | #undef TYPE |
29 | nullptr}; |
30 | |
31 | if (Type < NUM_TOKEN_TYPES) |
32 | return TokNames[Type]; |
33 | llvm_unreachable("unknown TokenType"); |
34 | return nullptr; |
35 | } |
36 | |
37 | // FIXME: This is copy&pasted from Sema. Put it in a common place and remove |
38 | // duplication. |
39 | bool FormatToken::isSimpleTypeSpecifier() const { |
40 | switch (Tok.getKind()) { |
41 | case tok::kw_short: |
42 | case tok::kw_long: |
43 | case tok::kw___int64: |
44 | case tok::kw___int128: |
45 | case tok::kw_signed: |
46 | case tok::kw_unsigned: |
47 | case tok::kw_void: |
48 | case tok::kw_char: |
49 | case tok::kw_int: |
50 | case tok::kw_half: |
51 | case tok::kw_float: |
52 | case tok::kw_double: |
53 | case tok::kw__Float16: |
54 | case tok::kw___float128: |
55 | case tok::kw_wchar_t: |
56 | case tok::kw_bool: |
57 | case tok::kw___underlying_type: |
58 | case tok::annot_typename: |
59 | case tok::kw_char8_t: |
60 | case tok::kw_char16_t: |
61 | case tok::kw_char32_t: |
62 | case tok::kw_typeof: |
63 | case tok::kw_decltype: |
64 | return true; |
65 | default: |
66 | return false; |
67 | } |
68 | } |
69 | |
70 | TokenRole::~TokenRole() {} |
71 | |
72 | void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {} |
73 | |
74 | unsigned CommaSeparatedList::formatAfterToken(LineState &State, |
75 | ContinuationIndenter *Indenter, |
76 | bool DryRun) { |
77 | if (State.NextToken == nullptr || !State.NextToken->Previous) |
78 | return 0; |
79 | |
80 | if (Formats.size() == 1) |
81 | return 0; // Handled by formatFromToken |
82 | |
83 | // Ensure that we start on the opening brace. |
84 | const FormatToken *LBrace = |
85 | State.NextToken->Previous->getPreviousNonComment(); |
86 | if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || |
87 | LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral || |
88 | LBrace->Next->Type == TT_DesignatedInitializerPeriod) |
89 | return 0; |
90 | |
91 | // Calculate the number of code points we have to format this list. As the |
92 | // first token is already placed, we have to subtract it. |
93 | unsigned RemainingCodePoints = |
94 | Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth; |
95 | |
96 | // Find the best ColumnFormat, i.e. the best number of columns to use. |
97 | const ColumnFormat *Format = getColumnFormat(RemainingCodePoints); |
98 | |
99 | // If no ColumnFormat can be used, the braced list would generally be |
100 | // bin-packed. Add a severe penalty to this so that column layouts are |
101 | // preferred if possible. |
102 | if (!Format) |
103 | return 10000; |
104 | |
105 | // Format the entire list. |
106 | unsigned Penalty = 0; |
107 | unsigned Column = 0; |
108 | unsigned Item = 0; |
109 | while (State.NextToken != LBrace->MatchingParen) { |
110 | bool NewLine = false; |
111 | unsigned ExtraSpaces = 0; |
112 | |
113 | // If the previous token was one of our commas, we are now on the next item. |
114 | if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) { |
115 | if (!State.NextToken->isTrailingComment()) { |
116 | ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item]; |
117 | ++Column; |
118 | } |
119 | ++Item; |
120 | } |
121 | |
122 | if (Column == Format->Columns || State.NextToken->MustBreakBefore) { |
123 | Column = 0; |
124 | NewLine = true; |
125 | } |
126 | |
127 | // Place token using the continuation indenter and store the penalty. |
128 | Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces); |
129 | } |
130 | return Penalty; |
131 | } |
132 | |
133 | unsigned CommaSeparatedList::formatFromToken(LineState &State, |
134 | ContinuationIndenter *Indenter, |
135 | bool DryRun) { |
136 | // Formatting with 1 Column isn't really a column layout, so we don't need the |
137 | // special logic here. We can just avoid bin packing any of the parameters. |
138 | if (Formats.size() == 1 || HasNestedBracedList) |
139 | State.Stack.back().AvoidBinPacking = true; |
140 | return 0; |
141 | } |
142 | |
143 | // Returns the lengths in code points between Begin and End (both included), |
144 | // assuming that the entire sequence is put on a single line. |
145 | static unsigned CodePointsBetween(const FormatToken *Begin, |
146 | const FormatToken *End) { |
147 | TotalLength >= Begin->TotalLength", "/home/seafit/code_projects/clang_source/clang/lib/Format/FormatToken.cpp", 147, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(End->TotalLength >= Begin->TotalLength); |
148 | return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth; |
149 | } |
150 | |
151 | void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { |
152 | // FIXME: At some point we might want to do this for other lists, too. |
153 | if (!Token->MatchingParen || |
154 | !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) |
155 | return; |
156 | |
157 | // In C++11 braced list style, we should not format in columns unless they |
158 | // have many items (20 or more) or we allow bin-packing of function call |
159 | // arguments. |
160 | if (Style.Cpp11BracedListStyle && !Style.BinPackArguments && |
161 | Commas.size() < 19) |
162 | return; |
163 | |
164 | // Limit column layout for JavaScript array initializers to 20 or more items |
165 | // for now to introduce it carefully. We can become more aggressive if this |
166 | // necessary. |
167 | if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19) |
168 | return; |
169 | |
170 | // Column format doesn't really make sense if we don't align after brackets. |
171 | if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) |
172 | return; |
173 | |
174 | FormatToken *ItemBegin = Token->Next; |
175 | while (ItemBegin->isTrailingComment()) |
176 | ItemBegin = ItemBegin->Next; |
177 | SmallVector<bool, 8> MustBreakBeforeItem; |
178 | |
179 | // The lengths of an item if it is put at the end of the line. This includes |
180 | // trailing comments which are otherwise ignored for column alignment. |
181 | SmallVector<unsigned, 8> EndOfLineItemLength; |
182 | |
183 | bool HasSeparatingComment = false; |
184 | for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) { |
185 | // Skip comments on their own line. |
186 | while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) { |
187 | ItemBegin = ItemBegin->Next; |
188 | HasSeparatingComment = i > 0; |
189 | } |
190 | |
191 | MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore); |
192 | if (ItemBegin->is(tok::l_brace)) |
193 | HasNestedBracedList = true; |
194 | const FormatToken *ItemEnd = nullptr; |
195 | if (i == Commas.size()) { |
196 | ItemEnd = Token->MatchingParen; |
197 | const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment(); |
198 | ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd)); |
199 | if (Style.Cpp11BracedListStyle && |
200 | !ItemEnd->Previous->isTrailingComment()) { |
201 | // In Cpp11 braced list style, the } and possibly other subsequent |
202 | // tokens will need to stay on a line with the last element. |
203 | while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore) |
204 | ItemEnd = ItemEnd->Next; |
205 | } else { |
206 | // In other braced lists styles, the "}" can be wrapped to the new line. |
207 | ItemEnd = Token->MatchingParen->Previous; |
208 | } |
209 | } else { |
210 | ItemEnd = Commas[i]; |
211 | // The comma is counted as part of the item when calculating the length. |
212 | ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd)); |
213 | |
214 | // Consume trailing comments so the are included in EndOfLineItemLength. |
215 | if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline && |
216 | ItemEnd->Next->isTrailingComment()) |
217 | ItemEnd = ItemEnd->Next; |
218 | } |
219 | EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd)); |
220 | // If there is a trailing comma in the list, the next item will start at the |
221 | // closing brace. Don't create an extra item for this. |
222 | if (ItemEnd->getNextNonComment() == Token->MatchingParen) |
223 | break; |
224 | ItemBegin = ItemEnd->Next; |
225 | } |
226 | |
227 | // Don't use column layout for lists with few elements and in presence of |
228 | // separating comments. |
229 | if (Commas.size() < 5 || HasSeparatingComment) |
230 | return; |
231 | |
232 | if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19) |
233 | return; |
234 | |
235 | // We can never place more than ColumnLimit / 3 items in a row (because of the |
236 | // spaces and the comma). |
237 | unsigned MaxItems = Style.ColumnLimit / 3; |
238 | std::vector<unsigned> MinSizeInColumn; |
239 | MinSizeInColumn.reserve(MaxItems); |
240 | for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) { |
241 | ColumnFormat Format; |
242 | Format.Columns = Columns; |
243 | Format.ColumnSizes.resize(Columns); |
244 | MinSizeInColumn.assign(Columns, UINT_MAX); |
245 | Format.LineCount = 1; |
246 | bool HasRowWithSufficientColumns = false; |
247 | unsigned Column = 0; |
248 | for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) { |
249 | assert(i < MustBreakBeforeItem.size()); |
250 | if (MustBreakBeforeItem[i] || Column == Columns) { |
251 | ++Format.LineCount; |
252 | Column = 0; |
253 | } |
254 | if (Column == Columns - 1) |
255 | HasRowWithSufficientColumns = true; |
256 | unsigned Length = |
257 | (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i]; |
258 | Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length); |
259 | MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length); |
260 | ++Column; |
261 | } |
262 | // If all rows are terminated early (e.g. by trailing comments), we don't |
263 | // need to look further. |
264 | if (!HasRowWithSufficientColumns) |
265 | break; |
266 | Format.TotalWidth = Columns - 1; // Width of the N-1 spaces. |
267 | |
268 | for (unsigned i = 0; i < Columns; ++i) |
269 | Format.TotalWidth += Format.ColumnSizes[i]; |
270 | |
271 | // Don't use this Format, if the difference between the longest and shortest |
272 | // element in a column exceeds a threshold to avoid excessive spaces. |
273 | if ([&] { |
274 | for (unsigned i = 0; i < Columns - 1; ++i) |
275 | if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10) |
276 | return true; |
277 | return false; |
278 | }()) |
279 | continue; |
280 | |
281 | // Ignore layouts that are bound to violate the column limit. |
282 | if (Format.TotalWidth > Style.ColumnLimit && Columns > 1) |
283 | continue; |
284 | |
285 | Formats.push_back(Format); |
286 | } |
287 | } |
288 | |
289 | const CommaSeparatedList::ColumnFormat * |
290 | CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const { |
291 | const ColumnFormat *BestFormat = nullptr; |
292 | for (SmallVector<ColumnFormat, 4>::const_reverse_iterator |
293 | I = Formats.rbegin(), |
294 | E = Formats.rend(); |
295 | I != E; ++I) { |
296 | if (I->TotalWidth <= RemainingCharacters || I->Columns == 1) { |
297 | if (BestFormat && I->LineCount > BestFormat->LineCount) |
298 | break; |
299 | BestFormat = &*I; |
300 | } |
301 | } |
302 | return BestFormat; |
303 | } |
304 | |
305 | } // namespace format |
306 | } // namespace clang |
307 | |