1 | //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file contains the declaration of the FormatToken, a wrapper |
11 | /// around Token with additional information related to formatting. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H |
16 | #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H |
17 | |
18 | #include "clang/Basic/IdentifierTable.h" |
19 | #include "clang/Basic/OperatorPrecedence.h" |
20 | #include "clang/Format/Format.h" |
21 | #include "clang/Lex/Lexer.h" |
22 | #include <memory> |
23 | #include <unordered_set> |
24 | |
25 | namespace clang { |
26 | namespace format { |
27 | |
28 | #define LIST_TOKEN_TYPES \ |
29 | TYPE(ArrayInitializerLSquare) \ |
30 | TYPE(ArraySubscriptLSquare) \ |
31 | TYPE(AttributeColon) \ |
32 | TYPE(AttributeParen) \ |
33 | TYPE(AttributeSquare) \ |
34 | TYPE(BinaryOperator) \ |
35 | TYPE(BitFieldColon) \ |
36 | TYPE(BlockComment) \ |
37 | TYPE(CastRParen) \ |
38 | TYPE(ConditionalExpr) \ |
39 | TYPE(ConflictAlternative) \ |
40 | TYPE(ConflictEnd) \ |
41 | TYPE(ConflictStart) \ |
42 | TYPE(CtorInitializerColon) \ |
43 | TYPE(CtorInitializerComma) \ |
44 | TYPE(DesignatedInitializerLSquare) \ |
45 | TYPE(DesignatedInitializerPeriod) \ |
46 | TYPE(DictLiteral) \ |
47 | TYPE(ForEachMacro) \ |
48 | TYPE(FunctionAnnotationRParen) \ |
49 | TYPE(FunctionDeclarationName) \ |
50 | TYPE(FunctionLBrace) \ |
51 | TYPE(FunctionTypeLParen) \ |
52 | TYPE(ImplicitStringLiteral) \ |
53 | TYPE(InheritanceColon) \ |
54 | TYPE(InheritanceComma) \ |
55 | TYPE(InlineASMBrace) \ |
56 | TYPE(InlineASMColon) \ |
57 | TYPE(JavaAnnotation) \ |
58 | TYPE(JsComputedPropertyName) \ |
59 | TYPE(JsExponentiation) \ |
60 | TYPE(JsExponentiationEqual) \ |
61 | TYPE(JsFatArrow) \ |
62 | TYPE(JsNonNullAssertion) \ |
63 | TYPE(JsPrivateIdentifier) \ |
64 | TYPE(JsTypeColon) \ |
65 | TYPE(JsTypeOperator) \ |
66 | TYPE(JsTypeOptionalQuestion) \ |
67 | TYPE(LambdaArrow) \ |
68 | TYPE(LambdaLBrace) \ |
69 | TYPE(LambdaLSquare) \ |
70 | TYPE(LeadingJavaAnnotation) \ |
71 | TYPE(LineComment) \ |
72 | TYPE(MacroBlockBegin) \ |
73 | TYPE(MacroBlockEnd) \ |
74 | TYPE(ObjCBlockLBrace) \ |
75 | TYPE(ObjCBlockLParen) \ |
76 | TYPE(ObjCDecl) \ |
77 | TYPE(ObjCForIn) \ |
78 | TYPE(ObjCMethodExpr) \ |
79 | TYPE(ObjCMethodSpecifier) \ |
80 | TYPE(ObjCProperty) \ |
81 | TYPE(ObjCStringLiteral) \ |
82 | TYPE(OverloadedOperator) \ |
83 | TYPE(OverloadedOperatorLParen) \ |
84 | TYPE(PointerOrReference) \ |
85 | TYPE(PureVirtualSpecifier) \ |
86 | TYPE(RangeBasedForLoopColon) \ |
87 | TYPE(RegexLiteral) \ |
88 | TYPE(SelectorName) \ |
89 | TYPE(StartOfName) \ |
90 | TYPE(StatementMacro) \ |
91 | TYPE(StructuredBindingLSquare) \ |
92 | TYPE(TemplateCloser) \ |
93 | TYPE(TemplateOpener) \ |
94 | TYPE(TemplateString) \ |
95 | TYPE(ProtoExtensionLSquare) \ |
96 | TYPE(TrailingAnnotation) \ |
97 | TYPE(TrailingReturnArrow) \ |
98 | TYPE(TrailingUnaryOperator) \ |
99 | TYPE(UnaryOperator) \ |
100 | TYPE(CSharpStringLiteral) \ |
101 | TYPE(CSharpNullCoalescing) \ |
102 | TYPE(Unknown) |
103 | |
104 | enum TokenType { |
105 | #define TYPE(X) TT_##X, |
106 | LIST_TOKEN_TYPES |
107 | #undef TYPE |
108 | NUM_TOKEN_TYPES |
109 | }; |
110 | |
111 | /// Determines the name of a token type. |
112 | const char *getTokenTypeName(TokenType Type); |
113 | |
114 | // Represents what type of block a set of braces open. |
115 | enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit }; |
116 | |
117 | // The packing kind of a function's parameters. |
118 | enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive }; |
119 | |
120 | enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break }; |
121 | |
122 | class TokenRole; |
123 | class AnnotatedLine; |
124 | |
125 | /// A wrapper around a \c Token storing information about the |
126 | /// whitespace characters preceding it. |
127 | struct FormatToken { |
128 | FormatToken() {} |
129 | |
130 | /// The \c Token. |
131 | Token Tok; |
132 | |
133 | /// The number of newlines immediately before the \c Token. |
134 | /// |
135 | /// This can be used to determine what the user wrote in the original code |
136 | /// and thereby e.g. leave an empty line between two function definitions. |
137 | unsigned NewlinesBefore = 0; |
138 | |
139 | /// Whether there is at least one unescaped newline before the \c |
140 | /// Token. |
141 | bool HasUnescapedNewline = false; |
142 | |
143 | /// The range of the whitespace immediately preceding the \c Token. |
144 | SourceRange WhitespaceRange; |
145 | |
146 | /// The offset just past the last '\n' in this token's leading |
147 | /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. |
148 | unsigned LastNewlineOffset = 0; |
149 | |
150 | /// The width of the non-whitespace parts of the token (or its first |
151 | /// line for multi-line tokens) in columns. |
152 | /// We need this to correctly measure number of columns a token spans. |
153 | unsigned ColumnWidth = 0; |
154 | |
155 | /// Contains the width in columns of the last line of a multi-line |
156 | /// token. |
157 | unsigned LastLineColumnWidth = 0; |
158 | |
159 | /// Whether the token text contains newlines (escaped or not). |
160 | bool IsMultiline = false; |
161 | |
162 | /// Indicates that this is the first token of the file. |
163 | bool IsFirst = false; |
164 | |
165 | /// Whether there must be a line break before this token. |
166 | /// |
167 | /// This happens for example when a preprocessor directive ended directly |
168 | /// before the token. |
169 | bool MustBreakBefore = false; |
170 | |
171 | /// The raw text of the token. |
172 | /// |
173 | /// Contains the raw token text without leading whitespace and without leading |
174 | /// escaped newlines. |
175 | StringRef TokenText; |
176 | |
177 | /// Set to \c true if this token is an unterminated literal. |
178 | bool IsUnterminatedLiteral = 0; |
179 | |
180 | /// Contains the kind of block if this token is a brace. |
181 | BraceBlockKind BlockKind = BK_Unknown; |
182 | |
183 | TokenType Type = TT_Unknown; |
184 | |
185 | /// The number of spaces that should be inserted before this token. |
186 | unsigned SpacesRequiredBefore = 0; |
187 | |
188 | /// \c true if it is allowed to break before this token. |
189 | bool CanBreakBefore = false; |
190 | |
191 | /// \c true if this is the ">" of "template<..>". |
192 | bool ClosesTemplateDeclaration = false; |
193 | |
194 | /// Number of parameters, if this is "(", "[" or "<". |
195 | unsigned ParameterCount = 0; |
196 | |
197 | /// Number of parameters that are nested blocks, |
198 | /// if this is "(", "[" or "<". |
199 | unsigned BlockParameterCount = 0; |
200 | |
201 | /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of |
202 | /// the surrounding bracket. |
203 | tok::TokenKind ParentBracket = tok::unknown; |
204 | |
205 | /// A token can have a special role that can carry extra information |
206 | /// about the token's formatting. |
207 | std::unique_ptr<TokenRole> Role; |
208 | |
209 | /// If this is an opening parenthesis, how are the parameters packed? |
210 | ParameterPackingKind PackingKind = PPK_Inconclusive; |
211 | |
212 | /// The total length of the unwrapped line up to and including this |
213 | /// token. |
214 | unsigned TotalLength = 0; |
215 | |
216 | /// The original 0-based column of this token, including expanded tabs. |
217 | /// The configured TabWidth is used as tab width. |
218 | unsigned OriginalColumn = 0; |
219 | |
220 | /// The length of following tokens until the next natural split point, |
221 | /// or the next token that can be broken. |
222 | unsigned UnbreakableTailLength = 0; |
223 | |
224 | // FIXME: Come up with a 'cleaner' concept. |
225 | /// The binding strength of a token. This is a combined value of |
226 | /// operator precedence, parenthesis nesting, etc. |
227 | unsigned BindingStrength = 0; |
228 | |
229 | /// The nesting level of this token, i.e. the number of surrounding (), |
230 | /// [], {} or <>. |
231 | unsigned NestingLevel = 0; |
232 | |
233 | /// The indent level of this token. Copied from the surrounding line. |
234 | unsigned IndentLevel = 0; |
235 | |
236 | /// Penalty for inserting a line break before this token. |
237 | unsigned SplitPenalty = 0; |
238 | |
239 | /// If this is the first ObjC selector name in an ObjC method |
240 | /// definition or call, this contains the length of the longest name. |
241 | /// |
242 | /// This being set to 0 means that the selectors should not be colon-aligned, |
243 | /// e.g. because several of them are block-type. |
244 | unsigned LongestObjCSelectorName = 0; |
245 | |
246 | /// If this is the first ObjC selector name in an ObjC method |
247 | /// definition or call, this contains the number of parts that the whole |
248 | /// selector consist of. |
249 | unsigned ObjCSelectorNameParts = 0; |
250 | |
251 | /// The 0-based index of the parameter/argument. For ObjC it is set |
252 | /// for the selector name token. |
253 | /// For now calculated only for ObjC. |
254 | unsigned ParameterIndex = 0; |
255 | |
256 | /// Stores the number of required fake parentheses and the |
257 | /// corresponding operator precedence. |
258 | /// |
259 | /// If multiple fake parentheses start at a token, this vector stores them in |
260 | /// reverse order, i.e. inner fake parenthesis first. |
261 | SmallVector<prec::Level, 4> FakeLParens; |
262 | /// Insert this many fake ) after this token for correct indentation. |
263 | unsigned FakeRParens = 0; |
264 | |
265 | /// \c true if this token starts a binary expression, i.e. has at least |
266 | /// one fake l_paren with a precedence greater than prec::Unknown. |
267 | bool StartsBinaryExpression = false; |
268 | /// \c true if this token ends a binary expression. |
269 | bool EndsBinaryExpression = false; |
270 | |
271 | /// If this is an operator (or "."/"->") in a sequence of operators |
272 | /// with the same precedence, contains the 0-based operator index. |
273 | unsigned OperatorIndex = 0; |
274 | |
275 | /// If this is an operator (or "."/"->") in a sequence of operators |
276 | /// with the same precedence, points to the next operator. |
277 | FormatToken *NextOperator = nullptr; |
278 | |
279 | /// Is this token part of a \c DeclStmt defining multiple variables? |
280 | /// |
281 | /// Only set if \c Type == \c TT_StartOfName. |
282 | bool PartOfMultiVariableDeclStmt = false; |
283 | |
284 | /// Does this line comment continue a line comment section? |
285 | /// |
286 | /// Only set to true if \c Type == \c TT_LineComment. |
287 | bool ContinuesLineCommentSection = false; |
288 | |
289 | /// If this is a bracket, this points to the matching one. |
290 | FormatToken *MatchingParen = nullptr; |
291 | |
292 | /// The previous token in the unwrapped line. |
293 | FormatToken *Previous = nullptr; |
294 | |
295 | /// The next token in the unwrapped line. |
296 | FormatToken *Next = nullptr; |
297 | |
298 | /// If this token starts a block, this contains all the unwrapped lines |
299 | /// in it. |
300 | SmallVector<AnnotatedLine *, 1> Children; |
301 | |
302 | /// Stores the formatting decision for the token once it was made. |
303 | FormatDecision Decision = FD_Unformatted; |
304 | |
305 | /// If \c true, this token has been fully formatted (indented and |
306 | /// potentially re-formatted inside), and we do not allow further formatting |
307 | /// changes. |
308 | bool Finalized = false; |
309 | |
310 | bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } |
311 | bool is(TokenType TT) const { return Type == TT; } |
312 | bool is(const IdentifierInfo *II) const { |
313 | return II && II == Tok.getIdentifierInfo(); |
314 | } |
315 | bool is(tok::PPKeywordKind Kind) const { |
316 | return Tok.getIdentifierInfo() && |
317 | Tok.getIdentifierInfo()->getPPKeywordID() == Kind; |
318 | } |
319 | template <typename A, typename B> bool isOneOf(A K1, B K2) const { |
320 | return is(K1) || is(K2); |
321 | } |
322 | template <typename A, typename B, typename... Ts> |
323 | bool isOneOf(A K1, B K2, Ts... Ks) const { |
324 | return is(K1) || isOneOf(K2, Ks...); |
325 | } |
326 | template <typename T> bool isNot(T Kind) const { return !is(Kind); } |
327 | |
328 | bool closesScopeAfterBlock() const { |
329 | if (BlockKind == BK_Block) |
330 | return true; |
331 | if (closesScope()) |
332 | return Previous->closesScopeAfterBlock(); |
333 | return false; |
334 | } |
335 | |
336 | /// \c true if this token starts a sequence with the given tokens in order, |
337 | /// following the ``Next`` pointers, ignoring comments. |
338 | template <typename A, typename... Ts> |
339 | bool startsSequence(A K1, Ts... Tokens) const { |
340 | return startsSequenceInternal(K1, Tokens...); |
341 | } |
342 | |
343 | /// \c true if this token ends a sequence with the given tokens in order, |
344 | /// following the ``Previous`` pointers, ignoring comments. |
345 | template <typename A, typename... Ts> |
346 | bool endsSequence(A K1, Ts... Tokens) const { |
347 | return endsSequenceInternal(K1, Tokens...); |
348 | } |
349 | |
350 | bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); } |
351 | |
352 | bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { |
353 | return Tok.isObjCAtKeyword(Kind); |
354 | } |
355 | |
356 | bool isAccessSpecifier(bool ColonRequired = true) const { |
357 | return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && |
358 | (!ColonRequired || (Next && Next->is(tok::colon))); |
359 | } |
360 | |
361 | /// Determine whether the token is a simple-type-specifier. |
362 | bool isSimpleTypeSpecifier() const; |
363 | |
364 | bool isObjCAccessSpecifier() const { |
365 | return is(tok::at) && Next && |
366 | (Next->isObjCAtKeyword(tok::objc_public) || |
367 | Next->isObjCAtKeyword(tok::objc_protected) || |
368 | Next->isObjCAtKeyword(tok::objc_package) || |
369 | Next->isObjCAtKeyword(tok::objc_private)); |
370 | } |
371 | |
372 | /// Returns whether \p Tok is ([{ or an opening < of a template or in |
373 | /// protos. |
374 | bool opensScope() const { |
375 | if (is(TT_TemplateString) && TokenText.endswith("${")) |
376 | return true; |
377 | if (is(TT_DictLiteral) && is(tok::less)) |
378 | return true; |
379 | return isOneOf(tok::l_paren, tok::l_brace, tok::l_square, |
380 | TT_TemplateOpener); |
381 | } |
382 | /// Returns whether \p Tok is )]} or a closing > of a template or in |
383 | /// protos. |
384 | bool closesScope() const { |
385 | if (is(TT_TemplateString) && TokenText.startswith("}")) |
386 | return true; |
387 | if (is(TT_DictLiteral) && is(tok::greater)) |
388 | return true; |
389 | return isOneOf(tok::r_paren, tok::r_brace, tok::r_square, |
390 | TT_TemplateCloser); |
391 | } |
392 | |
393 | /// Returns \c true if this is a "." or "->" accessing a member. |
394 | bool isMemberAccess() const { |
395 | return isOneOf(tok::arrow, tok::period, tok::arrowstar) && |
396 | !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow, |
397 | TT_LambdaArrow); |
398 | } |
399 | |
400 | bool isUnaryOperator() const { |
401 | switch (Tok.getKind()) { |
402 | case tok::plus: |
403 | case tok::plusplus: |
404 | case tok::minus: |
405 | case tok::minusminus: |
406 | case tok::exclaim: |
407 | case tok::tilde: |
408 | case tok::kw_sizeof: |
409 | case tok::kw_alignof: |
410 | return true; |
411 | default: |
412 | return false; |
413 | } |
414 | } |
415 | |
416 | bool isBinaryOperator() const { |
417 | // Comma is a binary operator, but does not behave as such wrt. formatting. |
418 | return getPrecedence() > prec::Comma; |
419 | } |
420 | |
421 | bool isTrailingComment() const { |
422 | return is(tok::comment) && |
423 | (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0); |
424 | } |
425 | |
426 | /// Returns \c true if this is a keyword that can be used |
427 | /// like a function call (e.g. sizeof, typeid, ...). |
428 | bool isFunctionLikeKeyword() const { |
429 | switch (Tok.getKind()) { |
430 | case tok::kw_throw: |
431 | case tok::kw_typeid: |
432 | case tok::kw_return: |
433 | case tok::kw_sizeof: |
434 | case tok::kw_alignof: |
435 | case tok::kw_alignas: |
436 | case tok::kw_decltype: |
437 | case tok::kw_noexcept: |
438 | case tok::kw_static_assert: |
439 | case tok::kw___attribute: |
440 | return true; |
441 | default: |
442 | return false; |
443 | } |
444 | } |
445 | |
446 | /// Returns \c true if this is a string literal that's like a label, |
447 | /// e.g. ends with "=" or ":". |
448 | bool isLabelString() const { |
449 | if (!is(tok::string_literal)) |
450 | return false; |
451 | StringRef Content = TokenText; |
452 | if (Content.startswith("\"") || Content.startswith("'")) |
453 | Content = Content.drop_front(1); |
454 | if (Content.endswith("\"") || Content.endswith("'")) |
455 | Content = Content.drop_back(1); |
456 | Content = Content.trim(); |
457 | return Content.size() > 1 && |
458 | (Content.back() == ':' || Content.back() == '='); |
459 | } |
460 | |
461 | /// Returns actual token start location without leading escaped |
462 | /// newlines and whitespace. |
463 | /// |
464 | /// This can be different to Tok.getLocation(), which includes leading escaped |
465 | /// newlines. |
466 | SourceLocation getStartOfNonWhitespace() const { |
467 | return WhitespaceRange.getEnd(); |
468 | } |
469 | |
470 | prec::Level getPrecedence() const { |
471 | return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true, |
472 | /*CPlusPlus11=*/true); |
473 | } |
474 | |
475 | /// Returns the previous token ignoring comments. |
476 | FormatToken *getPreviousNonComment() const { |
477 | FormatToken *Tok = Previous; |
478 | while (Tok && Tok->is(tok::comment)) |
479 | Tok = Tok->Previous; |
480 | return Tok; |
481 | } |
482 | |
483 | /// Returns the next token ignoring comments. |
484 | const FormatToken *getNextNonComment() const { |
485 | const FormatToken *Tok = Next; |
486 | while (Tok && Tok->is(tok::comment)) |
487 | Tok = Tok->Next; |
488 | return Tok; |
489 | } |
490 | |
491 | /// Returns \c true if this tokens starts a block-type list, i.e. a |
492 | /// list that should be indented with a block indent. |
493 | bool opensBlockOrBlockTypeList(const FormatStyle &Style) const { |
494 | if (is(TT_TemplateString) && opensScope()) |
495 | return true; |
496 | return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) || |
497 | (is(tok::l_brace) && |
498 | (BlockKind == BK_Block || is(TT_DictLiteral) || |
499 | (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || |
500 | (is(tok::less) && (Style.Language == FormatStyle::LK_Proto || |
501 | Style.Language == FormatStyle::LK_TextProto)); |
502 | } |
503 | |
504 | /// Returns whether the token is the left square bracket of a C++ |
505 | /// structured binding declaration. |
506 | bool isCppStructuredBinding(const FormatStyle &Style) const { |
507 | if (!Style.isCpp() || isNot(tok::l_square)) |
508 | return false; |
509 | const FormatToken *T = this; |
510 | do { |
511 | T = T->getPreviousNonComment(); |
512 | } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp, |
513 | tok::ampamp)); |
514 | return T && T->is(tok::kw_auto); |
515 | } |
516 | |
517 | /// Same as opensBlockOrBlockTypeList, but for the closing token. |
518 | bool closesBlockOrBlockTypeList(const FormatStyle &Style) const { |
519 | if (is(TT_TemplateString) && closesScope()) |
520 | return true; |
521 | return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style); |
522 | } |
523 | |
524 | /// Return the actual namespace token, if this token starts a namespace |
525 | /// block. |
526 | const FormatToken *getNamespaceToken() const { |
527 | const FormatToken *NamespaceTok = this; |
528 | if (is(tok::comment)) |
529 | NamespaceTok = NamespaceTok->getNextNonComment(); |
530 | // Detect "(inline|export)? namespace" in the beginning of a line. |
531 | if (NamespaceTok && NamespaceTok->isOneOf(tok::kw_inline, tok::kw_export)) |
532 | NamespaceTok = NamespaceTok->getNextNonComment(); |
533 | return NamespaceTok && NamespaceTok->is(tok::kw_namespace) ? NamespaceTok |
534 | : nullptr; |
535 | } |
536 | |
537 | private: |
538 | // Disallow copying. |
539 | FormatToken(const FormatToken &) = delete; |
540 | void operator=(const FormatToken &) = delete; |
541 | |
542 | template <typename A, typename... Ts> |
543 | bool startsSequenceInternal(A K1, Ts... Tokens) const { |
544 | if (is(tok::comment) && Next) |
545 | return Next->startsSequenceInternal(K1, Tokens...); |
546 | return is(K1) && Next && Next->startsSequenceInternal(Tokens...); |
547 | } |
548 | |
549 | template <typename A> bool startsSequenceInternal(A K1) const { |
550 | if (is(tok::comment) && Next) |
551 | return Next->startsSequenceInternal(K1); |
552 | return is(K1); |
553 | } |
554 | |
555 | template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const { |
556 | if (is(tok::comment) && Previous) |
557 | return Previous->endsSequenceInternal(K1); |
558 | return is(K1); |
559 | } |
560 | |
561 | template <typename A, typename... Ts> |
562 | bool endsSequenceInternal(A K1, Ts... Tokens) const { |
563 | if (is(tok::comment) && Previous) |
564 | return Previous->endsSequenceInternal(K1, Tokens...); |
565 | return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...); |
566 | } |
567 | }; |
568 | |
569 | class ContinuationIndenter; |
570 | struct LineState; |
571 | |
572 | class TokenRole { |
573 | public: |
574 | TokenRole(const FormatStyle &Style) : Style(Style) {} |
575 | virtual ~TokenRole(); |
576 | |
577 | /// After the \c TokenAnnotator has finished annotating all the tokens, |
578 | /// this function precomputes required information for formatting. |
579 | virtual void precomputeFormattingInfos(const FormatToken *Token); |
580 | |
581 | /// Apply the special formatting that the given role demands. |
582 | /// |
583 | /// Assumes that the token having this role is already formatted. |
584 | /// |
585 | /// Continues formatting from \p State leaving indentation to \p Indenter and |
586 | /// returns the total penalty that this formatting incurs. |
587 | virtual unsigned formatFromToken(LineState &State, |
588 | ContinuationIndenter *Indenter, |
589 | bool DryRun) { |
590 | return 0; |
591 | } |
592 | |
593 | /// Same as \c formatFromToken, but assumes that the first token has |
594 | /// already been set thereby deciding on the first line break. |
595 | virtual unsigned formatAfterToken(LineState &State, |
596 | ContinuationIndenter *Indenter, |
597 | bool DryRun) { |
598 | return 0; |
599 | } |
600 | |
601 | /// Notifies the \c Role that a comma was found. |
602 | virtual void CommaFound(const FormatToken *Token) {} |
603 | |
604 | virtual const FormatToken *lastComma() { return nullptr; } |
605 | |
606 | protected: |
607 | const FormatStyle &Style; |
608 | }; |
609 | |
610 | class CommaSeparatedList : public TokenRole { |
611 | public: |
612 | CommaSeparatedList(const FormatStyle &Style) |
613 | : TokenRole(Style), HasNestedBracedList(false) {} |
614 | |
615 | void precomputeFormattingInfos(const FormatToken *Token) override; |
616 | |
617 | unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, |
618 | bool DryRun) override; |
619 | |
620 | unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, |
621 | bool DryRun) override; |
622 | |
623 | /// Adds \p Token as the next comma to the \c CommaSeparated list. |
624 | void CommaFound(const FormatToken *Token) override { |
625 | Commas.push_back(Token); |
626 | } |
627 | |
628 | const FormatToken *lastComma() override { |
629 | if (Commas.empty()) |
630 | return nullptr; |
631 | return Commas.back(); |
632 | } |
633 | |
634 | private: |
635 | /// A struct that holds information on how to format a given list with |
636 | /// a specific number of columns. |
637 | struct ColumnFormat { |
638 | /// The number of columns to use. |
639 | unsigned Columns; |
640 | |
641 | /// The total width in characters. |
642 | unsigned TotalWidth; |
643 | |
644 | /// The number of lines required for this format. |
645 | unsigned LineCount; |
646 | |
647 | /// The size of each column in characters. |
648 | SmallVector<unsigned, 8> ColumnSizes; |
649 | }; |
650 | |
651 | /// Calculate which \c ColumnFormat fits best into |
652 | /// \p RemainingCharacters. |
653 | const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const; |
654 | |
655 | /// The ordered \c FormatTokens making up the commas of this list. |
656 | SmallVector<const FormatToken *, 8> Commas; |
657 | |
658 | /// The length of each of the list's items in characters including the |
659 | /// trailing comma. |
660 | SmallVector<unsigned, 8> ItemLengths; |
661 | |
662 | /// Precomputed formats that can be used for this list. |
663 | SmallVector<ColumnFormat, 4> Formats; |
664 | |
665 | bool HasNestedBracedList; |
666 | }; |
667 | |
668 | /// Encapsulates keywords that are context sensitive or for languages not |
669 | /// properly supported by Clang's lexer. |
670 | struct AdditionalKeywords { |
671 | AdditionalKeywords(IdentifierTable &IdentTable) { |
672 | kw_final = &IdentTable.get("final"); |
673 | kw_override = &IdentTable.get("override"); |
674 | kw_in = &IdentTable.get("in"); |
675 | kw_of = &IdentTable.get("of"); |
676 | kw_CF_ENUM = &IdentTable.get("CF_ENUM"); |
677 | kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS"); |
678 | kw_NS_ENUM = &IdentTable.get("NS_ENUM"); |
679 | kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS"); |
680 | |
681 | kw_as = &IdentTable.get("as"); |
682 | kw_async = &IdentTable.get("async"); |
683 | kw_await = &IdentTable.get("await"); |
684 | kw_declare = &IdentTable.get("declare"); |
685 | kw_finally = &IdentTable.get("finally"); |
686 | kw_from = &IdentTable.get("from"); |
687 | kw_function = &IdentTable.get("function"); |
688 | kw_get = &IdentTable.get("get"); |
689 | kw_import = &IdentTable.get("import"); |
690 | kw_infer = &IdentTable.get("infer"); |
691 | kw_is = &IdentTable.get("is"); |
692 | kw_let = &IdentTable.get("let"); |
693 | kw_module = &IdentTable.get("module"); |
694 | kw_readonly = &IdentTable.get("readonly"); |
695 | kw_set = &IdentTable.get("set"); |
696 | kw_type = &IdentTable.get("type"); |
697 | kw_typeof = &IdentTable.get("typeof"); |
698 | kw_var = &IdentTable.get("var"); |
699 | kw_yield = &IdentTable.get("yield"); |
700 | |
701 | kw_abstract = &IdentTable.get("abstract"); |
702 | kw_assert = &IdentTable.get("assert"); |
703 | kw_extends = &IdentTable.get("extends"); |
704 | kw_implements = &IdentTable.get("implements"); |
705 | kw_instanceof = &IdentTable.get("instanceof"); |
706 | kw_interface = &IdentTable.get("interface"); |
707 | kw_native = &IdentTable.get("native"); |
708 | kw_package = &IdentTable.get("package"); |
709 | kw_synchronized = &IdentTable.get("synchronized"); |
710 | kw_throws = &IdentTable.get("throws"); |
711 | kw___except = &IdentTable.get("__except"); |
712 | kw___has_include = &IdentTable.get("__has_include"); |
713 | kw___has_include_next = &IdentTable.get("__has_include_next"); |
714 | |
715 | kw_mark = &IdentTable.get("mark"); |
716 | |
717 | kw_extend = &IdentTable.get("extend"); |
718 | kw_option = &IdentTable.get("option"); |
719 | kw_optional = &IdentTable.get("optional"); |
720 | kw_repeated = &IdentTable.get("repeated"); |
721 | kw_required = &IdentTable.get("required"); |
722 | kw_returns = &IdentTable.get("returns"); |
723 | |
724 | kw_signals = &IdentTable.get("signals"); |
725 | kw_qsignals = &IdentTable.get("Q_SIGNALS"); |
726 | kw_slots = &IdentTable.get("slots"); |
727 | kw_qslots = &IdentTable.get("Q_SLOTS"); |
728 | |
729 | // C# keywords |
730 | kw_dollar = &IdentTable.get("dollar"); |
731 | kw_base = &IdentTable.get("base"); |
732 | kw_byte = &IdentTable.get("byte"); |
733 | kw_checked = &IdentTable.get("checked"); |
734 | kw_decimal = &IdentTable.get("decimal"); |
735 | kw_delegate = &IdentTable.get("delegate"); |
736 | kw_event = &IdentTable.get("event"); |
737 | kw_fixed = &IdentTable.get("fixed"); |
738 | kw_foreach = &IdentTable.get("foreach"); |
739 | kw_implicit = &IdentTable.get("implicit"); |
740 | kw_internal = &IdentTable.get("internal"); |
741 | kw_lock = &IdentTable.get("lock"); |
742 | kw_null = &IdentTable.get("null"); |
743 | kw_object = &IdentTable.get("object"); |
744 | kw_out = &IdentTable.get("out"); |
745 | kw_params = &IdentTable.get("params"); |
746 | kw_ref = &IdentTable.get("ref"); |
747 | kw_string = &IdentTable.get("string"); |
748 | kw_stackalloc = &IdentTable.get("stackalloc"); |
749 | kw_sbyte = &IdentTable.get("sbyte"); |
750 | kw_sealed = &IdentTable.get("sealed"); |
751 | kw_uint = &IdentTable.get("uint"); |
752 | kw_ulong = &IdentTable.get("ulong"); |
753 | kw_unchecked = &IdentTable.get("unchecked"); |
754 | kw_unsafe = &IdentTable.get("unsafe"); |
755 | kw_ushort = &IdentTable.get("ushort"); |
756 | |
757 | // Keep this at the end of the constructor to make sure everything here |
758 | // is |
759 | // already initialized. |
760 | JsExtraKeywords = std::unordered_set<IdentifierInfo *>( |
761 | {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, |
762 | kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, |
763 | kw_set, kw_type, kw_typeof, kw_var, kw_yield, |
764 | // Keywords from the Java section. |
765 | kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); |
766 | |
767 | CSharpExtraKeywords = std::unordered_set<IdentifierInfo *>( |
768 | {kw_base, kw_byte, kw_checked, kw_decimal, kw_delegate, kw_event, |
769 | kw_fixed, kw_foreach, kw_implicit, kw_in, kw_interface, kw_internal, |
770 | kw_is, kw_lock, kw_null, kw_object, kw_out, kw_override, kw_params, |
771 | kw_readonly, kw_ref, kw_string, kw_stackalloc, kw_sbyte, kw_sealed, |
772 | kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort, |
773 | // Keywords from the JavaScript section. |
774 | kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, |
775 | kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, |
776 | kw_set, kw_type, kw_typeof, kw_var, kw_yield, |
777 | // Keywords from the Java section. |
778 | kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); |
779 | } |
780 | |
781 | // Context sensitive keywords. |
782 | IdentifierInfo *kw_final; |
783 | IdentifierInfo *kw_override; |
784 | IdentifierInfo *kw_in; |
785 | IdentifierInfo *kw_of; |
786 | IdentifierInfo *kw_CF_ENUM; |
787 | IdentifierInfo *kw_CF_OPTIONS; |
788 | IdentifierInfo *kw_NS_ENUM; |
789 | IdentifierInfo *kw_NS_OPTIONS; |
790 | IdentifierInfo *kw___except; |
791 | IdentifierInfo *kw___has_include; |
792 | IdentifierInfo *kw___has_include_next; |
793 | |
794 | // JavaScript keywords. |
795 | IdentifierInfo *kw_as; |
796 | IdentifierInfo *kw_async; |
797 | IdentifierInfo *kw_await; |
798 | IdentifierInfo *kw_declare; |
799 | IdentifierInfo *kw_finally; |
800 | IdentifierInfo *kw_from; |
801 | IdentifierInfo *kw_function; |
802 | IdentifierInfo *kw_get; |
803 | IdentifierInfo *kw_import; |
804 | IdentifierInfo *kw_infer; |
805 | IdentifierInfo *kw_is; |
806 | IdentifierInfo *kw_let; |
807 | IdentifierInfo *kw_module; |
808 | IdentifierInfo *kw_readonly; |
809 | IdentifierInfo *kw_set; |
810 | IdentifierInfo *kw_type; |
811 | IdentifierInfo *kw_typeof; |
812 | IdentifierInfo *kw_var; |
813 | IdentifierInfo *kw_yield; |
814 | |
815 | // Java keywords. |
816 | IdentifierInfo *kw_abstract; |
817 | IdentifierInfo *kw_assert; |
818 | IdentifierInfo *kw_extends; |
819 | IdentifierInfo *kw_implements; |
820 | IdentifierInfo *kw_instanceof; |
821 | IdentifierInfo *kw_interface; |
822 | IdentifierInfo *kw_native; |
823 | IdentifierInfo *kw_package; |
824 | IdentifierInfo *kw_synchronized; |
825 | IdentifierInfo *kw_throws; |
826 | |
827 | // Pragma keywords. |
828 | IdentifierInfo *kw_mark; |
829 | |
830 | // Proto keywords. |
831 | IdentifierInfo *kw_extend; |
832 | IdentifierInfo *kw_option; |
833 | IdentifierInfo *kw_optional; |
834 | IdentifierInfo *kw_repeated; |
835 | IdentifierInfo *kw_required; |
836 | IdentifierInfo *kw_returns; |
837 | |
838 | // QT keywords. |
839 | IdentifierInfo *kw_signals; |
840 | IdentifierInfo *kw_qsignals; |
841 | IdentifierInfo *kw_slots; |
842 | IdentifierInfo *kw_qslots; |
843 | |
844 | // C# keywords |
845 | IdentifierInfo *kw_dollar; |
846 | IdentifierInfo *kw_base; |
847 | IdentifierInfo *kw_byte; |
848 | IdentifierInfo *kw_checked; |
849 | IdentifierInfo *kw_decimal; |
850 | IdentifierInfo *kw_delegate; |
851 | IdentifierInfo *kw_event; |
852 | IdentifierInfo *kw_fixed; |
853 | IdentifierInfo *kw_foreach; |
854 | IdentifierInfo *kw_implicit; |
855 | IdentifierInfo *kw_internal; |
856 | |
857 | IdentifierInfo *kw_lock; |
858 | IdentifierInfo *kw_null; |
859 | IdentifierInfo *kw_object; |
860 | IdentifierInfo *kw_out; |
861 | |
862 | IdentifierInfo *kw_params; |
863 | |
864 | IdentifierInfo *kw_ref; |
865 | IdentifierInfo *kw_string; |
866 | IdentifierInfo *kw_stackalloc; |
867 | IdentifierInfo *kw_sbyte; |
868 | IdentifierInfo *kw_sealed; |
869 | IdentifierInfo *kw_uint; |
870 | IdentifierInfo *kw_ulong; |
871 | IdentifierInfo *kw_unchecked; |
872 | IdentifierInfo *kw_unsafe; |
873 | IdentifierInfo *kw_ushort; |
874 | |
875 | /// Returns \c true if \p Tok is a true JavaScript identifier, returns |
876 | /// \c false if it is a keyword or a pseudo keyword. |
877 | bool IsJavaScriptIdentifier(const FormatToken &Tok) const { |
878 | return Tok.is(tok::identifier) && |
879 | JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == |
880 | JsExtraKeywords.end(); |
881 | } |
882 | |
883 | /// Returns \c true if \p Tok is a C# keyword, returns |
884 | /// \c false if it is a anything else. |
885 | bool isCSharpKeyword(const FormatToken &Tok) const { |
886 | switch (Tok.Tok.getKind()) { |
887 | case tok::kw_bool: |
888 | case tok::kw_break: |
889 | case tok::kw_case: |
890 | case tok::kw_catch: |
891 | case tok::kw_char: |
892 | case tok::kw_class: |
893 | case tok::kw_const: |
894 | case tok::kw_continue: |
895 | case tok::kw_default: |
896 | case tok::kw_do: |
897 | case tok::kw_double: |
898 | case tok::kw_else: |
899 | case tok::kw_enum: |
900 | case tok::kw_explicit: |
901 | case tok::kw_extern: |
902 | case tok::kw_false: |
903 | case tok::kw_float: |
904 | case tok::kw_for: |
905 | case tok::kw_goto: |
906 | case tok::kw_if: |
907 | case tok::kw_int: |
908 | case tok::kw_long: |
909 | case tok::kw_namespace: |
910 | case tok::kw_new: |
911 | case tok::kw_operator: |
912 | case tok::kw_private: |
913 | case tok::kw_protected: |
914 | case tok::kw_public: |
915 | case tok::kw_return: |
916 | case tok::kw_short: |
917 | case tok::kw_sizeof: |
918 | case tok::kw_static: |
919 | case tok::kw_struct: |
920 | case tok::kw_switch: |
921 | case tok::kw_this: |
922 | case tok::kw_throw: |
923 | case tok::kw_true: |
924 | case tok::kw_try: |
925 | case tok::kw_typeof: |
926 | case tok::kw_using: |
927 | case tok::kw_virtual: |
928 | case tok::kw_void: |
929 | case tok::kw_volatile: |
930 | case tok::kw_while: |
931 | return true; |
932 | default: |
933 | return Tok.is(tok::identifier) && |
934 | CSharpExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == |
935 | CSharpExtraKeywords.end(); |
936 | } |
937 | } |
938 | |
939 | private: |
940 | /// The JavaScript keywords beyond the C++ keyword set. |
941 | std::unordered_set<IdentifierInfo *> JsExtraKeywords; |
942 | |
943 | /// The C# keywords beyond the C++ keyword set |
944 | std::unordered_set<IdentifierInfo *> CSharpExtraKeywords; |
945 | }; |
946 | |
947 | } // namespace format |
948 | } // namespace clang |
949 | |
950 | #endif |
951 | |