UnwrappedLineParser.h source code [clang_source_code/lib/Format/UnwrappedLineParser.h]

1	//===--- UnwrappedLineParser.h - Format C++ code ----------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file contains the declaration of the UnwrappedLineParser,
11	/// which turns a stream of tokens into UnwrappedLines.
12	///
13	//===----------------------------------------------------------------------===//
14
15	#ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
16	#define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H
17
18	#include "FormatToken.h"
19	#include "clang/Basic/IdentifierTable.h"
20	#include "clang/Format/Format.h"
21	#include "llvm/Support/Regex.h"
22	#include <list>
23	#include <stack>
24
25	namespace clang {
26	namespace format {
27
28	struct UnwrappedLineNode;
29
30	/// An unwrapped line is a sequence of \c Token, that we would like to
31	/// put on a single line if there was no column limit.
32	///
33	/// This is used as a main interface between the \c UnwrappedLineParser and the
34	/// \c UnwrappedLineFormatter. The key property is that changing the formatting
35	/// within an unwrapped line does not affect any other unwrapped lines.
36	struct UnwrappedLine {
37	UnwrappedLine();
38
39	// FIXME: Don't use std::list here.
40	/// The \c Tokens comprising this \c UnwrappedLine.
41	std::list<UnwrappedLineNode> Tokens;
42
43	/// The indent level of the \c UnwrappedLine.
44	unsigned Level;
45
46	/// Whether this \c UnwrappedLine is part of a preprocessor directive.
47	bool InPPDirective;
48
49	bool MustBeDeclaration;
50
51	/// If this \c UnwrappedLine closes a block in a sequence of lines,
52	/// \c MatchingOpeningBlockLineIndex stores the index of the corresponding
53	/// opening line. Otherwise, \c MatchingOpeningBlockLineIndex must be
54	/// \c kInvalidIndex.
55	size_t MatchingOpeningBlockLineIndex = kInvalidIndex;
56
57	/// If this \c UnwrappedLine opens a block, stores the index of the
58	/// line with the corresponding closing brace.
59	size_t MatchingClosingBlockLineIndex = kInvalidIndex;
60
61	static const size_t kInvalidIndex = -1;
62
63	unsigned FirstStartColumn = 0;
64	};
65
66	class UnwrappedLineConsumer {
67	public:
68	virtual ~UnwrappedLineConsumer() {}
69	virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0;
70	virtual void finishRun() = 0;
71	};
72
73	class FormatTokenSource;
74
75	class UnwrappedLineParser {
76	public:
77	UnwrappedLineParser(const FormatStyle &Style,
78	const AdditionalKeywords &Keywords,
79	unsigned FirstStartColumn, ArrayRef<FormatToken *> Tokens,
80	UnwrappedLineConsumer &Callback);
81
82	void parse();
83
84	private:
85	void reset();
86	void parseFile();
87	void parseLevel(bool HasOpeningBrace);
88	void parseBlock(bool MustBeDeclaration, bool AddLevel = true,
89	bool MunchSemi = true);
90	void parseChildBlock();
91	void parsePPDirective();
92	void parsePPDefine();
93	void parsePPIf(bool IfDef);
94	void parsePPElIf();
95	void parsePPElse();
96	void parsePPEndIf();
97	void parsePPUnknown();
98	void readTokenWithJavaScriptASI();
99	void parseStructuralElement();
100	bool tryToParseBracedList();
101	bool parseBracedList(bool ContinueOnSemicolons = false,
102	tok::TokenKind ClosingBraceKind = tok::r_brace);
103	void parseParens();
104	void parseSquare(bool LambdaIntroducer = false);
105	void parseIfThenElse();
106	void parseTryCatch();
107	void parseForOrWhileLoop();
108	void parseDoWhile();
109	void parseLabel();
110	void parseCaseLabel();
111	void parseSwitch();
112	void parseNamespace();
113	void parseNew();
114	void parseAccessSpecifier();
115	bool parseEnum();
116	void parseJavaEnumBody();
117	// Parses a record (aka class) as a top level element. If ParseAsExpr is true,
118	// parses the record as a child block, i.e. if the class declaration is an
119	// expression.
120	void parseRecord(bool ParseAsExpr = false);
121	void parseObjCMethod();
122	void parseObjCProtocolList();
123	void parseObjCUntilAtEnd();
124	void parseObjCInterfaceOrImplementation();
125	bool parseObjCProtocol();
126	void parseJavaScriptEs6ImportExport();
127	void parseStatementMacro();
128	bool tryToParseLambda();
129	bool tryToParseLambdaIntroducer();
130	void tryToParseJSFunction();
131	void addUnwrappedLine();
132	bool eof() const;
133	// LevelDifference is the difference of levels after and before the current
134	// token. For example:
135	// - if the token is '{' and opens a block, LevelDifference is 1.
136	// - if the token is '}' and closes a block, LevelDifference is -1.
137	void nextToken(int LevelDifference = 0);
138	void readToken(int LevelDifference = 0);
139
140	// Decides which comment tokens should be added to the current line and which
141	// should be added as comments before the next token.
142	//
143	// Comments specifies the sequence of comment tokens to analyze. They get
144	// either pushed to the current line or added to the comments before the next
145	// token.
146	//
147	// NextTok specifies the next token. A null pointer NextTok is supported, and
148	// signifies either the absence of a next token, or that the next token
149	// shouldn't be taken into accunt for the analysis.
150	void distributeComments(const SmallVectorImpl<FormatToken *> &Comments,
151	const FormatToken *NextTok);
152
153	// Adds the comment preceding the next token to unwrapped lines.
154	void flushComments(bool NewlineBeforeNext);
155	void pushToken(FormatToken *Tok);
156	void calculateBraceTypes(bool ExpectClassBody = false);
157
158	// Marks a conditional compilation edge (for example, an '#if', '#ifdef',
159	// '#else' or merge conflict marker). If 'Unreachable' is true, assumes
160	// this branch either cannot be taken (for example '#if false'), or should
161	// not be taken in this round.
162	void conditionalCompilationCondition(bool Unreachable);
163	void conditionalCompilationStart(bool Unreachable);
164	void conditionalCompilationAlternative();
165	void conditionalCompilationEnd();
166
167	bool isOnNewLine(const FormatToken &FormatTok);
168
169	// Compute hash of the current preprocessor branch.
170	// This is used to identify the different branches, and thus track if block
171	// open and close in the same branch.
172	size_t computePPHash() const;
173
174	// FIXME: We are constantly running into bugs where Line.Level is incorrectly
175	// subtracted from beyond 0. Introduce a method to subtract from Line.Level
176	// and use that everywhere in the Parser.
177	std::unique_ptr<UnwrappedLine> Line;
178
179	// Comments are sorted into unwrapped lines by whether they are in the same
180	// line as the previous token, or not. If not, they belong to the next token.
181	// Since the next token might already be in a new unwrapped line, we need to
182	// store the comments belonging to that token.
183	SmallVector<FormatToken *, 1> CommentsBeforeNextToken;
184	FormatToken *FormatTok;
185	bool MustBreakBeforeNextToken;
186
187	// The parsed lines. Only added to through \c CurrentLines.
188	SmallVector<UnwrappedLine, 8> Lines;
189
190	// Preprocessor directives are parsed out-of-order from other unwrapped lines.
191	// Thus, we need to keep a list of preprocessor directives to be reported
192	// after an unwrapped line that has been started was finished.
193	SmallVector<UnwrappedLine, 4> PreprocessorDirectives;
194
195	// New unwrapped lines are added via CurrentLines.
196	// Usually points to \c &Lines. While parsing a preprocessor directive when
197	// there is an unfinished previous unwrapped line, will point to
198	// \c &PreprocessorDirectives.
199	SmallVectorImpl<UnwrappedLine> *CurrentLines;
200
201	// We store for each line whether it must be a declaration depending on
202	// whether we are in a compound statement or not.
203	std::vector<bool> DeclarationScopeStack;
204
205	const FormatStyle &Style;
206	const AdditionalKeywords &Keywords;
207
208	llvm::Regex CommentPragmasRegex;
209
210	FormatTokenSource *Tokens;
211	UnwrappedLineConsumer &Callback;
212
213	// FIXME: This is a temporary measure until we have reworked the ownership
214	// of the format tokens. The goal is to have the actual tokens created and
215	// owned outside of and handed into the UnwrappedLineParser.
216	ArrayRef<FormatToken *> AllTokens;
217
218	// Represents preprocessor branch type, so we can find matching
219	// #if/#else/#endif directives.
220	enum PPBranchKind {
221	PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0
222	PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0
223	};
224
225	struct PPBranch {
226	PPBranch(PPBranchKind Kind, size_t Line) : Kind(Kind), Line(Line) {}
227	PPBranchKind Kind;
228	size_t Line;
229	};
230
231	// Keeps a stack of currently active preprocessor branching directives.
232	SmallVector<PPBranch, 16> PPStack;
233
234	// The \c UnwrappedLineParser re-parses the code for each combination
235	// of preprocessor branches that can be taken.
236	// To that end, we take the same branch (#if, #else, or one of the #elif
237	// branches) for each nesting level of preprocessor branches.
238	// \c PPBranchLevel stores the current nesting level of preprocessor
239	// branches during one pass over the code.
240	int PPBranchLevel;
241
242	// Contains the current branch (#if, #else or one of the #elif branches)
243	// for each nesting level.
244	SmallVector<int, 8> PPLevelBranchIndex;
245
246	// Contains the maximum number of branches at each nesting level.
247	SmallVector<int, 8> PPLevelBranchCount;
248
249	// Contains the number of branches per nesting level we are currently
250	// in while parsing a preprocessor branch sequence.
251	// This is used to update PPLevelBranchCount at the end of a branch
252	// sequence.
253	std::stack<int> PPChainBranchIndex;
254
255	// Include guard search state. Used to fixup preprocessor indent levels
256	// so that include guards do not participate in indentation.
257	enum IncludeGuardState {
258	IG_Inited, // Search started, looking for #ifndef.
259	IG_IfNdefed, // #ifndef found, IncludeGuardToken points to condition.
260	IG_Defined, // Matching #define found, checking other requirements.
261	IG_Found, // All requirements met, need to fix indents.
262	IG_Rejected, // Search failed or never started.
263	};
264
265	// Current state of include guard search.
266	IncludeGuardState IncludeGuard;
267
268	// Points to the #ifndef condition for a potential include guard. Null unless
269	// IncludeGuardState == IG_IfNdefed.
270	FormatToken *IncludeGuardToken;
271
272	// Contains the first start column where the source begins. This is zero for
273	// normal source code and may be nonzero when formatting a code fragment that
274	// does not start at the beginning of the file.
275	unsigned FirstStartColumn;
276
277	friend class ScopedLineState;
278	friend class CompoundStatementIndenter;
279	};
280
281	struct UnwrappedLineNode {
282	UnwrappedLineNode() : Tok(nullptr) {}
283	UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {}
284
285	FormatToken *Tok;
286	SmallVector<UnwrappedLine, 0> Children;
287	};
288
289	inline UnwrappedLine::UnwrappedLine()
290	: Level(0), InPPDirective(false), MustBeDeclaration(false),
291	MatchingOpeningBlockLineIndex(kInvalidIndex) {}
292
293	} // end namespace format
294	} // end namespace clang
295
296	#endif
297

Clang Project