FormatTokenLexer.h source code [clang_source_code/lib/Format/FormatTokenLexer.h]

1	//===--- FormatTokenLexer.h - Format C++ code ----------------- C++ -----===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file contains FormatTokenLexer, which tokenizes a source file
11	/// into a token stream suitable for ClangFormat.
12	///
13	//===----------------------------------------------------------------------===//
14
15	#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
16	#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H
17
18	#include "Encoding.h"
19	#include "FormatToken.h"
20	#include "clang/Basic/SourceLocation.h"
21	#include "clang/Basic/SourceManager.h"
22	#include "clang/Format/Format.h"
23	#include "llvm/ADT/MapVector.h"
24	#include "llvm/Support/Regex.h"
25
26	#include <stack>
27
28	namespace clang {
29	namespace format {
30
31	enum LexerState {
32	NORMAL,
33	TEMPLATE_STRING,
34	TOKEN_STASHED,
35	};
36
37	class FormatTokenLexer {
38	public:
39	FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column,
40	const FormatStyle &Style, encoding::Encoding Encoding);
41
42	ArrayRef<FormatToken *> lex();
43
44	const AdditionalKeywords &getKeywords() { return Keywords; }
45
46	private:
47	void tryMergePreviousTokens();
48
49	bool tryMergeLessLess();
50	bool tryMergeNSStringLiteral();
51	bool tryMergeJSPrivateIdentifier();
52	bool tryMergeCSharpVerbatimStringLiteral();
53	bool tryMergeCSharpKeywordVariables();
54	bool tryMergeCSharpNullConditionals();
55	bool tryMergeCSharpDoubleQuestion();
56
57	bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType);
58
59	// Returns \c true if \p Tok can only be followed by an operand in JavaScript.
60	bool precedesOperand(FormatToken *Tok);
61
62	bool canPrecedeRegexLiteral(FormatToken *Prev);
63
64	// Tries to parse a JavaScript Regex literal starting at the current token,
65	// if that begins with a slash and is in a location where JavaScript allows
66	// regex literals. Changes the current token to a regex literal and updates
67	// its text if successful.
68	void tryParseJSRegexLiteral();
69
70	// Handles JavaScript template strings.
71	//
72	// JavaScript template strings use backticks ('`') as delimiters, and allow
73	// embedding expressions nested in ${expr-here}. Template strings can be
74	// nested recursively, i.e. expressions can contain template strings in turn.
75	//
76	// The code below parses starting from a backtick, up to a closing backtick or
77	// an opening ${. It also maintains a stack of lexing contexts to handle
78	// nested template parts by balancing curly braces.
79	void handleTemplateStrings();
80
81	void tryParsePythonComment();
82
83	bool tryMerge_TMacro();
84
85	bool tryMergeConflictMarkers();
86
87	FormatToken *getStashedToken();
88
89	FormatToken *getNextToken();
90
91	FormatToken *FormatTok;
92	bool IsFirstToken;
93	std::stack<LexerState> StateStack;
94	unsigned Column;
95	unsigned TrailingWhitespace;
96	std::unique_ptr<Lexer> Lex;
97	const SourceManager &SourceMgr;
98	FileID ID;
99	const FormatStyle &Style;
100	IdentifierTable IdentTable;
101	AdditionalKeywords Keywords;
102	encoding::Encoding Encoding;
103	llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
104	// Index (in 'Tokens') of the last token that starts a new line.
105	unsigned FirstInLineIndex;
106	SmallVector<FormatToken *, 16> Tokens;
107
108	llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros;
109
110	bool FormattingDisabled;
111
112	llvm::Regex MacroBlockBeginRegex;
113	llvm::Regex MacroBlockEndRegex;
114
115	void readRawToken(FormatToken &Tok);
116
117	void resetLexer(unsigned Offset);
118	};
119
120	} // namespace format
121	} // namespace clang
122
123	#endif
124

Clang Project