Token.h source code [clang_source_code/include/clang/Lex/Token.h]

1	//===--- Token.h - Token interface ------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the Token interface.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#ifndef LLVM_CLANG_LEX_TOKEN_H
14	#define LLVM_CLANG_LEX_TOKEN_H
15
16	#include "clang/Basic/SourceLocation.h"
17	#include "clang/Basic/TokenKinds.h"
18	#include "llvm/ADT/StringRef.h"
19	#include <cassert>
20
21	namespace clang {
22
23	class IdentifierInfo;
24
25	/// Token - This structure provides full information about a lexed token.
26	/// It is not intended to be space efficient, it is intended to return as much
27	/// information as possible about each returned token. This is expected to be
28	/// compressed into a smaller form if memory footprint is important.
29	///
30	/// The parser can create a special "annotation token" representing a stream of
31	/// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>"
32	/// can be represented by a single typename annotation token that carries
33	/// information about the SourceRange of the tokens and the type object.
34	class Token {
35	/// The location of the token. This is actually a SourceLocation.
36	unsigned Loc;
37
38	// Conceptually these next two fields could be in a union. However, this
39	// causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
40	// routine. Keeping as separate members with casts until a more beautiful fix
41	// presents itself.
42
43	/// UintData - This holds either the length of the token text, when
44	/// a normal token, or the end of the SourceRange when an annotation
45	/// token.
46	unsigned UintData;
47
48	/// PtrData - This is a union of four different pointer types, which depends
49	/// on what type of token this is:
50	/// Identifiers, keywords, etc:
51	/// This is an IdentifierInfo*, which contains the uniqued identifier
52	/// spelling.
53	/// Literals: isLiteral() returns true.
54	/// This is a pointer to the start of the token in a text buffer, which
55	/// may be dirty (have trigraphs / escaped newlines).
56	/// Annotations (resolved type names, C++ scopes, etc): isAnnotation().
57	/// This is a pointer to sema-specific data for the annotation token.
58	/// Eof:
59	// This is a pointer to a Decl.
60	/// Other:
61	/// This is null.
62	void *PtrData;
63
64	/// Kind - The actual flavor of token this is.
65	tok::TokenKind Kind;
66
67	/// Flags - Bits we track about this token, members of the TokenFlags enum.
68	unsigned short Flags;
69
70	public:
71	// Various flags set per token:
72	enum TokenFlags {
73	StartOfLine = 0x01, // At start of line or only after whitespace
74	// (considering the line after macro expansion).
75	LeadingSpace = 0x02, // Whitespace exists before this token (considering
76	// whitespace after macro expansion).
77	DisableExpand = 0x04, // This identifier may never be macro expanded.
78	NeedsCleaning = 0x08, // Contained an escaped newline or trigraph.
79	LeadingEmptyMacro = 0x10, // Empty macro exists before this token.
80	HasUDSuffix = 0x20, // This string or character literal has a ud-suffix.
81	HasUCN = 0x40, // This identifier contains a UCN.
82	IgnoredComma = 0x80, // This comma is not a macro argument separator (MS).
83	StringifiedInMacro = 0x100, // This string or character literal is formed by
84	// macro stringizing or charizing operator.
85	CommaAfterElided = 0x200, // The comma following this token was elided (MS).
86	IsEditorPlaceholder = 0x400, // This identifier is a placeholder.
87	};
88
89	tok::TokenKind getKind() const { return Kind; }
90	void setKind(tok::TokenKind K) { Kind = K; }
91
92	/// is/isNot - Predicates to check if this token is a specific kind, as in
93	/// "if (Tok.is(tok::l_brace)) {...}".
94	bool is(tok::TokenKind K) const { return Kind == K; }
95	bool isNot(tok::TokenKind K) const { return Kind != K; }
96	bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
97	return is(K1) \|\| is(K2);
98	}
99	template <typename... Ts>
100	bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, Ts... Ks) const {
101	return is(K1) \|\| isOneOf(K2, Ks...);
102	}
103
104	/// Return true if this is a raw identifier (when lexing
105	/// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).
106	bool isAnyIdentifier() const {
107	return tok::isAnyIdentifier(getKind());
108	}
109
110	/// Return true if this is a "literal", like a numeric
111	/// constant, string, etc.
112	bool isLiteral() const {
113	return tok::isLiteral(getKind());
114	}
115
116	/// Return true if this is any of tok::annot_* kind tokens.
117	bool isAnnotation() const {
118	return tok::isAnnotation(getKind());
119	}
120
121	/// Return a source location identifier for the specified
122	/// offset in the current file.
123	SourceLocation getLocation() const {
124	return SourceLocation::getFromRawEncoding(Loc);
125	}
126	unsigned getLength() const {
127	(0) . __assert_fail ("!isAnnotation() && \"Annotation tokens have no length field\"", "/home/seafit/code_projects/clang_source/clang/include/clang/Lex/Token.h", 127, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(!isAnnotation() && "Annotation tokens have no length field");
128	return UintData;
129	}
130
131	void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); }
132	void setLength(unsigned Len) {
133	(0) . __assert_fail ("!isAnnotation() && \"Annotation tokens have no length field\"", "/home/seafit/code_projects/clang_source/clang/include/clang/Lex/Token.h", 133, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(!isAnnotation() && "Annotation tokens have no length field");
134	UintData = Len;
135	}
136
137	SourceLocation getAnnotationEndLoc() const {
138	(0) . __assert_fail ("isAnnotation() && \"Used AnnotEndLocID on non-annotation token\"", "/home/seafit/code_projects/clang_source/clang/include/clang/Lex/Token.h", 138, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
139	return SourceLocation::getFromRawEncoding(UintData ? UintData : Loc);
140	}
141	void setAnnotationEndLoc(SourceLocation L) {
142	(0) . __assert_fail ("isAnnotation() && \"Used AnnotEndLocID on non-annotation token\"", "/home/seafit/code_projects/clang_source/clang/include/clang/Lex/Token.h", 142, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
143	UintData = L.getRawEncoding();
144	}
145
146	SourceLocation getLastLoc() const {
147	return isAnnotation() ? getAnnotationEndLoc() : getLocation();
148	}
149
150	SourceLocation getEndLoc() const {
151	return isAnnotation() ? getAnnotationEndLoc()
152	: getLocation().getLocWithOffset(getLength());
153	}
154
155	/// SourceRange of the group of tokens that this annotation token
156	/// represents.
157	SourceRange getAnnotationRange() const {
158	return SourceRange(getLocation(), getAnnotationEndLoc());
159	}
160	void setAnnotationRange(SourceRange R) {
161	setLocation(R.getBegin());
162	setAnnotationEndLoc(R.getEnd());
163	}
164
165	const char *getName() const { return tok::getTokenName(Kind); }
166
167	/// Reset all flags to cleared.
168	void startToken() {
169	Kind = tok::unknown;
170	Flags = 0;
171	PtrData = nullptr;
172	UintData = 0;
173	Loc = SourceLocation().getRawEncoding();
174	}
175
176	IdentifierInfo *getIdentifierInfo() const {
177	(0) . __assert_fail ("isNot(tok..raw_identifier) && \"getIdentifierInfo() on a tok..raw_identifier token!\"", "/home/seafit/code_projects/clang_source/clang/include/clang/Lex/Token.h", 178, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(isNot(tok::raw_identifier) &&
178	(0) . __assert_fail ("isNot(tok..raw_identifier) && \"getIdentifierInfo() on a tok..raw_identifier token!\"", "/home/seafit/code_projects/clang_source/clang/include/clang/Lex/Token.h", 178, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true"> "getIdentifierInfo() on a tok::raw_identifier token!");
179	(0) . __assert_fail ("!isAnnotation() && \"getIdentifierInfo() on an annotation token!\"", "/home/seafit/code_projects/clang_source/clang/include/clang/Lex/Token.h", 180, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(!isAnnotation() &&
180	(0) . __assert_fail ("!isAnnotation() && \"getIdentifierInfo() on an annotation token!\"", "/home/seafit/code_projects/clang_source/clang/include/clang/Lex/Token.h", 180, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true"> "getIdentifierInfo() on an annotation token!");
181	if (isLiteral()) return nullptr;
182	if (is(tok::eof)) return nullptr;
183	return (IdentifierInfo*) PtrData;
184	}
185	void setIdentifierInfo(IdentifierInfo *II) {
186	PtrData = (void*) II;
187	}
188
189	const void *getEofData() const {
190	assert(is(tok::eof));
191	return reinterpret_cast<const void *>(PtrData);
192	}
193	void setEofData(const void *D) {
194	assert(is(tok::eof));
195	assert(!PtrData);
196	PtrData = const_cast<void *>(D);
197	}
198
199	/// getRawIdentifier - For a raw identifier token (i.e., an identifier
200	/// lexed in raw mode), returns a reference to the text substring in the
201	/// buffer if known.
202	StringRef getRawIdentifier() const {
203	assert(is(tok::raw_identifier));
204	return StringRef(reinterpret_cast<const char *>(PtrData), getLength());
205	}
206	void setRawIdentifierData(const char *Ptr) {
207	assert(is(tok::raw_identifier));
208	PtrData = const_cast<char*>(Ptr);
209	}
210
211	/// getLiteralData - For a literal token (numeric constant, string, etc), this
212	/// returns a pointer to the start of it in the text buffer if known, null
213	/// otherwise.
214	const char *getLiteralData() const {
215	(0) . __assert_fail ("isLiteral() && \"Cannot get literal data of non-literal\"", "/home/seafit/code_projects/clang_source/clang/include/clang/Lex/Token.h", 215, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(isLiteral() && "Cannot get literal data of non-literal");
216	return reinterpret_cast<const char*>(PtrData);
217	}
218	void setLiteralData(const char *Ptr) {
219	(0) . __assert_fail ("isLiteral() && \"Cannot set literal data of non-literal\"", "/home/seafit/code_projects/clang_source/clang/include/clang/Lex/Token.h", 219, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(isLiteral() && "Cannot set literal data of non-literal");
220	PtrData = const_cast<char*>(Ptr);
221	}
222
223	void *getAnnotationValue() const {
224	(0) . __assert_fail ("isAnnotation() && \"Used AnnotVal on non-annotation token\"", "/home/seafit/code_projects/clang_source/clang/include/clang/Lex/Token.h", 224, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(isAnnotation() && "Used AnnotVal on non-annotation token");
225	return PtrData;
226	}
227	void setAnnotationValue(void *val) {
228	(0) . __assert_fail ("isAnnotation() && \"Used AnnotVal on non-annotation token\"", "/home/seafit/code_projects/clang_source/clang/include/clang/Lex/Token.h", 228, __PRETTY_FUNCTION__))" file_link="../../../../include/assert.h.html#88" macro="true">assert(isAnnotation() && "Used AnnotVal on non-annotation token");
229	PtrData = val;
230	}
231
232	/// Set the specified flag.
233	void setFlag(TokenFlags Flag) {
234	Flags \|= Flag;
235	}
236
237	/// Get the specified flag.
238	bool getFlag(TokenFlags Flag) const {
239	return (Flags & Flag) != 0;
240	}
241
242	/// Unset the specified flag.
243	void clearFlag(TokenFlags Flag) {
244	Flags &= ~Flag;
245	}
246
247	/// Return the internal represtation of the flags.
248	///
249	/// This is only intended for low-level operations such as writing tokens to
250	/// disk.
251	unsigned getFlags() const {
252	return Flags;
253	}
254
255	/// Set a flag to either true or false.
256	void setFlagValue(TokenFlags Flag, bool Val) {
257	if (Val)
258	setFlag(Flag);
259	else
260	clearFlag(Flag);
261	}
262
263	/// isAtStartOfLine - Return true if this token is at the start of a line.
264	///
265	bool isAtStartOfLine() const { return getFlag(StartOfLine); }
266
267	/// Return true if this token has whitespace before it.
268	///
269	bool hasLeadingSpace() const { return getFlag(LeadingSpace); }
270
271	/// Return true if this identifier token should never
272	/// be expanded in the future, due to C99 6.10.3.4p2.
273	bool isExpandDisabled() const { return getFlag(DisableExpand); }
274
275	/// Return true if we have an ObjC keyword identifier.
276	bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const;
277
278	/// Return the ObjC keyword kind.
279	tok::ObjCKeywordKind getObjCKeywordID() const;
280
281	/// Return true if this token has trigraphs or escaped newlines in it.
282	bool needsCleaning() const { return getFlag(NeedsCleaning); }
283
284	/// Return true if this token has an empty macro before it.
285	///
286	bool hasLeadingEmptyMacro() const { return getFlag(LeadingEmptyMacro); }
287
288	/// Return true if this token is a string or character literal which
289	/// has a ud-suffix.
290	bool hasUDSuffix() const { return getFlag(HasUDSuffix); }
291
292	/// Returns true if this token contains a universal character name.
293	bool hasUCN() const { return getFlag(HasUCN); }
294
295	/// Returns true if this token is formed by macro by stringizing or charizing
296	/// operator.
297	bool stringifiedInMacro() const { return getFlag(StringifiedInMacro); }
298
299	/// Returns true if the comma after this token was elided.
300	bool commaAfterElided() const { return getFlag(CommaAfterElided); }
301
302	/// Returns true if this token is an editor placeholder.
303	///
304	/// Editor placeholders are produced by the code-completion engine and are
305	/// represented as characters between '<#' and '#>' in the source code. The
306	/// lexer uses identifier tokens to represent placeholders.
307	bool isEditorPlaceholder() const { return getFlag(IsEditorPlaceholder); }
308	};
309
310	/// Information about the conditional stack (\#if directives)
311	/// currently active.
312	struct PPConditionalInfo {
313	/// Location where the conditional started.
314	SourceLocation IfLoc;
315
316	/// True if this was contained in a skipping directive, e.g.,
317	/// in a "\#if 0" block.
318	bool WasSkipping;
319
320	/// True if we have emitted tokens already, and now we're in
321	/// an \#else block or something. Only useful in Skipping blocks.
322	bool FoundNonSkip;
323
324	/// True if we've seen a \#else in this block. If so,
325	/// \#elif/\#else directives are not allowed.
326	bool FoundElse;
327	};
328
329	} // end namespace clang
330
331	#endif // LLVM_CLANG_LEX_TOKEN_H
332