RawCommentList.cpp source code [clang_source_code/lib/AST/RawCommentList.cpp]

1	//===--- RawCommentList.cpp - Processing raw comments ------------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "clang/AST/RawCommentList.h"
10	#include "clang/AST/ASTContext.h"
11	#include "clang/AST/Comment.h"
12	#include "clang/AST/CommentBriefParser.h"
13	#include "clang/AST/CommentCommandTraits.h"
14	#include "clang/AST/CommentLexer.h"
15	#include "clang/AST/CommentParser.h"
16	#include "clang/AST/CommentSema.h"
17	#include "clang/Basic/CharInfo.h"
18	#include "llvm/ADT/STLExtras.h"
19
20	using namespace clang;
21
22	namespace {
23	/// Get comment kind and bool describing if it is a trailing comment.
24	std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
25	bool ParseAllComments) {
26	const size_t MinCommentLength = ParseAllComments ? 2 : 3;
27	if ((Comment.size() < MinCommentLength) \|\| Comment[0] != '/')
28	return std::make_pair(RawComment::RCK_Invalid, false);
29
30	RawComment::CommentKind K;
31	if (Comment[1] == '/') {
32	if (Comment.size() < 3)
33	return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
34
35	if (Comment[2] == '/')
36	K = RawComment::RCK_BCPLSlash;
37	else if (Comment[2] == '!')
38	K = RawComment::RCK_BCPLExcl;
39	else
40	return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
41	} else {
42	= 4", "/home/seafit/code_projects/clang_source/clang/lib/AST/RawCommentList.cpp", 42, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(Comment.size() >= 4);
43
44	// Comment lexer does not understand escapes in comment markers, so pretend
45	// that this is not a comment.
46	if (Comment[1] != '*' \|\|
47	Comment[Comment.size() - 2] != '*' \|\|
48	Comment[Comment.size() - 1] != '/')
49	return std::make_pair(RawComment::RCK_Invalid, false);
50
51	if (Comment[2] == '*')
52	K = RawComment::RCK_JavaDoc;
53	else if (Comment[2] == '!')
54	K = RawComment::RCK_Qt;
55	else
56	return std::make_pair(RawComment::RCK_OrdinaryC, false);
57	}
58	const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<');
59	return std::make_pair(K, TrailingComment);
60	}
61
62	bool mergedCommentIsTrailingComment(StringRef Comment) {
63	return (Comment.size() > 3) && (Comment[3] == '<');
64	}
65
66	/// Returns true if R1 and R2 both have valid locations that start on the same
67	/// column.
68	bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
69	const RawComment &R2) {
70	SourceLocation L1 = R1.getBeginLoc();
71	SourceLocation L2 = R2.getBeginLoc();
72	bool Invalid = false;
73	unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
74	if (!Invalid) {
75	unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
76	return !Invalid && (C1 == C2);
77	}
78	return false;
79	}
80	} // unnamed namespace
81
82	/// Determines whether there is only whitespace in `Buffer` between `P`
83	/// and the previous line.
84	/// \param Buffer The buffer to search in.
85	/// \param P The offset from the beginning of `Buffer` to start from.
86	/// \return true if all of the characters in `Buffer` ranging from the closest
87	/// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
88	/// are whitespace.
89	static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
90	// Search backwards until we see linefeed or carriage return.
91	for (unsigned I = P; I != 0; --I) {
92	char C = Buffer[I - 1];
93	if (isVerticalWhitespace(C))
94	return true;
95	if (!isHorizontalWhitespace(C))
96	return false;
97	}
98	// We hit the beginning of the buffer.
99	return true;
100	}
101
102	/// Returns whether `K` is an ordinary comment kind.
103	static bool isOrdinaryKind(RawComment::CommentKind K) {
104	return (K == RawComment::RCK_OrdinaryBCPL) \|\|
105	(K == RawComment::RCK_OrdinaryC);
106	}
107
108	RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
109	const CommentOptions &CommentOpts, bool Merged) :
110	Range(SR), RawTextValid(false), BriefTextValid(false),
111	IsAttached(false), IsTrailingComment(false),
112	IsAlmostTrailingComment(false) {
113	// Extract raw comment text, if possible.
114	if (SR.getBegin() == SR.getEnd() \|\| getRawText(SourceMgr).empty()) {
115	Kind = RCK_Invalid;
116	return;
117	}
118
119	// Guess comment kind.
120	std::pair<CommentKind, bool> K =
121	getCommentKind(RawText, CommentOpts.ParseAllComments);
122
123	// Guess whether an ordinary comment is trailing.
124	if (CommentOpts.ParseAllComments && isOrdinaryKind(K.first)) {
125	FileID BeginFileID;
126	unsigned BeginOffset;
127	std::tie(BeginFileID, BeginOffset) =
128	SourceMgr.getDecomposedLoc(Range.getBegin());
129	if (BeginOffset != 0) {
130	bool Invalid = false;
131	const char *Buffer =
132	SourceMgr.getBufferData(BeginFileID, &Invalid).data();
133	IsTrailingComment \|=
134	(!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
135	}
136	}
137
138	if (!Merged) {
139	Kind = K.first;
140	IsTrailingComment \|= K.second;
141
142	IsAlmostTrailingComment = RawText.startswith("//<") \|\|
143	RawText.startswith("/*<");
144	} else {
145	Kind = RCK_Merged;
146	IsTrailingComment =
147	IsTrailingComment \|\| mergedCommentIsTrailingComment(RawText);
148	}
149	}
150
151	StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
152	FileID BeginFileID;
153	FileID EndFileID;
154	unsigned BeginOffset;
155	unsigned EndOffset;
156
157	std::tie(BeginFileID, BeginOffset) =
158	SourceMgr.getDecomposedLoc(Range.getBegin());
159	std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd());
160
161	const unsigned Length = EndOffset - BeginOffset;
162	if (Length < 2)
163	return StringRef();
164
165	// The comment can't begin in one file and end in another.
166	assert(BeginFileID == EndFileID);
167
168	bool Invalid = false;
169	const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
170	&Invalid).data();
171	if (Invalid)
172	return StringRef();
173
174	return StringRef(BufferStart + BeginOffset, Length);
175	}
176
177	const char *RawComment::extractBriefText(const ASTContext &Context) const {
178	// Lazily initialize RawText using the accessor before using it.
179	(void)getRawText(Context.getSourceManager());
180
181	// Since we will be copying the resulting text, all allocations made during
182	// parsing are garbage after resulting string is formed. Thus we can use
183	// a separate allocator for all temporary stuff.
184	llvm::BumpPtrAllocator Allocator;
185
186	comments::Lexer L(Allocator, Context.getDiagnostics(),
187	Context.getCommentCommandTraits(),
188	Range.getBegin(),
189	RawText.begin(), RawText.end());
190	comments::BriefParser P(L, Context.getCommentCommandTraits());
191
192	const std::string Result = P.Parse();
193	const unsigned BriefTextLength = Result.size();
194	char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
195	memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
196	BriefText = BriefTextPtr;
197	BriefTextValid = true;
198
199	return BriefTextPtr;
200	}
201
202	comments::FullComment *RawComment::parse(const ASTContext &Context,
203	const Preprocessor *PP,
204	const Decl *D) const {
205	// Lazily initialize RawText using the accessor before using it.
206	(void)getRawText(Context.getSourceManager());
207
208	comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
209	Context.getCommentCommandTraits(),
210	getSourceRange().getBegin(),
211	RawText.begin(), RawText.end());
212	comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
213	Context.getDiagnostics(),
214	Context.getCommentCommandTraits(),
215	PP);
216	S.setDecl(D);
217	comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
218	Context.getDiagnostics(),
219	Context.getCommentCommandTraits());
220
221	return P.parseFullComment();
222	}
223
224	static bool onlyWhitespaceBetween(SourceManager &SM,
225	SourceLocation Loc1, SourceLocation Loc2,
226	unsigned MaxNewlinesAllowed) {
227	std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1);
228	std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2);
229
230	// Question does not make sense if locations are in different files.
231	if (Loc1Info.first != Loc2Info.first)
232	return false;
233
234	bool Invalid = false;
235	const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data();
236	if (Invalid)
237	return false;
238
239	unsigned NumNewlines = 0;
240	(0) . __assert_fail ("Loc1Info.second <= Loc2Info.second && \"Loc1 after Loc2!\"", "/home/seafit/code_projects/clang_source/clang/lib/AST/RawCommentList.cpp", 240, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
241	// Look for non-whitespace characters and remember any newlines seen.
242	for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
243	switch (Buffer[I]) {
244	default:
245	return false;
246	case ' ':
247	case '\t':
248	case '\f':
249	case '\v':
250	break;
251	case '\r':
252	case '\n':
253	++NumNewlines;
254
255	// Check if we have found more than the maximum allowed number of
256	// newlines.
257	if (NumNewlines > MaxNewlinesAllowed)
258	return false;
259
260	// Collapse \r\n and \n\r into a single newline.
261	if (I + 1 != Loc2Info.second &&
262	(Buffer[I + 1] == '\n' \|\| Buffer[I + 1] == '\r') &&
263	Buffer[I] != Buffer[I + 1])
264	++I;
265	break;
266	}
267	}
268
269	return true;
270	}
271
272	void RawCommentList::addComment(const RawComment &RC,
273	const CommentOptions &CommentOpts,
274	llvm::BumpPtrAllocator &Allocator) {
275	if (RC.isInvalid())
276	return;
277
278	// Check if the comments are not in source order.
279	while (!Comments.empty() &&
280	!SourceMgr.isBeforeInTranslationUnit(Comments.back()->getBeginLoc(),
281	RC.getBeginLoc())) {
282	// If they are, just pop a few last comments that don't fit.
283	// This happens if an \#include directive contains comments.
284	Comments.pop_back();
285	}
286
287	// Ordinary comments are not interesting for us.
288	if (RC.isOrdinary() && !CommentOpts.ParseAllComments)
289	return;
290
291	// If this is the first Doxygen comment, save it (because there isn't
292	// anything to merge it with).
293	if (Comments.empty()) {
294	Comments.push_back(new (Allocator) RawComment(RC));
295	return;
296	}
297
298	const RawComment &C1 = *Comments.back();
299	const RawComment &C2 = RC;
300
301	// Merge comments only if there is only whitespace between them.
302	// Can't merge trailing and non-trailing comments unless the second is
303	// non-trailing ordinary in the same column, as in the case:
304	// int x; // documents x
305	// // more text
306	// versus:
307	// int x; // documents x
308	// int y; // documents y
309	// or:
310	// int x; // documents x
311	// // documents y
312	// int y;
313	// Merge comments if they are on same or consecutive lines.
314	if ((C1.isTrailingComment() == C2.isTrailingComment() \|\|
315	(C1.isTrailingComment() && !C2.isTrailingComment() &&
316	isOrdinaryKind(C2.getKind()) &&
317	commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
318	onlyWhitespaceBetween(SourceMgr, C1.getEndLoc(), C2.getBeginLoc(),
319	/MaxNewlinesAllowed=/1)) {
320	SourceRange MergedRange(C1.getBeginLoc(), C2.getEndLoc());
321	*Comments.back() = RawComment(SourceMgr, MergedRange, CommentOpts, true);
322	} else {
323	Comments.push_back(new (Allocator) RawComment(RC));
324	}
325	}
326
327	void RawCommentList::addDeserializedComments(ArrayRef<RawComment *> DeserializedComments) {
328	std::vector<RawComment *> MergedComments;
329	MergedComments.reserve(Comments.size() + DeserializedComments.size());
330
331	std::merge(Comments.begin(), Comments.end(),
332	DeserializedComments.begin(), DeserializedComments.end(),
333	std::back_inserter(MergedComments),
334	BeforeThanCompare<RawComment>(SourceMgr));
335	std::swap(Comments, MergedComments);
336	}
337
338	std::string RawComment::getFormattedText(const SourceManager &SourceMgr,
339	DiagnosticsEngine &Diags) const {
340	llvm::StringRef CommentText = getRawText(SourceMgr);
341	if (CommentText.empty())
342	return "";
343
344	llvm::BumpPtrAllocator Allocator;
345	// We do not parse any commands, so CommentOptions are ignored by
346	// comments::Lexer. Therefore, we just use default-constructed options.
347	CommentOptions DefOpts;
348	comments::CommandTraits EmptyTraits(Allocator, DefOpts);
349	comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(),
350	CommentText.begin(), CommentText.end(),
351	/ParseCommands=/false);
352
353	std::string Result;
354	// A column number of the first non-whitespace token in the comment text.
355	// We skip whitespace up to this column, but keep the whitespace after this
356	// column. IndentColumn is calculated when lexing the first line and reused
357	// for the rest of lines.
358	unsigned IndentColumn = 0;
359
360	// Processes one line of the comment and adds it to the result.
361	// Handles skipping the indent at the start of the line.
362	// Returns false when eof is reached and true otherwise.
363	auto LexLine = [&](bool IsFirstLine) -> bool {
364	comments::Token Tok;
365	// Lex the first token on the line. We handle it separately, because we to
366	// fix up its indentation.
367	L.lex(Tok);
368	if (Tok.is(comments::tok::eof))
369	return false;
370	if (Tok.is(comments::tok::newline)) {
371	Result += "\n";
372	return true;
373	}
374	llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
375	bool LocInvalid = false;
376	unsigned TokColumn =
377	SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid);
378	(0) . __assert_fail ("!LocInvalid && \"getFormattedText for invalid location\"", "/home/seafit/code_projects/clang_source/clang/lib/AST/RawCommentList.cpp", 378, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(!LocInvalid && "getFormattedText for invalid location");
379
380	// Amount of leading whitespace in TokText.
381	size_t WhitespaceLen = TokText.find_first_not_of(" \t");
382	if (WhitespaceLen == StringRef::npos)
383	WhitespaceLen = TokText.size();
384	// Remember the amount of whitespace we skipped in the first line to remove
385	// indent up to that column in the following lines.
386	if (IsFirstLine)
387	IndentColumn = TokColumn + WhitespaceLen;
388
389	// Amount of leading whitespace we actually want to skip.
390	// For the first line we skip all the whitespace.
391	// For the rest of the lines, we skip whitespace up to IndentColumn.
392	unsigned SkipLen =
393	IsFirstLine
394	? WhitespaceLen
395	: std::min<size_t>(
396	WhitespaceLen,
397	std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0));
398	llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
399	Result += Trimmed;
400	// Lex all tokens in the rest of the line.
401	for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) {
402	if (Tok.is(comments::tok::newline)) {
403	Result += "\n";
404	return true;
405	}
406	Result += L.getSpelling(Tok, SourceMgr);
407	}
408	// We've reached the end of file token.
409	return false;
410	};
411
412	auto DropTrailingNewLines = [](std::string &Str) {
413	while (Str.back() == '\n')
414	Str.pop_back();
415	};
416
417	// Process first line separately to remember indent for the following lines.
418	if (!LexLine(/IsFirstLine=/true)) {
419	DropTrailingNewLines(Result);
420	return Result;
421	}
422	// Process the rest of the lines.
423	while (LexLine(/IsFirstLine=/false))
424	;
425	DropTrailingNewLines(Result);
426	return Result;
427	}
428

clang::RawComment::getRawTextSlow

clang::RawComment::extractBriefText

clang::RawComment::parse

clang::RawCommentList::addComment

clang::RawCommentList::addDeserializedComments

clang::RawComment::getFormattedText

Clang Project