1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | #include "clang/AST/CommentLexer.h" |
10 | #include "clang/AST/CommentCommandTraits.h" |
11 | #include "clang/AST/CommentDiagnostic.h" |
12 | #include "clang/Basic/CharInfo.h" |
13 | #include "llvm/ADT/StringExtras.h" |
14 | #include "llvm/ADT/StringSwitch.h" |
15 | #include "llvm/Support/ConvertUTF.h" |
16 | #include "llvm/Support/ErrorHandling.h" |
17 | |
18 | namespace clang { |
19 | namespace comments { |
20 | |
21 | void Token::(const Lexer &L, const SourceManager &SM) const { |
22 | llvm::errs() << "comments::Token Kind=" << Kind << " "; |
23 | Loc.print(llvm::errs(), SM); |
24 | llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n"; |
25 | } |
26 | |
27 | static inline bool (char C) { |
28 | return isLetter(C); |
29 | } |
30 | |
31 | static inline bool (char C) { |
32 | return isDigit(C); |
33 | } |
34 | |
35 | static inline bool (char C) { |
36 | return isHexDigit(C); |
37 | } |
38 | |
39 | static inline StringRef ( |
40 | llvm::BumpPtrAllocator &Allocator, |
41 | unsigned CodePoint) { |
42 | char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT); |
43 | char *ResolvedPtr = Resolved; |
44 | if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr)) |
45 | return StringRef(Resolved, ResolvedPtr - Resolved); |
46 | else |
47 | return StringRef(); |
48 | } |
49 | |
50 | namespace { |
51 | |
52 | #include "clang/AST/CommentHTMLTags.inc" |
53 | #include "clang/AST/CommentHTMLNamedCharacterReferences.inc" |
54 | |
55 | } |
56 | |
57 | StringRef Lexer::(StringRef Name) const { |
58 | |
59 | return llvm::StringSwitch<StringRef>(Name) |
60 | .Case("amp", "&") |
61 | .Case("lt", "<") |
62 | .Case("gt", ">") |
63 | .Case("quot", "\"") |
64 | .Case("apos", "\'") |
65 | |
66 | .Default(translateHTMLNamedCharacterReferenceToUTF8(Name)); |
67 | } |
68 | |
69 | StringRef Lexer::(StringRef Name) const { |
70 | unsigned CodePoint = 0; |
71 | for (unsigned i = 0, e = Name.size(); i != e; ++i) { |
72 | assert(isHTMLDecimalCharacterReferenceCharacter(Name[i])); |
73 | CodePoint *= 10; |
74 | CodePoint += Name[i] - '0'; |
75 | } |
76 | return convertCodePointToUTF8(Allocator, CodePoint); |
77 | } |
78 | |
79 | StringRef Lexer::(StringRef Name) const { |
80 | unsigned CodePoint = 0; |
81 | for (unsigned i = 0, e = Name.size(); i != e; ++i) { |
82 | CodePoint *= 16; |
83 | const char C = Name[i]; |
84 | assert(isHTMLHexCharacterReferenceCharacter(C)); |
85 | CodePoint += llvm::hexDigitValue(C); |
86 | } |
87 | return convertCodePointToUTF8(Allocator, CodePoint); |
88 | } |
89 | |
90 | void Lexer::() { |
91 | |
92 | assert(CommentState == LCS_InsideCComment); |
93 | |
94 | if (BufferPtr == CommentEnd) |
95 | return; |
96 | |
97 | switch (*BufferPtr) { |
98 | case ' ': |
99 | case '\t': |
100 | case '\f': |
101 | case '\v': { |
102 | const char *NewBufferPtr = BufferPtr; |
103 | NewBufferPtr++; |
104 | if (NewBufferPtr == CommentEnd) |
105 | return; |
106 | |
107 | char C = *NewBufferPtr; |
108 | while (isHorizontalWhitespace(C)) { |
109 | NewBufferPtr++; |
110 | if (NewBufferPtr == CommentEnd) |
111 | return; |
112 | C = *NewBufferPtr; |
113 | } |
114 | if (C == '*') |
115 | BufferPtr = NewBufferPtr + 1; |
116 | break; |
117 | } |
118 | case '*': |
119 | BufferPtr++; |
120 | break; |
121 | } |
122 | } |
123 | |
124 | namespace { |
125 | |
126 | const char *(const char *BufferPtr, const char *BufferEnd) { |
127 | for ( ; BufferPtr != BufferEnd; ++BufferPtr) { |
128 | if (isVerticalWhitespace(*BufferPtr)) |
129 | return BufferPtr; |
130 | } |
131 | return BufferEnd; |
132 | } |
133 | |
134 | const char *(const char *BufferPtr, const char *BufferEnd) { |
135 | if (BufferPtr == BufferEnd) |
136 | return BufferPtr; |
137 | |
138 | if (*BufferPtr == '\n') |
139 | BufferPtr++; |
140 | else { |
141 | assert(*BufferPtr == '\r'); |
142 | BufferPtr++; |
143 | if (BufferPtr != BufferEnd && *BufferPtr == '\n') |
144 | BufferPtr++; |
145 | } |
146 | return BufferPtr; |
147 | } |
148 | |
149 | const char *(const char *BufferPtr, |
150 | const char *BufferEnd) { |
151 | for ( ; BufferPtr != BufferEnd; ++BufferPtr) { |
152 | if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr)) |
153 | return BufferPtr; |
154 | } |
155 | return BufferEnd; |
156 | } |
157 | |
158 | const char *(const char *BufferPtr, |
159 | const char *BufferEnd) { |
160 | for ( ; BufferPtr != BufferEnd; ++BufferPtr) { |
161 | if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr)) |
162 | return BufferPtr; |
163 | } |
164 | return BufferEnd; |
165 | } |
166 | |
167 | const char *(const char *BufferPtr, |
168 | const char *BufferEnd) { |
169 | for ( ; BufferPtr != BufferEnd; ++BufferPtr) { |
170 | if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr)) |
171 | return BufferPtr; |
172 | } |
173 | return BufferEnd; |
174 | } |
175 | |
176 | bool (char C) { |
177 | return isLetter(C); |
178 | } |
179 | |
180 | bool (char C) { |
181 | return isAlphanumeric(C); |
182 | } |
183 | |
184 | const char *(const char *BufferPtr, const char *BufferEnd) { |
185 | for ( ; BufferPtr != BufferEnd; ++BufferPtr) { |
186 | if (!isHTMLIdentifierCharacter(*BufferPtr)) |
187 | return BufferPtr; |
188 | } |
189 | return BufferEnd; |
190 | } |
191 | |
192 | |
193 | |
194 | |
195 | |
196 | const char *(const char *BufferPtr, const char *BufferEnd) |
197 | { |
198 | const char Quote = *BufferPtr; |
199 | (0) . __assert_fail ("Quote == '\\\"' || Quote == '\\''", "/home/seafit/code_projects/clang_source/clang/lib/AST/CommentLexer.cpp", 199, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(Quote == '\"' || Quote == '\''); |
200 | |
201 | BufferPtr++; |
202 | for ( ; BufferPtr != BufferEnd; ++BufferPtr) { |
203 | const char C = *BufferPtr; |
204 | if (C == Quote && BufferPtr[-1] != '\\') |
205 | return BufferPtr; |
206 | } |
207 | return BufferEnd; |
208 | } |
209 | |
210 | const char *(const char *BufferPtr, const char *BufferEnd) { |
211 | for ( ; BufferPtr != BufferEnd; ++BufferPtr) { |
212 | if (!isWhitespace(*BufferPtr)) |
213 | return BufferPtr; |
214 | } |
215 | return BufferEnd; |
216 | } |
217 | |
218 | bool (const char *BufferPtr, const char *BufferEnd) { |
219 | return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd; |
220 | } |
221 | |
222 | bool isCommandNameStartCharacter(char C) { |
223 | return isLetter(C); |
224 | } |
225 | |
226 | bool isCommandNameCharacter(char C) { |
227 | return isAlphanumeric(C); |
228 | } |
229 | |
230 | const char *skipCommandName(const char *BufferPtr, const char *BufferEnd) { |
231 | for ( ; BufferPtr != BufferEnd; ++BufferPtr) { |
232 | if (!isCommandNameCharacter(*BufferPtr)) |
233 | return BufferPtr; |
234 | } |
235 | return BufferEnd; |
236 | } |
237 | |
238 | |
239 | |
240 | const char *(const char *BufferPtr, const char *BufferEnd) { |
241 | const char *CurPtr = BufferPtr; |
242 | while (CurPtr != BufferEnd) { |
243 | while (!isVerticalWhitespace(*CurPtr)) { |
244 | CurPtr++; |
245 | if (CurPtr == BufferEnd) |
246 | return BufferEnd; |
247 | } |
248 | |
249 | const char *EscapePtr = CurPtr - 1; |
250 | while(isHorizontalWhitespace(*EscapePtr)) |
251 | EscapePtr--; |
252 | |
253 | if (*EscapePtr == '\\' || |
254 | (EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' && |
255 | EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) { |
256 | |
257 | CurPtr = skipNewline(CurPtr, BufferEnd); |
258 | } else |
259 | return CurPtr; |
260 | } |
261 | return BufferEnd; |
262 | } |
263 | |
264 | |
265 | |
266 | const char *(const char *BufferPtr, const char *BufferEnd) { |
267 | for ( ; BufferPtr != BufferEnd; ++BufferPtr) { |
268 | if (*BufferPtr == '*') { |
269 | assert(BufferPtr + 1 != BufferEnd); |
270 | if (*(BufferPtr + 1) == '/') |
271 | return BufferPtr; |
272 | } |
273 | } |
274 | llvm_unreachable("buffer end hit before '*/' was seen"); |
275 | } |
276 | |
277 | } |
278 | |
279 | void Lexer::(Token &Result, const char *TokEnd, |
280 | tok::TokenKind Kind) { |
281 | const unsigned TokLen = TokEnd - BufferPtr; |
282 | Result.setLocation(getSourceLocation(BufferPtr)); |
283 | Result.setKind(Kind); |
284 | Result.setLength(TokLen); |
285 | #ifndef NDEBUG |
286 | Result.TextPtr = "<UNSET>"; |
287 | Result.IntVal = 7; |
288 | #endif |
289 | BufferPtr = TokEnd; |
290 | } |
291 | |
292 | void Lexer::(Token &T) { |
293 | assert(CommentState == LCS_InsideBCPLComment || |
294 | CommentState == LCS_InsideCComment); |
295 | |
296 | |
297 | auto HandleNonCommandToken = [&]() -> void { |
298 | assert(State == LS_Normal); |
299 | |
300 | const char *TokenPtr = BufferPtr; |
301 | assert(TokenPtr < CommentEnd); |
302 | switch (*TokenPtr) { |
303 | case '\n': |
304 | case '\r': |
305 | TokenPtr = skipNewline(TokenPtr, CommentEnd); |
306 | formTokenWithChars(T, TokenPtr, tok::newline); |
307 | |
308 | if (CommentState == LCS_InsideCComment) |
309 | skipLineStartingDecorations(); |
310 | return; |
311 | |
312 | default: { |
313 | StringRef TokStartSymbols = ParseCommands ? "\n\r\\@&<" : "\n\r"; |
314 | size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr) |
315 | .find_first_of(TokStartSymbols); |
316 | if (End != StringRef::npos) |
317 | TokenPtr += End; |
318 | else |
319 | TokenPtr = CommentEnd; |
320 | formTextToken(T, TokenPtr); |
321 | return; |
322 | } |
323 | } |
324 | }; |
325 | |
326 | if (!ParseCommands) |
327 | return HandleNonCommandToken(); |
328 | |
329 | switch (State) { |
330 | case LS_Normal: |
331 | break; |
332 | case LS_VerbatimBlockFirstLine: |
333 | lexVerbatimBlockFirstLine(T); |
334 | return; |
335 | case LS_VerbatimBlockBody: |
336 | lexVerbatimBlockBody(T); |
337 | return; |
338 | case LS_VerbatimLineText: |
339 | lexVerbatimLineText(T); |
340 | return; |
341 | case LS_HTMLStartTag: |
342 | lexHTMLStartTag(T); |
343 | return; |
344 | case LS_HTMLEndTag: |
345 | lexHTMLEndTag(T); |
346 | return; |
347 | } |
348 | |
349 | assert(State == LS_Normal); |
350 | const char *TokenPtr = BufferPtr; |
351 | assert(TokenPtr < CommentEnd); |
352 | switch(*TokenPtr) { |
353 | case '\\': |
354 | case '@': { |
355 | |
356 | |
357 | |
358 | tok::TokenKind CommandKind = |
359 | (*TokenPtr == '@') ? tok::at_command : tok::backslash_command; |
360 | TokenPtr++; |
361 | if (TokenPtr == CommentEnd) { |
362 | formTextToken(T, TokenPtr); |
363 | return; |
364 | } |
365 | char C = *TokenPtr; |
366 | switch (C) { |
367 | default: |
368 | break; |
369 | |
370 | case '\\': case '@': case '&': case '$': |
371 | case '#': case '<': case '>': case '%': |
372 | case '\"': case '.': case ':': |
373 | |
374 | TokenPtr++; |
375 | if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') { |
376 | |
377 | TokenPtr++; |
378 | } |
379 | StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1)); |
380 | formTokenWithChars(T, TokenPtr, tok::text); |
381 | T.setText(UnescapedText); |
382 | return; |
383 | } |
384 | |
385 | |
386 | if (!isCommandNameStartCharacter(*TokenPtr)) { |
387 | formTextToken(T, TokenPtr); |
388 | return; |
389 | } |
390 | |
391 | TokenPtr = skipCommandName(TokenPtr, CommentEnd); |
392 | unsigned Length = TokenPtr - (BufferPtr + 1); |
393 | |
394 | |
395 | |
396 | if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) { |
397 | C = *TokenPtr; |
398 | if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') { |
399 | TokenPtr++; |
400 | Length++; |
401 | } |
402 | } |
403 | |
404 | StringRef CommandName(BufferPtr + 1, Length); |
405 | |
406 | const CommandInfo *Info = Traits.getCommandInfoOrNULL(CommandName); |
407 | if (!Info) { |
408 | if ((Info = Traits.getTypoCorrectCommandInfo(CommandName))) { |
409 | StringRef CorrectedName = Info->Name; |
410 | SourceLocation Loc = getSourceLocation(BufferPtr); |
411 | SourceLocation EndLoc = getSourceLocation(TokenPtr); |
412 | SourceRange FullRange = SourceRange(Loc, EndLoc); |
413 | SourceRange CommandRange(Loc.getLocWithOffset(1), EndLoc); |
414 | Diag(Loc, diag::warn_correct_comment_command_name) |
415 | << FullRange << CommandName << CorrectedName |
416 | << FixItHint::CreateReplacement(CommandRange, CorrectedName); |
417 | } else { |
418 | formTokenWithChars(T, TokenPtr, tok::unknown_command); |
419 | T.setUnknownCommandName(CommandName); |
420 | Diag(T.getLocation(), diag::warn_unknown_comment_command_name) |
421 | << SourceRange(T.getLocation(), T.getEndLocation()); |
422 | return; |
423 | } |
424 | } |
425 | if (Info->IsVerbatimBlockCommand) { |
426 | setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, Info); |
427 | return; |
428 | } |
429 | if (Info->IsVerbatimLineCommand) { |
430 | setupAndLexVerbatimLine(T, TokenPtr, Info); |
431 | return; |
432 | } |
433 | formTokenWithChars(T, TokenPtr, CommandKind); |
434 | T.setCommandID(Info->getID()); |
435 | return; |
436 | } |
437 | |
438 | case '&': |
439 | lexHTMLCharacterReference(T); |
440 | return; |
441 | |
442 | case '<': { |
443 | TokenPtr++; |
444 | if (TokenPtr == CommentEnd) { |
445 | formTextToken(T, TokenPtr); |
446 | return; |
447 | } |
448 | const char C = *TokenPtr; |
449 | if (isHTMLIdentifierStartingCharacter(C)) |
450 | setupAndLexHTMLStartTag(T); |
451 | else if (C == '/') |
452 | setupAndLexHTMLEndTag(T); |
453 | else |
454 | formTextToken(T, TokenPtr); |
455 | return; |
456 | } |
457 | |
458 | default: |
459 | return HandleNonCommandToken(); |
460 | } |
461 | } |
462 | |
463 | void Lexer::setupAndLexVerbatimBlock(Token &T, |
464 | const char *TextBegin, |
465 | char Marker, const CommandInfo *Info) { |
466 | IsVerbatimBlockCommand", "/home/seafit/code_projects/clang_source/clang/lib/AST/CommentLexer.cpp", 466, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(Info->IsVerbatimBlockCommand); |
467 | |
468 | VerbatimBlockEndCommandName.clear(); |
469 | VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@"); |
470 | VerbatimBlockEndCommandName.append(Info->EndCommandName); |
471 | |
472 | formTokenWithChars(T, TextBegin, tok::verbatim_block_begin); |
473 | T.setVerbatimBlockID(Info->getID()); |
474 | |
475 | |
476 | |
477 | |
478 | if (BufferPtr != CommentEnd && |
479 | isVerticalWhitespace(*BufferPtr)) { |
480 | BufferPtr = skipNewline(BufferPtr, CommentEnd); |
481 | State = LS_VerbatimBlockBody; |
482 | return; |
483 | } |
484 | |
485 | State = LS_VerbatimBlockFirstLine; |
486 | } |
487 | |
488 | void Lexer::(Token &T) { |
489 | again: |
490 | assert(BufferPtr < CommentEnd); |
491 | |
492 | |
493 | |
494 | |
495 | |
496 | const char *Newline = findNewline(BufferPtr, CommentEnd); |
497 | StringRef Line(BufferPtr, Newline - BufferPtr); |
498 | |
499 | |
500 | size_t Pos = Line.find(VerbatimBlockEndCommandName); |
501 | const char *TextEnd; |
502 | const char *NextLine; |
503 | if (Pos == StringRef::npos) { |
504 | |
505 | TextEnd = Newline; |
506 | NextLine = skipNewline(Newline, CommentEnd); |
507 | } else if (Pos == 0) { |
508 | |
509 | const char *End = BufferPtr + VerbatimBlockEndCommandName.size(); |
510 | StringRef Name(BufferPtr + 1, End - (BufferPtr + 1)); |
511 | formTokenWithChars(T, End, tok::verbatim_block_end); |
512 | T.setVerbatimBlockID(Traits.getCommandInfo(Name)->getID()); |
513 | State = LS_Normal; |
514 | return; |
515 | } else { |
516 | |
517 | TextEnd = BufferPtr + Pos; |
518 | NextLine = TextEnd; |
519 | |
520 | if (isWhitespace(BufferPtr, TextEnd)) { |
521 | BufferPtr = TextEnd; |
522 | goto again; |
523 | } |
524 | } |
525 | |
526 | StringRef Text(BufferPtr, TextEnd - BufferPtr); |
527 | formTokenWithChars(T, NextLine, tok::verbatim_block_line); |
528 | T.setVerbatimBlockText(Text); |
529 | |
530 | State = LS_VerbatimBlockBody; |
531 | } |
532 | |
533 | void Lexer::lexVerbatimBlockBody(Token &T) { |
534 | assert(State == LS_VerbatimBlockBody); |
535 | |
536 | if (CommentState == LCS_InsideCComment) |
537 | skipLineStartingDecorations(); |
538 | |
539 | if (BufferPtr == CommentEnd) { |
540 | formTokenWithChars(T, BufferPtr, tok::verbatim_block_line); |
541 | T.setVerbatimBlockText(""); |
542 | return; |
543 | } |
544 | |
545 | lexVerbatimBlockFirstLine(T); |
546 | } |
547 | |
548 | void Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin, |
549 | const CommandInfo *Info) { |
550 | IsVerbatimLineCommand", "/home/seafit/code_projects/clang_source/clang/lib/AST/CommentLexer.cpp", 550, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(Info->IsVerbatimLineCommand); |
551 | formTokenWithChars(T, TextBegin, tok::verbatim_line_name); |
552 | T.setVerbatimLineID(Info->getID()); |
553 | |
554 | State = LS_VerbatimLineText; |
555 | } |
556 | |
557 | void Lexer::(Token &T) { |
558 | assert(State == LS_VerbatimLineText); |
559 | |
560 | |
561 | const char *Newline = findNewline(BufferPtr, CommentEnd); |
562 | StringRef Text(BufferPtr, Newline - BufferPtr); |
563 | formTokenWithChars(T, Newline, tok::verbatim_line_text); |
564 | T.setVerbatimLineText(Text); |
565 | |
566 | State = LS_Normal; |
567 | } |
568 | |
569 | void Lexer::(Token &T) { |
570 | const char *TokenPtr = BufferPtr; |
571 | assert(*TokenPtr == '&'); |
572 | TokenPtr++; |
573 | if (TokenPtr == CommentEnd) { |
574 | formTextToken(T, TokenPtr); |
575 | return; |
576 | } |
577 | const char *NamePtr; |
578 | bool isNamed = false; |
579 | bool isDecimal = false; |
580 | char C = *TokenPtr; |
581 | if (isHTMLNamedCharacterReferenceCharacter(C)) { |
582 | NamePtr = TokenPtr; |
583 | TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd); |
584 | isNamed = true; |
585 | } else if (C == '#') { |
586 | TokenPtr++; |
587 | if (TokenPtr == CommentEnd) { |
588 | formTextToken(T, TokenPtr); |
589 | return; |
590 | } |
591 | C = *TokenPtr; |
592 | if (isHTMLDecimalCharacterReferenceCharacter(C)) { |
593 | NamePtr = TokenPtr; |
594 | TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd); |
595 | isDecimal = true; |
596 | } else if (C == 'x' || C == 'X') { |
597 | TokenPtr++; |
598 | NamePtr = TokenPtr; |
599 | TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd); |
600 | } else { |
601 | formTextToken(T, TokenPtr); |
602 | return; |
603 | } |
604 | } else { |
605 | formTextToken(T, TokenPtr); |
606 | return; |
607 | } |
608 | if (NamePtr == TokenPtr || TokenPtr == CommentEnd || |
609 | *TokenPtr != ';') { |
610 | formTextToken(T, TokenPtr); |
611 | return; |
612 | } |
613 | StringRef Name(NamePtr, TokenPtr - NamePtr); |
614 | TokenPtr++; |
615 | StringRef Resolved; |
616 | if (isNamed) |
617 | Resolved = resolveHTMLNamedCharacterReference(Name); |
618 | else if (isDecimal) |
619 | Resolved = resolveHTMLDecimalCharacterReference(Name); |
620 | else |
621 | Resolved = resolveHTMLHexCharacterReference(Name); |
622 | |
623 | if (Resolved.empty()) { |
624 | formTextToken(T, TokenPtr); |
625 | return; |
626 | } |
627 | formTokenWithChars(T, TokenPtr, tok::text); |
628 | T.setText(Resolved); |
629 | } |
630 | |
631 | void Lexer::setupAndLexHTMLStartTag(Token &T) { |
632 | assert(BufferPtr[0] == '<' && |
633 | isHTMLIdentifierStartingCharacter(BufferPtr[1])); |
634 | const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd); |
635 | StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1)); |
636 | if (!isHTMLTagName(Name)) { |
637 | formTextToken(T, TagNameEnd); |
638 | return; |
639 | } |
640 | |
641 | formTokenWithChars(T, TagNameEnd, tok::html_start_tag); |
642 | T.setHTMLTagStartName(Name); |
643 | |
644 | BufferPtr = skipWhitespace(BufferPtr, CommentEnd); |
645 | |
646 | const char C = *BufferPtr; |
647 | if (BufferPtr != CommentEnd && |
648 | (C == '>' || C == '/' || isHTMLIdentifierStartingCharacter(C))) |
649 | State = LS_HTMLStartTag; |
650 | } |
651 | |
652 | void Lexer::(Token &T) { |
653 | assert(State == LS_HTMLStartTag); |
654 | |
655 | const char *TokenPtr = BufferPtr; |
656 | char C = *TokenPtr; |
657 | if (isHTMLIdentifierCharacter(C)) { |
658 | TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd); |
659 | StringRef Ident(BufferPtr, TokenPtr - BufferPtr); |
660 | formTokenWithChars(T, TokenPtr, tok::html_ident); |
661 | T.setHTMLIdent(Ident); |
662 | } else { |
663 | switch (C) { |
664 | case '=': |
665 | TokenPtr++; |
666 | formTokenWithChars(T, TokenPtr, tok::html_equals); |
667 | break; |
668 | case '\"': |
669 | case '\'': { |
670 | const char *OpenQuote = TokenPtr; |
671 | TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd); |
672 | const char *ClosingQuote = TokenPtr; |
673 | if (TokenPtr != CommentEnd) |
674 | TokenPtr++; |
675 | formTokenWithChars(T, TokenPtr, tok::html_quoted_string); |
676 | T.setHTMLQuotedString(StringRef(OpenQuote + 1, |
677 | ClosingQuote - (OpenQuote + 1))); |
678 | break; |
679 | } |
680 | case '>': |
681 | TokenPtr++; |
682 | formTokenWithChars(T, TokenPtr, tok::html_greater); |
683 | State = LS_Normal; |
684 | return; |
685 | case '/': |
686 | TokenPtr++; |
687 | if (TokenPtr != CommentEnd && *TokenPtr == '>') { |
688 | TokenPtr++; |
689 | formTokenWithChars(T, TokenPtr, tok::html_slash_greater); |
690 | } else |
691 | formTextToken(T, TokenPtr); |
692 | |
693 | State = LS_Normal; |
694 | return; |
695 | } |
696 | } |
697 | |
698 | |
699 | |
700 | BufferPtr = skipWhitespace(BufferPtr, CommentEnd); |
701 | if (BufferPtr == CommentEnd) { |
702 | State = LS_Normal; |
703 | return; |
704 | } |
705 | |
706 | C = *BufferPtr; |
707 | if (!isHTMLIdentifierStartingCharacter(C) && |
708 | C != '=' && C != '\"' && C != '\'' && C != '>') { |
709 | State = LS_Normal; |
710 | return; |
711 | } |
712 | } |
713 | |
714 | void Lexer::setupAndLexHTMLEndTag(Token &T) { |
715 | assert(BufferPtr[0] == '<' && BufferPtr[1] == '/'); |
716 | |
717 | const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd); |
718 | const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd); |
719 | StringRef Name(TagNameBegin, TagNameEnd - TagNameBegin); |
720 | if (!isHTMLTagName(Name)) { |
721 | formTextToken(T, TagNameEnd); |
722 | return; |
723 | } |
724 | |
725 | const char *End = skipWhitespace(TagNameEnd, CommentEnd); |
726 | |
727 | formTokenWithChars(T, End, tok::html_end_tag); |
728 | T.setHTMLTagEndName(Name); |
729 | |
730 | if (BufferPtr != CommentEnd && *BufferPtr == '>') |
731 | State = LS_HTMLEndTag; |
732 | } |
733 | |
734 | void Lexer::(Token &T) { |
735 | '", "/home/seafit/code_projects/clang_source/clang/lib/AST/CommentLexer.cpp", 735, __PRETTY_FUNCTION__))" file_link="../../../include/assert.h.html#88" macro="true">assert(BufferPtr != CommentEnd && *BufferPtr == '>'); |
736 | |
737 | formTokenWithChars(T, BufferPtr + 1, tok::html_greater); |
738 | State = LS_Normal; |
739 | } |
740 | |
741 | Lexer::Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags, |
742 | const CommandTraits &Traits, SourceLocation FileLoc, |
743 | const char *BufferStart, const char *BufferEnd, |
744 | bool ParseCommands) |
745 | : Allocator(Allocator), Diags(Diags), Traits(Traits), |
746 | BufferStart(BufferStart), BufferEnd(BufferEnd), FileLoc(FileLoc), |
747 | BufferPtr(BufferStart), CommentState(LCS_BeforeComment), State(LS_Normal), |
748 | ParseCommands(ParseCommands) {} |
749 | |
750 | void Lexer::(Token &T) { |
751 | again: |
752 | switch (CommentState) { |
753 | case LCS_BeforeComment: |
754 | if (BufferPtr == BufferEnd) { |
755 | formTokenWithChars(T, BufferPtr, tok::eof); |
756 | return; |
757 | } |
758 | |
759 | assert(*BufferPtr == '/'); |
760 | BufferPtr++; |
761 | switch(*BufferPtr) { |
762 | case '/': { |
763 | BufferPtr++; |
764 | |
765 | if (BufferPtr != BufferEnd) { |
766 | |
767 | |
768 | |
769 | |
770 | const char C = *BufferPtr; |
771 | if (C == '/' || C == '!') |
772 | BufferPtr++; |
773 | } |
774 | |
775 | |
776 | |
777 | |
778 | if (BufferPtr != BufferEnd && *BufferPtr == '<') |
779 | BufferPtr++; |
780 | |
781 | CommentState = LCS_InsideBCPLComment; |
782 | if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine) |
783 | State = LS_Normal; |
784 | CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd); |
785 | goto again; |
786 | } |
787 | case '*': { |
788 | BufferPtr++; |
789 | |
790 | |
791 | const char C = *BufferPtr; |
792 | if ((C == '*' && *(BufferPtr + 1) != '/') || C == '!') |
793 | BufferPtr++; |
794 | |
795 | |
796 | if (BufferPtr != BufferEnd && *BufferPtr == '<') |
797 | BufferPtr++; |
798 | |
799 | CommentState = LCS_InsideCComment; |
800 | State = LS_Normal; |
801 | CommentEnd = findCCommentEnd(BufferPtr, BufferEnd); |
802 | goto again; |
803 | } |
804 | default: |
805 | llvm_unreachable("second character of comment should be '/' or '*'"); |
806 | } |
807 | |
808 | case LCS_BetweenComments: { |
809 | |
810 | |
811 | const char *EndWhitespace = BufferPtr; |
812 | while(EndWhitespace != BufferEnd && *EndWhitespace != '/') |
813 | EndWhitespace++; |
814 | |
815 | |
816 | |
817 | |
818 | |
819 | formTokenWithChars(T, EndWhitespace, tok::newline); |
820 | |
821 | CommentState = LCS_BeforeComment; |
822 | break; |
823 | } |
824 | |
825 | case LCS_InsideBCPLComment: |
826 | case LCS_InsideCComment: |
827 | if (BufferPtr != CommentEnd) { |
828 | lexCommentText(T); |
829 | break; |
830 | } else { |
831 | |
832 | if (CommentState == LCS_InsideCComment) { |
833 | assert(BufferPtr[0] == '*' && BufferPtr[1] == '/'); |
834 | BufferPtr += 2; |
835 | assert(BufferPtr <= BufferEnd); |
836 | |
837 | |
838 | |
839 | formTokenWithChars(T, BufferPtr, tok::newline); |
840 | |
841 | CommentState = LCS_BetweenComments; |
842 | break; |
843 | } else { |
844 | |
845 | CommentState = LCS_BetweenComments; |
846 | goto again; |
847 | } |
848 | } |
849 | } |
850 | } |
851 | |
852 | StringRef Lexer::(const Token &Tok, |
853 | const SourceManager &SourceMgr, |
854 | bool *Invalid) const { |
855 | SourceLocation Loc = Tok.getLocation(); |
856 | std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc); |
857 | |
858 | bool InvalidTemp = false; |
859 | StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp); |
860 | if (InvalidTemp) { |
861 | *Invalid = true; |
862 | return StringRef(); |
863 | } |
864 | |
865 | const char *Begin = File.data() + LocInfo.second; |
866 | return StringRef(Begin, Tok.getLength()); |
867 | } |
868 | |
869 | } |
870 | } |
871 | |