Clang Project

clang_source_code/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp
1//===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This tablegen backend emits an efficient function to translate HTML named
10// character references to UTF-8 sequences.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/ADT/SmallString.h"
15#include "llvm/Support/ConvertUTF.h"
16#include "llvm/TableGen/Error.h"
17#include "llvm/TableGen/Record.h"
18#include "llvm/TableGen/StringMatcher.h"
19#include "llvm/TableGen/TableGenBackend.h"
20#include <vector>
21
22using namespace llvm;
23
24/// Convert a code point to the corresponding UTF-8 sequence represented
25/// as a C string literal.
26///
27/// \returns true on success.
28static bool translateCodePointToUTF8(unsigned CodePoint,
29                                     SmallVectorImpl<char> &CLiteral) {
30  char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
31  char *TranslatedPtr = Translated;
32  if (!ConvertCodePointToUTF8(CodePointTranslatedPtr))
33    return false;
34
35  StringRef UTF8(Translated, TranslatedPtr - Translated);
36
37  raw_svector_ostream OS(CLiteral);
38  OS << "\"";
39  for (size_t i = 0e = UTF8.size(); i != e; ++i) {
40    OS << "\\x";
41    OS.write_hex(static_cast<unsigned char>(UTF8[i]));
42  }
43  OS << "\"";
44
45  return true;
46}
47
48namespace clang {
49void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
50                                                  raw_ostream &OS) {
51  std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
52  std::vector<StringMatcher::StringPair> NameToUTF8;
53  SmallString<32CLiteral;
54  for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
55       I != E; ++I) {
56    Record &Tag = **I;
57    std::string Spelling = Tag.getValueAsString("Spelling");
58    uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
59    CLiteral.clear();
60    CLiteral.append("return ");
61    if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
62      SrcMgr.PrintMessage(Tag.getLoc().front(),
63                          SourceMgr::DK_Error,
64                          Twine("invalid code point"));
65      continue;
66    }
67    CLiteral.append(";");
68
69    StringMatcher::StringPair Match(Spelling, CLiteral.str());
70    NameToUTF8.push_back(Match);
71  }
72
73  emitSourceFileHeader("HTML named character reference to UTF-8 "
74                       "translation", OS);
75
76  OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
77        "                                             StringRef Name) {\n";
78  StringMatcher("Name", NameToUTF8, OS).Emit();
79  OS << "  return StringRef();\n"
80     << "}\n\n";
81}
82
83// end namespace clang
84
85