1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | #include "llvm/ADT/SmallString.h" |
15 | #include "llvm/Support/ConvertUTF.h" |
16 | #include "llvm/TableGen/Error.h" |
17 | #include "llvm/TableGen/Record.h" |
18 | #include "llvm/TableGen/StringMatcher.h" |
19 | #include "llvm/TableGen/TableGenBackend.h" |
20 | #include <vector> |
21 | |
22 | using namespace llvm; |
23 | |
24 | |
25 | |
26 | |
27 | |
28 | static bool translateCodePointToUTF8(unsigned CodePoint, |
29 | SmallVectorImpl<char> &CLiteral) { |
30 | char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT]; |
31 | char *TranslatedPtr = Translated; |
32 | if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr)) |
33 | return false; |
34 | |
35 | StringRef UTF8(Translated, TranslatedPtr - Translated); |
36 | |
37 | raw_svector_ostream OS(CLiteral); |
38 | OS << "\""; |
39 | for (size_t i = 0, e = UTF8.size(); i != e; ++i) { |
40 | OS << "\\x"; |
41 | OS.write_hex(static_cast<unsigned char>(UTF8[i])); |
42 | } |
43 | OS << "\""; |
44 | |
45 | return true; |
46 | } |
47 | |
48 | namespace clang { |
49 | void (RecordKeeper &Records, |
50 | raw_ostream &OS) { |
51 | std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR"); |
52 | std::vector<StringMatcher::StringPair> NameToUTF8; |
53 | SmallString<32> CLiteral; |
54 | for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end(); |
55 | I != E; ++I) { |
56 | Record &Tag = **I; |
57 | std::string Spelling = Tag.getValueAsString("Spelling"); |
58 | uint64_t CodePoint = Tag.getValueAsInt("CodePoint"); |
59 | CLiteral.clear(); |
60 | CLiteral.append("return "); |
61 | if (!translateCodePointToUTF8(CodePoint, CLiteral)) { |
62 | SrcMgr.PrintMessage(Tag.getLoc().front(), |
63 | SourceMgr::DK_Error, |
64 | Twine("invalid code point")); |
65 | continue; |
66 | } |
67 | CLiteral.append(";"); |
68 | |
69 | StringMatcher::StringPair Match(Spelling, CLiteral.str()); |
70 | NameToUTF8.push_back(Match); |
71 | } |
72 | |
73 | emitSourceFileHeader("HTML named character reference to UTF-8 " |
74 | "translation", OS); |
75 | |
76 | OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n" |
77 | " StringRef Name) {\n"; |
78 | StringMatcher("Name", NameToUTF8, OS).Emit(); |
79 | OS << " return StringRef();\n" |
80 | << "}\n\n"; |
81 | } |
82 | |
83 | } |
84 | |
85 | |