1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | #ifndef LLVM_CLANG_LIB_FORMAT_ENCODING_H |
16 | #define LLVM_CLANG_LIB_FORMAT_ENCODING_H |
17 | |
18 | #include "clang/Basic/LLVM.h" |
19 | #include "llvm/ADT/StringRef.h" |
20 | #include "llvm/Support/ConvertUTF.h" |
21 | #include "llvm/Support/Unicode.h" |
22 | |
23 | namespace clang { |
24 | namespace format { |
25 | namespace encoding { |
26 | |
27 | enum Encoding { |
28 | Encoding_UTF8, |
29 | Encoding_Unknown |
30 | }; |
31 | |
32 | |
33 | |
34 | inline Encoding detectEncoding(StringRef Text) { |
35 | const llvm::UTF8 *Ptr = reinterpret_cast<const llvm::UTF8 *>(Text.begin()); |
36 | const llvm::UTF8 *BufEnd = reinterpret_cast<const llvm::UTF8 *>(Text.end()); |
37 | if (llvm::isLegalUTF8String(&Ptr, BufEnd)) |
38 | return Encoding_UTF8; |
39 | return Encoding_Unknown; |
40 | } |
41 | |
42 | |
43 | |
44 | |
45 | inline unsigned columnWidth(StringRef Text, Encoding Encoding) { |
46 | if (Encoding == Encoding_UTF8) { |
47 | int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text); |
48 | |
49 | |
50 | |
51 | |
52 | if (ContentWidth >= 0) |
53 | return ContentWidth; |
54 | } |
55 | return Text.size(); |
56 | } |
57 | |
58 | |
59 | |
60 | |
61 | inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, |
62 | unsigned TabWidth, Encoding Encoding) { |
63 | unsigned TotalWidth = 0; |
64 | StringRef Tail = Text; |
65 | for (;;) { |
66 | StringRef::size_type TabPos = Tail.find('\t'); |
67 | if (TabPos == StringRef::npos) |
68 | return TotalWidth + columnWidth(Tail, Encoding); |
69 | TotalWidth += columnWidth(Tail.substr(0, TabPos), Encoding); |
70 | TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth; |
71 | Tail = Tail.substr(TabPos + 1); |
72 | } |
73 | } |
74 | |
75 | |
76 | |
77 | inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) { |
78 | switch (Encoding) { |
79 | case Encoding_UTF8: |
80 | return llvm::getNumBytesForUTF8(FirstChar); |
81 | default: |
82 | return 1; |
83 | } |
84 | } |
85 | |
86 | inline bool isOctDigit(char c) { return '0' <= c && c <= '7'; } |
87 | |
88 | inline bool isHexDigit(char c) { |
89 | return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || |
90 | ('A' <= c && c <= 'F'); |
91 | } |
92 | |
93 | |
94 | |
95 | |
96 | inline unsigned getEscapeSequenceLength(StringRef Text) { |
97 | assert(Text[0] == '\\'); |
98 | if (Text.size() < 2) |
99 | return 1; |
100 | |
101 | switch (Text[1]) { |
102 | case 'u': |
103 | return 6; |
104 | case 'U': |
105 | return 10; |
106 | case 'x': { |
107 | unsigned I = 2; |
108 | while (I < Text.size() && isHexDigit(Text[I])) |
109 | ++I; |
110 | return I; |
111 | } |
112 | default: |
113 | if (isOctDigit(Text[1])) { |
114 | unsigned I = 1; |
115 | while (I < Text.size() && I < 4 && isOctDigit(Text[I])) |
116 | ++I; |
117 | return I; |
118 | } |
119 | return 1 + llvm::getNumBytesForUTF8(Text[1]); |
120 | } |
121 | } |
122 | |
123 | } |
124 | } |
125 | } |
126 | |
127 | #endif |
128 | |