Clang Project

clang_source_code/unittests/Basic/CharInfoTest.cpp
1//===- unittests/Basic/CharInfoTest.cpp -- ASCII classification tests -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "clang/Basic/CharInfo.h"
10#include "gtest/gtest.h"
11
12using namespace llvm;
13using namespace clang;
14
15// Check that the CharInfo table has been constructed reasonably.
16TEST(CharInfoTest, validateInfoTable) {
17  using namespace charinfo;
18  EXPECT_EQ((unsigned)CHAR_SPACE,   InfoTable[(unsigned)' ']);
19  EXPECT_EQ((unsigned)CHAR_HORZ_WS, InfoTable[(unsigned)'\t']);
20  EXPECT_EQ((unsigned)CHAR_HORZ_WS, InfoTable[(unsigned)'\f']); // ??
21  EXPECT_EQ((unsigned)CHAR_HORZ_WS, InfoTable[(unsigned)'\v']); // ??
22  EXPECT_EQ((unsigned)CHAR_VERT_WS, InfoTable[(unsigned)'\n']);
23  EXPECT_EQ((unsigned)CHAR_VERT_WS, InfoTable[(unsigned)'\r']);
24  EXPECT_EQ((unsigned)CHAR_UNDER,   InfoTable[(unsigned)'_']);
25  EXPECT_EQ((unsigned)CHAR_PERIOD,  InfoTable[(unsigned)'.']);
26
27  for (unsigned i = 'a'i <= 'f'; ++i) {
28    EXPECT_EQ((unsigned)CHAR_XLOWER, InfoTable[i]);
29    EXPECT_EQ((unsigned)CHAR_XUPPER, InfoTable[i+'A'-'a']);
30  }
31
32  for (unsigned i = 'g'i <= 'z'; ++i) {
33    EXPECT_EQ((unsigned)CHAR_LOWER, InfoTable[i]);
34    EXPECT_EQ((unsigned)CHAR_UPPER, InfoTable[i+'A'-'a']);
35  }
36
37  for (unsigned i = '0'; i <= '9'; ++i)
38    EXPECT_EQ((unsigned)CHAR_DIGIT, InfoTable[i]);
39}
40
41// Check various predicates.
42TEST(CharInfoTest, isASCII) {
43  EXPECT_TRUE(isASCII('\0'));
44  EXPECT_TRUE(isASCII('\n'));
45  EXPECT_TRUE(isASCII(' '));
46  EXPECT_TRUE(isASCII('a'));
47  EXPECT_TRUE(isASCII('\x7f'));
48  EXPECT_FALSE(isASCII('\x80'));
49  EXPECT_FALSE(isASCII('\xc2'));
50  EXPECT_FALSE(isASCII('\xff'));
51}
52
53TEST(CharInfoTest, isIdentifierHead) {
54  EXPECT_TRUE(isIdentifierHead('a'));
55  EXPECT_TRUE(isIdentifierHead('A'));
56  EXPECT_TRUE(isIdentifierHead('z'));
57  EXPECT_TRUE(isIdentifierHead('Z'));
58  EXPECT_TRUE(isIdentifierHead('_'));
59
60  EXPECT_FALSE(isIdentifierHead('0'));
61  EXPECT_FALSE(isIdentifierHead('.'));
62  EXPECT_FALSE(isIdentifierHead('`'));
63  EXPECT_FALSE(isIdentifierHead('\0'));
64
65  EXPECT_FALSE(isIdentifierHead('$'));
66  EXPECT_TRUE(isIdentifierHead('$'/*AllowDollar=*/true));
67
68  EXPECT_FALSE(isIdentifierHead('\x80'));
69  EXPECT_FALSE(isIdentifierHead('\xc2'));
70  EXPECT_FALSE(isIdentifierHead('\xff'));
71}
72
73TEST(CharInfoTest, isIdentifierBody) {
74  EXPECT_TRUE(isIdentifierBody('a'));
75  EXPECT_TRUE(isIdentifierBody('A'));
76  EXPECT_TRUE(isIdentifierBody('z'));
77  EXPECT_TRUE(isIdentifierBody('Z'));
78  EXPECT_TRUE(isIdentifierBody('_'));
79
80  EXPECT_TRUE(isIdentifierBody('0'));
81  EXPECT_FALSE(isIdentifierBody('.'));
82  EXPECT_FALSE(isIdentifierBody('`'));
83  EXPECT_FALSE(isIdentifierBody('\0'));
84
85  EXPECT_FALSE(isIdentifierBody('$'));
86  EXPECT_TRUE(isIdentifierBody('$'/*AllowDollar=*/true));
87
88  EXPECT_FALSE(isIdentifierBody('\x80'));
89  EXPECT_FALSE(isIdentifierBody('\xc2'));
90  EXPECT_FALSE(isIdentifierBody('\xff'));
91}
92
93TEST(CharInfoTest, isHorizontalWhitespace) {
94  EXPECT_FALSE(isHorizontalWhitespace('a'));
95  EXPECT_FALSE(isHorizontalWhitespace('_'));
96  EXPECT_FALSE(isHorizontalWhitespace('0'));
97  EXPECT_FALSE(isHorizontalWhitespace('.'));
98  EXPECT_FALSE(isHorizontalWhitespace('`'));
99  EXPECT_FALSE(isHorizontalWhitespace('\0'));
100  EXPECT_FALSE(isHorizontalWhitespace('\x7f'));
101
102  EXPECT_TRUE(isHorizontalWhitespace(' '));
103  EXPECT_TRUE(isHorizontalWhitespace('\t'));
104  EXPECT_TRUE(isHorizontalWhitespace('\f')); // ??
105  EXPECT_TRUE(isHorizontalWhitespace('\v')); // ??
106
107  EXPECT_FALSE(isHorizontalWhitespace('\n'));
108  EXPECT_FALSE(isHorizontalWhitespace('\r'));
109
110  EXPECT_FALSE(isHorizontalWhitespace('\x80'));
111  EXPECT_FALSE(isHorizontalWhitespace('\xc2'));
112  EXPECT_FALSE(isHorizontalWhitespace('\xff'));
113}
114
115TEST(CharInfoTest, isVerticalWhitespace) {
116  EXPECT_FALSE(isVerticalWhitespace('a'));
117  EXPECT_FALSE(isVerticalWhitespace('_'));
118  EXPECT_FALSE(isVerticalWhitespace('0'));
119  EXPECT_FALSE(isVerticalWhitespace('.'));
120  EXPECT_FALSE(isVerticalWhitespace('`'));
121  EXPECT_FALSE(isVerticalWhitespace('\0'));
122  EXPECT_FALSE(isVerticalWhitespace('\x7f'));
123
124  EXPECT_FALSE(isVerticalWhitespace(' '));
125  EXPECT_FALSE(isVerticalWhitespace('\t'));
126  EXPECT_FALSE(isVerticalWhitespace('\f')); // ??
127  EXPECT_FALSE(isVerticalWhitespace('\v')); // ??
128
129  EXPECT_TRUE(isVerticalWhitespace('\n'));
130  EXPECT_TRUE(isVerticalWhitespace('\r'));
131
132  EXPECT_FALSE(isVerticalWhitespace('\x80'));
133  EXPECT_FALSE(isVerticalWhitespace('\xc2'));
134  EXPECT_FALSE(isVerticalWhitespace('\xff'));
135}
136
137TEST(CharInfoTest, isWhitespace) {
138  EXPECT_FALSE(isWhitespace('a'));
139  EXPECT_FALSE(isWhitespace('_'));
140  EXPECT_FALSE(isWhitespace('0'));
141  EXPECT_FALSE(isWhitespace('.'));
142  EXPECT_FALSE(isWhitespace('`'));
143  EXPECT_FALSE(isWhitespace('\0'));
144  EXPECT_FALSE(isWhitespace('\x7f'));
145
146  EXPECT_TRUE(isWhitespace(' '));
147  EXPECT_TRUE(isWhitespace('\t'));
148  EXPECT_TRUE(isWhitespace('\f'));
149  EXPECT_TRUE(isWhitespace('\v'));
150
151  EXPECT_TRUE(isWhitespace('\n'));
152  EXPECT_TRUE(isWhitespace('\r'));
153
154  EXPECT_FALSE(isWhitespace('\x80'));
155  EXPECT_FALSE(isWhitespace('\xc2'));
156  EXPECT_FALSE(isWhitespace('\xff'));
157}
158
159TEST(CharInfoTest, isDigit) {
160  EXPECT_TRUE(isDigit('0'));
161  EXPECT_TRUE(isDigit('9'));
162
163  EXPECT_FALSE(isDigit('a'));
164  EXPECT_FALSE(isDigit('A'));
165
166  EXPECT_FALSE(isDigit('z'));
167  EXPECT_FALSE(isDigit('Z'));
168  
169  EXPECT_FALSE(isDigit('.'));
170  EXPECT_FALSE(isDigit('_'));
171
172  EXPECT_FALSE(isDigit('/'));
173  EXPECT_FALSE(isDigit('\0'));
174
175  EXPECT_FALSE(isDigit('\x80'));
176  EXPECT_FALSE(isDigit('\xc2'));
177  EXPECT_FALSE(isDigit('\xff'));
178}
179
180TEST(CharInfoTest, isHexDigit) {
181  EXPECT_TRUE(isHexDigit('0'));
182  EXPECT_TRUE(isHexDigit('9'));
183
184  EXPECT_TRUE(isHexDigit('a'));
185  EXPECT_TRUE(isHexDigit('A'));
186
187  EXPECT_FALSE(isHexDigit('z'));
188  EXPECT_FALSE(isHexDigit('Z'));
189  
190  EXPECT_FALSE(isHexDigit('.'));
191  EXPECT_FALSE(isHexDigit('_'));
192
193  EXPECT_FALSE(isHexDigit('/'));
194  EXPECT_FALSE(isHexDigit('\0'));
195
196  EXPECT_FALSE(isHexDigit('\x80'));
197  EXPECT_FALSE(isHexDigit('\xc2'));
198  EXPECT_FALSE(isHexDigit('\xff'));
199}
200
201TEST(CharInfoTest, isLetter) {
202  EXPECT_FALSE(isLetter('0'));
203  EXPECT_FALSE(isLetter('9'));
204
205  EXPECT_TRUE(isLetter('a'));
206  EXPECT_TRUE(isLetter('A'));
207
208  EXPECT_TRUE(isLetter('z'));
209  EXPECT_TRUE(isLetter('Z'));
210  
211  EXPECT_FALSE(isLetter('.'));
212  EXPECT_FALSE(isLetter('_'));
213
214  EXPECT_FALSE(isLetter('/'));
215  EXPECT_FALSE(isLetter('('));
216  EXPECT_FALSE(isLetter('\0'));
217
218  EXPECT_FALSE(isLetter('\x80'));
219  EXPECT_FALSE(isLetter('\xc2'));
220  EXPECT_FALSE(isLetter('\xff'));
221}
222
223TEST(CharInfoTest, isLowercase) {
224  EXPECT_FALSE(isLowercase('0'));
225  EXPECT_FALSE(isLowercase('9'));
226
227  EXPECT_TRUE(isLowercase('a'));
228  EXPECT_FALSE(isLowercase('A'));
229
230  EXPECT_TRUE(isLowercase('z'));
231  EXPECT_FALSE(isLowercase('Z'));
232  
233  EXPECT_FALSE(isLowercase('.'));
234  EXPECT_FALSE(isLowercase('_'));
235
236  EXPECT_FALSE(isLowercase('/'));
237  EXPECT_FALSE(isLowercase('('));
238  EXPECT_FALSE(isLowercase('\0'));
239
240  EXPECT_FALSE(isLowercase('\x80'));
241  EXPECT_FALSE(isLowercase('\xc2'));
242  EXPECT_FALSE(isLowercase('\xff'));
243}
244
245TEST(CharInfoTest, isUppercase) {
246  EXPECT_FALSE(isUppercase('0'));
247  EXPECT_FALSE(isUppercase('9'));
248
249  EXPECT_FALSE(isUppercase('a'));
250  EXPECT_TRUE(isUppercase('A'));
251
252  EXPECT_FALSE(isUppercase('z'));
253  EXPECT_TRUE(isUppercase('Z'));
254
255  EXPECT_FALSE(isUppercase('.'));
256  EXPECT_FALSE(isUppercase('_'));
257
258  EXPECT_FALSE(isUppercase('/'));
259  EXPECT_FALSE(isUppercase('('));
260  EXPECT_FALSE(isUppercase('\0'));
261
262  EXPECT_FALSE(isUppercase('\x80'));
263  EXPECT_FALSE(isUppercase('\xc2'));
264  EXPECT_FALSE(isUppercase('\xff'));
265}
266
267TEST(CharInfoTest, isAlphanumeric) {
268  EXPECT_TRUE(isAlphanumeric('0'));
269  EXPECT_TRUE(isAlphanumeric('9'));
270
271  EXPECT_TRUE(isAlphanumeric('a'));
272  EXPECT_TRUE(isAlphanumeric('A'));
273
274  EXPECT_TRUE(isAlphanumeric('z'));
275  EXPECT_TRUE(isAlphanumeric('Z'));
276
277  EXPECT_FALSE(isAlphanumeric('.'));
278  EXPECT_FALSE(isAlphanumeric('_'));
279
280  EXPECT_FALSE(isAlphanumeric('/'));
281  EXPECT_FALSE(isAlphanumeric('('));
282  EXPECT_FALSE(isAlphanumeric('\0'));
283
284  EXPECT_FALSE(isAlphanumeric('\x80'));
285  EXPECT_FALSE(isAlphanumeric('\xc2'));
286  EXPECT_FALSE(isAlphanumeric('\xff'));
287}
288
289TEST(CharInfoTest, isPunctuation) {
290  EXPECT_FALSE(isPunctuation('0'));
291  EXPECT_FALSE(isPunctuation('9'));
292
293  EXPECT_FALSE(isPunctuation('a'));
294  EXPECT_FALSE(isPunctuation('A'));
295
296  EXPECT_FALSE(isPunctuation('z'));
297  EXPECT_FALSE(isPunctuation('Z'));
298
299  EXPECT_TRUE(isPunctuation('.'));
300  EXPECT_TRUE(isPunctuation('_'));
301
302  EXPECT_TRUE(isPunctuation('/'));
303  EXPECT_TRUE(isPunctuation('('));
304
305  EXPECT_FALSE(isPunctuation(' '));
306  EXPECT_FALSE(isPunctuation('\n'));
307  EXPECT_FALSE(isPunctuation('\0'));
308
309  EXPECT_FALSE(isPunctuation('\x80'));
310  EXPECT_FALSE(isPunctuation('\xc2'));
311  EXPECT_FALSE(isPunctuation('\xff'));
312}
313
314TEST(CharInfoTest, isPrintable) {
315  EXPECT_TRUE(isPrintable('0'));
316  EXPECT_TRUE(isPrintable('9'));
317
318  EXPECT_TRUE(isPrintable('a'));
319  EXPECT_TRUE(isPrintable('A'));
320
321  EXPECT_TRUE(isPrintable('z'));
322  EXPECT_TRUE(isPrintable('Z'));
323
324  EXPECT_TRUE(isPrintable('.'));
325  EXPECT_TRUE(isPrintable('_'));
326
327  EXPECT_TRUE(isPrintable('/'));
328  EXPECT_TRUE(isPrintable('('));
329
330  EXPECT_TRUE(isPrintable(' '));
331  EXPECT_FALSE(isPrintable('\t'));
332  EXPECT_FALSE(isPrintable('\n'));
333  EXPECT_FALSE(isPrintable('\0'));
334
335  EXPECT_FALSE(isPrintable('\x80'));
336  EXPECT_FALSE(isPrintable('\xc2'));
337  EXPECT_FALSE(isPrintable('\xff'));
338}
339
340TEST(CharInfoTest, isPreprocessingNumberBody) {
341  EXPECT_TRUE(isPreprocessingNumberBody('0'));
342  EXPECT_TRUE(isPreprocessingNumberBody('9'));
343
344  EXPECT_TRUE(isPreprocessingNumberBody('a'));
345  EXPECT_TRUE(isPreprocessingNumberBody('A'));
346
347  EXPECT_TRUE(isPreprocessingNumberBody('z'));
348  EXPECT_TRUE(isPreprocessingNumberBody('Z'));
349  EXPECT_TRUE(isPreprocessingNumberBody('.'));
350  EXPECT_TRUE(isPreprocessingNumberBody('_'));
351
352  EXPECT_FALSE(isPreprocessingNumberBody('/'));
353  EXPECT_FALSE(isPreprocessingNumberBody('('));
354  EXPECT_FALSE(isPreprocessingNumberBody('\0'));
355
356  EXPECT_FALSE(isPreprocessingNumberBody('\x80'));
357  EXPECT_FALSE(isPreprocessingNumberBody('\xc2'));
358  EXPECT_FALSE(isPreprocessingNumberBody('\xff'));
359}
360
361TEST(CharInfoTest, isRawStringDelimBody) {
362  EXPECT_TRUE(isRawStringDelimBody('0'));
363  EXPECT_TRUE(isRawStringDelimBody('9'));
364
365  EXPECT_TRUE(isRawStringDelimBody('a'));
366  EXPECT_TRUE(isRawStringDelimBody('A'));
367
368  EXPECT_TRUE(isRawStringDelimBody('z'));
369  EXPECT_TRUE(isRawStringDelimBody('Z'));
370  EXPECT_TRUE(isRawStringDelimBody('.'));
371  EXPECT_TRUE(isRawStringDelimBody('_'));
372
373  EXPECT_TRUE(isRawStringDelimBody('/'));
374  EXPECT_FALSE(isRawStringDelimBody('('));
375  EXPECT_FALSE(isRawStringDelimBody('\0'));
376
377  EXPECT_FALSE(isRawStringDelimBody('\x80'));
378  EXPECT_FALSE(isRawStringDelimBody('\xc2'));
379  EXPECT_FALSE(isRawStringDelimBody('\xff'));
380}
381
382TEST(CharInfoTest, toLowercase) {
383  EXPECT_EQ('0', toLowercase('0'));
384  EXPECT_EQ('9', toLowercase('9'));
385
386  EXPECT_EQ('a', toLowercase('a'));
387  EXPECT_EQ('a', toLowercase('A'));
388
389  EXPECT_EQ('z', toLowercase('z'));
390  EXPECT_EQ('z', toLowercase('Z'));
391
392  EXPECT_EQ('.', toLowercase('.'));
393  EXPECT_EQ('_', toLowercase('_'));
394
395  EXPECT_EQ('/', toLowercase('/'));
396  EXPECT_EQ('\0', toLowercase('\0'));
397}
398
399TEST(CharInfoTest, toUppercase) {
400  EXPECT_EQ('0', toUppercase('0'));
401  EXPECT_EQ('9', toUppercase('9'));
402
403  EXPECT_EQ('A', toUppercase('a'));
404  EXPECT_EQ('A', toUppercase('A'));
405
406  EXPECT_EQ('Z', toUppercase('z'));
407  EXPECT_EQ('Z', toUppercase('Z'));
408
409  EXPECT_EQ('.', toUppercase('.'));
410  EXPECT_EQ('_', toUppercase('_'));
411
412  EXPECT_EQ('/', toUppercase('/'));
413  EXPECT_EQ('\0', toUppercase('\0'));
414}
415
416TEST(CharInfoTest, isValidIdentifier) {
417  EXPECT_FALSE(isValidIdentifier(""));
418
419  // 1 character
420  EXPECT_FALSE(isValidIdentifier("."));
421  EXPECT_FALSE(isValidIdentifier("\n"));
422  EXPECT_FALSE(isValidIdentifier(" "));
423  EXPECT_FALSE(isValidIdentifier("\x80"));
424  EXPECT_FALSE(isValidIdentifier("\xc2"));
425  EXPECT_FALSE(isValidIdentifier("\xff"));
426  EXPECT_FALSE(isValidIdentifier("$"));
427  EXPECT_FALSE(isValidIdentifier("1"));
428
429  EXPECT_TRUE(isValidIdentifier("_"));
430  EXPECT_TRUE(isValidIdentifier("a"));
431  EXPECT_TRUE(isValidIdentifier("z"));
432  EXPECT_TRUE(isValidIdentifier("A"));
433  EXPECT_TRUE(isValidIdentifier("Z"));
434  EXPECT_TRUE(isValidIdentifier("$"/*AllowDollar=*/true));
435
436  // 2 characters, '_' suffix
437  EXPECT_FALSE(isValidIdentifier("._"));
438  EXPECT_FALSE(isValidIdentifier("\n_"));
439  EXPECT_FALSE(isValidIdentifier(" _"));
440  EXPECT_FALSE(isValidIdentifier("\x80_"));
441  EXPECT_FALSE(isValidIdentifier("\xc2_"));
442  EXPECT_FALSE(isValidIdentifier("\xff_"));
443  EXPECT_FALSE(isValidIdentifier("$_"));
444  EXPECT_FALSE(isValidIdentifier("1_"));
445
446  EXPECT_TRUE(isValidIdentifier("__"));
447  EXPECT_TRUE(isValidIdentifier("a_"));
448  EXPECT_TRUE(isValidIdentifier("z_"));
449  EXPECT_TRUE(isValidIdentifier("A_"));
450  EXPECT_TRUE(isValidIdentifier("Z_"));
451  EXPECT_TRUE(isValidIdentifier("$_"/*AllowDollar=*/true));
452
453  // 2 characters, '_' prefix
454  EXPECT_FALSE(isValidIdentifier("_."));
455  EXPECT_FALSE(isValidIdentifier("_\n"));
456  EXPECT_FALSE(isValidIdentifier("_ "));
457  EXPECT_FALSE(isValidIdentifier("_\x80"));
458  EXPECT_FALSE(isValidIdentifier("_\xc2"));
459  EXPECT_FALSE(isValidIdentifier("_\xff"));
460  EXPECT_FALSE(isValidIdentifier("_$"));
461  EXPECT_TRUE(isValidIdentifier("_1"));
462
463  EXPECT_TRUE(isValidIdentifier("__"));
464  EXPECT_TRUE(isValidIdentifier("_a"));
465  EXPECT_TRUE(isValidIdentifier("_z"));
466  EXPECT_TRUE(isValidIdentifier("_A"));
467  EXPECT_TRUE(isValidIdentifier("_Z"));
468  EXPECT_TRUE(isValidIdentifier("_$"/*AllowDollar=*/true));
469
470  // 3 characters, '__' prefix
471  EXPECT_FALSE(isValidIdentifier("__."));
472  EXPECT_FALSE(isValidIdentifier("__\n"));
473  EXPECT_FALSE(isValidIdentifier("__ "));
474  EXPECT_FALSE(isValidIdentifier("__\x80"));
475  EXPECT_FALSE(isValidIdentifier("__\xc2"));
476  EXPECT_FALSE(isValidIdentifier("__\xff"));
477  EXPECT_FALSE(isValidIdentifier("__$"));
478  EXPECT_TRUE(isValidIdentifier("__1"));
479
480  EXPECT_TRUE(isValidIdentifier("___"));
481  EXPECT_TRUE(isValidIdentifier("__a"));
482  EXPECT_TRUE(isValidIdentifier("__z"));
483  EXPECT_TRUE(isValidIdentifier("__A"));
484  EXPECT_TRUE(isValidIdentifier("__Z"));
485  EXPECT_TRUE(isValidIdentifier("__$"/*AllowDollar=*/true));
486
487  // 3 characters, '_' prefix and suffix
488  EXPECT_FALSE(isValidIdentifier("_._"));
489  EXPECT_FALSE(isValidIdentifier("_\n_"));
490  EXPECT_FALSE(isValidIdentifier("_ _"));
491  EXPECT_FALSE(isValidIdentifier("_\x80_"));
492  EXPECT_FALSE(isValidIdentifier("_\xc2_"));
493  EXPECT_FALSE(isValidIdentifier("_\xff_"));
494  EXPECT_FALSE(isValidIdentifier("_$_"));
495  EXPECT_TRUE(isValidIdentifier("_1_"));
496
497  EXPECT_TRUE(isValidIdentifier("___"));
498  EXPECT_TRUE(isValidIdentifier("_a_"));
499  EXPECT_TRUE(isValidIdentifier("_z_"));
500  EXPECT_TRUE(isValidIdentifier("_A_"));
501  EXPECT_TRUE(isValidIdentifier("_Z_"));
502  EXPECT_TRUE(isValidIdentifier("_$_"/*AllowDollar=*/true));
503}
504