1 //===---------- Support/UnicodeCaseFold.cpp -------------------------------===//
2 //
3 // This file was generated by utils/unicode-case-fold.py from the Unicode
4 // case folding database at
5 // http://www.unicode.org/Public/9.0.0/ucd/CaseFolding.txt
6 //
7 // To regenerate this file, run:
8 // utils/unicode-case-fold.py \
9 // "http://www.unicode.org/Public/9.0.0/ucd/CaseFolding.txt" \
10 // > lib/Support/UnicodeCaseFold.cpp
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Support/Unicode.h"
15
foldCharSimple(int C)16 int llvm::sys::unicode::foldCharSimple(int C) {
17 if (C < 0x0041)
18 return C;
19 // 26 characters
20 if (C <= 0x005a)
21 return C + 32;
22 // MICRO SIGN
23 if (C == 0x00b5)
24 return 0x03bc;
25 if (C < 0x00c0)
26 return C;
27 // 23 characters
28 if (C <= 0x00d6)
29 return C + 32;
30 if (C < 0x00d8)
31 return C;
32 // 7 characters
33 if (C <= 0x00de)
34 return C + 32;
35 if (C < 0x0100)
36 return C;
37 // 24 characters
38 if (C <= 0x012e)
39 return C | 1;
40 if (C < 0x0132)
41 return C;
42 // 3 characters
43 if (C <= 0x0136)
44 return C | 1;
45 if (C < 0x0139)
46 return C;
47 // 8 characters
48 if (C <= 0x0147 && C % 2 == 1)
49 return C + 1;
50 if (C < 0x014a)
51 return C;
52 // 23 characters
53 if (C <= 0x0176)
54 return C | 1;
55 // LATIN CAPITAL LETTER Y WITH DIAERESIS
56 if (C == 0x0178)
57 return 0x00ff;
58 if (C < 0x0179)
59 return C;
60 // 3 characters
61 if (C <= 0x017d && C % 2 == 1)
62 return C + 1;
63 // LATIN SMALL LETTER LONG S
64 if (C == 0x017f)
65 return 0x0073;
66 // LATIN CAPITAL LETTER B WITH HOOK
67 if (C == 0x0181)
68 return 0x0253;
69 if (C < 0x0182)
70 return C;
71 // 2 characters
72 if (C <= 0x0184)
73 return C | 1;
74 // LATIN CAPITAL LETTER OPEN O
75 if (C == 0x0186)
76 return 0x0254;
77 // LATIN CAPITAL LETTER C WITH HOOK
78 if (C == 0x0187)
79 return 0x0188;
80 if (C < 0x0189)
81 return C;
82 // 2 characters
83 if (C <= 0x018a)
84 return C + 205;
85 // LATIN CAPITAL LETTER D WITH TOPBAR
86 if (C == 0x018b)
87 return 0x018c;
88 // LATIN CAPITAL LETTER REVERSED E
89 if (C == 0x018e)
90 return 0x01dd;
91 // LATIN CAPITAL LETTER SCHWA
92 if (C == 0x018f)
93 return 0x0259;
94 // LATIN CAPITAL LETTER OPEN E
95 if (C == 0x0190)
96 return 0x025b;
97 // LATIN CAPITAL LETTER F WITH HOOK
98 if (C == 0x0191)
99 return 0x0192;
100 // LATIN CAPITAL LETTER G WITH HOOK
101 if (C == 0x0193)
102 return 0x0260;
103 // LATIN CAPITAL LETTER GAMMA
104 if (C == 0x0194)
105 return 0x0263;
106 // LATIN CAPITAL LETTER IOTA
107 if (C == 0x0196)
108 return 0x0269;
109 // LATIN CAPITAL LETTER I WITH STROKE
110 if (C == 0x0197)
111 return 0x0268;
112 // LATIN CAPITAL LETTER K WITH HOOK
113 if (C == 0x0198)
114 return 0x0199;
115 // LATIN CAPITAL LETTER TURNED M
116 if (C == 0x019c)
117 return 0x026f;
118 // LATIN CAPITAL LETTER N WITH LEFT HOOK
119 if (C == 0x019d)
120 return 0x0272;
121 // LATIN CAPITAL LETTER O WITH MIDDLE TILDE
122 if (C == 0x019f)
123 return 0x0275;
124 if (C < 0x01a0)
125 return C;
126 // 3 characters
127 if (C <= 0x01a4)
128 return C | 1;
129 // LATIN LETTER YR
130 if (C == 0x01a6)
131 return 0x0280;
132 // LATIN CAPITAL LETTER TONE TWO
133 if (C == 0x01a7)
134 return 0x01a8;
135 // LATIN CAPITAL LETTER ESH
136 if (C == 0x01a9)
137 return 0x0283;
138 // LATIN CAPITAL LETTER T WITH HOOK
139 if (C == 0x01ac)
140 return 0x01ad;
141 // LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
142 if (C == 0x01ae)
143 return 0x0288;
144 // LATIN CAPITAL LETTER U WITH HORN
145 if (C == 0x01af)
146 return 0x01b0;
147 if (C < 0x01b1)
148 return C;
149 // 2 characters
150 if (C <= 0x01b2)
151 return C + 217;
152 if (C < 0x01b3)
153 return C;
154 // 2 characters
155 if (C <= 0x01b5 && C % 2 == 1)
156 return C + 1;
157 // LATIN CAPITAL LETTER EZH
158 if (C == 0x01b7)
159 return 0x0292;
160 if (C < 0x01b8)
161 return C;
162 // 2 characters
163 if (C <= 0x01bc && C % 4 == 0)
164 return C + 1;
165 // LATIN CAPITAL LETTER DZ WITH CARON
166 if (C == 0x01c4)
167 return 0x01c6;
168 // LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
169 if (C == 0x01c5)
170 return 0x01c6;
171 // LATIN CAPITAL LETTER LJ
172 if (C == 0x01c7)
173 return 0x01c9;
174 // LATIN CAPITAL LETTER L WITH SMALL LETTER J
175 if (C == 0x01c8)
176 return 0x01c9;
177 // LATIN CAPITAL LETTER NJ
178 if (C == 0x01ca)
179 return 0x01cc;
180 if (C < 0x01cb)
181 return C;
182 // 9 characters
183 if (C <= 0x01db && C % 2 == 1)
184 return C + 1;
185 if (C < 0x01de)
186 return C;
187 // 9 characters
188 if (C <= 0x01ee)
189 return C | 1;
190 // LATIN CAPITAL LETTER DZ
191 if (C == 0x01f1)
192 return 0x01f3;
193 if (C < 0x01f2)
194 return C;
195 // 2 characters
196 if (C <= 0x01f4)
197 return C | 1;
198 // LATIN CAPITAL LETTER HWAIR
199 if (C == 0x01f6)
200 return 0x0195;
201 // LATIN CAPITAL LETTER WYNN
202 if (C == 0x01f7)
203 return 0x01bf;
204 if (C < 0x01f8)
205 return C;
206 // 20 characters
207 if (C <= 0x021e)
208 return C | 1;
209 // LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
210 if (C == 0x0220)
211 return 0x019e;
212 if (C < 0x0222)
213 return C;
214 // 9 characters
215 if (C <= 0x0232)
216 return C | 1;
217 // LATIN CAPITAL LETTER A WITH STROKE
218 if (C == 0x023a)
219 return 0x2c65;
220 // LATIN CAPITAL LETTER C WITH STROKE
221 if (C == 0x023b)
222 return 0x023c;
223 // LATIN CAPITAL LETTER L WITH BAR
224 if (C == 0x023d)
225 return 0x019a;
226 // LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
227 if (C == 0x023e)
228 return 0x2c66;
229 // LATIN CAPITAL LETTER GLOTTAL STOP
230 if (C == 0x0241)
231 return 0x0242;
232 // LATIN CAPITAL LETTER B WITH STROKE
233 if (C == 0x0243)
234 return 0x0180;
235 // LATIN CAPITAL LETTER U BAR
236 if (C == 0x0244)
237 return 0x0289;
238 // LATIN CAPITAL LETTER TURNED V
239 if (C == 0x0245)
240 return 0x028c;
241 if (C < 0x0246)
242 return C;
243 // 5 characters
244 if (C <= 0x024e)
245 return C | 1;
246 // COMBINING GREEK YPOGEGRAMMENI
247 if (C == 0x0345)
248 return 0x03b9;
249 if (C < 0x0370)
250 return C;
251 // 2 characters
252 if (C <= 0x0372)
253 return C | 1;
254 // GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
255 if (C == 0x0376)
256 return 0x0377;
257 // GREEK CAPITAL LETTER YOT
258 if (C == 0x037f)
259 return 0x03f3;
260 // GREEK CAPITAL LETTER ALPHA WITH TONOS
261 if (C == 0x0386)
262 return 0x03ac;
263 if (C < 0x0388)
264 return C;
265 // 3 characters
266 if (C <= 0x038a)
267 return C + 37;
268 // GREEK CAPITAL LETTER OMICRON WITH TONOS
269 if (C == 0x038c)
270 return 0x03cc;
271 if (C < 0x038e)
272 return C;
273 // 2 characters
274 if (C <= 0x038f)
275 return C + 63;
276 if (C < 0x0391)
277 return C;
278 // 17 characters
279 if (C <= 0x03a1)
280 return C + 32;
281 if (C < 0x03a3)
282 return C;
283 // 9 characters
284 if (C <= 0x03ab)
285 return C + 32;
286 // GREEK SMALL LETTER FINAL SIGMA
287 if (C == 0x03c2)
288 return 0x03c3;
289 // GREEK CAPITAL KAI SYMBOL
290 if (C == 0x03cf)
291 return 0x03d7;
292 // GREEK BETA SYMBOL
293 if (C == 0x03d0)
294 return 0x03b2;
295 // GREEK THETA SYMBOL
296 if (C == 0x03d1)
297 return 0x03b8;
298 // GREEK PHI SYMBOL
299 if (C == 0x03d5)
300 return 0x03c6;
301 // GREEK PI SYMBOL
302 if (C == 0x03d6)
303 return 0x03c0;
304 if (C < 0x03d8)
305 return C;
306 // 12 characters
307 if (C <= 0x03ee)
308 return C | 1;
309 // GREEK KAPPA SYMBOL
310 if (C == 0x03f0)
311 return 0x03ba;
312 // GREEK RHO SYMBOL
313 if (C == 0x03f1)
314 return 0x03c1;
315 // GREEK CAPITAL THETA SYMBOL
316 if (C == 0x03f4)
317 return 0x03b8;
318 // GREEK LUNATE EPSILON SYMBOL
319 if (C == 0x03f5)
320 return 0x03b5;
321 // GREEK CAPITAL LETTER SHO
322 if (C == 0x03f7)
323 return 0x03f8;
324 // GREEK CAPITAL LUNATE SIGMA SYMBOL
325 if (C == 0x03f9)
326 return 0x03f2;
327 // GREEK CAPITAL LETTER SAN
328 if (C == 0x03fa)
329 return 0x03fb;
330 if (C < 0x03fd)
331 return C;
332 // 3 characters
333 if (C <= 0x03ff)
334 return C + -130;
335 if (C < 0x0400)
336 return C;
337 // 16 characters
338 if (C <= 0x040f)
339 return C + 80;
340 if (C < 0x0410)
341 return C;
342 // 32 characters
343 if (C <= 0x042f)
344 return C + 32;
345 if (C < 0x0460)
346 return C;
347 // 17 characters
348 if (C <= 0x0480)
349 return C | 1;
350 if (C < 0x048a)
351 return C;
352 // 27 characters
353 if (C <= 0x04be)
354 return C | 1;
355 // CYRILLIC LETTER PALOCHKA
356 if (C == 0x04c0)
357 return 0x04cf;
358 if (C < 0x04c1)
359 return C;
360 // 7 characters
361 if (C <= 0x04cd && C % 2 == 1)
362 return C + 1;
363 if (C < 0x04d0)
364 return C;
365 // 48 characters
366 if (C <= 0x052e)
367 return C | 1;
368 if (C < 0x0531)
369 return C;
370 // 38 characters
371 if (C <= 0x0556)
372 return C + 48;
373 if (C < 0x10a0)
374 return C;
375 // 38 characters
376 if (C <= 0x10c5)
377 return C + 7264;
378 if (C < 0x10c7)
379 return C;
380 // 2 characters
381 if (C <= 0x10cd && C % 6 == 5)
382 return C + 7264;
383 if (C < 0x13f8)
384 return C;
385 // 6 characters
386 if (C <= 0x13fd)
387 return C + -8;
388 // CYRILLIC SMALL LETTER ROUNDED VE
389 if (C == 0x1c80)
390 return 0x0432;
391 // CYRILLIC SMALL LETTER LONG-LEGGED DE
392 if (C == 0x1c81)
393 return 0x0434;
394 // CYRILLIC SMALL LETTER NARROW O
395 if (C == 0x1c82)
396 return 0x043e;
397 if (C < 0x1c83)
398 return C;
399 // 2 characters
400 if (C <= 0x1c84)
401 return C + -6210;
402 // CYRILLIC SMALL LETTER THREE-LEGGED TE
403 if (C == 0x1c85)
404 return 0x0442;
405 // CYRILLIC SMALL LETTER TALL HARD SIGN
406 if (C == 0x1c86)
407 return 0x044a;
408 // CYRILLIC SMALL LETTER TALL YAT
409 if (C == 0x1c87)
410 return 0x0463;
411 // CYRILLIC SMALL LETTER UNBLENDED UK
412 if (C == 0x1c88)
413 return 0xa64b;
414 if (C < 0x1e00)
415 return C;
416 // 75 characters
417 if (C <= 0x1e94)
418 return C | 1;
419 // LATIN SMALL LETTER LONG S WITH DOT ABOVE
420 if (C == 0x1e9b)
421 return 0x1e61;
422 // LATIN CAPITAL LETTER SHARP S
423 if (C == 0x1e9e)
424 return 0x00df;
425 if (C < 0x1ea0)
426 return C;
427 // 48 characters
428 if (C <= 0x1efe)
429 return C | 1;
430 if (C < 0x1f08)
431 return C;
432 // 8 characters
433 if (C <= 0x1f0f)
434 return C + -8;
435 if (C < 0x1f18)
436 return C;
437 // 6 characters
438 if (C <= 0x1f1d)
439 return C + -8;
440 if (C < 0x1f28)
441 return C;
442 // 8 characters
443 if (C <= 0x1f2f)
444 return C + -8;
445 if (C < 0x1f38)
446 return C;
447 // 8 characters
448 if (C <= 0x1f3f)
449 return C + -8;
450 if (C < 0x1f48)
451 return C;
452 // 6 characters
453 if (C <= 0x1f4d)
454 return C + -8;
455 if (C < 0x1f59)
456 return C;
457 // 4 characters
458 if (C <= 0x1f5f && C % 2 == 1)
459 return C + -8;
460 if (C < 0x1f68)
461 return C;
462 // 8 characters
463 if (C <= 0x1f6f)
464 return C + -8;
465 if (C < 0x1f88)
466 return C;
467 // 8 characters
468 if (C <= 0x1f8f)
469 return C + -8;
470 if (C < 0x1f98)
471 return C;
472 // 8 characters
473 if (C <= 0x1f9f)
474 return C + -8;
475 if (C < 0x1fa8)
476 return C;
477 // 8 characters
478 if (C <= 0x1faf)
479 return C + -8;
480 if (C < 0x1fb8)
481 return C;
482 // 2 characters
483 if (C <= 0x1fb9)
484 return C + -8;
485 if (C < 0x1fba)
486 return C;
487 // 2 characters
488 if (C <= 0x1fbb)
489 return C + -74;
490 // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
491 if (C == 0x1fbc)
492 return 0x1fb3;
493 // GREEK PROSGEGRAMMENI
494 if (C == 0x1fbe)
495 return 0x03b9;
496 if (C < 0x1fc8)
497 return C;
498 // 4 characters
499 if (C <= 0x1fcb)
500 return C + -86;
501 // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
502 if (C == 0x1fcc)
503 return 0x1fc3;
504 if (C < 0x1fd8)
505 return C;
506 // 2 characters
507 if (C <= 0x1fd9)
508 return C + -8;
509 if (C < 0x1fda)
510 return C;
511 // 2 characters
512 if (C <= 0x1fdb)
513 return C + -100;
514 if (C < 0x1fe8)
515 return C;
516 // 2 characters
517 if (C <= 0x1fe9)
518 return C + -8;
519 if (C < 0x1fea)
520 return C;
521 // 2 characters
522 if (C <= 0x1feb)
523 return C + -112;
524 // GREEK CAPITAL LETTER RHO WITH DASIA
525 if (C == 0x1fec)
526 return 0x1fe5;
527 if (C < 0x1ff8)
528 return C;
529 // 2 characters
530 if (C <= 0x1ff9)
531 return C + -128;
532 if (C < 0x1ffa)
533 return C;
534 // 2 characters
535 if (C <= 0x1ffb)
536 return C + -126;
537 // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
538 if (C == 0x1ffc)
539 return 0x1ff3;
540 // OHM SIGN
541 if (C == 0x2126)
542 return 0x03c9;
543 // KELVIN SIGN
544 if (C == 0x212a)
545 return 0x006b;
546 // ANGSTROM SIGN
547 if (C == 0x212b)
548 return 0x00e5;
549 // TURNED CAPITAL F
550 if (C == 0x2132)
551 return 0x214e;
552 if (C < 0x2160)
553 return C;
554 // 16 characters
555 if (C <= 0x216f)
556 return C + 16;
557 // ROMAN NUMERAL REVERSED ONE HUNDRED
558 if (C == 0x2183)
559 return 0x2184;
560 if (C < 0x24b6)
561 return C;
562 // 26 characters
563 if (C <= 0x24cf)
564 return C + 26;
565 if (C < 0x2c00)
566 return C;
567 // 47 characters
568 if (C <= 0x2c2e)
569 return C + 48;
570 // LATIN CAPITAL LETTER L WITH DOUBLE BAR
571 if (C == 0x2c60)
572 return 0x2c61;
573 // LATIN CAPITAL LETTER L WITH MIDDLE TILDE
574 if (C == 0x2c62)
575 return 0x026b;
576 // LATIN CAPITAL LETTER P WITH STROKE
577 if (C == 0x2c63)
578 return 0x1d7d;
579 // LATIN CAPITAL LETTER R WITH TAIL
580 if (C == 0x2c64)
581 return 0x027d;
582 if (C < 0x2c67)
583 return C;
584 // 3 characters
585 if (C <= 0x2c6b && C % 2 == 1)
586 return C + 1;
587 // LATIN CAPITAL LETTER ALPHA
588 if (C == 0x2c6d)
589 return 0x0251;
590 // LATIN CAPITAL LETTER M WITH HOOK
591 if (C == 0x2c6e)
592 return 0x0271;
593 // LATIN CAPITAL LETTER TURNED A
594 if (C == 0x2c6f)
595 return 0x0250;
596 // LATIN CAPITAL LETTER TURNED ALPHA
597 if (C == 0x2c70)
598 return 0x0252;
599 if (C < 0x2c72)
600 return C;
601 // 2 characters
602 if (C <= 0x2c75 && C % 3 == 2)
603 return C + 1;
604 if (C < 0x2c7e)
605 return C;
606 // 2 characters
607 if (C <= 0x2c7f)
608 return C + -10815;
609 if (C < 0x2c80)
610 return C;
611 // 50 characters
612 if (C <= 0x2ce2)
613 return C | 1;
614 if (C < 0x2ceb)
615 return C;
616 // 2 characters
617 if (C <= 0x2ced && C % 2 == 1)
618 return C + 1;
619 if (C < 0x2cf2)
620 return C;
621 // 2 characters
622 if (C <= 0xa640 && C % 31054 == 11506)
623 return C + 1;
624 if (C < 0xa642)
625 return C;
626 // 22 characters
627 if (C <= 0xa66c)
628 return C | 1;
629 if (C < 0xa680)
630 return C;
631 // 14 characters
632 if (C <= 0xa69a)
633 return C | 1;
634 if (C < 0xa722)
635 return C;
636 // 7 characters
637 if (C <= 0xa72e)
638 return C | 1;
639 if (C < 0xa732)
640 return C;
641 // 31 characters
642 if (C <= 0xa76e)
643 return C | 1;
644 if (C < 0xa779)
645 return C;
646 // 2 characters
647 if (C <= 0xa77b && C % 2 == 1)
648 return C + 1;
649 // LATIN CAPITAL LETTER INSULAR G
650 if (C == 0xa77d)
651 return 0x1d79;
652 if (C < 0xa77e)
653 return C;
654 // 5 characters
655 if (C <= 0xa786)
656 return C | 1;
657 // LATIN CAPITAL LETTER SALTILLO
658 if (C == 0xa78b)
659 return 0xa78c;
660 // LATIN CAPITAL LETTER TURNED H
661 if (C == 0xa78d)
662 return 0x0265;
663 if (C < 0xa790)
664 return C;
665 // 2 characters
666 if (C <= 0xa792)
667 return C | 1;
668 if (C < 0xa796)
669 return C;
670 // 10 characters
671 if (C <= 0xa7a8)
672 return C | 1;
673 // LATIN CAPITAL LETTER H WITH HOOK
674 if (C == 0xa7aa)
675 return 0x0266;
676 // LATIN CAPITAL LETTER REVERSED OPEN E
677 if (C == 0xa7ab)
678 return 0x025c;
679 // LATIN CAPITAL LETTER SCRIPT G
680 if (C == 0xa7ac)
681 return 0x0261;
682 // LATIN CAPITAL LETTER L WITH BELT
683 if (C == 0xa7ad)
684 return 0x026c;
685 // LATIN CAPITAL LETTER SMALL CAPITAL I
686 if (C == 0xa7ae)
687 return 0x026a;
688 // LATIN CAPITAL LETTER TURNED K
689 if (C == 0xa7b0)
690 return 0x029e;
691 // LATIN CAPITAL LETTER TURNED T
692 if (C == 0xa7b1)
693 return 0x0287;
694 // LATIN CAPITAL LETTER J WITH CROSSED-TAIL
695 if (C == 0xa7b2)
696 return 0x029d;
697 // LATIN CAPITAL LETTER CHI
698 if (C == 0xa7b3)
699 return 0xab53;
700 if (C < 0xa7b4)
701 return C;
702 // 2 characters
703 if (C <= 0xa7b6)
704 return C | 1;
705 if (C < 0xab70)
706 return C;
707 // 80 characters
708 if (C <= 0xabbf)
709 return C + -38864;
710 if (C < 0xff21)
711 return C;
712 // 26 characters
713 if (C <= 0xff3a)
714 return C + 32;
715 if (C < 0x10400)
716 return C;
717 // 40 characters
718 if (C <= 0x10427)
719 return C + 40;
720 if (C < 0x104b0)
721 return C;
722 // 36 characters
723 if (C <= 0x104d3)
724 return C + 40;
725 if (C < 0x10c80)
726 return C;
727 // 51 characters
728 if (C <= 0x10cb2)
729 return C + 64;
730 if (C < 0x118a0)
731 return C;
732 // 32 characters
733 if (C <= 0x118bf)
734 return C + 32;
735 if (C < 0x1e900)
736 return C;
737 // 34 characters
738 if (C <= 0x1e921)
739 return C + 34;
740
741 return C;
742 }
743