1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /**
4  *******************************************************************************
5  * Copyright (C) 2001-2016 International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 
10 package com.ibm.icu.lang;
11 
12 import java.util.BitSet;
13 import java.util.Locale;
14 
15 import com.ibm.icu.impl.UCharacterProperty;
16 import com.ibm.icu.util.ULocale;
17 
18 /**
19  * Constants for ISO 15924 script codes, and related functions.
20  *
21  * <p>The current set of script code constants supports at least all scripts
22  * that are encoded in the version of Unicode which ICU currently supports.
23  * The names of the constants are usually derived from the
24  * Unicode script property value aliases.
25  * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/)
26  * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
27  *
28  * <p>In addition, constants for many ISO 15924 script codes
29  * are included, for use with language tags, CLDR data, and similar.
30  * Some of those codes are not used in the Unicode Character Database (UCD).
31  * For example, there are no characters that have a UCD script property value of
32  * Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
33  *
34  * <p>Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.
35  *
36  * <p>Starting with ICU 55, script codes are only added when their scripts
37  * have been or will certainly be encoded in Unicode,
38  * and have been assigned Unicode script property value aliases,
39  * to ensure that their script names are stable and match the names of the constants.
40  * Script codes like Latf and Aran that are not subject to separate encoding
41  * may be added at any time.
42  *
43  * @stable ICU 2.4
44  */
45 public final class UScript {
46     /**
47      * Invalid code
48      * @stable ICU 2.4
49      */
50     public static final int INVALID_CODE = -1;
51     /**
52      * Common
53      * @stable ICU 2.4
54      */
55     public static final int COMMON       =  0;  /* Zyyy */
56     /**
57      * Inherited
58      * @stable ICU 2.4
59      */
60     public static final int INHERITED    =  1;  /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */
61     /**
62      * Arabic
63      * @stable ICU 2.4
64      */
65     public static final int ARABIC       =  2;  /* Arab */
66     /**
67      * Armenian
68      * @stable ICU 2.4
69      */
70     public static final int ARMENIAN     =  3;  /* Armn */
71     /**
72      * Bengali
73      * @stable ICU 2.4
74      */
75     public static final int BENGALI      =  4;  /* Beng */
76     /**
77      * Bopomofo
78      * @stable ICU 2.4
79      */
80     public static final int BOPOMOFO     =  5;  /* Bopo */
81     /**
82      * Cherokee
83      * @stable ICU 2.4
84      */
85     public static final int CHEROKEE     =  6;  /* Cher */
86     /**
87      * Coptic
88      * @stable ICU 2.4
89      */
90     public static final int COPTIC       =  7;  /* Qaac */
91     /**
92      * Cyrillic
93      * @stable ICU 2.4
94      */
95     public static final int CYRILLIC     =  8;  /* Cyrl (Cyrs) */
96     /**
97      * Deseret
98      * @stable ICU 2.4
99      */
100     public static final int DESERET      =  9;  /* Dsrt */
101     /**
102      * Devanagari
103      * @stable ICU 2.4
104      */
105     public static final int DEVANAGARI   = 10;  /* Deva */
106     /**
107      * Ethiopic
108      * @stable ICU 2.4
109      */
110     public static final int ETHIOPIC     = 11;  /* Ethi */
111     /**
112      * Georgian
113      * @stable ICU 2.4
114      */
115     public static final int GEORGIAN     = 12;  /* Geor (Geon; Geoa) */
116     /**
117      * Gothic
118      * @stable ICU 2.4
119      */
120     public static final int GOTHIC       = 13;  /* Goth */
121     /**
122      * Greek
123      * @stable ICU 2.4
124      */
125     public static final int GREEK        = 14;  /* Grek */
126     /**
127      * Gujarati
128      * @stable ICU 2.4
129      */
130     public static final int GUJARATI     = 15;  /* Gujr */
131     /**
132      * Gurmukhi
133      * @stable ICU 2.4
134      */
135     public static final int GURMUKHI     = 16;  /* Guru */
136     /**
137      * Han
138      * @stable ICU 2.4
139      */
140     public static final int HAN          = 17;  /* Hani */
141     /**
142      * Hangul
143      * @stable ICU 2.4
144      */
145     public static final int HANGUL       = 18;  /* Hang */
146     /**
147      * Hebrew
148      * @stable ICU 2.4
149      */
150     public static final int HEBREW       = 19;  /* Hebr */
151     /**
152      * Hiragana
153      * @stable ICU 2.4
154      */
155     public static final int HIRAGANA     = 20;  /* Hira */
156     /**
157      * Kannada
158      * @stable ICU 2.4
159      */
160     public static final int KANNADA      = 21;  /* Knda */
161     /**
162      * Katakana
163      * @stable ICU 2.4
164      */
165     public static final int KATAKANA     = 22;  /* Kana */
166     /**
167      * Khmer
168      * @stable ICU 2.4
169      */
170     public static final int KHMER        = 23;  /* Khmr */
171     /**
172      * Lao
173      * @stable ICU 2.4
174      */
175     public static final int LAO          = 24;  /* Laoo */
176     /**
177      * Latin
178      * @stable ICU 2.4
179      */
180     public static final int LATIN        = 25;  /* Latn (Latf; Latg) */
181     /**
182      * Malayalam
183      * @stable ICU 2.4
184      */
185     public static final int MALAYALAM    = 26;  /* Mlym */
186     /**
187      * Mangolian
188      * @stable ICU 2.4
189      */
190     public static final int MONGOLIAN    = 27;  /* Mong */
191     /**
192      * Myammar
193      * @stable ICU 2.4
194      */
195     public static final int MYANMAR      = 28;  /* Mymr */
196     /**
197      * Ogham
198      * @stable ICU 2.4
199      */
200     public static final int OGHAM        = 29;  /* Ogam */
201     /**
202      * Old Itallic
203      * @stable ICU 2.4
204      */
205     public static final int OLD_ITALIC   = 30;  /* Ital */
206     /**
207      * Oriya
208      * @stable ICU 2.4
209      */
210     public static final int ORIYA        = 31;  /* Orya */
211     /**
212      * Runic
213      * @stable ICU 2.4
214      */
215     public static final int RUNIC        = 32;  /* Runr */
216     /**
217      * Sinhala
218      * @stable ICU 2.4
219      */
220     public static final int SINHALA      = 33;  /* Sinh */
221     /**
222      * Syriac
223      * @stable ICU 2.4
224      */
225     public static final int SYRIAC       = 34;  /* Syrc (Syrj; Syrn; Syre) */
226     /**
227      * Tamil
228      * @stable ICU 2.4
229      */
230     public static final int TAMIL        = 35;  /* Taml */
231     /**
232      * Telugu
233      * @stable ICU 2.4
234      */
235     public static final int TELUGU       = 36;  /* Telu */
236     /**
237      * Thana
238      * @stable ICU 2.4
239      */
240     public static final int THAANA       = 37;  /* Thaa */
241     /**
242      * Thai
243      * @stable ICU 2.4
244      */
245     public static final int THAI         = 38;  /* Thai */
246     /**
247      * Tibetan
248      * @stable ICU 2.4
249      */
250     public static final int TIBETAN      = 39;  /* Tibt */
251     /**
252      * Unified Canadian Aboriginal Symbols
253      * @stable ICU 2.6
254      */
255     public static final int CANADIAN_ABORIGINAL = 40;  /* Cans */
256     /**
257      * Unified Canadian Aboriginal Symbols (alias)
258      * @stable ICU 2.4
259      */
260     public static final int UCAS         = CANADIAN_ABORIGINAL;  /* Cans */
261     /**
262      * Yi syllables
263      * @stable ICU 2.4
264      */
265     public static final int YI           = 41;  /* Yiii */
266     /**
267      * Tagalog
268      * @stable ICU 2.4
269      */
270     public static final int TAGALOG      = 42;  /* Tglg */
271     /**
272      * Hanunooo
273      * @stable ICU 2.4
274      */
275     public static final int HANUNOO      = 43;  /* Hano */
276     /**
277      * Buhid
278      * @stable ICU 2.4
279      */
280     public static final int BUHID        = 44;  /* Buhd */
281     /**
282      * Tagbanwa
283      * @stable ICU 2.4
284      */
285     public static final int TAGBANWA     = 45;  /* Tagb */
286     /**
287      * Braille
288      * Script in Unicode 4
289      * @stable ICU 2.6
290      *
291      */
292     public static final int BRAILLE      = 46;  /* Brai */
293     /**
294      * Cypriot
295      * Script in Unicode 4
296      * @stable ICU 2.6
297      *
298      */
299     public static final int CYPRIOT              = 47;  /* Cprt */
300     /**
301      * Limbu
302      * Script in Unicode 4
303      * @stable ICU 2.6
304      *
305      */
306     public static final int LIMBU                = 48;  /* Limb */
307     /**
308      * Linear B
309      * Script in Unicode 4
310      * @stable ICU 2.6
311      *
312      */
313     public static final int LINEAR_B     = 49;  /* Linb */
314     /**
315      * Osmanya
316      * Script in Unicode 4
317      * @stable ICU 2.6
318      *
319      */
320     public static final int OSMANYA              = 50;  /* Osma */
321     /**
322      * Shavian
323      * Script in Unicode 4
324      * @stable ICU 2.6
325      *
326      */
327     public static final int SHAVIAN              = 51;  /* Shaw */
328     /**
329      * Tai Le
330      * Script in Unicode 4
331      * @stable ICU 2.6
332      *
333      */
334     public static final int TAI_LE               = 52;  /* Tale */
335     /**
336      * Ugaritic
337      * Script in Unicode 4
338      * @stable ICU 2.6
339      *
340      */
341     public static final int UGARITIC     = 53;  /* Ugar */
342     /**
343      * Script in Unicode 4.0.1
344      * @stable ICU 3.0
345      */
346     public static final int KATAKANA_OR_HIRAGANA = 54;  /*Hrkt */
347 
348     /**
349      * Script in Unicode 4.1
350      * @stable ICU 3.4
351      */
352     public static final int BUGINESE = 55;           /* Bugi */
353     /**
354      * Script in Unicode 4.1
355      * @stable ICU 3.4
356      */
357     public static final int GLAGOLITIC = 56;         /* Glag */
358     /**
359      * Script in Unicode 4.1
360      * @stable ICU 3.4
361      */
362     public static final int KHAROSHTHI = 57;         /* Khar */
363     /**
364      * Script in Unicode 4.1
365      * @stable ICU 3.4
366      */
367     public static final int SYLOTI_NAGRI = 58;       /* Sylo */
368     /**
369      * Script in Unicode 4.1
370      * @stable ICU 3.4
371      */
372     public static final int NEW_TAI_LUE = 59;        /* Talu */
373     /**
374      * Script in Unicode 4.1
375      * @stable ICU 3.4
376      */
377     public static final int TIFINAGH = 60;           /* Tfng */
378     /**
379      * Script in Unicode 4.1
380      * @stable ICU 3.4
381      */
382     public static final int OLD_PERSIAN = 61;        /* Xpeo */
383 
384 
385     /**
386      * ISO 15924 script code
387      * @stable ICU 3.6
388      */
389     public static final int BALINESE                      = 62; /* Bali */
390     /**
391      * ISO 15924 script code
392      * @stable ICU 3.6
393      */
394     public static final int BATAK                         = 63; /* Batk */
395     /**
396      * ISO 15924 script code
397      * @stable ICU 3.6
398      */
399     public static final int BLISSYMBOLS                   = 64; /* Blis */
400     /**
401      * ISO 15924 script code
402      * @stable ICU 3.6
403      */
404     public static final int BRAHMI                        = 65; /* Brah */
405     /**
406      * ISO 15924 script code
407      * @stable ICU 3.6
408      */
409     public static final int CHAM                          = 66; /* Cham */
410     /**
411      * ISO 15924 script code
412      * @stable ICU 3.6
413      */
414     public static final int CIRTH                         = 67; /* Cirt */
415     /**
416      * ISO 15924 script code
417      * @stable ICU 3.6
418      */
419     public static final int OLD_CHURCH_SLAVONIC_CYRILLIC  = 68; /* Cyrs */
420     /**
421      * ISO 15924 script code
422      * @stable ICU 3.6
423      */
424     public static final int DEMOTIC_EGYPTIAN              = 69; /* Egyd */
425     /**
426      * ISO 15924 script code
427      * @stable ICU 3.6
428      */
429     public static final int HIERATIC_EGYPTIAN             = 70; /* Egyh */
430     /**
431      * ISO 15924 script code
432      * @stable ICU 3.6
433      */
434     public static final int EGYPTIAN_HIEROGLYPHS          = 71; /* Egyp */
435     /**
436      * ISO 15924 script code
437      * @stable ICU 3.6
438      */
439     public static final int KHUTSURI                      = 72; /* Geok */
440     /**
441      * ISO 15924 script code
442      * @stable ICU 3.6
443      */
444     public static final int SIMPLIFIED_HAN                = 73; /* Hans */
445     /**
446      * ISO 15924 script code
447      * @stable ICU 3.6
448      */
449     public static final int TRADITIONAL_HAN               = 74; /* Hant */
450     /**
451      * ISO 15924 script code
452      * @stable ICU 3.6
453      */
454     public static final int PAHAWH_HMONG                  = 75; /* Hmng */
455     /**
456      * ISO 15924 script code
457      * @stable ICU 3.6
458      */
459     public static final int OLD_HUNGARIAN                 = 76; /* Hung */
460     /**
461      * ISO 15924 script code
462      * @stable ICU 3.6
463      */
464     public static final int HARAPPAN_INDUS                = 77; /* Inds */
465     /**
466      * ISO 15924 script code
467      * @stable ICU 3.6
468      */
469     public static final int JAVANESE                      = 78; /* Java */
470     /**
471      * ISO 15924 script code
472      * @stable ICU 3.6
473      */
474     public static final int KAYAH_LI                      = 79; /* Kali */
475     /**
476      * ISO 15924 script code
477      * @stable ICU 3.6
478      */
479     public static final int LATIN_FRAKTUR                 = 80; /* Latf */
480     /**
481      * ISO 15924 script code
482      * @stable ICU 3.6
483      */
484     public static final int LATIN_GAELIC                  = 81; /* Latg */
485     /**
486      * ISO 15924 script code
487      * @stable ICU 3.6
488      */
489     public static final int LEPCHA                        = 82; /* Lepc */
490     /**
491      * ISO 15924 script code
492      * @stable ICU 3.6
493      */
494     public static final int LINEAR_A                      = 83; /* Lina */
495     /**
496      * ISO 15924 script code
497      * @stable ICU 4.6
498      */
499     public static final int MANDAIC                       = 84; /* Mand */
500     /**
501      * ISO 15924 script code
502      * @stable ICU 3.6
503      */
504     public static final int MANDAEAN                      = MANDAIC;
505     /**
506      * ISO 15924 script code
507      * @stable ICU 3.6
508      */
509     public static final int MAYAN_HIEROGLYPHS             = 85; /* Maya */
510     /**
511      * ISO 15924 script code
512      * @stable ICU 4.6
513      */
514     public static final int MEROITIC_HIEROGLYPHS          = 86; /* Mero */
515     /**
516      * ISO 15924 script code
517      * @stable ICU 3.6
518      */
519     public static final int MEROITIC                      = MEROITIC_HIEROGLYPHS;
520     /**
521      * ISO 15924 script code
522      * @stable ICU 3.6
523      */
524     public static final int NKO                           = 87; /* Nkoo */
525     /**
526      * ISO 15924 script code
527      * @stable ICU 3.6
528      */
529     public static final int ORKHON                        = 88; /* Orkh */
530     /**
531      * ISO 15924 script code
532      * @stable ICU 3.6
533      */
534     public static final int OLD_PERMIC                    = 89; /* Perm */
535     /**
536      * ISO 15924 script code
537      * @stable ICU 3.6
538      */
539     public static final int PHAGS_PA                      = 90; /* Phag */
540     /**
541      * ISO 15924 script code
542      * @stable ICU 3.6
543      */
544     public static final int PHOENICIAN                    = 91; /* Phnx */
545     /**
546      * ISO 15924 script code
547      * @stable ICU 52
548      */
549     public static final int MIAO                          = 92; /* Plrd */
550     /**
551      * ISO 15924 script code
552      * @stable ICU 3.6
553      */
554     public static final int PHONETIC_POLLARD              = MIAO;
555     /**
556      * ISO 15924 script code
557      * @stable ICU 3.6
558      */
559     public static final int RONGORONGO                    = 93; /* Roro */
560     /**
561      * ISO 15924 script code
562      * @stable ICU 3.6
563      */
564     public static final int SARATI                        = 94; /* Sara */
565     /**
566      * ISO 15924 script code
567      * @stable ICU 3.6
568      */
569     public static final int ESTRANGELO_SYRIAC             = 95; /* Syre */
570     /**
571      * ISO 15924 script code
572      * @stable ICU 3.6
573      */
574     public static final int WESTERN_SYRIAC                = 96; /* Syrj */
575     /**
576      * ISO 15924 script code
577      * @stable ICU 3.6
578      */
579     public static final int EASTERN_SYRIAC                = 97; /* Syrn */
580     /**
581      * ISO 15924 script code
582      * @stable ICU 3.6
583      */
584     public static final int TENGWAR                       = 98; /* Teng */
585     /**
586      * ISO 15924 script code
587      * @stable ICU 3.6
588      */
589     public static final int VAI                           = 99; /* Vaii */
590     /**
591      * ISO 15924 script code
592      * @stable ICU 3.6
593      */
594     public static final int VISIBLE_SPEECH                = 100;/* Visp */
595     /**
596      * ISO 15924 script code
597      * @stable ICU 3.6
598      */
599     public static final int CUNEIFORM                     = 101;/* Xsux */
600     /**
601      * ISO 15924 script code
602      * @stable ICU 3.6
603      */
604     public static final int UNWRITTEN_LANGUAGES           = 102;/* Zxxx */
605     /**
606      * ISO 15924 script code
607      * @stable ICU 3.6
608      */
609     public static final int UNKNOWN                       = 103;/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
610 
611     /**
612      * ISO 15924 script code
613      * @stable ICU 3.8
614      */
615     public static final int CARIAN                        = 104;/* Cari */
616     /**
617      * ISO 15924 script code
618      * @stable ICU 3.8
619      */
620     public static final int JAPANESE                      = 105;/* Jpan */
621     /**
622      * ISO 15924 script code
623      * @stable ICU 3.8
624      */
625     public static final int LANNA                         = 106;/* Lana */
626     /**
627      * ISO 15924 script code
628      * @stable ICU 3.8
629      */
630     public static final int LYCIAN                        = 107;/* Lyci */
631     /**
632      * ISO 15924 script code
633      * @stable ICU 3.8
634      */
635     public static final int LYDIAN                        = 108;/* Lydi */
636     /**
637      * ISO 15924 script code
638      * @stable ICU 3.8
639      */
640     public static final int OL_CHIKI                      = 109;/* Olck */
641     /**
642      * ISO 15924 script code
643      * @stable ICU 3.8
644      */
645     public static final int REJANG                        = 110;/* Rjng */
646     /**
647      * ISO 15924 script code
648      * @stable ICU 3.8
649      */
650     public static final int SAURASHTRA                    = 111;/* Saur */
651     /**
652      * ISO 15924 script code for Sutton SignWriting
653      * @stable ICU 3.8
654      */
655     public static final int SIGN_WRITING                  = 112;/* Sgnw */
656     /**
657      * ISO 15924 script code
658      * @stable ICU 3.8
659      */
660     public static final int SUNDANESE                     = 113;/* Sund */
661     /**
662      * ISO 15924 script code
663      * @stable ICU 3.8
664      */
665     public static final int MOON                          = 114;/* Moon */
666     /**
667      * ISO 15924 script code
668      * @stable ICU 3.8
669      */
670     public static final int MEITEI_MAYEK                  = 115;/* Mtei */
671 
672     /**
673      * ISO 15924 script code
674      * @stable ICU 4.0
675      */
676     public static final int IMPERIAL_ARAMAIC              = 116;/* Armi */
677 
678     /**
679      * ISO 15924 script code
680      * @stable ICU 4.0
681      */
682     public static final int AVESTAN                       = 117;/* Avst */
683 
684     /**
685      * ISO 15924 script code
686      * @stable ICU 4.0
687      */
688     public static final int CHAKMA                        = 118;/* Cakm */
689 
690     /**
691      * ISO 15924 script code
692      * @stable ICU 4.0
693      */
694     public static final int KOREAN                        = 119;/* Kore */
695 
696     /**
697      * ISO 15924 script code
698      * @stable ICU 4.0
699      */
700     public static final int KAITHI                        = 120;/* Kthi */
701 
702     /**
703      * ISO 15924 script code
704      * @stable ICU 4.0
705      */
706     public static final int MANICHAEAN                    = 121;/* Mani */
707 
708     /**
709      * ISO 15924 script code
710      * @stable ICU 4.0
711      */
712     public static final int INSCRIPTIONAL_PAHLAVI         = 122;/* Phli */
713 
714     /**
715      * ISO 15924 script code
716      * @stable ICU 4.0
717      */
718     public static final int PSALTER_PAHLAVI               = 123;/* Phlp */
719 
720     /**
721      * ISO 15924 script code
722      * @stable ICU 4.0
723      */
724     public static final int BOOK_PAHLAVI                  = 124;/* Phlv */
725 
726     /**
727      * ISO 15924 script code
728      * @stable ICU 4.0
729      */
730     public static final int INSCRIPTIONAL_PARTHIAN        = 125;/* Prti */
731 
732     /**
733      * ISO 15924 script code
734      * @stable ICU 4.0
735      */
736     public static final int SAMARITAN                     = 126;/* Samr */
737 
738     /**
739      * ISO 15924 script code
740      * @stable ICU 4.0
741      */
742     public static final int TAI_VIET                      = 127;/* Tavt */
743 
744     /**
745      * ISO 15924 script code
746      * @stable ICU 4.0
747      */
748     public static final int MATHEMATICAL_NOTATION         = 128;/* Zmth */
749 
750     /**
751      * ISO 15924 script code
752      * @stable ICU 4.0
753      */
754     public static final int SYMBOLS                       = 129;/* Zsym */
755 
756     /**
757      * ISO 15924 script code
758      * @stable ICU 4.4
759      */
760     public static final int BAMUM                         = 130;/* Bamu */
761     /**
762      * ISO 15924 script code
763      * @stable ICU 4.4
764      */
765     public static final int LISU                          = 131;/* Lisu */
766     /**
767      * ISO 15924 script code
768      * @stable ICU 4.4
769      */
770     public static final int NAKHI_GEBA                    = 132;/* Nkgb */
771     /**
772      * ISO 15924 script code
773      * @stable ICU 4.4
774      */
775     public static final int OLD_SOUTH_ARABIAN             = 133;/* Sarb */
776 
777     /**
778      * ISO 15924 script code
779      * @stable ICU 4.6
780      */
781     public static final int BASSA_VAH                     = 134;/* Bass */
782     /**
783      * ISO 15924 script code
784      * @stable ICU 54
785      */
786     public static final int DUPLOYAN                      = 135;/* Dupl */
787     /**
788      * Typo, use DUPLOYAN
789      * @deprecated ICU 54
790      */
791     @Deprecated
792     public static final int DUPLOYAN_SHORTAND             = DUPLOYAN;
793     /**
794      * ISO 15924 script code
795      * @stable ICU 4.6
796      */
797     public static final int ELBASAN                       = 136;/* Elba */
798     /**
799      * ISO 15924 script code
800      * @stable ICU 4.6
801      */
802     public static final int GRANTHA                       = 137;/* Gran */
803     /**
804      * ISO 15924 script code
805      * @stable ICU 4.6
806      */
807     public static final int KPELLE                        = 138;/* Kpel */
808     /**
809      * ISO 15924 script code
810      * @stable ICU 4.6
811      */
812     public static final int LOMA                          = 139;/* Loma */
813     /**
814      * Mende Kikakui
815      * ISO 15924 script code
816      * @stable ICU 4.6
817      */
818     public static final int MENDE                         = 140;/* Mend */
819     /**
820      * ISO 15924 script code
821      * @stable ICU 4.6
822      */
823     public static final int MEROITIC_CURSIVE              = 141;/* Merc */
824     /**
825      * ISO 15924 script code
826      * @stable ICU 4.6
827      */
828     public static final int OLD_NORTH_ARABIAN             = 142;/* Narb */
829     /**
830      * ISO 15924 script code
831      * @stable ICU 4.6
832      */
833     public static final int NABATAEAN                     = 143;/* Nbat */
834     /**
835      * ISO 15924 script code
836      * @stable ICU 4.6
837      */
838     public static final int PALMYRENE                     = 144;/* Palm */
839     /**
840      * ISO 15924 script code
841      * @stable ICU 54
842      */
843     public static final int KHUDAWADI                     = 145;/* Sind */
844     /**
845      * ISO 15924 script code
846      * @stable ICU 4.6
847      */
848     public static final int SINDHI = KHUDAWADI;
849     /**
850      * ISO 15924 script code
851      * @stable ICU 4.6
852      */
853     public static final int WARANG_CITI                   = 146;/* Wara */
854 
855     /**
856      * ISO 15924 script code
857      * @stable ICU 4.8
858      */
859     public static final int AFAKA = 147;/* Afak */
860     /**
861      * ISO 15924 script code
862      * @stable ICU 4.8
863      */
864     public static final int JURCHEN = 148;/* Jurc */
865     /**
866      * ISO 15924 script code
867      * @stable ICU 4.8
868      */
869     public static final int MRO = 149;/* Mroo */
870     /**
871      * ISO 15924 script code
872      * @stable ICU 4.8
873      */
874     public static final int NUSHU = 150;/* Nshu */
875     /**
876      * ISO 15924 script code
877      * @stable ICU 4.8
878      */
879     public static final int SHARADA = 151;/* Shrd */
880     /**
881      * ISO 15924 script code
882      * @stable ICU 4.8
883      */
884     public static final int SORA_SOMPENG = 152;/* Sora */
885     /**
886      * ISO 15924 script code
887      * @stable ICU 4.8
888      */
889     public static final int TAKRI = 153;/* Takr */
890     /**
891      * ISO 15924 script code
892      * @stable ICU 4.8
893      */
894     public static final int TANGUT = 154;/* Tang */
895     /**
896      * ISO 15924 script code
897      * @stable ICU 4.8
898      */
899     public static final int WOLEAI = 155;/* Wole */
900 
901     /**
902      * ISO 15924 script code
903      * @stable ICU 49
904      */
905     public static final int ANATOLIAN_HIEROGLYPHS = 156;/* Hluw */
906     /**
907      * ISO 15924 script code
908      * @stable ICU 49
909      */
910     public static final int KHOJKI = 157;/* Khoj */
911     /**
912      * ISO 15924 script code
913      * @stable ICU 49
914      */
915     public static final int TIRHUTA = 158;/* Tirh */
916     /**
917      * ISO 15924 script code
918      * @stable ICU 52
919      */
920     public static final int CAUCASIAN_ALBANIAN = 159; /* Aghb */
921     /**
922      * ISO 15924 script code
923      * @stable ICU 52
924      */
925     public static final int MAHAJANI = 160; /* Mahj */
926 
927     /**
928      * ISO 15924 script code
929      * @stable ICU 54
930      */
931     public static final int AHOM = 161; /* Ahom */
932     /**
933      * ISO 15924 script code
934      * @stable ICU 54
935      */
936     public static final int HATRAN = 162; /* Hatr */
937     /**
938      * ISO 15924 script code
939      * @stable ICU 54
940      */
941     public static final int MODI = 163; /* Modi */
942     /**
943      * ISO 15924 script code
944      * @stable ICU 54
945      */
946     public static final int MULTANI = 164; /* Mult */
947     /**
948      * ISO 15924 script code
949      * @stable ICU 54
950      */
951     public static final int PAU_CIN_HAU = 165; /* Pauc */
952     /**
953      * ISO 15924 script code
954      * @stable ICU 54
955      */
956     public static final int SIDDHAM = 166; /* Sidd */
957 
958     /**
959      * ISO 15924 script code
960      * @stable ICU 58
961      */
962     public static final int ADLAM = 167; /* Adlm */
963     /**
964      * ISO 15924 script code
965      * @stable ICU 58
966      */
967     public static final int BHAIKSUKI = 168; /* Bhks */
968     /**
969      * ISO 15924 script code
970      * @stable ICU 58
971      */
972     public static final int MARCHEN = 169; /* Marc */
973     /**
974      * ISO 15924 script code
975      * @stable ICU 58
976      */
977     public static final int NEWA = 170; /* Newa */
978     /**
979      * ISO 15924 script code
980      * @stable ICU 58
981      */
982     public static final int OSAGE = 171; /* Osge */
983 
984     /**
985      * ISO 15924 script code
986      * @stable ICU 58
987      */
988     public static final int HAN_WITH_BOPOMOFO = 172; /* Hanb */
989     /**
990      * ISO 15924 script code
991      * @stable ICU 58
992      */
993     public static final int JAMO = 173; /* Jamo */
994     /**
995      * ISO 15924 script code
996      * @stable ICU 58
997      */
998     public static final int SYMBOLS_EMOJI = 174; /* Zsye */
999 
1000     /**
1001      * ISO 15924 script code
1002      * @stable ICU 60
1003      */
1004     public static final int MASARAM_GONDI = 175; /* Gonm */
1005     /**
1006      * ISO 15924 script code
1007      * @stable ICU 60
1008      */
1009     public static final int SOYOMBO = 176; /* Soyo */
1010     /**
1011      * ISO 15924 script code
1012      * @stable ICU 60
1013      */
1014     public static final int ZANABAZAR_SQUARE = 177; /* Zanb */
1015 
1016     /**
1017      * ISO 15924 script code
1018      * @stable ICU 62
1019      */
1020     public static final int DOGRA = 178; /* Dogr */
1021     /** @stable ICU 62 */
1022     public static final int GUNJALA_GONDI = 179; /* Gong */
1023     /** @stable ICU 62 */
1024     public static final int MAKASAR = 180; /* Maka */
1025     /** @stable ICU 62 */
1026     public static final int MEDEFAIDRIN = 181; /* Medf */
1027     /** @stable ICU 62 */
1028     public static final int HANIFI_ROHINGYA = 182; /* Rohg */
1029     /** @stable ICU 62 */
1030     public static final int SOGDIAN = 183; /* Sogd */
1031     /** @stable ICU 62 */
1032     public static final int OLD_SOGDIAN = 184; /* Sogo */
1033 
1034     /**
1035      * One more than the highest normal UScript code.
1036      * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT).
1037      *
1038      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
1039      */
1040     @Deprecated
1041     public static final int CODE_LIMIT   = 185;
1042 
getCodesFromLocale(ULocale locale)1043     private static int[] getCodesFromLocale(ULocale locale) {
1044         // Multi-script languages, equivalent to the LocaleScript data
1045         // that we used to load from locale resource bundles.
1046         String lang = locale.getLanguage();
1047         if(lang.equals("ja")) {
1048             return new int[] { UScript.KATAKANA, UScript.HIRAGANA, UScript.HAN };
1049         }
1050         if(lang.equals("ko")) {
1051             return new int[] { UScript.HANGUL, UScript.HAN };
1052         }
1053         String script = locale.getScript();
1054         if(lang.equals("zh") && script.equals("Hant")) {
1055             return new int[] { UScript.HAN, UScript.BOPOMOFO };
1056         }
1057         // Explicit script code.
1058         if(script.length() != 0) {
1059             int scriptCode = UScript.getCodeFromName(script);
1060             if(scriptCode != UScript.INVALID_CODE) {
1061                 if(scriptCode == UScript.SIMPLIFIED_HAN || scriptCode == UScript.TRADITIONAL_HAN) {
1062                     scriptCode = UScript.HAN;
1063                 }
1064                 return new int[] { scriptCode };
1065             }
1066         }
1067         return null;
1068     }
1069 
1070     /**
1071      * Helper function to find the code from locale.
1072      * @param locale The locale.
1073      */
findCodeFromLocale(ULocale locale)1074     private static int[] findCodeFromLocale(ULocale locale) {
1075         int[] result = getCodesFromLocale(locale);
1076         if(result != null) {
1077             return result;
1078         }
1079         ULocale likely = ULocale.addLikelySubtags(locale);
1080         return getCodesFromLocale(likely);
1081     }
1082 
1083     /**
1084      * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name.
1085      * Returns MALAYAM given "Malayam" OR "Mlym".
1086      * Returns LATIN given "en" OR "en_US"
1087      * @param locale Locale
1088      * @return The script codes array. null if the the code cannot be found.
1089      * @stable ICU 2.4
1090      */
getCode(Locale locale)1091     public static final int[] getCode(Locale locale){
1092         return findCodeFromLocale(ULocale.forLocale(locale));
1093     }
1094     /**
1095      * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name.
1096      * Returns MALAYAM given "Malayam" OR "Mlym".
1097      * Returns LATIN given "en" OR "en_US"
1098      * @param locale ULocale
1099      * @return The script codes array. null if the the code cannot be found.
1100      * @stable ICU 3.0
1101      */
getCode(ULocale locale)1102     public static final int[] getCode(ULocale locale){
1103         return findCodeFromLocale(locale);
1104     }
1105     /**
1106      * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
1107      * Returns MALAYAM given "Malayam" OR "Mlym".
1108      * Returns LATIN given "en" OR "en_US"
1109      *
1110      * <p>Note: To search by short or long script alias only, use
1111      * {@link #getCodeFromName(String)} instead.
1112      * That does a fast lookup with no access of the locale data.
1113      *
1114      * @param nameOrAbbrOrLocale name of the script or ISO 15924 code or locale
1115      * @return The script codes array. null if the the code cannot be found.
1116      * @stable ICU 2.4
1117      */
getCode(String nameOrAbbrOrLocale)1118     public static final int[] getCode(String nameOrAbbrOrLocale) {
1119         boolean triedCode = false;
1120         if (nameOrAbbrOrLocale.indexOf('_') < 0 && nameOrAbbrOrLocale.indexOf('-') < 0) {
1121             int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbrOrLocale);
1122             if (propNum != UProperty.UNDEFINED) {
1123                 return new int[] {propNum};
1124             }
1125             triedCode = true;
1126         }
1127         int[] scripts = findCodeFromLocale(new ULocale(nameOrAbbrOrLocale));
1128         if (scripts != null) {
1129             return scripts;
1130         }
1131         if (!triedCode) {
1132             int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbrOrLocale);
1133             if (propNum != UProperty.UNDEFINED) {
1134                 return new int[] {propNum};
1135             }
1136         }
1137         return null;
1138     }
1139 
1140     /**
1141      * Returns the script code associated with the given Unicode script property alias
1142      * (name or abbreviation).
1143      * Short aliases are ISO 15924 script codes.
1144      * Returns MALAYAM given "Malayam" OR "Mlym".
1145      *
1146      * @param nameOrAbbr name of the script or ISO 15924 code
1147      * @return The script code value, or INVALID_CODE if the code cannot be found.
1148      * @stable ICU 54
1149      */
getCodeFromName(String nameOrAbbr)1150     public static final int getCodeFromName(String nameOrAbbr) {
1151         int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbr);
1152         return propNum == UProperty.UNDEFINED ? INVALID_CODE : propNum;
1153     }
1154 
1155     /**
1156      * Gets the script code associated with the given codepoint.
1157      * Returns UScript.MALAYAM given 0x0D02
1158      * @param codepoint UChar32 codepoint
1159      * @return The script code
1160      * @stable ICU 2.4
1161      */
getScript(int codepoint)1162     public static final int getScript(int codepoint){
1163         if (codepoint >= UCharacter.MIN_VALUE & codepoint <= UCharacter.MAX_VALUE) {
1164             int scriptX=UCharacterProperty.INSTANCE.getAdditional(codepoint, 0)&UCharacterProperty.SCRIPT_X_MASK;
1165             if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) {
1166                 return scriptX;
1167             } else if(scriptX<UCharacterProperty.SCRIPT_X_WITH_INHERITED) {
1168                 return UScript.COMMON;
1169             } else if(scriptX<UCharacterProperty.SCRIPT_X_WITH_OTHER) {
1170                 return UScript.INHERITED;
1171             } else {
1172                 return UCharacterProperty.INSTANCE.m_scriptExtensions_[scriptX&UCharacterProperty.SCRIPT_MASK_];
1173             }
1174         }else{
1175             throw new IllegalArgumentException(Integer.toString(codepoint));
1176         }
1177     }
1178 
1179     /**
1180      * Do the Script_Extensions of code point c contain script sc?
1181      * If c does not have explicit Script_Extensions, then this tests whether
1182      * c has the Script property value sc.
1183      *
1184      * <p>Some characters are commonly used in multiple scripts.
1185      * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
1186      *
1187      * @param c code point
1188      * @param sc script code
1189      * @return true if sc is in Script_Extensions(c)
1190      * @stable ICU 49
1191      */
hasScript(int c, int sc)1192     public static final boolean hasScript(int c, int sc) {
1193         int scriptX=UCharacterProperty.INSTANCE.getAdditional(c, 0)&UCharacterProperty.SCRIPT_X_MASK;
1194         if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) {
1195             return sc==scriptX;
1196         }
1197 
1198         char[] scriptExtensions=UCharacterProperty.INSTANCE.m_scriptExtensions_;
1199         int scx=scriptX&UCharacterProperty.SCRIPT_MASK_;  // index into scriptExtensions
1200         if(scriptX>=UCharacterProperty.SCRIPT_X_WITH_OTHER) {
1201             scx=scriptExtensions[scx+1];
1202         }
1203         if(sc>0x7fff) {
1204             // Guard against bogus input that would
1205             // make us go past the Script_Extensions terminator.
1206             return false;
1207         }
1208         while(sc>scriptExtensions[scx]) {
1209             ++scx;
1210         }
1211         return sc==(scriptExtensions[scx]&0x7fff);
1212     }
1213 
1214     /**
1215      * Sets code point c's Script_Extensions as script code integers into the output BitSet.
1216      * <ul>
1217      * <li>If c does have Script_Extensions, then the return value is
1218      * the negative number of Script_Extensions codes (= -set.cardinality());
1219      * in this case, the Script property value
1220      * (normally Common or Inherited) is not included in the set.
1221      * <li>If c does not have Script_Extensions, then the one Script code is put into the set
1222      * and also returned.
1223      * <li>If c is not a valid code point, then the one {@link #UNKNOWN} code is put into the set
1224      * and also returned.
1225      * </ul>
1226      * In other words, if the return value is non-negative, it is c's single Script code
1227      * and the set contains exactly this Script code.
1228      * If the return value is -n, then the set contains c's n&gt;=2 Script_Extensions script codes.
1229      *
1230      * <p>Some characters are commonly used in multiple scripts.
1231      * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
1232      *
1233      * @param c code point
1234      * @param set set of script code integers; will be cleared, then bits are set
1235      *            corresponding to c's Script_Extensions
1236      * @return negative number of script codes in c's Script_Extensions,
1237      *         or the non-negative single Script value
1238      * @stable ICU 49
1239      */
getScriptExtensions(int c, BitSet set)1240     public static final int getScriptExtensions(int c, BitSet set) {
1241         set.clear();
1242         int scriptX=UCharacterProperty.INSTANCE.getAdditional(c, 0)&UCharacterProperty.SCRIPT_X_MASK;
1243         if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) {
1244             set.set(scriptX);
1245             return scriptX;
1246         }
1247 
1248         char[] scriptExtensions=UCharacterProperty.INSTANCE.m_scriptExtensions_;
1249         int scx=scriptX&UCharacterProperty.SCRIPT_MASK_;  // index into scriptExtensions
1250         if(scriptX>=UCharacterProperty.SCRIPT_X_WITH_OTHER) {
1251             scx=scriptExtensions[scx+1];
1252         }
1253         int length=0;
1254         int sx;
1255         do {
1256             sx=scriptExtensions[scx++];
1257             set.set(sx&0x7fff);
1258             ++length;
1259         } while(sx<0x8000);
1260         // length==set.cardinality()
1261         return -length;
1262     }
1263 
1264     /**
1265      * Returns the long Unicode script name, if there is one.
1266      * Otherwise returns the 4-letter ISO 15924 script code.
1267      * Returns "Malayam" given MALAYALAM.
1268      *
1269      * @param scriptCode int script code
1270      * @return long script name as given in PropertyValueAliases.txt, or the 4-letter code
1271      * @throws IllegalArgumentException if the script code is not valid
1272      * @stable ICU 2.4
1273      */
getName(int scriptCode)1274     public static final String getName(int scriptCode){
1275         return UCharacter.getPropertyValueName(UProperty.SCRIPT,
1276                 scriptCode,
1277                 UProperty.NameChoice.LONG);
1278     }
1279 
1280     /**
1281      * Returns the 4-letter ISO 15924 script code,
1282      * which is the same as the short Unicode script name if Unicode has names for the script.
1283      * Returns "Mlym" given MALAYALAM.
1284      *
1285      * @param scriptCode int script code
1286      * @return short script name (4-letter code)
1287      * @throws IllegalArgumentException if the script code is not valid
1288      * @stable ICU 2.4
1289      */
getShortName(int scriptCode)1290     public static final String getShortName(int scriptCode){
1291         return UCharacter.getPropertyValueName(UProperty.SCRIPT,
1292                 scriptCode,
1293                 UProperty.NameChoice.SHORT);
1294     }
1295 
1296     /**
1297      * Script metadata (script properties).
1298      * See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
1299      */
1300     private static final class ScriptMetadata {
1301         // 0 = NOT_ENCODED, no sample character, default false script properties.
1302         // Bits 20.. 0: sample character
1303 
1304         // Bits 23..21: usage
1305         private static final int UNKNOWN = 1 << 21;
1306         private static final int EXCLUSION = 2 << 21;
1307         private static final int LIMITED_USE = 3 << 21;
1308         // vate static final int ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10
1309         private static final int RECOMMENDED = 5 << 21;
1310 
1311         // Bits 31..24: Single-bit flags
1312         private static final int RTL = 1 << 24;
1313         private static final int LB_LETTERS = 1 << 25;
1314         private static final int CASED = 1 << 26;
1315 
1316         private static final int SCRIPT_PROPS[] = {
1317             // Begin copy-paste output from
1318             // tools/trunk/unicode/py/parsescriptmetadata.py
1319             // or from icu/trunk/source/common/uscript_props.cpp
1320             0x0040 | RECOMMENDED,  // Zyyy
1321             0x0308 | RECOMMENDED,  // Zinh
1322             0x0628 | RECOMMENDED | RTL,  // Arab
1323             0x0531 | RECOMMENDED | CASED,  // Armn
1324             0x0995 | RECOMMENDED,  // Beng
1325             0x3105 | RECOMMENDED | LB_LETTERS,  // Bopo
1326             0x13C4 | LIMITED_USE | CASED,  // Cher
1327             0x03E2 | EXCLUSION | CASED,  // Copt
1328             0x042F | RECOMMENDED | CASED,  // Cyrl
1329             0x10414 | EXCLUSION | CASED,  // Dsrt
1330             0x0905 | RECOMMENDED,  // Deva
1331             0x12A0 | RECOMMENDED,  // Ethi
1332             0x10D3 | RECOMMENDED,  // Geor
1333             0x10330 | EXCLUSION,  // Goth
1334             0x03A9 | RECOMMENDED | CASED,  // Grek
1335             0x0A95 | RECOMMENDED,  // Gujr
1336             0x0A15 | RECOMMENDED,  // Guru
1337             0x5B57 | RECOMMENDED | LB_LETTERS,  // Hani
1338             0xAC00 | RECOMMENDED,  // Hang
1339             0x05D0 | RECOMMENDED | RTL,  // Hebr
1340             0x304B | RECOMMENDED | LB_LETTERS,  // Hira
1341             0x0C95 | RECOMMENDED,  // Knda
1342             0x30AB | RECOMMENDED | LB_LETTERS,  // Kana
1343             0x1780 | RECOMMENDED | LB_LETTERS,  // Khmr
1344             0x0EA5 | RECOMMENDED | LB_LETTERS,  // Laoo
1345             0x004C | RECOMMENDED | CASED,  // Latn
1346             0x0D15 | RECOMMENDED,  // Mlym
1347             0x1826 | EXCLUSION,  // Mong
1348             0x1000 | RECOMMENDED | LB_LETTERS,  // Mymr
1349             0x168F | EXCLUSION,  // Ogam
1350             0x10300 | EXCLUSION,  // Ital
1351             0x0B15 | RECOMMENDED,  // Orya
1352             0x16A0 | EXCLUSION,  // Runr
1353             0x0D85 | RECOMMENDED,  // Sinh
1354             0x0710 | LIMITED_USE | RTL,  // Syrc
1355             0x0B95 | RECOMMENDED,  // Taml
1356             0x0C15 | RECOMMENDED,  // Telu
1357             0x078C | RECOMMENDED | RTL,  // Thaa
1358             0x0E17 | RECOMMENDED | LB_LETTERS,  // Thai
1359             0x0F40 | RECOMMENDED,  // Tibt
1360             0x14C0 | LIMITED_USE,  // Cans
1361             0xA288 | LIMITED_USE | LB_LETTERS,  // Yiii
1362             0x1703 | EXCLUSION,  // Tglg
1363             0x1723 | EXCLUSION,  // Hano
1364             0x1743 | EXCLUSION,  // Buhd
1365             0x1763 | EXCLUSION,  // Tagb
1366             0x280E | UNKNOWN,  // Brai
1367             0x10800 | EXCLUSION | RTL,  // Cprt
1368             0x1900 | LIMITED_USE,  // Limb
1369             0x10000 | EXCLUSION,  // Linb
1370             0x10480 | EXCLUSION,  // Osma
1371             0x10450 | EXCLUSION,  // Shaw
1372             0x1950 | LIMITED_USE | LB_LETTERS,  // Tale
1373             0x10380 | EXCLUSION,  // Ugar
1374             0,
1375             0x1A00 | EXCLUSION,  // Bugi
1376             0x2C00 | EXCLUSION | CASED,  // Glag
1377             0x10A00 | EXCLUSION | RTL,  // Khar
1378             0xA800 | LIMITED_USE,  // Sylo
1379             0x1980 | LIMITED_USE | LB_LETTERS,  // Talu
1380             0x2D30 | LIMITED_USE,  // Tfng
1381             0x103A0 | EXCLUSION,  // Xpeo
1382             0x1B05 | LIMITED_USE,  // Bali
1383             0x1BC0 | LIMITED_USE,  // Batk
1384             0,
1385             0x11005 | EXCLUSION,  // Brah
1386             0xAA00 | LIMITED_USE,  // Cham
1387             0,
1388             0,
1389             0,
1390             0,
1391             0x13153 | EXCLUSION,  // Egyp
1392             0,
1393             0x5B57 | RECOMMENDED | LB_LETTERS,  // Hans
1394             0x5B57 | RECOMMENDED | LB_LETTERS,  // Hant
1395             0x16B1C | EXCLUSION,  // Hmng
1396             0x10CA1 | EXCLUSION | RTL | CASED,  // Hung
1397             0,
1398             0xA984 | LIMITED_USE,  // Java
1399             0xA90A | LIMITED_USE,  // Kali
1400             0,
1401             0,
1402             0x1C00 | LIMITED_USE,  // Lepc
1403             0x10647 | EXCLUSION,  // Lina
1404             0x0840 | LIMITED_USE | RTL,  // Mand
1405             0,
1406             0x10980 | EXCLUSION | RTL,  // Mero
1407             0x07CA | LIMITED_USE | RTL,  // Nkoo
1408             0x10C00 | EXCLUSION | RTL,  // Orkh
1409             0x1036B | EXCLUSION,  // Perm
1410             0xA840 | EXCLUSION,  // Phag
1411             0x10900 | EXCLUSION | RTL,  // Phnx
1412             0x16F00 | LIMITED_USE,  // Plrd
1413             0,
1414             0,
1415             0,
1416             0,
1417             0,
1418             0,
1419             0xA549 | LIMITED_USE,  // Vaii
1420             0,
1421             0x12000 | EXCLUSION,  // Xsux
1422             0,
1423             0xFDD0 | UNKNOWN,  // Zzzz
1424             0x102A0 | EXCLUSION,  // Cari
1425             0x304B | RECOMMENDED | LB_LETTERS,  // Jpan
1426             0x1A20 | LIMITED_USE | LB_LETTERS,  // Lana
1427             0x10280 | EXCLUSION,  // Lyci
1428             0x10920 | EXCLUSION | RTL,  // Lydi
1429             0x1C5A | LIMITED_USE,  // Olck
1430             0xA930 | EXCLUSION,  // Rjng
1431             0xA882 | LIMITED_USE,  // Saur
1432             0x1D850 | EXCLUSION,  // Sgnw
1433             0x1B83 | LIMITED_USE,  // Sund
1434             0,
1435             0xABC0 | LIMITED_USE,  // Mtei
1436             0x10840 | EXCLUSION | RTL,  // Armi
1437             0x10B00 | EXCLUSION | RTL,  // Avst
1438             0x11103 | LIMITED_USE,  // Cakm
1439             0xAC00 | RECOMMENDED,  // Kore
1440             0x11083 | EXCLUSION,  // Kthi
1441             0x10AD8 | EXCLUSION | RTL,  // Mani
1442             0x10B60 | EXCLUSION | RTL,  // Phli
1443             0x10B8F | EXCLUSION | RTL,  // Phlp
1444             0,
1445             0x10B40 | EXCLUSION | RTL,  // Prti
1446             0x0800 | EXCLUSION | RTL,  // Samr
1447             0xAA80 | LIMITED_USE | LB_LETTERS,  // Tavt
1448             0,
1449             0,
1450             0xA6A0 | LIMITED_USE,  // Bamu
1451             0xA4D0 | LIMITED_USE,  // Lisu
1452             0,
1453             0x10A60 | EXCLUSION | RTL,  // Sarb
1454             0x16AE6 | EXCLUSION,  // Bass
1455             0x1BC20 | EXCLUSION,  // Dupl
1456             0x10500 | EXCLUSION,  // Elba
1457             0x11315 | EXCLUSION,  // Gran
1458             0,
1459             0,
1460             0x1E802 | EXCLUSION | RTL,  // Mend
1461             0x109A0 | EXCLUSION | RTL,  // Merc
1462             0x10A95 | EXCLUSION | RTL,  // Narb
1463             0x10896 | EXCLUSION | RTL,  // Nbat
1464             0x10873 | EXCLUSION | RTL,  // Palm
1465             0x112BE | EXCLUSION,  // Sind
1466             0x118B4 | EXCLUSION | CASED,  // Wara
1467             0,
1468             0,
1469             0x16A4F | EXCLUSION,  // Mroo
1470             0x1B1C4 | EXCLUSION | LB_LETTERS,  // Nshu
1471             0x11183 | EXCLUSION,  // Shrd
1472             0x110D0 | EXCLUSION,  // Sora
1473             0x11680 | EXCLUSION,  // Takr
1474             0x18229 | EXCLUSION | LB_LETTERS,  // Tang
1475             0,
1476             0x14400 | EXCLUSION,  // Hluw
1477             0x11208 | EXCLUSION,  // Khoj
1478             0x11484 | EXCLUSION,  // Tirh
1479             0x10537 | EXCLUSION,  // Aghb
1480             0x11152 | EXCLUSION,  // Mahj
1481             0x11717 | EXCLUSION | LB_LETTERS,  // Ahom
1482             0x108F4 | EXCLUSION | RTL,  // Hatr
1483             0x1160E | EXCLUSION,  // Modi
1484             0x1128F | EXCLUSION,  // Mult
1485             0x11AC0 | EXCLUSION,  // Pauc
1486             0x1158E | EXCLUSION,  // Sidd
1487             0x1E909 | LIMITED_USE | RTL | CASED,  // Adlm
1488             0x11C0E | EXCLUSION,  // Bhks
1489             0x11C72 | EXCLUSION,  // Marc
1490             0x11412 | LIMITED_USE,  // Newa
1491             0x104B5 | LIMITED_USE | CASED,  // Osge
1492             0x5B57 | RECOMMENDED | LB_LETTERS,  // Hanb
1493             0x1112 | RECOMMENDED,  // Jamo
1494             0,
1495             0x11D10 | EXCLUSION,  // Gonm
1496             0x11A5C | EXCLUSION,  // Soyo
1497             0x11A0B | EXCLUSION,  // Zanb
1498             0x1180B | EXCLUSION,  // Dogr
1499             0x11D71 | LIMITED_USE,  // Gong
1500             0x11EE5 | EXCLUSION,  // Maka
1501             0x16E40 | EXCLUSION | CASED,  // Medf
1502             0x10D12 | LIMITED_USE | RTL,  // Rohg
1503             0x10F42 | EXCLUSION | RTL,  // Sogd
1504             0x10F19 | EXCLUSION | RTL,  // Sogo
1505             // End copy-paste from parsescriptmetadata.py
1506         };
1507 
getScriptProps(int script)1508         private static final int getScriptProps(int script) {
1509             if (0 <= script && script < SCRIPT_PROPS.length) {
1510                 return SCRIPT_PROPS[script];
1511             } else {
1512                 return 0;
1513             }
1514         }
1515     }
1516 
1517     /**
1518      * Script usage constants.
1519      * See UAX #31 Unicode Identifier and Pattern Syntax.
1520      * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
1521      *
1522      * @stable ICU 51
1523      */
1524     public enum ScriptUsage {
1525         /**
1526          * Not encoded in Unicode.
1527          * @stable ICU 51
1528          */
1529         NOT_ENCODED,
1530         /**
1531          * Unknown script usage.
1532          * @stable ICU 51
1533          */
1534         UNKNOWN,
1535         /**
1536          * Candidate for Exclusion from Identifiers.
1537          * @stable ICU 51
1538          */
1539         EXCLUDED,
1540         /**
1541          * Limited Use script.
1542          * @stable ICU 51
1543          */
1544         LIMITED_USE,
1545         /**
1546          * Aspirational Use script.
1547          * @stable ICU 51
1548          */
1549         ASPIRATIONAL,
1550         /**
1551          * Recommended script.
1552          * @stable ICU 51
1553          */
1554         RECOMMENDED
1555     }
1556     private static final ScriptUsage[] usageValues = ScriptUsage.values();
1557 
1558     /**
1559      * Returns the script sample character string.
1560      * This string normally consists of one code point but might be longer.
1561      * The string is empty if the script is not encoded.
1562      *
1563      * @param script script code
1564      * @return the sample character string
1565      * @stable ICU 51
1566      */
getSampleString(int script)1567     public static final String getSampleString(int script) {
1568         int sampleChar = ScriptMetadata.getScriptProps(script) & 0x1fffff;
1569         if(sampleChar != 0) {
1570             return new StringBuilder().appendCodePoint(sampleChar).toString();
1571         }
1572         return "";
1573     }
1574 
1575     /**
1576      * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
1577      * Returns {@link ScriptUsage#NOT_ENCODED} if the script is not encoded in Unicode.
1578      *
1579      * @param script script code
1580      * @return script usage
1581      * @see ScriptUsage
1582      * @stable ICU 51
1583      */
getUsage(int script)1584     public static final ScriptUsage getUsage(int script) {
1585         return usageValues[(ScriptMetadata.getScriptProps(script) >> 21) & 7];
1586     }
1587 
1588     /**
1589      * Returns true if the script is written right-to-left.
1590      * For example, Arab and Hebr.
1591      *
1592      * @param script script code
1593      * @return true if the script is right-to-left
1594      * @stable ICU 51
1595      */
isRightToLeft(int script)1596     public static final boolean isRightToLeft(int script) {
1597         return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.RTL) != 0;
1598     }
1599 
1600     /**
1601      * Returns true if the script allows line breaks between letters (excluding hyphenation).
1602      * Such a script typically requires dictionary-based line breaking.
1603      * For example, Hani and Thai.
1604      *
1605      * @param script script code
1606      * @return true if the script allows line breaks between letters
1607      * @stable ICU 51
1608      */
breaksBetweenLetters(int script)1609     public static final boolean breaksBetweenLetters(int script) {
1610         return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.LB_LETTERS) != 0;
1611     }
1612 
1613     /**
1614      * Returns true if in modern (or most recent) usage of the script case distinctions are customary.
1615      * For example, Latn and Cyrl.
1616      *
1617      * @param script script code
1618      * @return true if the script is cased
1619      * @stable ICU 51
1620      */
isCased(int script)1621     public static final boolean isCased(int script) {
1622         return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.CASED) != 0;
1623     }
1624 
1625     ///CLOVER:OFF
1626     /**
1627      *  Private Constructor. Never default construct
1628      */
UScript()1629     private UScript(){}
1630     ///CLOVER:ON
1631 }
1632