1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 package android.icu.impl;
5 
6 import java.io.IOException;
7 import java.text.CharacterIterator;
8 import java.util.Locale;
9 
10 import android.icu.lang.UCharacter;
11 import android.icu.lang.UCharacterCategory;
12 import android.icu.text.BreakIterator;
13 import android.icu.text.Edits;
14 import android.icu.util.ICUUncheckedIOException;
15 import android.icu.util.ULocale;
16 
17 /**
18  * @hide Only a subset of ICU is exposed in Android
19  */
20 public final class CaseMapImpl {
21     /**
22      * Implementation of UCaseProps.ContextIterator, iterates over a String.
23      * See ustrcase.c/utf16_caseContextIterator().
24      */
25     public static final class StringContextIterator implements UCaseProps.ContextIterator {
26         /**
27          * Constructor.
28          * @param src String to iterate over.
29          */
StringContextIterator(CharSequence src)30         public StringContextIterator(CharSequence src) {
31             this.s=src;
32             limit=src.length();
33             cpStart=cpLimit=index=0;
34             dir=0;
35         }
36 
37         /**
38          * Set the iteration limit for nextCaseMapCP() to an index within the string.
39          * If the limit parameter is negative or past the string, then the
40          * string length is restored as the iteration limit.
41          *
42          * <p>This limit does not affect the next() function which always
43          * iterates to the very end of the string.
44          *
45          * @param lim The iteration limit.
46          */
setLimit(int lim)47         public void setLimit(int lim) {
48             if(0<=lim && lim<=s.length()) {
49                 limit=lim;
50             } else {
51                 limit=s.length();
52             }
53         }
54 
55         /**
56          * Move to the iteration limit without fetching code points up to there.
57          */
moveToLimit()58         public void moveToLimit() {
59             cpStart=cpLimit=limit;
60         }
61 
62         /**
63          * Iterate forward through the string to fetch the next code point
64          * to be case-mapped, and set the context indexes for it.
65          *
66          * <p>When the iteration limit is reached (and -1 is returned),
67          * getCPStart() will be at the iteration limit.
68          *
69          * <p>Iteration with next() does not affect the position for nextCaseMapCP().
70          *
71          * @return The next code point to be case-mapped, or <0 when the iteration is done.
72          */
nextCaseMapCP()73         public int nextCaseMapCP() {
74             cpStart=cpLimit;
75             if(cpLimit<limit) {
76                 int c=Character.codePointAt(s, cpLimit);
77                 cpLimit+=Character.charCount(c);
78                 return c;
79             } else {
80                 return -1;
81             }
82         }
83 
84         /**
85          * Returns the start of the code point that was last returned
86          * by nextCaseMapCP().
87          */
getCPStart()88         public int getCPStart() {
89             return cpStart;
90         }
91 
92         /**
93          * Returns the limit of the code point that was last returned
94          * by nextCaseMapCP().
95          */
getCPLimit()96         public int getCPLimit() {
97             return cpLimit;
98         }
99 
getCPLength()100         public int getCPLength() {
101             return cpLimit-cpStart;
102         }
103 
104         // implement UCaseProps.ContextIterator
105         // The following code is not used anywhere in this private class
106         @Override
reset(int direction)107         public void reset(int direction) {
108             if(direction>0) {
109                 /* reset for forward iteration */
110                 dir=1;
111                 index=cpLimit;
112             } else if(direction<0) {
113                 /* reset for backward iteration */
114                 dir=-1;
115                 index=cpStart;
116             } else {
117                 // not a valid direction
118                 dir=0;
119                 index=0;
120             }
121         }
122 
123         @Override
next()124         public int next() {
125             int c;
126 
127             if(dir>0 && index<s.length()) {
128                 c=Character.codePointAt(s, index);
129                 index+=Character.charCount(c);
130                 return c;
131             } else if(dir<0 && index>0) {
132                 c=Character.codePointBefore(s, index);
133                 index-=Character.charCount(c);
134                 return c;
135             }
136             return -1;
137         }
138 
139         // variables
140         protected CharSequence s;
141         protected int index, limit, cpStart, cpLimit;
142         protected int dir; // 0=initial state  >0=forward  <0=backward
143     }
144 
145     public static final int TITLECASE_WHOLE_STRING = 0x20;
146     public static final int TITLECASE_SENTENCES = 0x40;
147 
148     /**
149      * Bit mask for the titlecasing iterator options bit field.
150      * Currently only 3 out of 8 values are used:
151      * 0 (words), TITLECASE_WHOLE_STRING, TITLECASE_SENTENCES.
152      * See stringoptions.h.
153      * @hide draft / provisional / internal are hidden on Android
154      */
155     private static final int TITLECASE_ITERATOR_MASK = 0xe0;
156 
157     public static final int TITLECASE_ADJUST_TO_CASED = 0x400;
158 
159     /**
160      * Bit mask for the titlecasing index adjustment options bit set.
161      * Currently two bits are defined:
162      * TITLECASE_NO_BREAK_ADJUSTMENT, TITLECASE_ADJUST_TO_CASED.
163      * See stringoptions.h.
164      * @hide draft / provisional / internal are hidden on Android
165      */
166     private static final int TITLECASE_ADJUSTMENT_MASK = 0x600;
167 
addTitleAdjustmentOption(int options, int newOption)168     public static int addTitleAdjustmentOption(int options, int newOption) {
169         int adjOptions = options & TITLECASE_ADJUSTMENT_MASK;
170         if (adjOptions !=0 && adjOptions != newOption) {
171             throw new IllegalArgumentException("multiple titlecasing index adjustment options");
172         }
173         return options | newOption;
174     }
175 
176     private static final int LNS =
177             (1 << UCharacterCategory.UPPERCASE_LETTER) |
178             (1 << UCharacterCategory.LOWERCASE_LETTER) |
179             (1 << UCharacterCategory.TITLECASE_LETTER) |
180             // Not MODIFIER_LETTER: We count only cased modifier letters.
181             (1 << UCharacterCategory.OTHER_LETTER) |
182 
183             (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) |
184             (1 << UCharacterCategory.LETTER_NUMBER) |
185             (1 << UCharacterCategory.OTHER_NUMBER) |
186 
187             (1 << UCharacterCategory.MATH_SYMBOL) |
188             (1 << UCharacterCategory.CURRENCY_SYMBOL) |
189             (1 << UCharacterCategory.MODIFIER_SYMBOL) |
190             (1 << UCharacterCategory.OTHER_SYMBOL) |
191 
192             (1 << UCharacterCategory.PRIVATE_USE);
193 
isLNS(int c)194     private static boolean isLNS(int c) {
195         // Letter, number, symbol,
196         // or a private use code point because those are typically used as letters or numbers.
197         // Consider modifier letters only if they are cased.
198         int gc = UCharacterProperty.INSTANCE.getType(c);
199         return ((1 << gc) & LNS) != 0 ||
200                 (gc == UCharacterCategory.MODIFIER_LETTER &&
201                     UCaseProps.INSTANCE.getType(c) != UCaseProps.NONE);
202     }
203 
addTitleIteratorOption(int options, int newOption)204     public static int addTitleIteratorOption(int options, int newOption) {
205         int iterOptions = options & TITLECASE_ITERATOR_MASK;
206         if (iterOptions !=0 && iterOptions != newOption) {
207             throw new IllegalArgumentException("multiple titlecasing iterator options");
208         }
209         return options | newOption;
210     }
211 
getTitleBreakIterator( Locale locale, int options, BreakIterator iter)212     public static BreakIterator getTitleBreakIterator(
213             Locale locale, int options, BreakIterator iter) {
214         options &= TITLECASE_ITERATOR_MASK;
215         if (options != 0 && iter != null) {
216             throw new IllegalArgumentException(
217                     "titlecasing iterator option together with an explicit iterator");
218         }
219         if (iter == null) {
220             switch (options) {
221             case 0:
222                 iter = BreakIterator.getWordInstance(locale);
223                 break;
224             case TITLECASE_WHOLE_STRING:
225                 iter = new WholeStringBreakIterator();
226                 break;
227             case TITLECASE_SENTENCES:
228                 iter = BreakIterator.getSentenceInstance(locale);
229                 break;
230             default:
231                 throw new IllegalArgumentException("unknown titlecasing iterator option");
232             }
233         }
234         return iter;
235     }
236 
getTitleBreakIterator( ULocale locale, int options, BreakIterator iter)237     public static BreakIterator getTitleBreakIterator(
238             ULocale locale, int options, BreakIterator iter) {
239         options &= TITLECASE_ITERATOR_MASK;
240         if (options != 0 && iter != null) {
241             throw new IllegalArgumentException(
242                     "titlecasing iterator option together with an explicit iterator");
243         }
244         if (iter == null) {
245             switch (options) {
246             case 0:
247                 iter = BreakIterator.getWordInstance(locale);
248                 break;
249             case TITLECASE_WHOLE_STRING:
250                 iter = new WholeStringBreakIterator();
251                 break;
252             case TITLECASE_SENTENCES:
253                 iter = BreakIterator.getSentenceInstance(locale);
254                 break;
255             default:
256                 throw new IllegalArgumentException("unknown titlecasing iterator option");
257             }
258         }
259         return iter;
260     }
261 
262     /**
263      * Omit unchanged text when case-mapping with Edits.
264      */
265     public static final int OMIT_UNCHANGED_TEXT = 0x4000;
266 
267     private static final class WholeStringBreakIterator extends BreakIterator {
268         private int length;
269 
notImplemented()270         private static void notImplemented() {
271             throw new UnsupportedOperationException("should not occur");
272         }
273 
274         @Override
first()275         public int first() {
276             return 0;
277         }
278 
279         @Override
last()280         public int last() {
281             notImplemented();
282             return 0;
283         }
284 
285         @Override
next(int n)286         public int next(int n) {
287             notImplemented();
288             return 0;
289         }
290 
291         @Override
next()292         public int next() {
293             return length;
294         }
295 
296         @Override
previous()297         public int previous() {
298             notImplemented();
299             return 0;
300         }
301 
302         @Override
following(int offset)303         public int following(int offset) {
304             notImplemented();
305             return 0;
306         }
307 
308         @Override
current()309         public int current() {
310             notImplemented();
311             return 0;
312         }
313 
314         @Override
getText()315         public CharacterIterator getText() {
316             notImplemented();
317             return null;
318         }
319 
320         @Override
setText(CharacterIterator newText)321         public void setText(CharacterIterator newText) {
322             length = newText.getEndIndex();
323         }
324 
325         @Override
setText(CharSequence newText)326         public void setText(CharSequence newText) {
327             length = newText.length();
328         }
329 
330         @Override
setText(String newText)331         public void setText(String newText) {
332             length = newText.length();
333         }
334     }
335 
appendCodePoint(Appendable a, int c)336     private static int appendCodePoint(Appendable a, int c) throws IOException {
337         if (c <= Character.MAX_VALUE) {
338             a.append((char)c);
339             return 1;
340         } else {
341             a.append((char)(0xd7c0 + (c >> 10)));
342             a.append((char)(Character.MIN_LOW_SURROGATE + (c & 0x3ff)));
343             return 2;
344         }
345     }
346 
347     /**
348      * Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}.
349      * @throws IOException
350      */
appendResult(int result, Appendable dest, int cpLength, int options, Edits edits)351     private static void appendResult(int result, Appendable dest,
352             int cpLength, int options, Edits edits) throws IOException {
353         // Decode the result.
354         if (result < 0) {
355             // (not) original code point
356             if (edits != null) {
357                 edits.addUnchanged(cpLength);
358             }
359             if ((options & OMIT_UNCHANGED_TEXT) != 0) {
360                 return;
361             }
362             appendCodePoint(dest, ~result);
363         } else if (result <= UCaseProps.MAX_STRING_LENGTH) {
364             // The mapping has already been appended to result.
365             if (edits != null) {
366                 edits.addReplace(cpLength, result);
367             }
368         } else {
369             // Append the single-code point mapping.
370             int length = appendCodePoint(dest, result);
371             if (edits != null) {
372                 edits.addReplace(cpLength, length);
373             }
374         }
375     }
376 
appendUnchanged(CharSequence src, int start, int length, Appendable dest, int options, Edits edits)377     private static final void appendUnchanged(CharSequence src, int start, int length,
378             Appendable dest, int options, Edits edits) throws IOException {
379         if (length > 0) {
380             if (edits != null) {
381                 edits.addUnchanged(length);
382             }
383             if ((options & OMIT_UNCHANGED_TEXT) != 0) {
384                 return;
385             }
386             dest.append(src, start, start + length);
387         }
388     }
389 
applyEdits(CharSequence src, StringBuilder replacementChars, Edits edits)390     private static String applyEdits(CharSequence src, StringBuilder replacementChars, Edits edits) {
391         if (!edits.hasChanges()) {
392             return src.toString();
393         }
394         StringBuilder result = new StringBuilder(src.length() + edits.lengthDelta());
395         for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
396             if (ei.hasChange()) {
397                 int i = ei.replacementIndex();
398                 result.append(replacementChars, i, i + ei.newLength());
399             } else {
400                 int i = ei.sourceIndex();
401                 result.append(src, i, i + ei.oldLength());
402             }
403         }
404         return result.toString();
405     }
406 
internalToLower(int caseLocale, int options, StringContextIterator iter, Appendable dest, Edits edits)407     private static void internalToLower(int caseLocale, int options, StringContextIterator iter,
408             Appendable dest, Edits edits) throws IOException {
409         int c;
410         while ((c = iter.nextCaseMapCP()) >= 0) {
411             c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale);
412             appendResult(c, dest, iter.getCPLength(), options, edits);
413         }
414     }
415 
toLower(int caseLocale, int options, CharSequence src)416     public static String toLower(int caseLocale, int options, CharSequence src) {
417         if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
418             if (src.length() == 0) {
419                 return src.toString();
420             }
421             // Collect and apply only changes.
422             // Good if no or few changes. Bad (slow) if many changes.
423             Edits edits = new Edits();
424             StringBuilder replacementChars = toLower(
425                     caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
426             return applyEdits(src, replacementChars, edits);
427         } else {
428             return toLower(caseLocale, options, src,
429                     new StringBuilder(src.length()), null).toString();
430         }
431     }
432 
toLower(int caseLocale, int options, CharSequence src, A dest, Edits edits)433     public static <A extends Appendable> A toLower(int caseLocale, int options,
434             CharSequence src, A dest, Edits edits) {
435         try {
436             if (edits != null) {
437                 edits.reset();
438             }
439             StringContextIterator iter = new StringContextIterator(src);
440             internalToLower(caseLocale, options, iter, dest, edits);
441             return dest;
442         } catch (IOException e) {
443             throw new ICUUncheckedIOException(e);
444         }
445     }
446 
toUpper(int caseLocale, int options, CharSequence src)447     public static String toUpper(int caseLocale, int options, CharSequence src) {
448         if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
449             if (src.length() == 0) {
450                 return src.toString();
451             }
452             // Collect and apply only changes.
453             // Good if no or few changes. Bad (slow) if many changes.
454             Edits edits = new Edits();
455             StringBuilder replacementChars = toUpper(
456                     caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
457             return applyEdits(src, replacementChars, edits);
458         } else {
459             return toUpper(caseLocale, options, src,
460                     new StringBuilder(src.length()), null).toString();
461         }
462     }
463 
toUpper(int caseLocale, int options, CharSequence src, A dest, Edits edits)464     public static <A extends Appendable> A toUpper(int caseLocale, int options,
465             CharSequence src, A dest, Edits edits) {
466         try {
467             if (edits != null) {
468                 edits.reset();
469             }
470             if (caseLocale == UCaseProps.LOC_GREEK) {
471                 return GreekUpper.toUpper(options, src, dest, edits);
472             }
473             StringContextIterator iter = new StringContextIterator(src);
474             int c;
475             while ((c = iter.nextCaseMapCP()) >= 0) {
476                 c = UCaseProps.INSTANCE.toFullUpper(c, iter, dest, caseLocale);
477                 appendResult(c, dest, iter.getCPLength(), options, edits);
478             }
479             return dest;
480         } catch (IOException e) {
481             throw new ICUUncheckedIOException(e);
482         }
483     }
484 
toTitle(int caseLocale, int options, BreakIterator iter, CharSequence src)485     public static String toTitle(int caseLocale, int options, BreakIterator iter, CharSequence src) {
486         if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
487             if (src.length() == 0) {
488                 return src.toString();
489             }
490             // Collect and apply only changes.
491             // Good if no or few changes. Bad (slow) if many changes.
492             Edits edits = new Edits();
493             StringBuilder replacementChars = toTitle(
494                     caseLocale, options | OMIT_UNCHANGED_TEXT, iter, src,
495                     new StringBuilder(), edits);
496             return applyEdits(src, replacementChars, edits);
497         } else {
498             return toTitle(caseLocale, options, iter, src,
499                     new StringBuilder(src.length()), null).toString();
500         }
501     }
502 
toTitle( int caseLocale, int options, BreakIterator titleIter, CharSequence src, A dest, Edits edits)503     public static <A extends Appendable> A toTitle(
504             int caseLocale, int options, BreakIterator titleIter,
505             CharSequence src, A dest, Edits edits) {
506         try {
507             if (edits != null) {
508                 edits.reset();
509             }
510 
511             /* set up local variables */
512             StringContextIterator iter = new StringContextIterator(src);
513             int srcLength = src.length();
514             int prev=0;
515             boolean isFirstIndex=true;
516 
517             /* titlecasing loop */
518             while(prev<srcLength) {
519                 /* find next index where to titlecase */
520                 int index;
521                 if(isFirstIndex) {
522                     isFirstIndex=false;
523                     index=titleIter.first();
524                 } else {
525                     index=titleIter.next();
526                 }
527                 if(index==BreakIterator.DONE || index>srcLength) {
528                     index=srcLength;
529                 }
530 
531                 /*
532                  * Segment [prev..index[ into 3 parts:
533                  * a) skipped characters (copy as-is) [prev..titleStart[
534                  * b) first letter (titlecase)              [titleStart..titleLimit[
535                  * c) subsequent characters (lowercase)                 [titleLimit..index[
536                  */
537                 if(prev<index) {
538                     // Find and copy skipped characters [prev..titleStart[
539                     int titleStart=prev;
540                     iter.setLimit(index);
541                     int c=iter.nextCaseMapCP();
542                     if ((options&UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT)==0) {
543                         // Adjust the titlecasing index to the next cased character,
544                         // or to the next letter/number/symbol/private use.
545                         // Stop with titleStart<titleLimit<=index
546                         // if there is a character to be titlecased,
547                         // or else stop with titleStart==titleLimit==index.
548                         boolean toCased = (options&CaseMapImpl.TITLECASE_ADJUST_TO_CASED) != 0;
549                         while ((toCased ?
550                                     UCaseProps.NONE==UCaseProps.INSTANCE.getType(c) :
551                                         !CaseMapImpl.isLNS(c)) &&
552                                 (c=iter.nextCaseMapCP())>=0) {}
553                         // If c<0 then we have only uncased characters in [prev..index[
554                         // and stopped with titleStart==titleLimit==index.
555                         titleStart=iter.getCPStart();
556                         if (prev < titleStart) {
557                             appendUnchanged(src, prev, titleStart-prev, dest, options, edits);
558                         }
559                     }
560 
561                     if(titleStart<index) {
562                         int titleLimit=iter.getCPLimit();
563                         // titlecase c which is from [titleStart..titleLimit[
564                         c = UCaseProps.INSTANCE.toFullTitle(c, iter, dest, caseLocale);
565                         appendResult(c, dest, iter.getCPLength(), options, edits);
566 
567                         // Special case Dutch IJ titlecasing
568                         if (titleStart+1 < index && caseLocale == UCaseProps.LOC_DUTCH) {
569                             char c1 = src.charAt(titleStart);
570                             if ((c1 == 'i' || c1 == 'I')) {
571                                 char c2 = src.charAt(titleStart+1);
572                                 if (c2 == 'j') {
573                                     dest.append('J');
574                                     if (edits != null) {
575                                         edits.addReplace(1, 1);
576                                     }
577                                     c = iter.nextCaseMapCP();
578                                     titleLimit++;
579                                     assert c == c2;
580                                     assert titleLimit == iter.getCPLimit();
581                                 } else if (c2 == 'J') {
582                                     // Keep the capital J from getting lowercased.
583                                     appendUnchanged(src, titleStart + 1, 1, dest, options, edits);
584                                     c = iter.nextCaseMapCP();
585                                     titleLimit++;
586                                     assert c == c2;
587                                     assert titleLimit == iter.getCPLimit();
588                                 }
589                             }
590                         }
591 
592                         // lowercase [titleLimit..index[
593                         if(titleLimit<index) {
594                             if((options&UCharacter.TITLECASE_NO_LOWERCASE)==0) {
595                                 // Normal operation: Lowercase the rest of the word.
596                                 internalToLower(caseLocale, options, iter, dest, edits);
597                             } else {
598                                 // Optionally just copy the rest of the word unchanged.
599                                 appendUnchanged(src, titleLimit, index-titleLimit, dest, options, edits);
600                                 iter.moveToLimit();
601                             }
602                         }
603                     }
604                 }
605 
606                 prev=index;
607             }
608             return dest;
609         } catch (IOException e) {
610             throw new ICUUncheckedIOException(e);
611         }
612     }
613 
fold(int options, CharSequence src)614     public static String fold(int options, CharSequence src) {
615         if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) {
616             if (src.length() == 0) {
617                 return src.toString();
618             }
619             // Collect and apply only changes.
620             // Good if no or few changes. Bad (slow) if many changes.
621             Edits edits = new Edits();
622             StringBuilder replacementChars = fold(
623                     options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits);
624             return applyEdits(src, replacementChars, edits);
625         } else {
626             return fold(options, src, new StringBuilder(src.length()), null).toString();
627         }
628     }
629 
fold(int options, CharSequence src, A dest, Edits edits)630     public static <A extends Appendable> A fold(int options,
631             CharSequence src, A dest, Edits edits) {
632         try {
633             if (edits != null) {
634                 edits.reset();
635             }
636             int length = src.length();
637             for (int i = 0; i < length;) {
638                 int c = Character.codePointAt(src, i);
639                 int cpLength = Character.charCount(c);
640                 i += cpLength;
641                 c = UCaseProps.INSTANCE.toFullFolding(c, dest, options);
642                 appendResult(c, dest, cpLength, options, edits);
643             }
644             return dest;
645         } catch (IOException e) {
646             throw new ICUUncheckedIOException(e);
647         }
648     }
649 
650     private static final class GreekUpper {
651         // Data bits.
652         private static final int UPPER_MASK = 0x3ff;
653         private static final int HAS_VOWEL = 0x1000;
654         private static final int HAS_YPOGEGRAMMENI = 0x2000;
655         private static final int HAS_ACCENT = 0x4000;
656         private static final int HAS_DIALYTIKA = 0x8000;
657         // Further bits during data building and processing, not stored in the data map.
658         private static final int HAS_COMBINING_DIALYTIKA = 0x10000;
659         private static final int HAS_OTHER_GREEK_DIACRITIC = 0x20000;
660 
661         private static final int HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
662         private static final int HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
663                 HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
664         private static final int HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
665 
666         // State bits.
667         private static final int AFTER_CASED = 1;
668         private static final int AFTER_VOWEL_WITH_ACCENT = 2;
669 
670         // Data generated by prototype code, see
671         // http://site.icu-project.org/design/case/greek-upper
672         // TODO: Move this data into ucase.icu.
673         private static final char[] data0370 = {
674             // U+0370..03FF
675             0x0370,  // Ͱ
676             0x0370,  // ͱ
677             0x0372,  // Ͳ
678             0x0372,  // ͳ
679             0,
680             0,
681             0x0376,  // Ͷ
682             0x0376,  // ͷ
683             0,
684             0,
685             0x037A,  // ͺ
686             0x03FD,  // ͻ
687             0x03FE,  // ͼ
688             0x03FF,  // ͽ
689             0,
690             0x037F,  // Ϳ
691             0,
692             0,
693             0,
694             0,
695             0,
696             0,
697             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ά
698             0,
699             0x0395 | HAS_VOWEL | HAS_ACCENT,  // Έ
700             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ή
701             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ί
702             0,
703             0x039F | HAS_VOWEL | HAS_ACCENT,  // Ό
704             0,
705             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // Ύ
706             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ώ
707             0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // ΐ
708             0x0391 | HAS_VOWEL,  // Α
709             0x0392,  // Β
710             0x0393,  // Γ
711             0x0394,  // Δ
712             0x0395 | HAS_VOWEL,  // Ε
713             0x0396,  // Ζ
714             0x0397 | HAS_VOWEL,  // Η
715             0x0398,  // Θ
716             0x0399 | HAS_VOWEL,  // Ι
717             0x039A,  // Κ
718             0x039B,  // Λ
719             0x039C,  // Μ
720             0x039D,  // Ν
721             0x039E,  // Ξ
722             0x039F | HAS_VOWEL,  // Ο
723             0x03A0,  // Π
724             0x03A1,  // Ρ
725             0,
726             0x03A3,  // Σ
727             0x03A4,  // Τ
728             0x03A5 | HAS_VOWEL,  // Υ
729             0x03A6,  // Φ
730             0x03A7,  // Χ
731             0x03A8,  // Ψ
732             0x03A9 | HAS_VOWEL,  // Ω
733             0x0399 | HAS_VOWEL | HAS_DIALYTIKA,  // Ϊ
734             0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,  // Ϋ
735             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ά
736             0x0395 | HAS_VOWEL | HAS_ACCENT,  // έ
737             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ή
738             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ί
739             0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // ΰ
740             0x0391 | HAS_VOWEL,  // α
741             0x0392,  // β
742             0x0393,  // γ
743             0x0394,  // δ
744             0x0395 | HAS_VOWEL,  // ε
745             0x0396,  // ζ
746             0x0397 | HAS_VOWEL,  // η
747             0x0398,  // θ
748             0x0399 | HAS_VOWEL,  // ι
749             0x039A,  // κ
750             0x039B,  // λ
751             0x039C,  // μ
752             0x039D,  // ν
753             0x039E,  // ξ
754             0x039F | HAS_VOWEL,  // ο
755             0x03A0,  // π
756             0x03A1,  // ρ
757             0x03A3,  // ς
758             0x03A3,  // σ
759             0x03A4,  // τ
760             0x03A5 | HAS_VOWEL,  // υ
761             0x03A6,  // φ
762             0x03A7,  // χ
763             0x03A8,  // ψ
764             0x03A9 | HAS_VOWEL,  // ω
765             0x0399 | HAS_VOWEL | HAS_DIALYTIKA,  // ϊ
766             0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,  // ϋ
767             0x039F | HAS_VOWEL | HAS_ACCENT,  // ό
768             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ύ
769             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ώ
770             0x03CF,  // Ϗ
771             0x0392,  // ϐ
772             0x0398,  // ϑ
773             0x03D2,  // ϒ
774             0x03D2 | HAS_ACCENT,  // ϓ
775             0x03D2 | HAS_DIALYTIKA,  // ϔ
776             0x03A6,  // ϕ
777             0x03A0,  // ϖ
778             0x03CF,  // ϗ
779             0x03D8,  // Ϙ
780             0x03D8,  // ϙ
781             0x03DA,  // Ϛ
782             0x03DA,  // ϛ
783             0x03DC,  // Ϝ
784             0x03DC,  // ϝ
785             0x03DE,  // Ϟ
786             0x03DE,  // ϟ
787             0x03E0,  // Ϡ
788             0x03E0,  // ϡ
789             0,
790             0,
791             0,
792             0,
793             0,
794             0,
795             0,
796             0,
797             0,
798             0,
799             0,
800             0,
801             0,
802             0,
803             0x039A,  // ϰ
804             0x03A1,  // ϱ
805             0x03F9,  // ϲ
806             0x037F,  // ϳ
807             0x03F4,  // ϴ
808             0x0395 | HAS_VOWEL,  // ϵ
809             0,
810             0x03F7,  // Ϸ
811             0x03F7,  // ϸ
812             0x03F9,  // Ϲ
813             0x03FA,  // Ϻ
814             0x03FA,  // ϻ
815             0x03FC,  // ϼ
816             0x03FD,  // Ͻ
817             0x03FE,  // Ͼ
818             0x03FF,  // Ͽ
819         };
820 
821         private static final char[] data1F00 = {
822             // U+1F00..1FFF
823             0x0391 | HAS_VOWEL,  // ἀ
824             0x0391 | HAS_VOWEL,  // ἁ
825             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ἂ
826             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ἃ
827             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ἄ
828             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ἅ
829             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ἆ
830             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ἇ
831             0x0391 | HAS_VOWEL,  // Ἀ
832             0x0391 | HAS_VOWEL,  // Ἁ
833             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ἂ
834             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ἃ
835             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ἄ
836             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ἅ
837             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ἆ
838             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ἇ
839             0x0395 | HAS_VOWEL,  // ἐ
840             0x0395 | HAS_VOWEL,  // ἑ
841             0x0395 | HAS_VOWEL | HAS_ACCENT,  // ἒ
842             0x0395 | HAS_VOWEL | HAS_ACCENT,  // ἓ
843             0x0395 | HAS_VOWEL | HAS_ACCENT,  // ἔ
844             0x0395 | HAS_VOWEL | HAS_ACCENT,  // ἕ
845             0,
846             0,
847             0x0395 | HAS_VOWEL,  // Ἐ
848             0x0395 | HAS_VOWEL,  // Ἑ
849             0x0395 | HAS_VOWEL | HAS_ACCENT,  // Ἒ
850             0x0395 | HAS_VOWEL | HAS_ACCENT,  // Ἓ
851             0x0395 | HAS_VOWEL | HAS_ACCENT,  // Ἔ
852             0x0395 | HAS_VOWEL | HAS_ACCENT,  // Ἕ
853             0,
854             0,
855             0x0397 | HAS_VOWEL,  // ἠ
856             0x0397 | HAS_VOWEL,  // ἡ
857             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ἢ
858             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ἣ
859             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ἤ
860             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ἥ
861             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ἦ
862             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ἧ
863             0x0397 | HAS_VOWEL,  // Ἠ
864             0x0397 | HAS_VOWEL,  // Ἡ
865             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ἢ
866             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ἣ
867             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ἤ
868             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ἥ
869             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ἦ
870             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ἧ
871             0x0399 | HAS_VOWEL,  // ἰ
872             0x0399 | HAS_VOWEL,  // ἱ
873             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ἲ
874             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ἳ
875             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ἴ
876             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ἵ
877             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ἶ
878             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ἷ
879             0x0399 | HAS_VOWEL,  // Ἰ
880             0x0399 | HAS_VOWEL,  // Ἱ
881             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ἲ
882             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ἳ
883             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ἴ
884             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ἵ
885             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ἶ
886             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ἷ
887             0x039F | HAS_VOWEL,  // ὀ
888             0x039F | HAS_VOWEL,  // ὁ
889             0x039F | HAS_VOWEL | HAS_ACCENT,  // ὂ
890             0x039F | HAS_VOWEL | HAS_ACCENT,  // ὃ
891             0x039F | HAS_VOWEL | HAS_ACCENT,  // ὄ
892             0x039F | HAS_VOWEL | HAS_ACCENT,  // ὅ
893             0,
894             0,
895             0x039F | HAS_VOWEL,  // Ὀ
896             0x039F | HAS_VOWEL,  // Ὁ
897             0x039F | HAS_VOWEL | HAS_ACCENT,  // Ὂ
898             0x039F | HAS_VOWEL | HAS_ACCENT,  // Ὃ
899             0x039F | HAS_VOWEL | HAS_ACCENT,  // Ὄ
900             0x039F | HAS_VOWEL | HAS_ACCENT,  // Ὅ
901             0,
902             0,
903             0x03A5 | HAS_VOWEL,  // ὐ
904             0x03A5 | HAS_VOWEL,  // ὑ
905             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ὒ
906             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ὓ
907             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ὔ
908             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ὕ
909             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ὖ
910             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ὗ
911             0,
912             0x03A5 | HAS_VOWEL,  // Ὑ
913             0,
914             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // Ὓ
915             0,
916             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // Ὕ
917             0,
918             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // Ὗ
919             0x03A9 | HAS_VOWEL,  // ὠ
920             0x03A9 | HAS_VOWEL,  // ὡ
921             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ὢ
922             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ὣ
923             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ὤ
924             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ὥ
925             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ὦ
926             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ὧ
927             0x03A9 | HAS_VOWEL,  // Ὠ
928             0x03A9 | HAS_VOWEL,  // Ὡ
929             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ὢ
930             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ὣ
931             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ὤ
932             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ὥ
933             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ὦ
934             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ὧ
935             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ὰ
936             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ά
937             0x0395 | HAS_VOWEL | HAS_ACCENT,  // ὲ
938             0x0395 | HAS_VOWEL | HAS_ACCENT,  // έ
939             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ὴ
940             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ή
941             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ὶ
942             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ί
943             0x039F | HAS_VOWEL | HAS_ACCENT,  // ὸ
944             0x039F | HAS_VOWEL | HAS_ACCENT,  // ό
945             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ὺ
946             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ύ
947             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ὼ
948             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ώ
949             0,
950             0,
951             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾀ
952             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾁ
953             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾂ
954             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾃ
955             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾄ
956             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾅ
957             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾆ
958             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾇ
959             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾈ
960             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾉ
961             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾊ
962             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾋ
963             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾌ
964             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾍ
965             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾎ
966             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾏ
967             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾐ
968             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾑ
969             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾒ
970             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾓ
971             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾔ
972             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾕ
973             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾖ
974             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾗ
975             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾘ
976             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾙ
977             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾚ
978             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾛ
979             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾜ
980             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾝ
981             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾞ
982             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾟ
983             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾠ
984             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾡ
985             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾢ
986             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾣ
987             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾤ
988             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾥ
989             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾦ
990             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾧ
991             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾨ
992             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾩ
993             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾪ
994             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾫ
995             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾬ
996             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾭ
997             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾮ
998             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾯ
999             0x0391 | HAS_VOWEL,  // ᾰ
1000             0x0391 | HAS_VOWEL,  // ᾱ
1001             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾲ
1002             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾳ
1003             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾴ
1004             0,
1005             0x0391 | HAS_VOWEL | HAS_ACCENT,  // ᾶ
1006             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ᾷ
1007             0x0391 | HAS_VOWEL,  // Ᾰ
1008             0x0391 | HAS_VOWEL,  // Ᾱ
1009             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ὰ
1010             0x0391 | HAS_VOWEL | HAS_ACCENT,  // Ά
1011             0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ᾼ
1012             0,
1013             0x0399 | HAS_VOWEL,  // ι
1014             0,
1015             0,
1016             0,
1017             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ῂ
1018             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ῃ
1019             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ῄ
1020             0,
1021             0x0397 | HAS_VOWEL | HAS_ACCENT,  // ῆ
1022             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ῇ
1023             0x0395 | HAS_VOWEL | HAS_ACCENT,  // Ὲ
1024             0x0395 | HAS_VOWEL | HAS_ACCENT,  // Έ
1025             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ὴ
1026             0x0397 | HAS_VOWEL | HAS_ACCENT,  // Ή
1027             0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ῌ
1028             0,
1029             0,
1030             0,
1031             0x0399 | HAS_VOWEL,  // ῐ
1032             0x0399 | HAS_VOWEL,  // ῑ
1033             0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // ῒ
1034             0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // ΐ
1035             0,
1036             0,
1037             0x0399 | HAS_VOWEL | HAS_ACCENT,  // ῖ
1038             0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // ῗ
1039             0x0399 | HAS_VOWEL,  // Ῐ
1040             0x0399 | HAS_VOWEL,  // Ῑ
1041             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ὶ
1042             0x0399 | HAS_VOWEL | HAS_ACCENT,  // Ί
1043             0,
1044             0,
1045             0,
1046             0,
1047             0x03A5 | HAS_VOWEL,  // ῠ
1048             0x03A5 | HAS_VOWEL,  // ῡ
1049             0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // ῢ
1050             0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // ΰ
1051             0x03A1,  // ῤ
1052             0x03A1,  // ῥ
1053             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // ῦ
1054             0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  // ῧ
1055             0x03A5 | HAS_VOWEL,  // Ῠ
1056             0x03A5 | HAS_VOWEL,  // Ῡ
1057             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // Ὺ
1058             0x03A5 | HAS_VOWEL | HAS_ACCENT,  // Ύ
1059             0x03A1,  // Ῥ
1060             0,
1061             0,
1062             0,
1063             0,
1064             0,
1065             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ῲ
1066             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ῳ
1067             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ῴ
1068             0,
1069             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // ῶ
1070             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  // ῷ
1071             0x039F | HAS_VOWEL | HAS_ACCENT,  // Ὸ
1072             0x039F | HAS_VOWEL | HAS_ACCENT,  // Ό
1073             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ὼ
1074             0x03A9 | HAS_VOWEL | HAS_ACCENT,  // Ώ
1075             0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  // ῼ
1076             0,
1077             0,
1078             0,
1079         };
1080 
1081         // U+2126 Ohm sign
1082         private static final char data2126 = 0x03A9 | HAS_VOWEL;  // Ω
1083 
getLetterData(int c)1084         private static final int getLetterData(int c) {
1085             if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) {
1086                 return 0;
1087             } else if (c <= 0x3ff) {
1088                 return data0370[c - 0x370];
1089             } else if (c <= 0x1fff) {
1090                 return data1F00[c - 0x1f00];
1091             } else if (c == 0x2126) {
1092                 return data2126;
1093             } else {
1094                 return 0;
1095             }
1096         }
1097 
1098         /**
1099          * Returns a non-zero value for each of the Greek combining diacritics
1100          * listed in The Unicode Standard, version 8, chapter 7.2 Greek,
1101          * plus some perispomeni look-alikes.
1102          */
getDiacriticData(int c)1103         private static final int getDiacriticData(int c) {
1104             switch (c) {
1105             case '\u0300':  // varia
1106             case '\u0301':  // tonos = oxia
1107             case '\u0342':  // perispomeni
1108             case '\u0302':  // circumflex can look like perispomeni
1109             case '\u0303':  // tilde can look like perispomeni
1110             case '\u0311':  // inverted breve can look like perispomeni
1111                 return HAS_ACCENT;
1112             case '\u0308':  // dialytika = diaeresis
1113                 return HAS_COMBINING_DIALYTIKA;
1114             case '\u0344':  // dialytika tonos
1115                 return HAS_COMBINING_DIALYTIKA | HAS_ACCENT;
1116             case '\u0345':  // ypogegrammeni = iota subscript
1117                 return HAS_YPOGEGRAMMENI;
1118             case '\u0304':  // macron
1119             case '\u0306':  // breve
1120             case '\u0313':  // comma above
1121             case '\u0314':  // reversed comma above
1122             case '\u0343':  // koronis
1123                 return HAS_OTHER_GREEK_DIACRITIC;
1124             default:
1125                 return 0;
1126             }
1127         }
1128 
isFollowedByCasedLetter(CharSequence s, int i)1129         private static boolean isFollowedByCasedLetter(CharSequence s, int i) {
1130             while (i < s.length()) {
1131                 int c = Character.codePointAt(s, i);
1132                 int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c);
1133                 if ((type & UCaseProps.IGNORABLE) != 0) {
1134                     // Case-ignorable, continue with the loop.
1135                     i += Character.charCount(c);
1136                 } else if (type != UCaseProps.NONE) {
1137                     return true;  // Followed by cased letter.
1138                 } else {
1139                     return false;  // Uncased and not case-ignorable.
1140                 }
1141             }
1142             return false;  // Not followed by cased letter.
1143         }
1144 
1145         /**
1146          * Greek string uppercasing with a state machine.
1147          * Probably simpler than a stateless function that has to figure out complex context-before
1148          * for each character.
1149          * TODO: Try to re-consolidate one way or another with the non-Greek function.
1150          *
1151          * <p>Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8).
1152          * @throws IOException
1153          */
toUpper(int options, CharSequence src, A dest, Edits edits)1154         private static <A extends Appendable> A toUpper(int options,
1155                 CharSequence src, A dest, Edits edits) throws IOException {
1156             int state = 0;
1157             for (int i = 0; i < src.length();) {
1158                 int c = Character.codePointAt(src, i);
1159                 int nextIndex = i + Character.charCount(c);
1160                 int nextState = 0;
1161                 int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c);
1162                 if ((type & UCaseProps.IGNORABLE) != 0) {
1163                     // c is case-ignorable
1164                     nextState |= (state & AFTER_CASED);
1165                 } else if (type != UCaseProps.NONE) {
1166                     // c is cased
1167                     nextState |= AFTER_CASED;
1168                 }
1169                 int data = getLetterData(c);
1170                 if (data > 0) {
1171                     int upper = data & UPPER_MASK;
1172                     // Add a dialytika to this iota or ypsilon vowel
1173                     // if we removed a tonos from the previous vowel,
1174                     // and that previous vowel did not also have (or gain) a dialytika.
1175                     // Adding one only to the final vowel in a longer sequence
1176                     // (which does not occur in normal writing) would require lookahead.
1177                     // Set the same flag as for preserving an existing dialytika.
1178                     if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
1179                             (upper == 'Ι' || upper == 'Υ')) {
1180                         data |= HAS_DIALYTIKA;
1181                     }
1182                     int numYpogegrammeni = 0;  // Map each one to a trailing, spacing, capital iota.
1183                     if ((data & HAS_YPOGEGRAMMENI) != 0) {
1184                         numYpogegrammeni = 1;
1185                     }
1186                     // Skip combining diacritics after this Greek letter.
1187                     while (nextIndex < src.length()) {
1188                         int diacriticData = getDiacriticData(src.charAt(nextIndex));
1189                         if (diacriticData != 0) {
1190                             data |= diacriticData;
1191                             if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
1192                                 ++numYpogegrammeni;
1193                             }
1194                             ++nextIndex;
1195                         } else {
1196                             break;  // not a Greek diacritic
1197                         }
1198                     }
1199                     if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
1200                         nextState |= AFTER_VOWEL_WITH_ACCENT;
1201                     }
1202                     // Map according to Greek rules.
1203                     boolean addTonos = false;
1204                     if (upper == 'Η' &&
1205                             (data & HAS_ACCENT) != 0 &&
1206                             numYpogegrammeni == 0 &&
1207                             (state & AFTER_CASED) == 0 &&
1208                             !isFollowedByCasedLetter(src, nextIndex)) {
1209                         // Keep disjunctive "or" with (only) a tonos.
1210                         // We use the same "word boundary" conditions as for the Final_Sigma test.
1211                         if (i == nextIndex) {
1212                             upper = 'Ή';  // Preserve the precomposed form.
1213                         } else {
1214                             addTonos = true;
1215                         }
1216                     } else if ((data & HAS_DIALYTIKA) != 0) {
1217                         // Preserve a vowel with dialytika in precomposed form if it exists.
1218                         if (upper == 'Ι') {
1219                             upper = 'Ϊ';
1220                             data &= ~HAS_EITHER_DIALYTIKA;
1221                         } else if (upper == 'Υ') {
1222                             upper = 'Ϋ';
1223                             data &= ~HAS_EITHER_DIALYTIKA;
1224                         }
1225                     }
1226 
1227                     boolean change;
1228                     if (edits == null && (options & OMIT_UNCHANGED_TEXT) == 0) {
1229                         change = true;  // common, simple usage
1230                     } else {
1231                         // Find out first whether we are changing the text.
1232                         change = src.charAt(i) != upper || numYpogegrammeni > 0;
1233                         int i2 = i + 1;
1234                         if ((data & HAS_EITHER_DIALYTIKA) != 0) {
1235                             change |= i2 >= nextIndex || src.charAt(i2) != 0x308;
1236                             ++i2;
1237                         }
1238                         if (addTonos) {
1239                             change |= i2 >= nextIndex || src.charAt(i2) != 0x301;
1240                             ++i2;
1241                         }
1242                         int oldLength = nextIndex - i;
1243                         int newLength = (i2 - i) + numYpogegrammeni;
1244                         change |= oldLength != newLength;
1245                         if (change) {
1246                             if (edits != null) {
1247                                 edits.addReplace(oldLength, newLength);
1248                             }
1249                         } else {
1250                             if (edits != null) {
1251                                 edits.addUnchanged(oldLength);
1252                             }
1253                             // Write unchanged text?
1254                             change = (options & OMIT_UNCHANGED_TEXT) == 0;
1255                         }
1256                     }
1257 
1258                     if (change) {
1259                         dest.append((char)upper);
1260                         if ((data & HAS_EITHER_DIALYTIKA) != 0) {
1261                             dest.append('\u0308');  // restore or add a dialytika
1262                         }
1263                         if (addTonos) {
1264                             dest.append('\u0301');
1265                         }
1266                         while (numYpogegrammeni > 0) {
1267                             dest.append('Ι');
1268                             --numYpogegrammeni;
1269                         }
1270                     }
1271                 } else {
1272                     c = UCaseProps.INSTANCE.toFullUpper(c, null, dest, UCaseProps.LOC_GREEK);
1273                     appendResult(c, dest, nextIndex - i, options, edits);
1274                 }
1275                 i = nextIndex;
1276                 state = nextState;
1277             }
1278             return dest;
1279         }
1280     }
1281 }
1282