1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 package android.icu.impl; 5 6 import java.io.IOException; 7 import java.text.CharacterIterator; 8 import java.util.Locale; 9 10 import android.icu.lang.UCharacter; 11 import android.icu.lang.UCharacterCategory; 12 import android.icu.text.BreakIterator; 13 import android.icu.text.Edits; 14 import android.icu.util.ICUUncheckedIOException; 15 import android.icu.util.ULocale; 16 17 /** 18 * @hide Only a subset of ICU is exposed in Android 19 */ 20 public final class CaseMapImpl { 21 /** 22 * Implementation of UCaseProps.ContextIterator, iterates over a String. 23 * See ustrcase.c/utf16_caseContextIterator(). 24 */ 25 public static final class StringContextIterator implements UCaseProps.ContextIterator { 26 /** 27 * Constructor. 28 * @param src String to iterate over. 29 */ StringContextIterator(CharSequence src)30 public StringContextIterator(CharSequence src) { 31 this.s=src; 32 limit=src.length(); 33 cpStart=cpLimit=index=0; 34 dir=0; 35 } 36 37 /** 38 * Set the iteration limit for nextCaseMapCP() to an index within the string. 39 * If the limit parameter is negative or past the string, then the 40 * string length is restored as the iteration limit. 41 * 42 * <p>This limit does not affect the next() function which always 43 * iterates to the very end of the string. 44 * 45 * @param lim The iteration limit. 46 */ setLimit(int lim)47 public void setLimit(int lim) { 48 if(0<=lim && lim<=s.length()) { 49 limit=lim; 50 } else { 51 limit=s.length(); 52 } 53 } 54 55 /** 56 * Move to the iteration limit without fetching code points up to there. 57 */ moveToLimit()58 public void moveToLimit() { 59 cpStart=cpLimit=limit; 60 } 61 62 /** 63 * Iterate forward through the string to fetch the next code point 64 * to be case-mapped, and set the context indexes for it. 65 * 66 * <p>When the iteration limit is reached (and -1 is returned), 67 * getCPStart() will be at the iteration limit. 68 * 69 * <p>Iteration with next() does not affect the position for nextCaseMapCP(). 70 * 71 * @return The next code point to be case-mapped, or <0 when the iteration is done. 72 */ nextCaseMapCP()73 public int nextCaseMapCP() { 74 cpStart=cpLimit; 75 if(cpLimit<limit) { 76 int c=Character.codePointAt(s, cpLimit); 77 cpLimit+=Character.charCount(c); 78 return c; 79 } else { 80 return -1; 81 } 82 } 83 84 /** 85 * Returns the start of the code point that was last returned 86 * by nextCaseMapCP(). 87 */ getCPStart()88 public int getCPStart() { 89 return cpStart; 90 } 91 92 /** 93 * Returns the limit of the code point that was last returned 94 * by nextCaseMapCP(). 95 */ getCPLimit()96 public int getCPLimit() { 97 return cpLimit; 98 } 99 getCPLength()100 public int getCPLength() { 101 return cpLimit-cpStart; 102 } 103 104 // implement UCaseProps.ContextIterator 105 // The following code is not used anywhere in this private class 106 @Override reset(int direction)107 public void reset(int direction) { 108 if(direction>0) { 109 /* reset for forward iteration */ 110 dir=1; 111 index=cpLimit; 112 } else if(direction<0) { 113 /* reset for backward iteration */ 114 dir=-1; 115 index=cpStart; 116 } else { 117 // not a valid direction 118 dir=0; 119 index=0; 120 } 121 } 122 123 @Override next()124 public int next() { 125 int c; 126 127 if(dir>0 && index<s.length()) { 128 c=Character.codePointAt(s, index); 129 index+=Character.charCount(c); 130 return c; 131 } else if(dir<0 && index>0) { 132 c=Character.codePointBefore(s, index); 133 index-=Character.charCount(c); 134 return c; 135 } 136 return -1; 137 } 138 139 // variables 140 protected CharSequence s; 141 protected int index, limit, cpStart, cpLimit; 142 protected int dir; // 0=initial state >0=forward <0=backward 143 } 144 145 public static final int TITLECASE_WHOLE_STRING = 0x20; 146 public static final int TITLECASE_SENTENCES = 0x40; 147 148 /** 149 * Bit mask for the titlecasing iterator options bit field. 150 * Currently only 3 out of 8 values are used: 151 * 0 (words), TITLECASE_WHOLE_STRING, TITLECASE_SENTENCES. 152 * See stringoptions.h. 153 * @hide draft / provisional / internal are hidden on Android 154 */ 155 private static final int TITLECASE_ITERATOR_MASK = 0xe0; 156 157 public static final int TITLECASE_ADJUST_TO_CASED = 0x400; 158 159 /** 160 * Bit mask for the titlecasing index adjustment options bit set. 161 * Currently two bits are defined: 162 * TITLECASE_NO_BREAK_ADJUSTMENT, TITLECASE_ADJUST_TO_CASED. 163 * See stringoptions.h. 164 * @hide draft / provisional / internal are hidden on Android 165 */ 166 private static final int TITLECASE_ADJUSTMENT_MASK = 0x600; 167 addTitleAdjustmentOption(int options, int newOption)168 public static int addTitleAdjustmentOption(int options, int newOption) { 169 int adjOptions = options & TITLECASE_ADJUSTMENT_MASK; 170 if (adjOptions !=0 && adjOptions != newOption) { 171 throw new IllegalArgumentException("multiple titlecasing index adjustment options"); 172 } 173 return options | newOption; 174 } 175 176 private static final int LNS = 177 (1 << UCharacterCategory.UPPERCASE_LETTER) | 178 (1 << UCharacterCategory.LOWERCASE_LETTER) | 179 (1 << UCharacterCategory.TITLECASE_LETTER) | 180 // Not MODIFIER_LETTER: We count only cased modifier letters. 181 (1 << UCharacterCategory.OTHER_LETTER) | 182 183 (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) | 184 (1 << UCharacterCategory.LETTER_NUMBER) | 185 (1 << UCharacterCategory.OTHER_NUMBER) | 186 187 (1 << UCharacterCategory.MATH_SYMBOL) | 188 (1 << UCharacterCategory.CURRENCY_SYMBOL) | 189 (1 << UCharacterCategory.MODIFIER_SYMBOL) | 190 (1 << UCharacterCategory.OTHER_SYMBOL) | 191 192 (1 << UCharacterCategory.PRIVATE_USE); 193 isLNS(int c)194 private static boolean isLNS(int c) { 195 // Letter, number, symbol, 196 // or a private use code point because those are typically used as letters or numbers. 197 // Consider modifier letters only if they are cased. 198 int gc = UCharacterProperty.INSTANCE.getType(c); 199 return ((1 << gc) & LNS) != 0 || 200 (gc == UCharacterCategory.MODIFIER_LETTER && 201 UCaseProps.INSTANCE.getType(c) != UCaseProps.NONE); 202 } 203 addTitleIteratorOption(int options, int newOption)204 public static int addTitleIteratorOption(int options, int newOption) { 205 int iterOptions = options & TITLECASE_ITERATOR_MASK; 206 if (iterOptions !=0 && iterOptions != newOption) { 207 throw new IllegalArgumentException("multiple titlecasing iterator options"); 208 } 209 return options | newOption; 210 } 211 getTitleBreakIterator( Locale locale, int options, BreakIterator iter)212 public static BreakIterator getTitleBreakIterator( 213 Locale locale, int options, BreakIterator iter) { 214 options &= TITLECASE_ITERATOR_MASK; 215 if (options != 0 && iter != null) { 216 throw new IllegalArgumentException( 217 "titlecasing iterator option together with an explicit iterator"); 218 } 219 if (iter == null) { 220 switch (options) { 221 case 0: 222 iter = BreakIterator.getWordInstance(locale); 223 break; 224 case TITLECASE_WHOLE_STRING: 225 iter = new WholeStringBreakIterator(); 226 break; 227 case TITLECASE_SENTENCES: 228 iter = BreakIterator.getSentenceInstance(locale); 229 break; 230 default: 231 throw new IllegalArgumentException("unknown titlecasing iterator option"); 232 } 233 } 234 return iter; 235 } 236 getTitleBreakIterator( ULocale locale, int options, BreakIterator iter)237 public static BreakIterator getTitleBreakIterator( 238 ULocale locale, int options, BreakIterator iter) { 239 options &= TITLECASE_ITERATOR_MASK; 240 if (options != 0 && iter != null) { 241 throw new IllegalArgumentException( 242 "titlecasing iterator option together with an explicit iterator"); 243 } 244 if (iter == null) { 245 switch (options) { 246 case 0: 247 iter = BreakIterator.getWordInstance(locale); 248 break; 249 case TITLECASE_WHOLE_STRING: 250 iter = new WholeStringBreakIterator(); 251 break; 252 case TITLECASE_SENTENCES: 253 iter = BreakIterator.getSentenceInstance(locale); 254 break; 255 default: 256 throw new IllegalArgumentException("unknown titlecasing iterator option"); 257 } 258 } 259 return iter; 260 } 261 262 /** 263 * Omit unchanged text when case-mapping with Edits. 264 */ 265 public static final int OMIT_UNCHANGED_TEXT = 0x4000; 266 267 private static final class WholeStringBreakIterator extends BreakIterator { 268 private int length; 269 notImplemented()270 private static void notImplemented() { 271 throw new UnsupportedOperationException("should not occur"); 272 } 273 274 @Override first()275 public int first() { 276 return 0; 277 } 278 279 @Override last()280 public int last() { 281 notImplemented(); 282 return 0; 283 } 284 285 @Override next(int n)286 public int next(int n) { 287 notImplemented(); 288 return 0; 289 } 290 291 @Override next()292 public int next() { 293 return length; 294 } 295 296 @Override previous()297 public int previous() { 298 notImplemented(); 299 return 0; 300 } 301 302 @Override following(int offset)303 public int following(int offset) { 304 notImplemented(); 305 return 0; 306 } 307 308 @Override current()309 public int current() { 310 notImplemented(); 311 return 0; 312 } 313 314 @Override getText()315 public CharacterIterator getText() { 316 notImplemented(); 317 return null; 318 } 319 320 @Override setText(CharacterIterator newText)321 public void setText(CharacterIterator newText) { 322 length = newText.getEndIndex(); 323 } 324 325 @Override setText(CharSequence newText)326 public void setText(CharSequence newText) { 327 length = newText.length(); 328 } 329 330 @Override setText(String newText)331 public void setText(String newText) { 332 length = newText.length(); 333 } 334 } 335 appendCodePoint(Appendable a, int c)336 private static int appendCodePoint(Appendable a, int c) throws IOException { 337 if (c <= Character.MAX_VALUE) { 338 a.append((char)c); 339 return 1; 340 } else { 341 a.append((char)(0xd7c0 + (c >> 10))); 342 a.append((char)(Character.MIN_LOW_SURROGATE + (c & 0x3ff))); 343 return 2; 344 } 345 } 346 347 /** 348 * Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}. 349 * @throws IOException 350 */ appendResult(int result, Appendable dest, int cpLength, int options, Edits edits)351 private static void appendResult(int result, Appendable dest, 352 int cpLength, int options, Edits edits) throws IOException { 353 // Decode the result. 354 if (result < 0) { 355 // (not) original code point 356 if (edits != null) { 357 edits.addUnchanged(cpLength); 358 } 359 if ((options & OMIT_UNCHANGED_TEXT) != 0) { 360 return; 361 } 362 appendCodePoint(dest, ~result); 363 } else if (result <= UCaseProps.MAX_STRING_LENGTH) { 364 // The mapping has already been appended to result. 365 if (edits != null) { 366 edits.addReplace(cpLength, result); 367 } 368 } else { 369 // Append the single-code point mapping. 370 int length = appendCodePoint(dest, result); 371 if (edits != null) { 372 edits.addReplace(cpLength, length); 373 } 374 } 375 } 376 appendUnchanged(CharSequence src, int start, int length, Appendable dest, int options, Edits edits)377 private static final void appendUnchanged(CharSequence src, int start, int length, 378 Appendable dest, int options, Edits edits) throws IOException { 379 if (length > 0) { 380 if (edits != null) { 381 edits.addUnchanged(length); 382 } 383 if ((options & OMIT_UNCHANGED_TEXT) != 0) { 384 return; 385 } 386 dest.append(src, start, start + length); 387 } 388 } 389 applyEdits(CharSequence src, StringBuilder replacementChars, Edits edits)390 private static String applyEdits(CharSequence src, StringBuilder replacementChars, Edits edits) { 391 if (!edits.hasChanges()) { 392 return src.toString(); 393 } 394 StringBuilder result = new StringBuilder(src.length() + edits.lengthDelta()); 395 for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) { 396 if (ei.hasChange()) { 397 int i = ei.replacementIndex(); 398 result.append(replacementChars, i, i + ei.newLength()); 399 } else { 400 int i = ei.sourceIndex(); 401 result.append(src, i, i + ei.oldLength()); 402 } 403 } 404 return result.toString(); 405 } 406 internalToLower(int caseLocale, int options, StringContextIterator iter, Appendable dest, Edits edits)407 private static void internalToLower(int caseLocale, int options, StringContextIterator iter, 408 Appendable dest, Edits edits) throws IOException { 409 int c; 410 while ((c = iter.nextCaseMapCP()) >= 0) { 411 c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale); 412 appendResult(c, dest, iter.getCPLength(), options, edits); 413 } 414 } 415 toLower(int caseLocale, int options, CharSequence src)416 public static String toLower(int caseLocale, int options, CharSequence src) { 417 if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { 418 if (src.length() == 0) { 419 return src.toString(); 420 } 421 // Collect and apply only changes. 422 // Good if no or few changes. Bad (slow) if many changes. 423 Edits edits = new Edits(); 424 StringBuilder replacementChars = toLower( 425 caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); 426 return applyEdits(src, replacementChars, edits); 427 } else { 428 return toLower(caseLocale, options, src, 429 new StringBuilder(src.length()), null).toString(); 430 } 431 } 432 toLower(int caseLocale, int options, CharSequence src, A dest, Edits edits)433 public static <A extends Appendable> A toLower(int caseLocale, int options, 434 CharSequence src, A dest, Edits edits) { 435 try { 436 if (edits != null) { 437 edits.reset(); 438 } 439 StringContextIterator iter = new StringContextIterator(src); 440 internalToLower(caseLocale, options, iter, dest, edits); 441 return dest; 442 } catch (IOException e) { 443 throw new ICUUncheckedIOException(e); 444 } 445 } 446 toUpper(int caseLocale, int options, CharSequence src)447 public static String toUpper(int caseLocale, int options, CharSequence src) { 448 if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { 449 if (src.length() == 0) { 450 return src.toString(); 451 } 452 // Collect and apply only changes. 453 // Good if no or few changes. Bad (slow) if many changes. 454 Edits edits = new Edits(); 455 StringBuilder replacementChars = toUpper( 456 caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); 457 return applyEdits(src, replacementChars, edits); 458 } else { 459 return toUpper(caseLocale, options, src, 460 new StringBuilder(src.length()), null).toString(); 461 } 462 } 463 toUpper(int caseLocale, int options, CharSequence src, A dest, Edits edits)464 public static <A extends Appendable> A toUpper(int caseLocale, int options, 465 CharSequence src, A dest, Edits edits) { 466 try { 467 if (edits != null) { 468 edits.reset(); 469 } 470 if (caseLocale == UCaseProps.LOC_GREEK) { 471 return GreekUpper.toUpper(options, src, dest, edits); 472 } 473 StringContextIterator iter = new StringContextIterator(src); 474 int c; 475 while ((c = iter.nextCaseMapCP()) >= 0) { 476 c = UCaseProps.INSTANCE.toFullUpper(c, iter, dest, caseLocale); 477 appendResult(c, dest, iter.getCPLength(), options, edits); 478 } 479 return dest; 480 } catch (IOException e) { 481 throw new ICUUncheckedIOException(e); 482 } 483 } 484 toTitle(int caseLocale, int options, BreakIterator iter, CharSequence src)485 public static String toTitle(int caseLocale, int options, BreakIterator iter, CharSequence src) { 486 if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { 487 if (src.length() == 0) { 488 return src.toString(); 489 } 490 // Collect and apply only changes. 491 // Good if no or few changes. Bad (slow) if many changes. 492 Edits edits = new Edits(); 493 StringBuilder replacementChars = toTitle( 494 caseLocale, options | OMIT_UNCHANGED_TEXT, iter, src, 495 new StringBuilder(), edits); 496 return applyEdits(src, replacementChars, edits); 497 } else { 498 return toTitle(caseLocale, options, iter, src, 499 new StringBuilder(src.length()), null).toString(); 500 } 501 } 502 toTitle( int caseLocale, int options, BreakIterator titleIter, CharSequence src, A dest, Edits edits)503 public static <A extends Appendable> A toTitle( 504 int caseLocale, int options, BreakIterator titleIter, 505 CharSequence src, A dest, Edits edits) { 506 try { 507 if (edits != null) { 508 edits.reset(); 509 } 510 511 /* set up local variables */ 512 StringContextIterator iter = new StringContextIterator(src); 513 int srcLength = src.length(); 514 int prev=0; 515 boolean isFirstIndex=true; 516 517 /* titlecasing loop */ 518 while(prev<srcLength) { 519 /* find next index where to titlecase */ 520 int index; 521 if(isFirstIndex) { 522 isFirstIndex=false; 523 index=titleIter.first(); 524 } else { 525 index=titleIter.next(); 526 } 527 if(index==BreakIterator.DONE || index>srcLength) { 528 index=srcLength; 529 } 530 531 /* 532 * Segment [prev..index[ into 3 parts: 533 * a) skipped characters (copy as-is) [prev..titleStart[ 534 * b) first letter (titlecase) [titleStart..titleLimit[ 535 * c) subsequent characters (lowercase) [titleLimit..index[ 536 */ 537 if(prev<index) { 538 // Find and copy skipped characters [prev..titleStart[ 539 int titleStart=prev; 540 iter.setLimit(index); 541 int c=iter.nextCaseMapCP(); 542 if ((options&UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT)==0) { 543 // Adjust the titlecasing index to the next cased character, 544 // or to the next letter/number/symbol/private use. 545 // Stop with titleStart<titleLimit<=index 546 // if there is a character to be titlecased, 547 // or else stop with titleStart==titleLimit==index. 548 boolean toCased = (options&CaseMapImpl.TITLECASE_ADJUST_TO_CASED) != 0; 549 while ((toCased ? 550 UCaseProps.NONE==UCaseProps.INSTANCE.getType(c) : 551 !CaseMapImpl.isLNS(c)) && 552 (c=iter.nextCaseMapCP())>=0) {} 553 // If c<0 then we have only uncased characters in [prev..index[ 554 // and stopped with titleStart==titleLimit==index. 555 titleStart=iter.getCPStart(); 556 if (prev < titleStart) { 557 appendUnchanged(src, prev, titleStart-prev, dest, options, edits); 558 } 559 } 560 561 if(titleStart<index) { 562 int titleLimit=iter.getCPLimit(); 563 // titlecase c which is from [titleStart..titleLimit[ 564 c = UCaseProps.INSTANCE.toFullTitle(c, iter, dest, caseLocale); 565 appendResult(c, dest, iter.getCPLength(), options, edits); 566 567 // Special case Dutch IJ titlecasing 568 if (titleStart+1 < index && caseLocale == UCaseProps.LOC_DUTCH) { 569 char c1 = src.charAt(titleStart); 570 if ((c1 == 'i' || c1 == 'I')) { 571 char c2 = src.charAt(titleStart+1); 572 if (c2 == 'j') { 573 dest.append('J'); 574 if (edits != null) { 575 edits.addReplace(1, 1); 576 } 577 c = iter.nextCaseMapCP(); 578 titleLimit++; 579 assert c == c2; 580 assert titleLimit == iter.getCPLimit(); 581 } else if (c2 == 'J') { 582 // Keep the capital J from getting lowercased. 583 appendUnchanged(src, titleStart + 1, 1, dest, options, edits); 584 c = iter.nextCaseMapCP(); 585 titleLimit++; 586 assert c == c2; 587 assert titleLimit == iter.getCPLimit(); 588 } 589 } 590 } 591 592 // lowercase [titleLimit..index[ 593 if(titleLimit<index) { 594 if((options&UCharacter.TITLECASE_NO_LOWERCASE)==0) { 595 // Normal operation: Lowercase the rest of the word. 596 internalToLower(caseLocale, options, iter, dest, edits); 597 } else { 598 // Optionally just copy the rest of the word unchanged. 599 appendUnchanged(src, titleLimit, index-titleLimit, dest, options, edits); 600 iter.moveToLimit(); 601 } 602 } 603 } 604 } 605 606 prev=index; 607 } 608 return dest; 609 } catch (IOException e) { 610 throw new ICUUncheckedIOException(e); 611 } 612 } 613 fold(int options, CharSequence src)614 public static String fold(int options, CharSequence src) { 615 if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { 616 if (src.length() == 0) { 617 return src.toString(); 618 } 619 // Collect and apply only changes. 620 // Good if no or few changes. Bad (slow) if many changes. 621 Edits edits = new Edits(); 622 StringBuilder replacementChars = fold( 623 options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); 624 return applyEdits(src, replacementChars, edits); 625 } else { 626 return fold(options, src, new StringBuilder(src.length()), null).toString(); 627 } 628 } 629 fold(int options, CharSequence src, A dest, Edits edits)630 public static <A extends Appendable> A fold(int options, 631 CharSequence src, A dest, Edits edits) { 632 try { 633 if (edits != null) { 634 edits.reset(); 635 } 636 int length = src.length(); 637 for (int i = 0; i < length;) { 638 int c = Character.codePointAt(src, i); 639 int cpLength = Character.charCount(c); 640 i += cpLength; 641 c = UCaseProps.INSTANCE.toFullFolding(c, dest, options); 642 appendResult(c, dest, cpLength, options, edits); 643 } 644 return dest; 645 } catch (IOException e) { 646 throw new ICUUncheckedIOException(e); 647 } 648 } 649 650 private static final class GreekUpper { 651 // Data bits. 652 private static final int UPPER_MASK = 0x3ff; 653 private static final int HAS_VOWEL = 0x1000; 654 private static final int HAS_YPOGEGRAMMENI = 0x2000; 655 private static final int HAS_ACCENT = 0x4000; 656 private static final int HAS_DIALYTIKA = 0x8000; 657 // Further bits during data building and processing, not stored in the data map. 658 private static final int HAS_COMBINING_DIALYTIKA = 0x10000; 659 private static final int HAS_OTHER_GREEK_DIACRITIC = 0x20000; 660 661 private static final int HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT; 662 private static final int HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA = 663 HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA; 664 private static final int HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA; 665 666 // State bits. 667 private static final int AFTER_CASED = 1; 668 private static final int AFTER_VOWEL_WITH_ACCENT = 2; 669 670 // Data generated by prototype code, see 671 // http://site.icu-project.org/design/case/greek-upper 672 // TODO: Move this data into ucase.icu. 673 private static final char[] data0370 = { 674 // U+0370..03FF 675 0x0370, // Ͱ 676 0x0370, // ͱ 677 0x0372, // Ͳ 678 0x0372, // ͳ 679 0, 680 0, 681 0x0376, // Ͷ 682 0x0376, // ͷ 683 0, 684 0, 685 0x037A, // ͺ 686 0x03FD, // ͻ 687 0x03FE, // ͼ 688 0x03FF, // ͽ 689 0, 690 0x037F, // Ϳ 691 0, 692 0, 693 0, 694 0, 695 0, 696 0, 697 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ά 698 0, 699 0x0395 | HAS_VOWEL | HAS_ACCENT, // Έ 700 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ή 701 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ί 702 0, 703 0x039F | HAS_VOWEL | HAS_ACCENT, // Ό 704 0, 705 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ύ 706 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ώ 707 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ΐ 708 0x0391 | HAS_VOWEL, // Α 709 0x0392, // Β 710 0x0393, // Γ 711 0x0394, // Δ 712 0x0395 | HAS_VOWEL, // Ε 713 0x0396, // Ζ 714 0x0397 | HAS_VOWEL, // Η 715 0x0398, // Θ 716 0x0399 | HAS_VOWEL, // Ι 717 0x039A, // Κ 718 0x039B, // Λ 719 0x039C, // Μ 720 0x039D, // Ν 721 0x039E, // Ξ 722 0x039F | HAS_VOWEL, // Ο 723 0x03A0, // Π 724 0x03A1, // Ρ 725 0, 726 0x03A3, // Σ 727 0x03A4, // Τ 728 0x03A5 | HAS_VOWEL, // Υ 729 0x03A6, // Φ 730 0x03A7, // Χ 731 0x03A8, // Ψ 732 0x03A9 | HAS_VOWEL, // Ω 733 0x0399 | HAS_VOWEL | HAS_DIALYTIKA, // Ϊ 734 0x03A5 | HAS_VOWEL | HAS_DIALYTIKA, // Ϋ 735 0x0391 | HAS_VOWEL | HAS_ACCENT, // ά 736 0x0395 | HAS_VOWEL | HAS_ACCENT, // έ 737 0x0397 | HAS_VOWEL | HAS_ACCENT, // ή 738 0x0399 | HAS_VOWEL | HAS_ACCENT, // ί 739 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ΰ 740 0x0391 | HAS_VOWEL, // α 741 0x0392, // β 742 0x0393, // γ 743 0x0394, // δ 744 0x0395 | HAS_VOWEL, // ε 745 0x0396, // ζ 746 0x0397 | HAS_VOWEL, // η 747 0x0398, // θ 748 0x0399 | HAS_VOWEL, // ι 749 0x039A, // κ 750 0x039B, // λ 751 0x039C, // μ 752 0x039D, // ν 753 0x039E, // ξ 754 0x039F | HAS_VOWEL, // ο 755 0x03A0, // π 756 0x03A1, // ρ 757 0x03A3, // ς 758 0x03A3, // σ 759 0x03A4, // τ 760 0x03A5 | HAS_VOWEL, // υ 761 0x03A6, // φ 762 0x03A7, // χ 763 0x03A8, // ψ 764 0x03A9 | HAS_VOWEL, // ω 765 0x0399 | HAS_VOWEL | HAS_DIALYTIKA, // ϊ 766 0x03A5 | HAS_VOWEL | HAS_DIALYTIKA, // ϋ 767 0x039F | HAS_VOWEL | HAS_ACCENT, // ό 768 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ύ 769 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ώ 770 0x03CF, // Ϗ 771 0x0392, // ϐ 772 0x0398, // ϑ 773 0x03D2, // ϒ 774 0x03D2 | HAS_ACCENT, // ϓ 775 0x03D2 | HAS_DIALYTIKA, // ϔ 776 0x03A6, // ϕ 777 0x03A0, // ϖ 778 0x03CF, // ϗ 779 0x03D8, // Ϙ 780 0x03D8, // ϙ 781 0x03DA, // Ϛ 782 0x03DA, // ϛ 783 0x03DC, // Ϝ 784 0x03DC, // ϝ 785 0x03DE, // Ϟ 786 0x03DE, // ϟ 787 0x03E0, // Ϡ 788 0x03E0, // ϡ 789 0, 790 0, 791 0, 792 0, 793 0, 794 0, 795 0, 796 0, 797 0, 798 0, 799 0, 800 0, 801 0, 802 0, 803 0x039A, // ϰ 804 0x03A1, // ϱ 805 0x03F9, // ϲ 806 0x037F, // ϳ 807 0x03F4, // ϴ 808 0x0395 | HAS_VOWEL, // ϵ 809 0, 810 0x03F7, // Ϸ 811 0x03F7, // ϸ 812 0x03F9, // Ϲ 813 0x03FA, // Ϻ 814 0x03FA, // ϻ 815 0x03FC, // ϼ 816 0x03FD, // Ͻ 817 0x03FE, // Ͼ 818 0x03FF, // Ͽ 819 }; 820 821 private static final char[] data1F00 = { 822 // U+1F00..1FFF 823 0x0391 | HAS_VOWEL, // ἀ 824 0x0391 | HAS_VOWEL, // ἁ 825 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἂ 826 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἃ 827 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἄ 828 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἅ 829 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἆ 830 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἇ 831 0x0391 | HAS_VOWEL, // Ἀ 832 0x0391 | HAS_VOWEL, // Ἁ 833 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἂ 834 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἃ 835 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἄ 836 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἅ 837 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἆ 838 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἇ 839 0x0395 | HAS_VOWEL, // ἐ 840 0x0395 | HAS_VOWEL, // ἑ 841 0x0395 | HAS_VOWEL | HAS_ACCENT, // ἒ 842 0x0395 | HAS_VOWEL | HAS_ACCENT, // ἓ 843 0x0395 | HAS_VOWEL | HAS_ACCENT, // ἔ 844 0x0395 | HAS_VOWEL | HAS_ACCENT, // ἕ 845 0, 846 0, 847 0x0395 | HAS_VOWEL, // Ἐ 848 0x0395 | HAS_VOWEL, // Ἑ 849 0x0395 | HAS_VOWEL | HAS_ACCENT, // Ἒ 850 0x0395 | HAS_VOWEL | HAS_ACCENT, // Ἓ 851 0x0395 | HAS_VOWEL | HAS_ACCENT, // Ἔ 852 0x0395 | HAS_VOWEL | HAS_ACCENT, // Ἕ 853 0, 854 0, 855 0x0397 | HAS_VOWEL, // ἠ 856 0x0397 | HAS_VOWEL, // ἡ 857 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἢ 858 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἣ 859 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἤ 860 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἥ 861 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἦ 862 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἧ 863 0x0397 | HAS_VOWEL, // Ἠ 864 0x0397 | HAS_VOWEL, // Ἡ 865 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἢ 866 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἣ 867 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἤ 868 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἥ 869 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἦ 870 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἧ 871 0x0399 | HAS_VOWEL, // ἰ 872 0x0399 | HAS_VOWEL, // ἱ 873 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἲ 874 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἳ 875 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἴ 876 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἵ 877 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἶ 878 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἷ 879 0x0399 | HAS_VOWEL, // Ἰ 880 0x0399 | HAS_VOWEL, // Ἱ 881 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἲ 882 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἳ 883 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἴ 884 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἵ 885 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἶ 886 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἷ 887 0x039F | HAS_VOWEL, // ὀ 888 0x039F | HAS_VOWEL, // ὁ 889 0x039F | HAS_VOWEL | HAS_ACCENT, // ὂ 890 0x039F | HAS_VOWEL | HAS_ACCENT, // ὃ 891 0x039F | HAS_VOWEL | HAS_ACCENT, // ὄ 892 0x039F | HAS_VOWEL | HAS_ACCENT, // ὅ 893 0, 894 0, 895 0x039F | HAS_VOWEL, // Ὀ 896 0x039F | HAS_VOWEL, // Ὁ 897 0x039F | HAS_VOWEL | HAS_ACCENT, // Ὂ 898 0x039F | HAS_VOWEL | HAS_ACCENT, // Ὃ 899 0x039F | HAS_VOWEL | HAS_ACCENT, // Ὄ 900 0x039F | HAS_VOWEL | HAS_ACCENT, // Ὅ 901 0, 902 0, 903 0x03A5 | HAS_VOWEL, // ὐ 904 0x03A5 | HAS_VOWEL, // ὑ 905 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὒ 906 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὓ 907 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὔ 908 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὕ 909 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὖ 910 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὗ 911 0, 912 0x03A5 | HAS_VOWEL, // Ὑ 913 0, 914 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ὓ 915 0, 916 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ὕ 917 0, 918 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ὗ 919 0x03A9 | HAS_VOWEL, // ὠ 920 0x03A9 | HAS_VOWEL, // ὡ 921 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὢ 922 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὣ 923 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὤ 924 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὥ 925 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὦ 926 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὧ 927 0x03A9 | HAS_VOWEL, // Ὠ 928 0x03A9 | HAS_VOWEL, // Ὡ 929 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὢ 930 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὣ 931 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὤ 932 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὥ 933 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὦ 934 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὧ 935 0x0391 | HAS_VOWEL | HAS_ACCENT, // ὰ 936 0x0391 | HAS_VOWEL | HAS_ACCENT, // ά 937 0x0395 | HAS_VOWEL | HAS_ACCENT, // ὲ 938 0x0395 | HAS_VOWEL | HAS_ACCENT, // έ 939 0x0397 | HAS_VOWEL | HAS_ACCENT, // ὴ 940 0x0397 | HAS_VOWEL | HAS_ACCENT, // ή 941 0x0399 | HAS_VOWEL | HAS_ACCENT, // ὶ 942 0x0399 | HAS_VOWEL | HAS_ACCENT, // ί 943 0x039F | HAS_VOWEL | HAS_ACCENT, // ὸ 944 0x039F | HAS_VOWEL | HAS_ACCENT, // ό 945 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὺ 946 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ύ 947 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὼ 948 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ώ 949 0, 950 0, 951 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾀ 952 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾁ 953 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾂ 954 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾃ 955 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾄ 956 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾅ 957 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾆ 958 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾇ 959 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾈ 960 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾉ 961 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾊ 962 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾋ 963 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾌ 964 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾍ 965 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾎ 966 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾏ 967 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾐ 968 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾑ 969 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾒ 970 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾓ 971 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾔ 972 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾕ 973 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾖ 974 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾗ 975 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾘ 976 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾙ 977 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾚ 978 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾛ 979 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾜ 980 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾝ 981 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾞ 982 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾟ 983 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾠ 984 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾡ 985 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾢ 986 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾣ 987 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾤ 988 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾥ 989 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾦ 990 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾧ 991 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾨ 992 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾩ 993 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾪ 994 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾫ 995 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾬ 996 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾭ 997 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾮ 998 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾯ 999 0x0391 | HAS_VOWEL, // ᾰ 1000 0x0391 | HAS_VOWEL, // ᾱ 1001 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾲ 1002 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾳ 1003 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾴ 1004 0, 1005 0x0391 | HAS_VOWEL | HAS_ACCENT, // ᾶ 1006 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾷ 1007 0x0391 | HAS_VOWEL, // Ᾰ 1008 0x0391 | HAS_VOWEL, // Ᾱ 1009 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ὰ 1010 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ά 1011 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾼ 1012 0, 1013 0x0399 | HAS_VOWEL, // ι 1014 0, 1015 0, 1016 0, 1017 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῂ 1018 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ῃ 1019 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῄ 1020 0, 1021 0x0397 | HAS_VOWEL | HAS_ACCENT, // ῆ 1022 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῇ 1023 0x0395 | HAS_VOWEL | HAS_ACCENT, // Ὲ 1024 0x0395 | HAS_VOWEL | HAS_ACCENT, // Έ 1025 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ὴ 1026 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ή 1027 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ῌ 1028 0, 1029 0, 1030 0, 1031 0x0399 | HAS_VOWEL, // ῐ 1032 0x0399 | HAS_VOWEL, // ῑ 1033 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ῒ 1034 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ΐ 1035 0, 1036 0, 1037 0x0399 | HAS_VOWEL | HAS_ACCENT, // ῖ 1038 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ῗ 1039 0x0399 | HAS_VOWEL, // Ῐ 1040 0x0399 | HAS_VOWEL, // Ῑ 1041 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ὶ 1042 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ί 1043 0, 1044 0, 1045 0, 1046 0, 1047 0x03A5 | HAS_VOWEL, // ῠ 1048 0x03A5 | HAS_VOWEL, // ῡ 1049 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ῢ 1050 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ΰ 1051 0x03A1, // ῤ 1052 0x03A1, // ῥ 1053 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ῦ 1054 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ῧ 1055 0x03A5 | HAS_VOWEL, // Ῠ 1056 0x03A5 | HAS_VOWEL, // Ῡ 1057 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ὺ 1058 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ύ 1059 0x03A1, // Ῥ 1060 0, 1061 0, 1062 0, 1063 0, 1064 0, 1065 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῲ 1066 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ῳ 1067 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῴ 1068 0, 1069 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ῶ 1070 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῷ 1071 0x039F | HAS_VOWEL | HAS_ACCENT, // Ὸ 1072 0x039F | HAS_VOWEL | HAS_ACCENT, // Ό 1073 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὼ 1074 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ώ 1075 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ῼ 1076 0, 1077 0, 1078 0, 1079 }; 1080 1081 // U+2126 Ohm sign 1082 private static final char data2126 = 0x03A9 | HAS_VOWEL; // Ω 1083 getLetterData(int c)1084 private static final int getLetterData(int c) { 1085 if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) { 1086 return 0; 1087 } else if (c <= 0x3ff) { 1088 return data0370[c - 0x370]; 1089 } else if (c <= 0x1fff) { 1090 return data1F00[c - 0x1f00]; 1091 } else if (c == 0x2126) { 1092 return data2126; 1093 } else { 1094 return 0; 1095 } 1096 } 1097 1098 /** 1099 * Returns a non-zero value for each of the Greek combining diacritics 1100 * listed in The Unicode Standard, version 8, chapter 7.2 Greek, 1101 * plus some perispomeni look-alikes. 1102 */ getDiacriticData(int c)1103 private static final int getDiacriticData(int c) { 1104 switch (c) { 1105 case '\u0300': // varia 1106 case '\u0301': // tonos = oxia 1107 case '\u0342': // perispomeni 1108 case '\u0302': // circumflex can look like perispomeni 1109 case '\u0303': // tilde can look like perispomeni 1110 case '\u0311': // inverted breve can look like perispomeni 1111 return HAS_ACCENT; 1112 case '\u0308': // dialytika = diaeresis 1113 return HAS_COMBINING_DIALYTIKA; 1114 case '\u0344': // dialytika tonos 1115 return HAS_COMBINING_DIALYTIKA | HAS_ACCENT; 1116 case '\u0345': // ypogegrammeni = iota subscript 1117 return HAS_YPOGEGRAMMENI; 1118 case '\u0304': // macron 1119 case '\u0306': // breve 1120 case '\u0313': // comma above 1121 case '\u0314': // reversed comma above 1122 case '\u0343': // koronis 1123 return HAS_OTHER_GREEK_DIACRITIC; 1124 default: 1125 return 0; 1126 } 1127 } 1128 isFollowedByCasedLetter(CharSequence s, int i)1129 private static boolean isFollowedByCasedLetter(CharSequence s, int i) { 1130 while (i < s.length()) { 1131 int c = Character.codePointAt(s, i); 1132 int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c); 1133 if ((type & UCaseProps.IGNORABLE) != 0) { 1134 // Case-ignorable, continue with the loop. 1135 i += Character.charCount(c); 1136 } else if (type != UCaseProps.NONE) { 1137 return true; // Followed by cased letter. 1138 } else { 1139 return false; // Uncased and not case-ignorable. 1140 } 1141 } 1142 return false; // Not followed by cased letter. 1143 } 1144 1145 /** 1146 * Greek string uppercasing with a state machine. 1147 * Probably simpler than a stateless function that has to figure out complex context-before 1148 * for each character. 1149 * TODO: Try to re-consolidate one way or another with the non-Greek function. 1150 * 1151 * <p>Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8). 1152 * @throws IOException 1153 */ toUpper(int options, CharSequence src, A dest, Edits edits)1154 private static <A extends Appendable> A toUpper(int options, 1155 CharSequence src, A dest, Edits edits) throws IOException { 1156 int state = 0; 1157 for (int i = 0; i < src.length();) { 1158 int c = Character.codePointAt(src, i); 1159 int nextIndex = i + Character.charCount(c); 1160 int nextState = 0; 1161 int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c); 1162 if ((type & UCaseProps.IGNORABLE) != 0) { 1163 // c is case-ignorable 1164 nextState |= (state & AFTER_CASED); 1165 } else if (type != UCaseProps.NONE) { 1166 // c is cased 1167 nextState |= AFTER_CASED; 1168 } 1169 int data = getLetterData(c); 1170 if (data > 0) { 1171 int upper = data & UPPER_MASK; 1172 // Add a dialytika to this iota or ypsilon vowel 1173 // if we removed a tonos from the previous vowel, 1174 // and that previous vowel did not also have (or gain) a dialytika. 1175 // Adding one only to the final vowel in a longer sequence 1176 // (which does not occur in normal writing) would require lookahead. 1177 // Set the same flag as for preserving an existing dialytika. 1178 if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 && 1179 (upper == 'Ι' || upper == 'Υ')) { 1180 data |= HAS_DIALYTIKA; 1181 } 1182 int numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota. 1183 if ((data & HAS_YPOGEGRAMMENI) != 0) { 1184 numYpogegrammeni = 1; 1185 } 1186 // Skip combining diacritics after this Greek letter. 1187 while (nextIndex < src.length()) { 1188 int diacriticData = getDiacriticData(src.charAt(nextIndex)); 1189 if (diacriticData != 0) { 1190 data |= diacriticData; 1191 if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) { 1192 ++numYpogegrammeni; 1193 } 1194 ++nextIndex; 1195 } else { 1196 break; // not a Greek diacritic 1197 } 1198 } 1199 if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) { 1200 nextState |= AFTER_VOWEL_WITH_ACCENT; 1201 } 1202 // Map according to Greek rules. 1203 boolean addTonos = false; 1204 if (upper == 'Η' && 1205 (data & HAS_ACCENT) != 0 && 1206 numYpogegrammeni == 0 && 1207 (state & AFTER_CASED) == 0 && 1208 !isFollowedByCasedLetter(src, nextIndex)) { 1209 // Keep disjunctive "or" with (only) a tonos. 1210 // We use the same "word boundary" conditions as for the Final_Sigma test. 1211 if (i == nextIndex) { 1212 upper = 'Ή'; // Preserve the precomposed form. 1213 } else { 1214 addTonos = true; 1215 } 1216 } else if ((data & HAS_DIALYTIKA) != 0) { 1217 // Preserve a vowel with dialytika in precomposed form if it exists. 1218 if (upper == 'Ι') { 1219 upper = 'Ϊ'; 1220 data &= ~HAS_EITHER_DIALYTIKA; 1221 } else if (upper == 'Υ') { 1222 upper = 'Ϋ'; 1223 data &= ~HAS_EITHER_DIALYTIKA; 1224 } 1225 } 1226 1227 boolean change; 1228 if (edits == null && (options & OMIT_UNCHANGED_TEXT) == 0) { 1229 change = true; // common, simple usage 1230 } else { 1231 // Find out first whether we are changing the text. 1232 change = src.charAt(i) != upper || numYpogegrammeni > 0; 1233 int i2 = i + 1; 1234 if ((data & HAS_EITHER_DIALYTIKA) != 0) { 1235 change |= i2 >= nextIndex || src.charAt(i2) != 0x308; 1236 ++i2; 1237 } 1238 if (addTonos) { 1239 change |= i2 >= nextIndex || src.charAt(i2) != 0x301; 1240 ++i2; 1241 } 1242 int oldLength = nextIndex - i; 1243 int newLength = (i2 - i) + numYpogegrammeni; 1244 change |= oldLength != newLength; 1245 if (change) { 1246 if (edits != null) { 1247 edits.addReplace(oldLength, newLength); 1248 } 1249 } else { 1250 if (edits != null) { 1251 edits.addUnchanged(oldLength); 1252 } 1253 // Write unchanged text? 1254 change = (options & OMIT_UNCHANGED_TEXT) == 0; 1255 } 1256 } 1257 1258 if (change) { 1259 dest.append((char)upper); 1260 if ((data & HAS_EITHER_DIALYTIKA) != 0) { 1261 dest.append('\u0308'); // restore or add a dialytika 1262 } 1263 if (addTonos) { 1264 dest.append('\u0301'); 1265 } 1266 while (numYpogegrammeni > 0) { 1267 dest.append('Ι'); 1268 --numYpogegrammeni; 1269 } 1270 } 1271 } else { 1272 c = UCaseProps.INSTANCE.toFullUpper(c, null, dest, UCaseProps.LOC_GREEK); 1273 appendResult(c, dest, nextIndex - i, options, edits); 1274 } 1275 i = nextIndex; 1276 state = nextState; 1277 } 1278 return dest; 1279 } 1280 } 1281 } 1282