1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2008-2015, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.dev.test.localespi; 10 11 import java.text.BreakIterator; 12 import java.util.Locale; 13 14 import org.junit.Test; 15 16 import com.ibm.icu.dev.test.TestFmwk; 17 18 public class BreakIteratorTest extends TestFmwk { 19 private static final int CHARACTER_BRK = 0; 20 private static final int WORD_BRK = 1; 21 private static final int LINE_BRK = 2; 22 private static final int SENTENCE_BRK = 3; 23 24 /* 25 * Check if getInstance returns the ICU implementation. 26 */ 27 @Test TestGetInstance()28 public void TestGetInstance() { 29 for (Locale loc : BreakIterator.getAvailableLocales()) { 30 if (TestUtil.isExcluded(loc)) { 31 logln("Skipped " + loc); 32 continue; 33 } 34 checkGetInstance(CHARACTER_BRK, loc); 35 checkGetInstance(WORD_BRK, loc); 36 checkGetInstance(LINE_BRK, loc); 37 checkGetInstance(SENTENCE_BRK, loc); 38 } 39 } 40 checkGetInstance(int type, Locale loc)41 private void checkGetInstance(int type, Locale loc) { 42 BreakIterator brkitr = null; 43 String method = null; 44 switch (type) { 45 case CHARACTER_BRK: 46 brkitr = BreakIterator.getCharacterInstance(loc); 47 method = "getCharacterInstance"; 48 break; 49 case WORD_BRK: 50 brkitr = BreakIterator.getWordInstance(loc); 51 method = "getWordInstance"; 52 break; 53 case LINE_BRK: 54 brkitr = BreakIterator.getLineInstance(loc); 55 method = "getLineInstance"; 56 break; 57 case SENTENCE_BRK: 58 brkitr = BreakIterator.getSentenceInstance(loc); 59 method = "getSentenceInstance"; 60 break; 61 default: 62 errln("FAIL: Unknown break iterator type"); 63 return; 64 } 65 66 boolean isIcuImpl = (brkitr instanceof com.ibm.icu.impl.jdkadapter.BreakIteratorICU); 67 68 if (TestUtil.isICUExtendedLocale(loc)) { 69 if (!isIcuImpl) { 70 errln("FAIL: " + method + " returned JDK BreakIterator for locale " + loc); 71 } 72 } else { 73 if (isIcuImpl) { 74 logln("INFO: " + method + " returned ICU BreakIterator for locale " + loc); 75 } 76 BreakIterator brkitrIcu = null; 77 Locale iculoc = TestUtil.toICUExtendedLocale(loc); 78 switch (type) { 79 case CHARACTER_BRK: 80 brkitrIcu = BreakIterator.getCharacterInstance(iculoc); 81 break; 82 case WORD_BRK: 83 brkitrIcu = BreakIterator.getWordInstance(iculoc); 84 break; 85 case LINE_BRK: 86 brkitrIcu = BreakIterator.getLineInstance(iculoc); 87 break; 88 case SENTENCE_BRK: 89 brkitrIcu = BreakIterator.getSentenceInstance(iculoc); 90 break; 91 } 92 if (isIcuImpl) { 93 if (!brkitr.equals(brkitrIcu)) { 94 // BreakIterator.getXXXInstance returns a cached BreakIterator instance. 95 // BreakIterator does not override Object#equals, so the result may not be 96 // consistent. 97 // logln("INFO: " + method + " returned ICU BreakIterator for locale " + loc 98 // + ", but different from the one for locale " + iculoc); 99 } 100 } else { 101 if (!(brkitrIcu instanceof com.ibm.icu.impl.jdkadapter.BreakIteratorICU)) { 102 errln("FAIL: " + method + " returned JDK BreakIterator for locale " + iculoc); 103 } 104 } 105 } 106 } 107 108 /* 109 * Testing the behavior of text break between ICU instance and its 110 * equivalent created via the Locale SPI framework. 111 */ 112 @Test TestICUEquivalent()113 public void TestICUEquivalent() { 114 Locale[] TEST_LOCALES = { 115 new Locale("en", "US"), 116 new Locale("fr", "FR"), 117 new Locale("th", "TH"), 118 new Locale("zh", "CN"), 119 }; 120 121 String[] TEST_DATA = { 122 "International Components for Unicode (ICU) is an open source project of mature " 123 + "C/C++ and Java libraries for Unicode support, software internationalization and " 124 + "software globalization. ICU is widely portable to many operating systems and " 125 + "environments. It gives applications the same results on all platforms and between " 126 + "C/C++ and Java software. The ICU project is an open source development project " 127 + "that is sponsored, supported and used by IBM and many other companies.", 128 129 "L'International Components for Unicode (ICU) est un projet open source qui fourni " 130 + "des biblioth\u00e8ques pour les langages informatique C/C++ et Java pour supporter " 131 + "Unicode, l'internationalisation et la mondialisation des logiciels. ICU est largement " 132 + "portable vers beaucoup de syst\u00e8mes d'exploitations et d'environnements. Il " 133 + "donne aux applications les m\u00eames comportements et r\u00e9sultats sur toutes " 134 + "les plateformes et entre les logiciels C/C++ et Java. Le projet ICU est un projet " 135 + "dont les code sources sont disponibles qui est sponsoris\u00e9, support\u00e9 et " 136 + "utilis\u00e9 par IBM et beaucoup d'autres entreprises.", 137 138 "\u5728IBM\u7b49\u4f01\u696d\u4e2d\uff0c\u56fd\u9645\u5316\u7ecf\u5e38\u7b80\u5199" 139 + "\u4e3aI18N (\u6216i18n\u6216I18n)\uff0c\u5176\u4e2d18\u4ee3\u8868\u4e86\u4e2d\u95f4" 140 + "\u7701\u7565\u768418\u4e2a\u5b57\u6bcd\uff1b\u800c\u201c\u672c\u5730\u5316\u201d" 141 + "\u540c\u53ef\u7b80\u5199\u4e3al10n\u3002\u9019\u4e24\u4e2a\u6982\u5ff5\u6709\u65f6" 142 + "\u5408\u79f0\u5168\u7403\u5316\uff08g11n\uff09\uff0c\u4f46\u662f\u5168\u7403\u5316" 143 + "\u7684\u6db5\u4e49\u66f4\u4e3a\u4e00\u822c\u5316\u3002\u53e6\u5916\u5076\u5c14\u4f1a" 144 + "\u51fa\u73b0\u201cp13n\u201d\uff0c\u4ee3\u8868\u4e2a\u4eba\u5316\uff08personalization" 145 + "\uff09\u3002", 146 147 "\u0e01\u0e23\u0e38\u0e07\u0e40\u0e17\u0e1e\u0e21\u0e2b\u0e32\u0e19\u0e04\u0e23" 148 + "\u0e43\u0e19\u0e1b\u0e31\u0e08\u0e08\u0e38\u0e1a\u0e31\u0e19\u0e40\u0e1b\u0e47" 149 + "\u0e19\u0e28\u0e39\u0e19\u0e22\u0e4c\u0e01\u0e25\u0e32\u0e07\u0e01\u0e32\u0e23" 150 + "\u0e1b\u0e01\u0e04\u0e23\u0e2d\u0e07 \u0e01\u0e32\u0e23\u0e28\u0e36\u0e01\u0e29" 151 + "\u0e32 \u0e01\u0e32\u0e23\u0e04\u0e21\u0e19\u0e32\u0e04\u0e21\u0e02\u0e19\u0e2a" 152 + "\u0e48\u0e07 \u0e01\u0e32\u0e23\u0e40\u0e07\u0e34\u0e19\u0e01\u0e32\u0e23\u0e18" 153 + "\u0e19\u0e32\u0e04\u0e32\u0e23 \u0e01\u0e32\u0e23\u0e1e\u0e32\u0e13\u0e34\u0e0a" 154 + "\u0e22\u0e4c \u0e01\u0e32\u0e23\u0e2a\u0e37\u0e48\u0e2d\u0e2a\u0e32\u0e23 \u0e2f" 155 + "\u0e25\u0e2f \u0e42\u0e14\u0e22\u0e21\u0e35\u0e1e\u0e37\u0e49\u0e19\u0e17\u0e35" 156 + "\u0e48\u0e17\u0e31\u0e49\u0e07\u0e2b\u0e21\u0e14 1,562.2 \u0e15\u0e32\u0e23\u0e32" 157 + "\u0e07\u0e01\u0e34\u0e42\u0e25\u0e40\u0e21\u0e15\u0e23 \u0e1e\u0e34\u0e01\u0e31" 158 + "\u0e14\u0e17\u0e32\u0e07\u0e20\u0e39\u0e21\u0e34\u0e28\u0e32\u0e2a\u0e15\u0e23" 159 + "\u0e4c\u0e04\u0e37\u0e2d \u0e25\u0e30\u0e15\u0e34\u0e08\u0e39\u0e14 13\u00b0 45" 160 + "\u2019 \u0e40\u0e2b\u0e19\u0e37\u0e2d \u0e25\u0e2d\u0e07\u0e08\u0e34\u0e08\u0e39" 161 + "\u0e14 100\u00b0 31\u2019 \u0e15\u0e30\u0e27\u0e31\u0e19\u0e2d\u0e2d\u0e01" 162 }; 163 164 BreakIterator[] jdkBrkItrs = new BreakIterator[4]; 165 com.ibm.icu.text.BreakIterator[] icuBrkItrs = new com.ibm.icu.text.BreakIterator[4]; 166 167 for (Locale loc : TEST_LOCALES) { 168 Locale iculoc = TestUtil.toICUExtendedLocale(loc); 169 170 jdkBrkItrs[0] = BreakIterator.getCharacterInstance(iculoc); 171 jdkBrkItrs[1] = BreakIterator.getWordInstance(iculoc); 172 jdkBrkItrs[2] = BreakIterator.getLineInstance(iculoc); 173 jdkBrkItrs[3] = BreakIterator.getSentenceInstance(iculoc); 174 175 icuBrkItrs[0] = com.ibm.icu.text.BreakIterator.getCharacterInstance(iculoc); 176 icuBrkItrs[1] = com.ibm.icu.text.BreakIterator.getWordInstance(iculoc); 177 icuBrkItrs[2] = com.ibm.icu.text.BreakIterator.getLineInstance(iculoc); 178 icuBrkItrs[3] = com.ibm.icu.text.BreakIterator.getSentenceInstance(iculoc); 179 180 for (String text : TEST_DATA) { 181 for (int i = 0; i < 4; i++) { 182 compareBreaks(text, jdkBrkItrs[i], icuBrkItrs[i]); 183 } 184 } 185 } 186 } 187 compareBreaks(String text, BreakIterator jdkBrk, com.ibm.icu.text.BreakIterator icuBrk)188 private void compareBreaks(String text, BreakIterator jdkBrk, com.ibm.icu.text.BreakIterator icuBrk) { 189 jdkBrk.setText(text); 190 icuBrk.setText(text); 191 192 // Forward 193 int jidx = jdkBrk.first(); 194 int iidx = icuBrk.first(); 195 if (jidx != iidx) { 196 errln("FAIL: Different first boundaries (jdk=" + jidx + ",icu=" + iidx + ") for text:\n" + text); 197 } 198 while (true) { 199 jidx = jdkBrk.next(); 200 iidx = icuBrk.next(); 201 if (jidx != iidx) { 202 errln("FAIL: Different boundaries (jdk=" + jidx + ",icu=" + iidx + "direction=forward) for text:\n" + text); 203 } 204 if (jidx == BreakIterator.DONE) { 205 break; 206 } 207 } 208 209 // Backward 210 jidx = jdkBrk.last(); 211 iidx = jdkBrk.last(); 212 if (jidx != iidx) { 213 errln("FAIL: Different last boundaries (jdk=" + jidx + ",icu=" + iidx + ") for text:\n" + text); 214 } 215 while (true) { 216 jidx = jdkBrk.previous(); 217 iidx = icuBrk.previous(); 218 if (jidx != iidx) { 219 errln("FAIL: Different boundaries (jdk=" + jidx + ",icu=" + iidx + "direction=backward) for text:\n" + text); 220 } 221 if (jidx == BreakIterator.DONE) { 222 break; 223 } 224 } 225 } 226 } 227