1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2008-2015, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.dev.test.localespi; 10 11 import java.text.BreakIterator; 12 import java.util.Locale; 13 14 import org.junit.Test; 15 import org.junit.runner.RunWith; 16 import org.junit.runners.JUnit4; 17 18 import com.ibm.icu.dev.test.TestFmwk; 19 20 @RunWith(JUnit4.class) 21 public class BreakIteratorTest extends TestFmwk { 22 private static final int CHARACTER_BRK = 0; 23 private static final int WORD_BRK = 1; 24 private static final int LINE_BRK = 2; 25 private static final int SENTENCE_BRK = 3; 26 27 /* 28 * Check if getInstance returns the ICU implementation. 29 */ 30 @Test TestGetInstance()31 public void TestGetInstance() { 32 for (Locale loc : BreakIterator.getAvailableLocales()) { 33 if (TestUtil.isExcluded(loc)) { 34 logln("Skipped " + loc); 35 continue; 36 } 37 checkGetInstance(CHARACTER_BRK, loc); 38 checkGetInstance(WORD_BRK, loc); 39 checkGetInstance(LINE_BRK, loc); 40 checkGetInstance(SENTENCE_BRK, loc); 41 } 42 } 43 checkGetInstance(int type, Locale loc)44 private void checkGetInstance(int type, Locale loc) { 45 BreakIterator brkitr = null; 46 String method = null; 47 switch (type) { 48 case CHARACTER_BRK: 49 brkitr = BreakIterator.getCharacterInstance(loc); 50 method = "getCharacterInstance"; 51 break; 52 case WORD_BRK: 53 brkitr = BreakIterator.getWordInstance(loc); 54 method = "getWordInstance"; 55 break; 56 case LINE_BRK: 57 brkitr = BreakIterator.getLineInstance(loc); 58 method = "getLineInstance"; 59 break; 60 case SENTENCE_BRK: 61 brkitr = BreakIterator.getSentenceInstance(loc); 62 method = "getSentenceInstance"; 63 break; 64 default: 65 errln("FAIL: Unknown break iterator type"); 66 return; 67 } 68 69 boolean isIcuImpl = (brkitr instanceof com.ibm.icu.impl.jdkadapter.BreakIteratorICU); 70 71 if (TestUtil.isICUExtendedLocale(loc)) { 72 if (!isIcuImpl) { 73 errln("FAIL: " + method + " returned JDK BreakIterator for locale " + loc); 74 } 75 } else { 76 if (isIcuImpl) { 77 logln("INFO: " + method + " returned ICU BreakIterator for locale " + loc); 78 } 79 BreakIterator brkitrIcu = null; 80 Locale iculoc = TestUtil.toICUExtendedLocale(loc); 81 switch (type) { 82 case CHARACTER_BRK: 83 brkitrIcu = BreakIterator.getCharacterInstance(iculoc); 84 break; 85 case WORD_BRK: 86 brkitrIcu = BreakIterator.getWordInstance(iculoc); 87 break; 88 case LINE_BRK: 89 brkitrIcu = BreakIterator.getLineInstance(iculoc); 90 break; 91 case SENTENCE_BRK: 92 brkitrIcu = BreakIterator.getSentenceInstance(iculoc); 93 break; 94 } 95 if (isIcuImpl) { 96 if (!brkitr.equals(brkitrIcu)) { 97 // BreakIterator.getXXXInstance returns a cached BreakIterator instance. 98 // BreakIterator does not override Object#equals, so the result may not be 99 // consistent. 100 // logln("INFO: " + method + " returned ICU BreakIterator for locale " + loc 101 // + ", but different from the one for locale " + iculoc); 102 } 103 } else { 104 if (!(brkitrIcu instanceof com.ibm.icu.impl.jdkadapter.BreakIteratorICU)) { 105 errln("FAIL: " + method + " returned JDK BreakIterator for locale " + iculoc); 106 } 107 } 108 } 109 } 110 111 /* 112 * Testing the behavior of text break between ICU instance and its 113 * equivalent created via the Locale SPI framework. 114 */ 115 @Test TestICUEquivalent()116 public void TestICUEquivalent() { 117 Locale[] TEST_LOCALES = { 118 new Locale("en", "US"), 119 new Locale("fr", "FR"), 120 new Locale("th", "TH"), 121 new Locale("zh", "CN"), 122 }; 123 124 String[] TEST_DATA = { 125 "International Components for Unicode (ICU) is an open source project of mature " 126 + "C/C++ and Java libraries for Unicode support, software internationalization and " 127 + "software globalization. ICU is widely portable to many operating systems and " 128 + "environments. It gives applications the same results on all platforms and between " 129 + "C/C++ and Java software. The ICU project is an open source development project " 130 + "that is sponsored, supported and used by IBM and many other companies.", 131 132 "L'International Components for Unicode (ICU) est un projet open source qui fourni " 133 + "des biblioth\u00e8ques pour les langages informatique C/C++ et Java pour supporter " 134 + "Unicode, l'internationalisation et la mondialisation des logiciels. ICU est largement " 135 + "portable vers beaucoup de syst\u00e8mes d'exploitations et d'environnements. Il " 136 + "donne aux applications les m\u00eames comportements et r\u00e9sultats sur toutes " 137 + "les plateformes et entre les logiciels C/C++ et Java. Le projet ICU est un projet " 138 + "dont les code sources sont disponibles qui est sponsoris\u00e9, support\u00e9 et " 139 + "utilis\u00e9 par IBM et beaucoup d'autres entreprises.", 140 141 "\u5728IBM\u7b49\u4f01\u696d\u4e2d\uff0c\u56fd\u9645\u5316\u7ecf\u5e38\u7b80\u5199" 142 + "\u4e3aI18N (\u6216i18n\u6216I18n)\uff0c\u5176\u4e2d18\u4ee3\u8868\u4e86\u4e2d\u95f4" 143 + "\u7701\u7565\u768418\u4e2a\u5b57\u6bcd\uff1b\u800c\u201c\u672c\u5730\u5316\u201d" 144 + "\u540c\u53ef\u7b80\u5199\u4e3al10n\u3002\u9019\u4e24\u4e2a\u6982\u5ff5\u6709\u65f6" 145 + "\u5408\u79f0\u5168\u7403\u5316\uff08g11n\uff09\uff0c\u4f46\u662f\u5168\u7403\u5316" 146 + "\u7684\u6db5\u4e49\u66f4\u4e3a\u4e00\u822c\u5316\u3002\u53e6\u5916\u5076\u5c14\u4f1a" 147 + "\u51fa\u73b0\u201cp13n\u201d\uff0c\u4ee3\u8868\u4e2a\u4eba\u5316\uff08personalization" 148 + "\uff09\u3002", 149 150 "\u0e01\u0e23\u0e38\u0e07\u0e40\u0e17\u0e1e\u0e21\u0e2b\u0e32\u0e19\u0e04\u0e23" 151 + "\u0e43\u0e19\u0e1b\u0e31\u0e08\u0e08\u0e38\u0e1a\u0e31\u0e19\u0e40\u0e1b\u0e47" 152 + "\u0e19\u0e28\u0e39\u0e19\u0e22\u0e4c\u0e01\u0e25\u0e32\u0e07\u0e01\u0e32\u0e23" 153 + "\u0e1b\u0e01\u0e04\u0e23\u0e2d\u0e07 \u0e01\u0e32\u0e23\u0e28\u0e36\u0e01\u0e29" 154 + "\u0e32 \u0e01\u0e32\u0e23\u0e04\u0e21\u0e19\u0e32\u0e04\u0e21\u0e02\u0e19\u0e2a" 155 + "\u0e48\u0e07 \u0e01\u0e32\u0e23\u0e40\u0e07\u0e34\u0e19\u0e01\u0e32\u0e23\u0e18" 156 + "\u0e19\u0e32\u0e04\u0e32\u0e23 \u0e01\u0e32\u0e23\u0e1e\u0e32\u0e13\u0e34\u0e0a" 157 + "\u0e22\u0e4c \u0e01\u0e32\u0e23\u0e2a\u0e37\u0e48\u0e2d\u0e2a\u0e32\u0e23 \u0e2f" 158 + "\u0e25\u0e2f \u0e42\u0e14\u0e22\u0e21\u0e35\u0e1e\u0e37\u0e49\u0e19\u0e17\u0e35" 159 + "\u0e48\u0e17\u0e31\u0e49\u0e07\u0e2b\u0e21\u0e14 1,562.2 \u0e15\u0e32\u0e23\u0e32" 160 + "\u0e07\u0e01\u0e34\u0e42\u0e25\u0e40\u0e21\u0e15\u0e23 \u0e1e\u0e34\u0e01\u0e31" 161 + "\u0e14\u0e17\u0e32\u0e07\u0e20\u0e39\u0e21\u0e34\u0e28\u0e32\u0e2a\u0e15\u0e23" 162 + "\u0e4c\u0e04\u0e37\u0e2d \u0e25\u0e30\u0e15\u0e34\u0e08\u0e39\u0e14 13\u00b0 45" 163 + "\u2019 \u0e40\u0e2b\u0e19\u0e37\u0e2d \u0e25\u0e2d\u0e07\u0e08\u0e34\u0e08\u0e39" 164 + "\u0e14 100\u00b0 31\u2019 \u0e15\u0e30\u0e27\u0e31\u0e19\u0e2d\u0e2d\u0e01" 165 }; 166 167 BreakIterator[] jdkBrkItrs = new BreakIterator[4]; 168 com.ibm.icu.text.BreakIterator[] icuBrkItrs = new com.ibm.icu.text.BreakIterator[4]; 169 170 for (Locale loc : TEST_LOCALES) { 171 Locale iculoc = TestUtil.toICUExtendedLocale(loc); 172 173 jdkBrkItrs[0] = BreakIterator.getCharacterInstance(iculoc); 174 jdkBrkItrs[1] = BreakIterator.getWordInstance(iculoc); 175 jdkBrkItrs[2] = BreakIterator.getLineInstance(iculoc); 176 jdkBrkItrs[3] = BreakIterator.getSentenceInstance(iculoc); 177 178 icuBrkItrs[0] = com.ibm.icu.text.BreakIterator.getCharacterInstance(iculoc); 179 icuBrkItrs[1] = com.ibm.icu.text.BreakIterator.getWordInstance(iculoc); 180 icuBrkItrs[2] = com.ibm.icu.text.BreakIterator.getLineInstance(iculoc); 181 icuBrkItrs[3] = com.ibm.icu.text.BreakIterator.getSentenceInstance(iculoc); 182 183 for (String text : TEST_DATA) { 184 for (int i = 0; i < 4; i++) { 185 compareBreaks(text, jdkBrkItrs[i], icuBrkItrs[i]); 186 } 187 } 188 } 189 } 190 compareBreaks(String text, BreakIterator jdkBrk, com.ibm.icu.text.BreakIterator icuBrk)191 private void compareBreaks(String text, BreakIterator jdkBrk, com.ibm.icu.text.BreakIterator icuBrk) { 192 jdkBrk.setText(text); 193 icuBrk.setText(text); 194 195 // Forward 196 int jidx = jdkBrk.first(); 197 int iidx = icuBrk.first(); 198 if (jidx != iidx) { 199 errln("FAIL: Different first boundaries (jdk=" + jidx + ",icu=" + iidx + ") for text:\n" + text); 200 } 201 while (true) { 202 jidx = jdkBrk.next(); 203 iidx = icuBrk.next(); 204 if (jidx != iidx) { 205 errln("FAIL: Different boundaries (jdk=" + jidx + ",icu=" + iidx + "direction=forward) for text:\n" + text); 206 } 207 if (jidx == BreakIterator.DONE) { 208 break; 209 } 210 } 211 212 // Backward 213 jidx = jdkBrk.last(); 214 iidx = jdkBrk.last(); 215 if (jidx != iidx) { 216 errln("FAIL: Different last boundaries (jdk=" + jidx + ",icu=" + iidx + ") for text:\n" + text); 217 } 218 while (true) { 219 jidx = jdkBrk.previous(); 220 iidx = icuBrk.previous(); 221 if (jidx != iidx) { 222 errln("FAIL: Different boundaries (jdk=" + jidx + ",icu=" + iidx + "direction=backward) for text:\n" + text); 223 } 224 if (jidx == BreakIterator.DONE) { 225 break; 226 } 227 } 228 } 229 } 230