1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  * Copyright (C) 2008-2015, International Business Machines Corporation and    *
6  * others. All Rights Reserved.                                                *
7  *******************************************************************************
8  */
9 package com.ibm.icu.dev.test.localespi;
10 
11 import java.text.BreakIterator;
12 import java.util.Locale;
13 
14 import org.junit.Test;
15 import org.junit.runner.RunWith;
16 import org.junit.runners.JUnit4;
17 
18 import com.ibm.icu.dev.test.TestFmwk;
19 
20 @RunWith(JUnit4.class)
21 public class BreakIteratorTest extends TestFmwk {
22     private static final int CHARACTER_BRK = 0;
23     private static final int WORD_BRK = 1;
24     private static final int LINE_BRK = 2;
25     private static final int SENTENCE_BRK = 3;
26 
27     /*
28      * Check if getInstance returns the ICU implementation.
29      */
30     @Test
TestGetInstance()31     public void TestGetInstance() {
32         for (Locale loc : BreakIterator.getAvailableLocales()) {
33             if (TestUtil.isExcluded(loc)) {
34                 logln("Skipped " + loc);
35                 continue;
36             }
37             checkGetInstance(CHARACTER_BRK, loc);
38             checkGetInstance(WORD_BRK, loc);
39             checkGetInstance(LINE_BRK, loc);
40             checkGetInstance(SENTENCE_BRK, loc);
41         }
42     }
43 
checkGetInstance(int type, Locale loc)44     private void checkGetInstance(int type, Locale loc) {
45         BreakIterator brkitr = null;
46         String method = null;
47         switch (type) {
48         case CHARACTER_BRK:
49             brkitr = BreakIterator.getCharacterInstance(loc);
50             method = "getCharacterInstance";
51             break;
52         case WORD_BRK:
53             brkitr = BreakIterator.getWordInstance(loc);
54             method = "getWordInstance";
55             break;
56         case LINE_BRK:
57             brkitr = BreakIterator.getLineInstance(loc);
58             method = "getLineInstance";
59             break;
60         case SENTENCE_BRK:
61             brkitr = BreakIterator.getSentenceInstance(loc);
62             method = "getSentenceInstance";
63             break;
64         default:
65             errln("FAIL: Unknown break iterator type");
66             return;
67         }
68 
69         boolean isIcuImpl = (brkitr instanceof com.ibm.icu.impl.jdkadapter.BreakIteratorICU);
70 
71         if (TestUtil.isICUExtendedLocale(loc)) {
72             if (!isIcuImpl) {
73                 errln("FAIL: " + method + " returned JDK BreakIterator for locale " + loc);
74             }
75         } else {
76             if (isIcuImpl) {
77                 logln("INFO: " + method + " returned ICU BreakIterator for locale " + loc);
78             }
79             BreakIterator brkitrIcu = null;
80             Locale iculoc = TestUtil.toICUExtendedLocale(loc);
81             switch (type) {
82             case CHARACTER_BRK:
83                 brkitrIcu = BreakIterator.getCharacterInstance(iculoc);
84                 break;
85             case WORD_BRK:
86                 brkitrIcu = BreakIterator.getWordInstance(iculoc);
87                 break;
88             case LINE_BRK:
89                 brkitrIcu = BreakIterator.getLineInstance(iculoc);
90                 break;
91             case SENTENCE_BRK:
92                 brkitrIcu = BreakIterator.getSentenceInstance(iculoc);
93                 break;
94             }
95             if (isIcuImpl) {
96                 if (!brkitr.equals(brkitrIcu)) {
97                     // BreakIterator.getXXXInstance returns a cached BreakIterator instance.
98                     // BreakIterator does not override Object#equals, so the result may not be
99                     // consistent.
100 //                        logln("INFO: " + method + " returned ICU BreakIterator for locale " + loc
101 //                                + ", but different from the one for locale " + iculoc);
102                 }
103             } else {
104                 if (!(brkitrIcu instanceof com.ibm.icu.impl.jdkadapter.BreakIteratorICU)) {
105                     errln("FAIL: " + method + " returned JDK BreakIterator for locale " + iculoc);
106                 }
107             }
108         }
109     }
110 
111     /*
112      * Testing the behavior of text break between ICU instance and its
113      * equivalent created via the Locale SPI framework.
114      */
115     @Test
TestICUEquivalent()116     public void TestICUEquivalent() {
117         Locale[] TEST_LOCALES = {
118                 new Locale("en", "US"),
119                 new Locale("fr", "FR"),
120                 new Locale("th", "TH"),
121                 new Locale("zh", "CN"),
122         };
123 
124         String[] TEST_DATA = {
125                 "International Components for Unicode (ICU) is an open source project of mature "
126                 + "C/C++ and Java libraries for Unicode support, software internationalization and "
127                 + "software globalization. ICU is widely portable to many operating systems and "
128                 + "environments. It gives applications the same results on all platforms and between "
129                 + "C/C++ and Java software. The ICU project is an open source development project "
130                 + "that is sponsored, supported and used by IBM and many other companies.",
131 
132                 "L'International Components for Unicode (ICU) est un projet open source qui fourni "
133                 + "des biblioth\u00e8ques pour les langages informatique C/C++ et Java pour supporter "
134                 + "Unicode, l'internationalisation et la mondialisation des logiciels. ICU est largement "
135                 + "portable vers beaucoup de syst\u00e8mes d'exploitations et d'environnements. Il "
136                 + "donne aux applications les m\u00eames comportements et r\u00e9sultats sur toutes "
137                 + "les plateformes et entre les logiciels C/C++ et Java. Le projet ICU est un projet "
138                 + "dont les code sources sont disponibles qui est sponsoris\u00e9, support\u00e9 et "
139                 + "utilis\u00e9 par IBM et beaucoup d'autres entreprises.",
140 
141                 "\u5728IBM\u7b49\u4f01\u696d\u4e2d\uff0c\u56fd\u9645\u5316\u7ecf\u5e38\u7b80\u5199"
142                 + "\u4e3aI18N (\u6216i18n\u6216I18n)\uff0c\u5176\u4e2d18\u4ee3\u8868\u4e86\u4e2d\u95f4"
143                 + "\u7701\u7565\u768418\u4e2a\u5b57\u6bcd\uff1b\u800c\u201c\u672c\u5730\u5316\u201d"
144                 + "\u540c\u53ef\u7b80\u5199\u4e3al10n\u3002\u9019\u4e24\u4e2a\u6982\u5ff5\u6709\u65f6"
145                 + "\u5408\u79f0\u5168\u7403\u5316\uff08g11n\uff09\uff0c\u4f46\u662f\u5168\u7403\u5316"
146                 + "\u7684\u6db5\u4e49\u66f4\u4e3a\u4e00\u822c\u5316\u3002\u53e6\u5916\u5076\u5c14\u4f1a"
147                 + "\u51fa\u73b0\u201cp13n\u201d\uff0c\u4ee3\u8868\u4e2a\u4eba\u5316\uff08personalization"
148                 + "\uff09\u3002",
149 
150                 "\u0e01\u0e23\u0e38\u0e07\u0e40\u0e17\u0e1e\u0e21\u0e2b\u0e32\u0e19\u0e04\u0e23"
151                 + "\u0e43\u0e19\u0e1b\u0e31\u0e08\u0e08\u0e38\u0e1a\u0e31\u0e19\u0e40\u0e1b\u0e47"
152                 + "\u0e19\u0e28\u0e39\u0e19\u0e22\u0e4c\u0e01\u0e25\u0e32\u0e07\u0e01\u0e32\u0e23"
153                 + "\u0e1b\u0e01\u0e04\u0e23\u0e2d\u0e07 \u0e01\u0e32\u0e23\u0e28\u0e36\u0e01\u0e29"
154                 + "\u0e32 \u0e01\u0e32\u0e23\u0e04\u0e21\u0e19\u0e32\u0e04\u0e21\u0e02\u0e19\u0e2a"
155                 + "\u0e48\u0e07 \u0e01\u0e32\u0e23\u0e40\u0e07\u0e34\u0e19\u0e01\u0e32\u0e23\u0e18"
156                 + "\u0e19\u0e32\u0e04\u0e32\u0e23 \u0e01\u0e32\u0e23\u0e1e\u0e32\u0e13\u0e34\u0e0a"
157                 + "\u0e22\u0e4c \u0e01\u0e32\u0e23\u0e2a\u0e37\u0e48\u0e2d\u0e2a\u0e32\u0e23 \u0e2f"
158                 + "\u0e25\u0e2f \u0e42\u0e14\u0e22\u0e21\u0e35\u0e1e\u0e37\u0e49\u0e19\u0e17\u0e35"
159                 + "\u0e48\u0e17\u0e31\u0e49\u0e07\u0e2b\u0e21\u0e14 1,562.2 \u0e15\u0e32\u0e23\u0e32"
160                 + "\u0e07\u0e01\u0e34\u0e42\u0e25\u0e40\u0e21\u0e15\u0e23 \u0e1e\u0e34\u0e01\u0e31"
161                 + "\u0e14\u0e17\u0e32\u0e07\u0e20\u0e39\u0e21\u0e34\u0e28\u0e32\u0e2a\u0e15\u0e23"
162                 + "\u0e4c\u0e04\u0e37\u0e2d \u0e25\u0e30\u0e15\u0e34\u0e08\u0e39\u0e14 13\u00b0 45"
163                 + "\u2019 \u0e40\u0e2b\u0e19\u0e37\u0e2d \u0e25\u0e2d\u0e07\u0e08\u0e34\u0e08\u0e39"
164                 + "\u0e14 100\u00b0 31\u2019 \u0e15\u0e30\u0e27\u0e31\u0e19\u0e2d\u0e2d\u0e01"
165         };
166 
167         BreakIterator[] jdkBrkItrs = new BreakIterator[4];
168         com.ibm.icu.text.BreakIterator[] icuBrkItrs = new com.ibm.icu.text.BreakIterator[4];
169 
170         for (Locale loc : TEST_LOCALES) {
171             Locale iculoc = TestUtil.toICUExtendedLocale(loc);
172 
173             jdkBrkItrs[0] = BreakIterator.getCharacterInstance(iculoc);
174             jdkBrkItrs[1] = BreakIterator.getWordInstance(iculoc);
175             jdkBrkItrs[2] = BreakIterator.getLineInstance(iculoc);
176             jdkBrkItrs[3] = BreakIterator.getSentenceInstance(iculoc);
177 
178             icuBrkItrs[0] = com.ibm.icu.text.BreakIterator.getCharacterInstance(iculoc);
179             icuBrkItrs[1] = com.ibm.icu.text.BreakIterator.getWordInstance(iculoc);
180             icuBrkItrs[2] = com.ibm.icu.text.BreakIterator.getLineInstance(iculoc);
181             icuBrkItrs[3] = com.ibm.icu.text.BreakIterator.getSentenceInstance(iculoc);
182 
183             for (String text : TEST_DATA) {
184                 for (int i = 0; i < 4; i++) {
185                     compareBreaks(text, jdkBrkItrs[i], icuBrkItrs[i]);
186                 }
187             }
188         }
189     }
190 
compareBreaks(String text, BreakIterator jdkBrk, com.ibm.icu.text.BreakIterator icuBrk)191     private void compareBreaks(String text, BreakIterator jdkBrk, com.ibm.icu.text.BreakIterator icuBrk) {
192         jdkBrk.setText(text);
193         icuBrk.setText(text);
194 
195         // Forward
196         int jidx = jdkBrk.first();
197         int iidx = icuBrk.first();
198         if (jidx != iidx) {
199             errln("FAIL: Different first boundaries (jdk=" + jidx + ",icu=" + iidx + ") for text:\n" + text);
200         }
201         while (true) {
202             jidx = jdkBrk.next();
203             iidx = icuBrk.next();
204             if (jidx != iidx) {
205                 errln("FAIL: Different boundaries (jdk=" + jidx + ",icu=" + iidx + "direction=forward) for text:\n" + text);
206             }
207             if (jidx == BreakIterator.DONE) {
208                 break;
209             }
210         }
211 
212         // Backward
213         jidx = jdkBrk.last();
214         iidx = jdkBrk.last();
215         if (jidx != iidx) {
216             errln("FAIL: Different last boundaries (jdk=" + jidx + ",icu=" + iidx + ") for text:\n" + text);
217         }
218         while (true) {
219             jidx = jdkBrk.previous();
220             iidx = icuBrk.previous();
221             if (jidx != iidx) {
222                 errln("FAIL: Different boundaries (jdk=" + jidx + ",icu=" + iidx + "direction=backward) for text:\n" + text);
223             }
224             if (jidx == BreakIterator.DONE) {
225                 break;
226             }
227         }
228     }
229 }
230