1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1996-2014, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.text; 10 11 import java.text.CharacterIterator; 12 13 import com.ibm.icu.lang.UCharacter; 14 import com.ibm.icu.util.ICUCloneNotSupportedException; 15 import com.ibm.icu.util.ULocale; 16 17 18 /** 19 * Inserts the specified characters at word breaks. To restrict it to particular characters, use a filter. 20 * TODO: this is an internal class, and only temporary. Remove it once we have \b notation in Transliterator. 21 */ 22 final class BreakTransliterator extends Transliterator { 23 private BreakIterator bi; 24 private String insertion; 25 private int[] boundaries = new int[50]; 26 private int boundaryCount = 0; 27 BreakTransliterator(String ID, UnicodeFilter filter, BreakIterator bi, String insertion)28 public BreakTransliterator(String ID, UnicodeFilter filter, BreakIterator bi, String insertion) { 29 super(ID, filter); 30 this.bi = bi; 31 this.insertion = insertion; 32 } 33 BreakTransliterator(String ID, UnicodeFilter filter)34 public BreakTransliterator(String ID, UnicodeFilter filter) { 35 this(ID, filter, null, " "); 36 } 37 38 ///CLOVER:OFF 39 // The following method is not called by anything and can't be reached getInsertion()40 public String getInsertion() { 41 return insertion; 42 } 43 ///CLOVER:ON 44 45 ///CLOVER:OFF 46 // The following method is not called by anything and can't be reached setInsertion(String insertion)47 public void setInsertion(String insertion) { 48 this.insertion = insertion; 49 } 50 ///CLOVER:ON 51 getBreakIterator()52 public BreakIterator getBreakIterator() { 53 // Defer initialization of BreakIterator because it is slow, 54 // typically over 2000 ms. 55 if (bi == null) bi = BreakIterator.getWordInstance(new ULocale("th_TH")); 56 return bi; 57 } 58 59 ///CLOVER:OFF 60 // The following method is not called by anything and can't be reached setBreakIterator(BreakIterator bi)61 public void setBreakIterator(BreakIterator bi) { 62 this.bi = bi; 63 } 64 ///CLOVER:ON 65 66 static final int LETTER_OR_MARK_MASK = 67 (1<<Character.UPPERCASE_LETTER) 68 | (1<<Character.LOWERCASE_LETTER) 69 | (1<<Character.TITLECASE_LETTER) 70 | (1<<Character.MODIFIER_LETTER) 71 | (1<<Character.OTHER_LETTER) 72 | (1<<Character.COMBINING_SPACING_MARK) 73 | (1<<Character.NON_SPACING_MARK) 74 | (1<<Character.ENCLOSING_MARK) 75 ; 76 @Override handleTransliterate(Replaceable text, Position pos, boolean incremental)77 protected synchronized void handleTransliterate(Replaceable text, Position pos, boolean incremental) { 78 boundaryCount = 0; 79 int boundary = 0; 80 getBreakIterator(); // Lazy-create it if necessary 81 bi.setText(new ReplaceableCharacterIterator(text, pos.start, pos.limit, pos.start)); 82 // TODO: fix clumsy workaround used below. 83 /* 84 char[] tempBuffer = new char[text.length()]; 85 text.getChars(0, text.length(), tempBuffer, 0); 86 bi.setText(new StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, pos.start)); 87 */ 88 // end debugging 89 90 // To make things much easier, we will stack the boundaries, and then insert at the end. 91 // generally, we won't need too many, since we will be filtered. 92 93 for(boundary = bi.first(); boundary != BreakIterator.DONE && boundary < pos.limit; boundary = bi.next()) { 94 if (boundary == 0) continue; 95 // HACK: Check to see that preceeding item was a letter 96 97 int cp = UTF16.charAt(text, boundary-1); 98 int type = UCharacter.getType(cp); 99 //System.out.println(Integer.toString(cp,16) + " (before): " + type); 100 if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue; 101 102 cp = UTF16.charAt(text, boundary); 103 type = UCharacter.getType(cp); 104 //System.out.println(Integer.toString(cp,16) + " (after): " + type); 105 if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue; 106 107 if (boundaryCount >= boundaries.length) { // realloc if necessary 108 int[] temp = new int[boundaries.length * 2]; 109 System.arraycopy(boundaries, 0, temp, 0, boundaries.length); 110 boundaries = temp; 111 } 112 113 boundaries[boundaryCount++] = boundary; 114 //System.out.println(boundary); 115 } 116 117 int delta = 0; 118 int lastBoundary = 0; 119 120 if (boundaryCount != 0) { // if we found something, adjust 121 delta = boundaryCount * insertion.length(); 122 lastBoundary = boundaries[boundaryCount-1]; 123 124 // we do this from the end backwards, so that we don't have to keep updating. 125 126 while (boundaryCount > 0) { 127 boundary = boundaries[--boundaryCount]; 128 text.replace(boundary, boundary, insertion); 129 } 130 } 131 132 // Now fix up the return values 133 pos.contextLimit += delta; 134 pos.limit += delta; 135 pos.start = incremental ? lastBoundary + delta : pos.limit; 136 } 137 138 139 /** 140 * Registers standard variants with the system. Called by 141 * Transliterator during initialization. 142 */ register()143 static void register() { 144 // false means that it is invisible 145 Transliterator trans = new BreakTransliterator("Any-BreakInternal", null); 146 Transliterator.registerInstance(trans, false); 147 /* 148 Transliterator.registerFactory("Any-Break", new Transliterator.Factory() { 149 public Transliterator getInstance(String ID) { 150 return new BreakTransliterator("Any-Break", null); 151 } 152 }); 153 */ 154 } 155 156 // Hack, just to get a real character iterator. 157 static final class ReplaceableCharacterIterator implements CharacterIterator 158 { 159 private Replaceable text; 160 private int begin; 161 private int end; 162 // invariant: begin <= pos <= end 163 private int pos; 164 165 /** 166 * Constructs an iterator with an initial index of 0. 167 */ 168 /*public ReplaceableCharacterIterator(Replaceable text) 169 { 170 this(text, 0); 171 }*/ 172 173 /** 174 * Constructs an iterator with the specified initial index. 175 * 176 * @param text The String to be iterated over 177 * @param pos Initial iterator position 178 */ 179 /*public ReplaceableCharacterIterator(Replaceable text, int pos) 180 { 181 this(text, 0, text.length(), pos); 182 }*/ 183 184 /** 185 * Constructs an iterator over the given range of the given string, with the 186 * index set at the specified position. 187 * 188 * @param text The String to be iterated over 189 * @param begin Index of the first character 190 * @param end Index of the character following the last character 191 * @param pos Initial iterator position 192 */ ReplaceableCharacterIterator(Replaceable text, int begin, int end, int pos)193 public ReplaceableCharacterIterator(Replaceable text, int begin, int end, int pos) { 194 if (text == null) { 195 throw new NullPointerException(); 196 } 197 this.text = text; 198 199 if (begin < 0 || begin > end || end > text.length()) { 200 throw new IllegalArgumentException("Invalid substring range"); 201 } 202 203 if (pos < begin || pos > end) { 204 throw new IllegalArgumentException("Invalid position"); 205 } 206 207 this.begin = begin; 208 this.end = end; 209 this.pos = pos; 210 } 211 212 /** 213 * Reset this iterator to point to a new string. This package-visible 214 * method is used by other java.text classes that want to avoid allocating 215 * new ReplaceableCharacterIterator objects every time their setText method 216 * is called. 217 * 218 * @param text The String to be iterated over 219 */ setText(Replaceable text)220 public void setText(Replaceable text) { 221 if (text == null) { 222 throw new NullPointerException(); 223 } 224 this.text = text; 225 this.begin = 0; 226 this.end = text.length(); 227 this.pos = 0; 228 } 229 230 /** 231 * Implements CharacterIterator.first() for String. 232 * @see CharacterIterator#first 233 */ 234 @Override first()235 public char first() 236 { 237 pos = begin; 238 return current(); 239 } 240 241 /** 242 * Implements CharacterIterator.last() for String. 243 * @see CharacterIterator#last 244 */ 245 @Override last()246 public char last() 247 { 248 if (end != begin) { 249 pos = end - 1; 250 } else { 251 pos = end; 252 } 253 return current(); 254 } 255 256 /** 257 * Implements CharacterIterator.setIndex() for String. 258 * @see CharacterIterator#setIndex 259 */ 260 @Override setIndex(int p)261 public char setIndex(int p) 262 { 263 if (p < begin || p > end) { 264 throw new IllegalArgumentException("Invalid index"); 265 } 266 pos = p; 267 return current(); 268 } 269 270 /** 271 * Implements CharacterIterator.current() for String. 272 * @see CharacterIterator#current 273 */ 274 @Override current()275 public char current() 276 { 277 if (pos >= begin && pos < end) { 278 return text.charAt(pos); 279 } 280 else { 281 return DONE; 282 } 283 } 284 285 /** 286 * Implements CharacterIterator.next() for String. 287 * @see CharacterIterator#next 288 */ 289 @Override next()290 public char next() 291 { 292 if (pos < end - 1) { 293 pos++; 294 return text.charAt(pos); 295 } 296 else { 297 pos = end; 298 return DONE; 299 } 300 } 301 302 /** 303 * Implements CharacterIterator.previous() for String. 304 * @see CharacterIterator#previous 305 */ 306 @Override previous()307 public char previous() 308 { 309 if (pos > begin) { 310 pos--; 311 return text.charAt(pos); 312 } 313 else { 314 return DONE; 315 } 316 } 317 318 /** 319 * Implements CharacterIterator.getBeginIndex() for String. 320 * @see CharacterIterator#getBeginIndex 321 */ 322 @Override getBeginIndex()323 public int getBeginIndex() 324 { 325 return begin; 326 } 327 328 /** 329 * Implements CharacterIterator.getEndIndex() for String. 330 * @see CharacterIterator#getEndIndex 331 */ 332 @Override getEndIndex()333 public int getEndIndex() 334 { 335 return end; 336 } 337 338 /** 339 * Implements CharacterIterator.getIndex() for String. 340 * @see CharacterIterator#getIndex 341 */ 342 @Override getIndex()343 public int getIndex() 344 { 345 return pos; 346 } 347 348 /** 349 * Compares the equality of two ReplaceableCharacterIterator objects. 350 * @param obj the ReplaceableCharacterIterator object to be compared with. 351 * @return true if the given obj is the same as this 352 * ReplaceableCharacterIterator object; false otherwise. 353 */ 354 @Override equals(Object obj)355 public boolean equals(Object obj) 356 { 357 if (this == obj) { 358 return true; 359 } 360 if (!(obj instanceof ReplaceableCharacterIterator)) { 361 return false; 362 } 363 364 ReplaceableCharacterIterator that = (ReplaceableCharacterIterator) obj; 365 366 if (hashCode() != that.hashCode()) { 367 return false; 368 } 369 if (!text.equals(that.text)) { 370 return false; 371 } 372 if (pos != that.pos || begin != that.begin || end != that.end) { 373 return false; 374 } 375 return true; 376 } 377 378 /** 379 * Computes a hashcode for this iterator. 380 * @return A hash code 381 */ 382 @Override hashCode()383 public int hashCode() 384 { 385 return text.hashCode() ^ pos ^ begin ^ end; 386 } 387 388 /** 389 * Creates a copy of this iterator. 390 * @return A copy of this 391 */ 392 @Override clone()393 public Object clone() 394 { 395 try { 396 ReplaceableCharacterIterator other 397 = (ReplaceableCharacterIterator) super.clone(); 398 return other; 399 } 400 catch (CloneNotSupportedException e) { 401 throw new ICUCloneNotSupportedException(); 402 } 403 } 404 405 } 406 /* (non-Javadoc) 407 * @see com.ibm.icu.text.Transliterator#addSourceTargetSet(com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet, com.ibm.icu.text.UnicodeSet) 408 */ 409 @Override addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet)410 public void addSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { 411 UnicodeSet myFilter = getFilterAsUnicodeSet(inputFilter); 412 // Doesn't actually modify the source characters, so leave them alone. 413 // add the characters inserted 414 if (myFilter.size() != 0) { 415 targetSet.addAll(insertion); 416 } 417 } 418 419 } 420