1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1996-2014, International Business Machines Corporation and * 6 * others. All Rights Reserved. * 7 ******************************************************************************* 8 */ 9 package com.ibm.icu.text; 10 import com.ibm.icu.impl.Norm2AllModes; 11 import com.ibm.icu.impl.Normalizer2Impl; 12 13 /** 14 * This class has been deprecated since ICU 2.2. 15 * One problem is that this class is not designed to return supplementary characters. 16 * Use the Normalizer2 and UCharacter classes instead. 17 * <p> 18 * <tt>ComposedCharIter</tt> is an iterator class that returns all 19 * of the precomposed characters defined in the Unicode standard, along 20 * with their decomposed forms. This is often useful when building 21 * data tables (<i>e.g.</i> collation tables) which need to treat composed 22 * and decomposed characters equivalently. 23 * <p> 24 * For example, imagine that you have built a collation table with ordering 25 * rules for the {@link Normalizer#DECOMP canonically decomposed} forms of all 26 * characters used in a particular language. When you process input text using 27 * this table, the text must first be decomposed so that it matches the form 28 * used in the table. This can impose a performance penalty that may be 29 * unacceptable in some situations. 30 * <p> 31 * You can avoid this problem by ensuring that the collation table contains 32 * rules for both the decomposed <i>and</i> composed versions of each character. 33 * To do so, use a <tt>ComposedCharIter</tt> to iterate through all of the 34 * composed characters in Unicode. If the decomposition for that character 35 * consists solely of characters that are listed in your ruleset, you can 36 * add a new rule for the composed character that makes it equivalent to 37 * its decomposition sequence. 38 * <p> 39 * Note that <tt>ComposedCharIter</tt> iterates over a <em>static</em> table 40 * of the composed characters in Unicode. If you want to iterate over the 41 * composed characters in a particular string, use {@link Normalizer} instead. 42 * <p> 43 * When constructing a <tt>ComposedCharIter</tt> there is one 44 * optional feature that you can enable or disable: 45 * <ul> 46 * <li>{@link Normalizer#IGNORE_HANGUL} - Do not iterate over the Hangul 47 * characters and their corresponding Jamo decompositions. 48 * This option is off by default (<i>i.e.</i> Hangul processing is enabled) 49 * since the Unicode standard specifies that Hangul to Jamo 50 * is a canonical decomposition. 51 * </ul> 52 * <p> 53 * <tt>ComposedCharIter</tt> is currently based on version 2.1.8 of the 54 * <a href="http://www.unicode.org" target="unicode">Unicode Standard</a>. 55 * It will be updated as later versions of Unicode are released. 56 * @deprecated ICU 2.2 57 */ 58 @Deprecated 59 ///CLOVER:OFF 60 public final class ComposedCharIter { 61 /** 62 * Constant that indicates the iteration has completed. 63 * {@link #next} returns this value when there are no more composed characters 64 * over which to iterate. 65 * @deprecated ICU 2.2 66 */ 67 @Deprecated 68 public static final char DONE = (char) Normalizer.DONE; 69 70 /** 71 * Construct a new <tt>ComposedCharIter</tt>. The iterator will return 72 * all Unicode characters with canonical decompositions, including Korean 73 * Hangul characters. 74 * @deprecated ICU 2.2 75 */ 76 @Deprecated ComposedCharIter()77 public ComposedCharIter() { 78 this(false, 0); 79 } 80 81 /** 82 * Constructs a non-default <tt>ComposedCharIter</tt> with optional behavior. 83 * <p> 84 * @param compat <tt>false</tt> for canonical decompositions only; 85 * <tt>true</tt> for both canonical and compatibility 86 * decompositions. 87 * 88 * @param options Optional decomposition features. None are supported, so this is ignored. 89 * @deprecated ICU 2.2 90 */ 91 @Deprecated ComposedCharIter(boolean compat, int options)92 public ComposedCharIter(boolean compat, int options) { 93 if(compat) { 94 n2impl = Norm2AllModes.getNFKCInstance().impl; 95 } else { 96 n2impl = Norm2AllModes.getNFCInstance().impl; 97 } 98 } 99 100 /** 101 * Determines whether there any precomposed Unicode characters not yet returned 102 * by {@link #next}. 103 * @deprecated ICU 2.2 104 */ 105 @Deprecated hasNext()106 public boolean hasNext() { 107 if (nextChar == Normalizer.DONE) { 108 findNextChar(); 109 } 110 return nextChar != Normalizer.DONE; 111 } 112 113 /** 114 * Returns the next precomposed Unicode character. 115 * Repeated calls to <tt>next</tt> return all of the precomposed characters defined 116 * by Unicode, in ascending order. After all precomposed characters have 117 * been returned, {@link #hasNext} will return <tt>false</tt> and further calls 118 * to <tt>next</tt> will return {@link #DONE}. 119 * @deprecated ICU 2.2 120 */ 121 @Deprecated next()122 public char next() { 123 if (nextChar == Normalizer.DONE) { 124 findNextChar(); 125 } 126 curChar = nextChar; 127 nextChar = Normalizer.DONE; 128 return (char) curChar; 129 } 130 131 /** 132 * Returns the Unicode decomposition of the current character. 133 * This method returns the decomposition of the precomposed character most 134 * recently returned by {@link #next}. The resulting decomposition is 135 * affected by the settings of the options passed to the constructor. 136 * @deprecated ICU 2.2 137 */ 138 @Deprecated decomposition()139 public String decomposition() { 140 // the decomposition buffer contains the decomposition of 141 // current char so just return it 142 if(decompBuf != null) { 143 return decompBuf; 144 } else { 145 return ""; 146 } 147 } 148 findNextChar()149 private void findNextChar() { 150 int c=curChar+1; 151 decompBuf = null; 152 for(;;) { 153 if(c < 0xFFFF) { 154 decompBuf = n2impl.getDecomposition(c); 155 if(decompBuf != null) { 156 // the curChar can be decomposed... so it is a composed char 157 // cache the result 158 break; 159 } 160 c++; 161 } else { 162 c=Normalizer.DONE; 163 break; 164 } 165 } 166 nextChar=c; 167 } 168 169 private final Normalizer2Impl n2impl; 170 private String decompBuf; 171 private int curChar = 0; 172 private int nextChar = Normalizer.DONE; 173 } 174