1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /*
4  *******************************************************************************
5  * Copyright (C) 1996-2014, International Business Machines Corporation and    *
6  * others. All Rights Reserved.                                                *
7  *******************************************************************************
8  */
9 package com.ibm.icu.text;
10 import com.ibm.icu.impl.Norm2AllModes;
11 import com.ibm.icu.impl.Normalizer2Impl;
12 
13 /**
14  * This class has been deprecated since ICU 2.2.
15  * One problem is that this class is not designed to return supplementary characters.
16  * Use the Normalizer2 and UCharacter classes instead.
17  * <p>
18  * <tt>ComposedCharIter</tt> is an iterator class that returns all
19  * of the precomposed characters defined in the Unicode standard, along
20  * with their decomposed forms.  This is often useful when building
21  * data tables (<i>e.g.</i> collation tables) which need to treat composed
22  * and decomposed characters equivalently.
23  * <p>
24  * For example, imagine that you have built a collation table with ordering
25  * rules for the {@link Normalizer#DECOMP canonically decomposed} forms of all
26  * characters used in a particular language.  When you process input text using
27  * this table, the text must first be decomposed so that it matches the form
28  * used in the table.  This can impose a performance penalty that may be
29  * unacceptable in some situations.
30  * <p>
31  * You can avoid this problem by ensuring that the collation table contains
32  * rules for both the decomposed <i>and</i> composed versions of each character.
33  * To do so, use a <tt>ComposedCharIter</tt> to iterate through all of the
34  * composed characters in Unicode.  If the decomposition for that character
35  * consists solely of characters that are listed in your ruleset, you can
36  * add a new rule for the composed character that makes it equivalent to
37  * its decomposition sequence.
38  * <p>
39  * Note that <tt>ComposedCharIter</tt> iterates over a <em>static</em> table
40  * of the composed characters in Unicode.  If you want to iterate over the
41  * composed characters in a particular string, use {@link Normalizer} instead.
42  * <p>
43  * When constructing a <tt>ComposedCharIter</tt> there is one
44  * optional feature that you can enable or disable:
45  * <ul>
46  *   <li>{@link Normalizer#IGNORE_HANGUL} - Do not iterate over the Hangul
47  *          characters and their corresponding Jamo decompositions.
48  *          This option is off by default (<i>i.e.</i> Hangul processing is enabled)
49  *          since the Unicode standard specifies that Hangul to Jamo
50  *          is a canonical decomposition.
51  * </ul>
52  * <p>
53  * <tt>ComposedCharIter</tt> is currently based on version 2.1.8 of the
54  * <a href="http://www.unicode.org" target="unicode">Unicode Standard</a>.
55  * It will be updated as later versions of Unicode are released.
56  * @deprecated ICU 2.2
57  */
58 @Deprecated
59 ///CLOVER:OFF
60 public final class ComposedCharIter {
61     /**
62      * Constant that indicates the iteration has completed.
63      * {@link #next} returns this value when there are no more composed characters
64      * over which to iterate.
65      * @deprecated ICU 2.2
66      */
67     @Deprecated
68     public static final  char DONE = (char) Normalizer.DONE;
69 
70     /**
71      * Construct a new <tt>ComposedCharIter</tt>.  The iterator will return
72      * all Unicode characters with canonical decompositions, including Korean
73      * Hangul characters.
74      * @deprecated ICU 2.2
75      */
76     @Deprecated
ComposedCharIter()77     public ComposedCharIter() {
78         this(false, 0);
79     }
80 
81     /**
82      * Constructs a non-default <tt>ComposedCharIter</tt> with optional behavior.
83      * <p>
84      * @param compat    <tt>false</tt> for canonical decompositions only;
85      *                  <tt>true</tt> for both canonical and compatibility
86      *                  decompositions.
87      *
88      * @param options   Optional decomposition features. None are supported, so this is ignored.
89      * @deprecated ICU 2.2
90      */
91     @Deprecated
ComposedCharIter(boolean compat, int options)92     public ComposedCharIter(boolean compat, int options) {
93         if(compat) {
94             n2impl = Norm2AllModes.getNFKCInstance().impl;
95         } else {
96             n2impl = Norm2AllModes.getNFCInstance().impl;
97         }
98     }
99 
100     /**
101      * Determines whether there any precomposed Unicode characters not yet returned
102      * by {@link #next}.
103      * @deprecated ICU 2.2
104      */
105     @Deprecated
hasNext()106     public boolean hasNext() {
107         if (nextChar == Normalizer.DONE)  {
108             findNextChar();
109         }
110         return nextChar != Normalizer.DONE;
111     }
112 
113     /**
114      * Returns the next precomposed Unicode character.
115      * Repeated calls to <tt>next</tt> return all of the precomposed characters defined
116      * by Unicode, in ascending order.  After all precomposed characters have
117      * been returned, {@link #hasNext} will return <tt>false</tt> and further calls
118      * to <tt>next</tt> will return {@link #DONE}.
119      * @deprecated ICU 2.2
120      */
121     @Deprecated
next()122     public char next() {
123         if (nextChar == Normalizer.DONE)  {
124             findNextChar();
125         }
126         curChar = nextChar;
127         nextChar = Normalizer.DONE;
128         return (char) curChar;
129     }
130 
131     /**
132      * Returns the Unicode decomposition of the current character.
133      * This method returns the decomposition of the precomposed character most
134      * recently returned by {@link #next}.  The resulting decomposition is
135      * affected by the settings of the options passed to the constructor.
136      * @deprecated ICU 2.2
137      */
138     @Deprecated
decomposition()139     public String decomposition() {
140         // the decomposition buffer contains the decomposition of
141         // current char so just return it
142         if(decompBuf != null) {
143             return decompBuf;
144         } else {
145             return "";
146         }
147     }
148 
findNextChar()149     private void findNextChar() {
150         int c=curChar+1;
151         decompBuf = null;
152         for(;;) {
153             if(c < 0xFFFF) {
154                 decompBuf = n2impl.getDecomposition(c);
155                 if(decompBuf != null) {
156                     // the curChar can be decomposed... so it is a composed char
157                     // cache the result
158                     break;
159                 }
160                 c++;
161             } else {
162                 c=Normalizer.DONE;
163                 break;
164             }
165         }
166         nextChar=c;
167     }
168 
169     private final Normalizer2Impl n2impl;
170     private String decompBuf;
171     private int curChar = 0;
172     private int nextChar = Normalizer.DONE;
173 }
174