1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *****************************************************************************
5  * Copyright (C) 1996-2014, International Business Machines Corporation and others.
6  * All Rights Reserved.
7  *****************************************************************************
8  *
9  * File sortkey.h
10  *
11  * Created by: Helena Shih
12  *
13  * Modification History:
14  *
15  *  Date         Name          Description
16  *
17  *  6/20/97     helena      Java class name change.
18  *  8/18/97     helena      Added internal API documentation.
19  *  6/26/98     erm         Changed to use byte arrays and memcmp.
20  *****************************************************************************
21  */
22 
23 #ifndef SORTKEY_H
24 #define SORTKEY_H
25 
26 #include "unicode/utypes.h"
27 
28 /**
29  * \file
30  * \brief C++ API: Keys for comparing strings multiple times.
31  */
32 
33 #if !UCONFIG_NO_COLLATION
34 
35 #include "unicode/uobject.h"
36 #include "unicode/unistr.h"
37 #include "unicode/coll.h"
38 
39 U_NAMESPACE_BEGIN
40 
41 /* forward declaration */
42 class RuleBasedCollator;
43 class CollationKeyByteSink;
44 
45 /**
46  *
47  * Collation keys are generated by the Collator class.  Use the CollationKey objects
48  * instead of Collator to compare strings multiple times.  A CollationKey
49  * preprocesses the comparison information from the Collator object to
50  * make the comparison faster.  If you are not going to comparing strings
51  * multiple times, then using the Collator object is generally faster,
52  * since it only processes as much of the string as needed to make a
53  * comparison.
54  * <p> For example (with strength == tertiary)
55  * <p>When comparing "Abernathy" to "Baggins-Smythworthy", Collator
56  * only needs to process a couple of characters, while a comparison
57  * with CollationKeys will process all of the characters.  On the other hand,
58  * if you are doing a sort of a number of fields, it is much faster to use
59  * CollationKeys, since you will be comparing strings multiple times.
60  * <p>Typical use of CollationKeys are in databases, where you store a CollationKey
61  * in a hidden field, and use it for sorting or indexing.
62  *
63  * <p>Example of use:
64  * <pre>
65  * \code
66  *     UErrorCode success = U_ZERO_ERROR;
67  *     Collator* myCollator = Collator::createInstance(success);
68  *     CollationKey* keys = new CollationKey [3];
69  *     myCollator->getCollationKey("Tom", keys[0], success );
70  *     myCollator->getCollationKey("Dick", keys[1], success );
71  *     myCollator->getCollationKey("Harry", keys[2], success );
72  *
73  *     // Inside body of sort routine, compare keys this way:
74  *     CollationKey tmp;
75  *     if(keys[0].compareTo( keys[1] ) > 0 ) {
76  *         tmp = keys[0]; keys[0] = keys[1]; keys[1] = tmp;
77  *     }
78  *     //...
79  * \endcode
80  * </pre>
81  * <p>Because Collator::compare()'s algorithm is complex, it is faster to sort
82  * long lists of words by retrieving collation keys with Collator::getCollationKey().
83  * You can then cache the collation keys and compare them using CollationKey::compareTo().
84  * <p>
85  * <strong>Note:</strong> <code>Collator</code>s with different Locale,
86  * CollationStrength and DecompositionMode settings will return different
87  * CollationKeys for the same set of strings. Locales have specific
88  * collation rules, and the way in which secondary and tertiary differences
89  * are taken into account, for example, will result in different CollationKeys
90  * for same strings.
91  * <p>
92 
93  * @see          Collator
94  * @see          RuleBasedCollator
95  * @version      1.3 12/18/96
96  * @author       Helena Shih
97  * @stable ICU 2.0
98  */
99 class U_I18N_API CollationKey : public UObject {
100 public:
101     /**
102     * This creates an empty collation key based on the null string.  An empty
103     * collation key contains no sorting information.  When comparing two empty
104     * collation keys, the result is Collator::EQUAL.  Comparing empty collation key
105     * with non-empty collation key is always Collator::LESS.
106     * @stable ICU 2.0
107     */
108     CollationKey();
109 
110 
111     /**
112     * Creates a collation key based on the collation key values.
113     * @param values the collation key values
114     * @param count number of collation key values, including trailing nulls.
115     * @stable ICU 2.0
116     */
117     CollationKey(const  uint8_t*    values,
118                 int32_t     count);
119 
120     /**
121     * Copy constructor.
122     * @param other    the object to be copied.
123     * @stable ICU 2.0
124     */
125     CollationKey(const CollationKey& other);
126 
127     /**
128     * Sort key destructor.
129     * @stable ICU 2.0
130     */
131     virtual ~CollationKey();
132 
133     /**
134     * Assignment operator
135     * @param other    the object to be copied.
136     * @stable ICU 2.0
137     */
138     const   CollationKey&   operator=(const CollationKey& other);
139 
140     /**
141     * Compare if two collation keys are the same.
142     * @param source the collation key to compare to.
143     * @return Returns true if two collation keys are equal, false otherwise.
144     * @stable ICU 2.0
145     */
146     UBool                   operator==(const CollationKey& source) const;
147 
148     /**
149     * Compare if two collation keys are not the same.
150     * @param source the collation key to compare to.
151     * @return Returns TRUE if two collation keys are different, FALSE otherwise.
152     * @stable ICU 2.0
153     */
154     UBool                   operator!=(const CollationKey& source) const;
155 
156 
157     /**
158     * Test to see if the key is in an invalid state. The key will be in an
159     * invalid state if it couldn't allocate memory for some operation.
160     * @return Returns TRUE if the key is in an invalid, FALSE otherwise.
161     * @stable ICU 2.0
162     */
163     UBool                   isBogus(void) const;
164 
165     /**
166     * Returns a pointer to the collation key values. The storage is owned
167     * by the collation key and the pointer will become invalid if the key
168     * is deleted.
169     * @param count the output parameter of number of collation key values,
170     * including any trailing nulls.
171     * @return a pointer to the collation key values.
172     * @stable ICU 2.0
173     */
174     const    uint8_t*       getByteArray(int32_t& count) const;
175 
176 #ifdef U_USE_COLLATION_KEY_DEPRECATES
177     /**
178     * Extracts the collation key values into a new array. The caller owns
179     * this storage and should free it.
180     * @param count the output parameter of number of collation key values,
181     * including any trailing nulls.
182     * @obsolete ICU 2.6. Use getByteArray instead since this API will be removed in that release.
183     */
184     uint8_t*                toByteArray(int32_t& count) const;
185 #endif
186 
187 #ifndef U_HIDE_DEPRECATED_API
188     /**
189     * Convenience method which does a string(bit-wise) comparison of the
190     * two collation keys.
191     * @param target target collation key to be compared with
192     * @return Returns Collator::LESS if sourceKey &lt; targetKey,
193     * Collator::GREATER if sourceKey > targetKey and Collator::EQUAL
194     * otherwise.
195     * @deprecated ICU 2.6 use the overload with error code
196     */
197     Collator::EComparisonResult compareTo(const CollationKey& target) const;
198 #endif  /* U_HIDE_DEPRECATED_API */
199 
200     /**
201     * Convenience method which does a string(bit-wise) comparison of the
202     * two collation keys.
203     * @param target target collation key to be compared with
204     * @param status error code
205     * @return Returns UCOL_LESS if sourceKey &lt; targetKey,
206     * UCOL_GREATER if sourceKey > targetKey and UCOL_EQUAL
207     * otherwise.
208     * @stable ICU 2.6
209     */
210     UCollationResult compareTo(const CollationKey& target, UErrorCode &status) const;
211 
212     /**
213     * Creates an integer that is unique to the collation key.  NOTE: this
214     * is not the same as String.hashCode.
215     * <p>Example of use:
216     * <pre>
217     * .    UErrorCode status = U_ZERO_ERROR;
218     * .    Collator *myCollation = Collator::createInstance(Locale::US, status);
219     * .    if (U_FAILURE(status)) return;
220     * .    CollationKey key1, key2;
221     * .    UErrorCode status1 = U_ZERO_ERROR, status2 = U_ZERO_ERROR;
222     * .    myCollation->getCollationKey("abc", key1, status1);
223     * .    if (U_FAILURE(status1)) { delete myCollation; return; }
224     * .    myCollation->getCollationKey("ABC", key2, status2);
225     * .    if (U_FAILURE(status2)) { delete myCollation; return; }
226     * .    // key1.hashCode() != key2.hashCode()
227     * </pre>
228     * @return the hash value based on the string's collation order.
229     * @see UnicodeString#hashCode
230     * @stable ICU 2.0
231     */
232     int32_t                 hashCode(void) const;
233 
234     /**
235      * ICU "poor man's RTTI", returns a UClassID for the actual class.
236      * @stable ICU 2.2
237      */
238     virtual UClassID getDynamicClassID() const;
239 
240     /**
241      * ICU "poor man's RTTI", returns a UClassID for this class.
242      * @stable ICU 2.2
243      */
244     static UClassID U_EXPORT2 getStaticClassID();
245 
246 private:
247     /**
248      * Replaces the current bytes buffer with a new one of newCapacity
249      * and copies length bytes from the old buffer to the new one.
250      * @return the new buffer, or NULL if the allocation failed
251      */
252     uint8_t *reallocate(int32_t newCapacity, int32_t length);
253     /**
254      * Set a new length for a new sort key in the existing fBytes.
255      */
256     void setLength(int32_t newLength);
257 
getBytes()258     uint8_t *getBytes() {
259         return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes;
260     }
getBytes()261     const uint8_t *getBytes() const {
262         return (fFlagAndLength >= 0) ? fUnion.fStackBuffer : fUnion.fFields.fBytes;
263     }
getCapacity()264     int32_t getCapacity() const {
265         return (fFlagAndLength >= 0) ? (int32_t)sizeof(fUnion) : fUnion.fFields.fCapacity;
266     }
getLength()267     int32_t getLength() const { return fFlagAndLength & 0x7fffffff; }
268 
269     /**
270     * Set the CollationKey to a "bogus" or invalid state
271     * @return this CollationKey
272     */
273     CollationKey&           setToBogus(void);
274     /**
275     * Resets this CollationKey to an empty state
276     * @return this CollationKey
277     */
278     CollationKey&           reset(void);
279 
280     /**
281     * Allow private access to RuleBasedCollator
282     */
283     friend  class           RuleBasedCollator;
284     friend  class           CollationKeyByteSink;
285 
286     // Class fields. sizeof(CollationKey) is intended to be 48 bytes
287     // on a machine with 64-bit pointers.
288     // We use a union to maximize the size of the internal buffer,
289     // similar to UnicodeString but not as tight and complex.
290 
291     // (implicit) *vtable;
292     /**
293      * Sort key length and flag.
294      * Bit 31 is set if the buffer is heap-allocated.
295      * Bits 30..0 contain the sort key length.
296      */
297     int32_t fFlagAndLength;
298     /**
299     * Unique hash value of this CollationKey.
300     * Special value 2 if the key is bogus.
301     */
302     mutable int32_t fHashCode;
303     /**
304      * fUnion provides 32 bytes for the internal buffer or for
305      * pointer+capacity.
306      */
307     union StackBufferOrFields {
308         /** fStackBuffer is used iff fFlagAndLength>=0, else fFields is used */
309         uint8_t fStackBuffer[32];
310         struct {
311             uint8_t *fBytes;
312             int32_t fCapacity;
313         } fFields;
314     } fUnion;
315 };
316 
317 inline UBool
318 CollationKey::operator!=(const CollationKey& other) const
319 {
320     return !(*this == other);
321 }
322 
323 inline UBool
isBogus()324 CollationKey::isBogus() const
325 {
326     return fHashCode == 2;  // kBogusHashCode
327 }
328 
329 inline const uint8_t*
getByteArray(int32_t & count)330 CollationKey::getByteArray(int32_t &count) const
331 {
332     count = getLength();
333     return getBytes();
334 }
335 
336 U_NAMESPACE_END
337 
338 #endif /* #if !UCONFIG_NO_COLLATION */
339 
340 #endif
341