1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (c) 1996-2015, International Business Machines Corporation and others.
6 * All Rights Reserved.
7 *******************************************************************************
8 */
9 
10 #ifndef UCOL_H
11 #define UCOL_H
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_COLLATION
16 
17 #include "unicode/unorm.h"
18 #include "unicode/localpointer.h"
19 #include "unicode/parseerr.h"
20 #include "unicode/uloc.h"
21 #include "unicode/uset.h"
22 #include "unicode/uscript.h"
23 
24 /**
25  * \file
26  * \brief C API: Collator
27  *
28  * <h2> Collator C API </h2>
29  *
30  * The C API for Collator performs locale-sensitive
31  * string comparison. You use this service to build
32  * searching and sorting routines for natural language text.
33  * <p>
34  * For more information about the collation service see
35  * <a href="http://userguide.icu-project.org/collation">the User Guide</a>.
36  * <p>
37  * Collation service provides correct sorting orders for most locales supported in ICU.
38  * If specific data for a locale is not available, the orders eventually falls back
39  * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>.
40  * <p>
41  * Sort ordering may be customized by providing your own set of rules. For more on
42  * this subject see the <a href="http://userguide.icu-project.org/collation/customization">
43  * Collation Customization</a> section of the User Guide.
44  * <p>
45  * @see         UCollationResult
46  * @see         UNormalizationMode
47  * @see         UCollationStrength
48  * @see         UCollationElements
49  */
50 
51 /** A collator.
52 *  For usage in C programs.
53 */
54 struct UCollator;
55 /** structure representing a collator object instance
56  * @stable ICU 2.0
57  */
58 typedef struct UCollator UCollator;
59 
60 
61 /**
62  * UCOL_LESS is returned if source string is compared to be less than target
63  * string in the ucol_strcoll() method.
64  * UCOL_EQUAL is returned if source string is compared to be equal to target
65  * string in the ucol_strcoll() method.
66  * UCOL_GREATER is returned if source string is compared to be greater than
67  * target string in the ucol_strcoll() method.
68  * @see ucol_strcoll()
69  * <p>
70  * Possible values for a comparison result
71  * @stable ICU 2.0
72  */
73 typedef enum {
74   /** string a == string b */
75   UCOL_EQUAL    = 0,
76   /** string a > string b */
77   UCOL_GREATER    = 1,
78   /** string a < string b */
79   UCOL_LESS    = -1
80 } UCollationResult ;
81 
82 
83 /** Enum containing attribute values for controling collation behavior.
84  * Here are all the allowable values. Not every attribute can take every value. The only
85  * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined
86  * value for that locale
87  * @stable ICU 2.0
88  */
89 typedef enum {
90   /** accepted by most attributes */
91   UCOL_DEFAULT = -1,
92 
93   /** Primary collation strength */
94   UCOL_PRIMARY = 0,
95   /** Secondary collation strength */
96   UCOL_SECONDARY = 1,
97   /** Tertiary collation strength */
98   UCOL_TERTIARY = 2,
99   /** Default collation strength */
100   UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY,
101   UCOL_CE_STRENGTH_LIMIT,
102   /** Quaternary collation strength */
103   UCOL_QUATERNARY=3,
104   /** Identical collation strength */
105   UCOL_IDENTICAL=15,
106   UCOL_STRENGTH_LIMIT,
107 
108   /** Turn the feature off - works for UCOL_FRENCH_COLLATION,
109       UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
110       & UCOL_DECOMPOSITION_MODE*/
111   UCOL_OFF = 16,
112   /** Turn the feature on - works for UCOL_FRENCH_COLLATION,
113       UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE
114       & UCOL_DECOMPOSITION_MODE*/
115   UCOL_ON = 17,
116 
117   /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */
118   UCOL_SHIFTED = 20,
119   /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */
120   UCOL_NON_IGNORABLE = 21,
121 
122   /** Valid for UCOL_CASE_FIRST -
123       lower case sorts before upper case */
124   UCOL_LOWER_FIRST = 24,
125   /** upper case sorts before lower case */
126   UCOL_UPPER_FIRST = 25,
127 
128 #ifndef U_HIDE_DEPRECATED_API
129     /**
130      * One more than the highest normal UColAttributeValue value.
131      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
132      */
133   UCOL_ATTRIBUTE_VALUE_COUNT
134 #endif  // U_HIDE_DEPRECATED_API
135 } UColAttributeValue;
136 
137 /**
138  * Enum containing the codes for reordering segments of the collation table that are not script
139  * codes. These reordering codes are to be used in conjunction with the script codes.
140  * @see ucol_getReorderCodes
141  * @see ucol_setReorderCodes
142  * @see ucol_getEquivalentReorderCodes
143  * @see UScriptCode
144  * @stable ICU 4.8
145  */
146  typedef enum {
147    /**
148     * A special reordering code that is used to specify the default
149     * reordering codes for a locale.
150     * @stable ICU 4.8
151     */
152     UCOL_REORDER_CODE_DEFAULT       = -1,
153    /**
154     * A special reordering code that is used to specify no reordering codes.
155     * @stable ICU 4.8
156     */
157     UCOL_REORDER_CODE_NONE          = USCRIPT_UNKNOWN,
158    /**
159     * A special reordering code that is used to specify all other codes used for
160     * reordering except for the codes lised as UColReorderCode values and those
161     * listed explicitly in a reordering.
162     * @stable ICU 4.8
163     */
164     UCOL_REORDER_CODE_OTHERS        = USCRIPT_UNKNOWN,
165    /**
166     * Characters with the space property.
167     * This is equivalent to the rule value "space".
168     * @stable ICU 4.8
169     */
170     UCOL_REORDER_CODE_SPACE         = 0x1000,
171    /**
172     * The first entry in the enumeration of reordering groups. This is intended for use in
173     * range checking and enumeration of the reorder codes.
174     * @stable ICU 4.8
175     */
176     UCOL_REORDER_CODE_FIRST         = UCOL_REORDER_CODE_SPACE,
177    /**
178     * Characters with the punctuation property.
179     * This is equivalent to the rule value "punct".
180     * @stable ICU 4.8
181     */
182     UCOL_REORDER_CODE_PUNCTUATION   = 0x1001,
183    /**
184     * Characters with the symbol property.
185     * This is equivalent to the rule value "symbol".
186     * @stable ICU 4.8
187     */
188     UCOL_REORDER_CODE_SYMBOL        = 0x1002,
189    /**
190     * Characters with the currency property.
191     * This is equivalent to the rule value "currency".
192     * @stable ICU 4.8
193     */
194     UCOL_REORDER_CODE_CURRENCY      = 0x1003,
195    /**
196     * Characters with the digit property.
197     * This is equivalent to the rule value "digit".
198     * @stable ICU 4.8
199     */
200     UCOL_REORDER_CODE_DIGIT         = 0x1004,
201 #ifndef U_HIDE_DEPRECATED_API
202     /**
203      * One more than the highest normal UColReorderCode value.
204      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
205      */
206     UCOL_REORDER_CODE_LIMIT         = 0x1005
207 #endif  // U_HIDE_DEPRECATED_API
208 } UColReorderCode;
209 
210 /**
211  * Base letter represents a primary difference.  Set comparison
212  * level to UCOL_PRIMARY to ignore secondary and tertiary differences.
213  * Use this to set the strength of a Collator object.
214  * Example of primary difference, "abc" &lt; "abd"
215  *
216  * Diacritical differences on the same base letter represent a secondary
217  * difference.  Set comparison level to UCOL_SECONDARY to ignore tertiary
218  * differences. Use this to set the strength of a Collator object.
219  * Example of secondary difference, "&auml;" >> "a".
220  *
221  * Uppercase and lowercase versions of the same character represents a
222  * tertiary difference.  Set comparison level to UCOL_TERTIARY to include
223  * all comparison differences. Use this to set the strength of a Collator
224  * object.
225  * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
226  *
227  * Two characters are considered "identical" when they have the same
228  * unicode spellings.  UCOL_IDENTICAL.
229  * For example, "&auml;" == "&auml;".
230  *
231  * UCollationStrength is also used to determine the strength of sort keys
232  * generated from UCollator objects
233  * These values can be now found in the UColAttributeValue enum.
234  * @stable ICU 2.0
235  **/
236 typedef UColAttributeValue UCollationStrength;
237 
238 /** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT
239  * value, as well as the values specific to each one.
240  * @stable ICU 2.0
241  */
242 typedef enum {
243      /** Attribute for direction of secondary weights - used in Canadian French.
244       * Acceptable values are UCOL_ON, which results in secondary weights
245       * being considered backwards and UCOL_OFF which treats secondary
246       * weights in the order they appear.
247       * @stable ICU 2.0
248       */
249      UCOL_FRENCH_COLLATION,
250      /** Attribute for handling variable elements.
251       * Acceptable values are UCOL_NON_IGNORABLE (default)
252       * which treats all the codepoints with non-ignorable
253       * primary weights in the same way,
254       * and UCOL_SHIFTED which causes codepoints with primary
255       * weights that are equal or below the variable top value
256       * to be ignored on primary level and moved to the quaternary
257       * level.
258       * @stable ICU 2.0
259       */
260      UCOL_ALTERNATE_HANDLING,
261      /** Controls the ordering of upper and lower case letters.
262       * Acceptable values are UCOL_OFF (default), which orders
263       * upper and lower case letters in accordance to their tertiary
264       * weights, UCOL_UPPER_FIRST which forces upper case letters to
265       * sort before lower case letters, and UCOL_LOWER_FIRST which does
266       * the opposite.
267       * @stable ICU 2.0
268       */
269      UCOL_CASE_FIRST,
270      /** Controls whether an extra case level (positioned before the third
271       * level) is generated or not. Acceptable values are UCOL_OFF (default),
272       * when case level is not generated, and UCOL_ON which causes the case
273       * level to be generated. Contents of the case level are affected by
274       * the value of UCOL_CASE_FIRST attribute. A simple way to ignore
275       * accent differences in a string is to set the strength to UCOL_PRIMARY
276       * and enable case level.
277       * @stable ICU 2.0
278       */
279      UCOL_CASE_LEVEL,
280      /** Controls whether the normalization check and necessary normalizations
281       * are performed. When set to UCOL_OFF (default) no normalization check
282       * is performed. The correctness of the result is guaranteed only if the
283       * input data is in so-called FCD form (see users manual for more info).
284       * When set to UCOL_ON, an incremental check is performed to see whether
285       * the input data is in the FCD form. If the data is not in the FCD form,
286       * incremental NFD normalization is performed.
287       * @stable ICU 2.0
288       */
289      UCOL_NORMALIZATION_MODE,
290      /** An alias for UCOL_NORMALIZATION_MODE attribute.
291       * @stable ICU 2.0
292       */
293      UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE,
294      /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY,
295       * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength
296       * for most locales (except Japanese) is tertiary.
297       *
298       * Quaternary strength
299       * is useful when combined with shifted setting for alternate handling
300       * attribute and for JIS X 4061 collation, when it is used to distinguish
301       * between Katakana and Hiragana.
302       * Otherwise, quaternary level
303       * is affected only by the number of non-ignorable code points in
304       * the string.
305       *
306       * Identical strength is rarely useful, as it amounts
307       * to codepoints of the NFD form of the string.
308       * @stable ICU 2.0
309       */
310      UCOL_STRENGTH,
311 #ifndef U_HIDE_DEPRECATED_API
312      /** When turned on, this attribute positions Hiragana before all
313       * non-ignorables on quaternary level This is a sneaky way to produce JIS
314       * sort order.
315       *
316       * This attribute was an implementation detail of the CLDR Japanese tailoring.
317       * Since ICU 50, this attribute is not settable any more via API functions.
318       * Since CLDR 25/ICU 53, explicit quaternary relations are used
319       * to achieve the same Japanese sort order.
320       *
321       * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation.
322       */
323      UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1,
324 #endif  /* U_HIDE_DEPRECATED_API */
325      /**
326       * When turned on, this attribute makes
327       * substrings of digits sort according to their numeric values.
328       *
329       * This is a way to get '100' to sort AFTER '2'. Note that the longest
330       * digit substring that can be treated as a single unit is
331       * 254 digits (not counting leading zeros). If a digit substring is
332       * longer than that, the digits beyond the limit will be treated as a
333       * separate digit substring.
334       *
335       * A "digit" in this sense is a code point with General_Category=Nd,
336       * which does not include circled numbers, roman numerals, etc.
337       * Only a contiguous digit substring is considered, that is,
338       * non-negative integers without separators.
339       * There is no support for plus/minus signs, decimals, exponents, etc.
340       *
341       * @stable ICU 2.8
342       */
343      UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2,
344 
345     // Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API,
346     // it is needed for layout of RuleBasedCollator object.
347     /**
348      * One more than the highest normal UColAttribute value.
349      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
350      */
351      UCOL_ATTRIBUTE_COUNT
352 } UColAttribute;
353 
354 /** Options for retrieving the rule string
355  *  @stable ICU 2.0
356  */
357 typedef enum {
358   /**
359    * Retrieves the tailoring rules only.
360    * Same as calling the version of getRules() without UColRuleOption.
361    * @stable ICU 2.0
362    */
363   UCOL_TAILORING_ONLY,
364   /**
365    * Retrieves the "UCA rules" concatenated with the tailoring rules.
366    * The "UCA rules" are an <i>approximation</i> of the root collator's sort order.
367    * They are almost never used or useful at runtime and can be removed from the data.
368    * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
369    * @stable ICU 2.0
370    */
371   UCOL_FULL_RULES
372 } UColRuleOption ;
373 
374 /**
375  * Open a UCollator for comparing strings.
376  *
377  * For some languages, multiple collation types are available;
378  * for example, "de@collation=phonebook".
379  * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
380  * in the old locale extension syntax ("el@colCaseFirst=upper")
381  * or in language tag syntax ("el-u-kf-upper").
382  * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>.
383  *
384  * The UCollator pointer is used in all the calls to the Collation
385  * service. After finished, collator must be disposed of by calling
386  * {@link #ucol_close }.
387  * @param loc The locale containing the required collation rules.
388  *            Special values for locales can be passed in -
389  *            if NULL is passed for the locale, the default locale
390  *            collation rules will be used. If empty string ("") or
391  *            "root" are passed, the root collator will be returned.
392  * @param status A pointer to a UErrorCode to receive any errors
393  * @return A pointer to a UCollator, or 0 if an error occurred.
394  * @see ucol_openRules
395  * @see ucol_safeClone
396  * @see ucol_close
397  * @stable ICU 2.0
398  */
399 U_STABLE UCollator* U_EXPORT2
400 ucol_open(const char *loc, UErrorCode *status);
401 
402 /**
403  * Produce a UCollator instance according to the rules supplied.
404  * The rules are used to change the default ordering, defined in the
405  * UCA in a process called tailoring. The resulting UCollator pointer
406  * can be used in the same way as the one obtained by {@link #ucol_strcoll }.
407  * @param rules A string describing the collation rules. For the syntax
408  *              of the rules please see users guide.
409  * @param rulesLength The length of rules, or -1 if null-terminated.
410  * @param normalizationMode The normalization mode: One of
411  *             UCOL_OFF     (expect the text to not need normalization),
412  *             UCOL_ON      (normalize), or
413  *             UCOL_DEFAULT (set the mode according to the rules)
414  * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
415  * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules.
416  * @param parseError  A pointer to UParseError to recieve information about errors
417  *                    occurred during parsing. This argument can currently be set
418  *                    to NULL, but at users own risk. Please provide a real structure.
419  * @param status A pointer to a UErrorCode to receive any errors
420  * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case
421  *         of error - please use status argument to check for errors.
422  * @see ucol_open
423  * @see ucol_safeClone
424  * @see ucol_close
425  * @stable ICU 2.0
426  */
427 U_STABLE UCollator* U_EXPORT2
428 ucol_openRules( const UChar        *rules,
429                 int32_t            rulesLength,
430                 UColAttributeValue normalizationMode,
431                 UCollationStrength strength,
432                 UParseError        *parseError,
433                 UErrorCode         *status);
434 
435 #ifndef U_HIDE_DEPRECATED_API
436 /**
437  * Open a collator defined by a short form string.
438  * The structure and the syntax of the string is defined in the "Naming collators"
439  * section of the users guide:
440  * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme
441  * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final
442  * strength will be 3. 3066bis locale overrides individual locale parts.
443  * The call to this function is equivalent to a call to ucol_open, followed by a
444  * series of calls to ucol_setAttribute and ucol_setVariableTop.
445  * @param definition A short string containing a locale and a set of attributes.
446  *                   Attributes not explicitly mentioned are left at the default
447  *                   state for a locale.
448  * @param parseError if not NULL, structure that will get filled with error's pre
449  *                   and post context in case of error.
450  * @param forceDefaults if FALSE, the settings that are the same as the collator
451  *                   default settings will not be applied (for example, setting
452  *                   French secondary on a French collator would not be executed).
453  *                   If TRUE, all the settings will be applied regardless of the
454  *                   collator default value. If the definition
455  *                   strings are to be cached, should be set to FALSE.
456  * @param status     Error code. Apart from regular error conditions connected to
457  *                   instantiating collators (like out of memory or similar), this
458  *                   API will return an error if an invalid attribute or attribute/value
459  *                   combination is specified.
460  * @return           A pointer to a UCollator or 0 if an error occured (including an
461  *                   invalid attribute).
462  * @see ucol_open
463  * @see ucol_setAttribute
464  * @see ucol_setVariableTop
465  * @see ucol_getShortDefinitionString
466  * @see ucol_normalizeShortDefinitionString
467  * @deprecated ICU 54 Use ucol_open() with language tag collation keywords instead.
468  */
469 U_DEPRECATED UCollator* U_EXPORT2
470 ucol_openFromShortString( const char *definition,
471                           UBool forceDefaults,
472                           UParseError *parseError,
473                           UErrorCode *status);
474 #endif  /* U_HIDE_DEPRECATED_API */
475 
476 #ifndef U_HIDE_DEPRECATED_API
477 /**
478  * Get a set containing the contractions defined by the collator. The set includes
479  * both the root collator's contractions and the contractions defined by the collator. This set
480  * will contain only strings. If a tailoring explicitly suppresses contractions from
481  * the root collator (like Russian), removed contractions will not be in the resulting set.
482  * @param coll collator
483  * @param conts the set to hold the result. It gets emptied before
484  *              contractions are added.
485  * @param status to hold the error code
486  * @return the size of the contraction set
487  *
488  * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead
489  */
490 U_DEPRECATED int32_t U_EXPORT2
491 ucol_getContractions( const UCollator *coll,
492                   USet *conts,
493                   UErrorCode *status);
494 #endif  /* U_HIDE_DEPRECATED_API */
495 
496 /**
497  * Get a set containing the expansions defined by the collator. The set includes
498  * both the root collator's expansions and the expansions defined by the tailoring
499  * @param coll collator
500  * @param contractions if not NULL, the set to hold the contractions
501  * @param expansions if not NULL, the set to hold the expansions
502  * @param addPrefixes add the prefix contextual elements to contractions
503  * @param status to hold the error code
504  *
505  * @stable ICU 3.4
506  */
507 U_STABLE void U_EXPORT2
508 ucol_getContractionsAndExpansions( const UCollator *coll,
509                   USet *contractions, USet *expansions,
510                   UBool addPrefixes, UErrorCode *status);
511 
512 /**
513  * Close a UCollator.
514  * Once closed, a UCollator should not be used. Every open collator should
515  * be closed. Otherwise, a memory leak will result.
516  * @param coll The UCollator to close.
517  * @see ucol_open
518  * @see ucol_openRules
519  * @see ucol_safeClone
520  * @stable ICU 2.0
521  */
522 U_STABLE void U_EXPORT2
523 ucol_close(UCollator *coll);
524 
525 #if U_SHOW_CPLUSPLUS_API
526 
527 U_NAMESPACE_BEGIN
528 
529 /**
530  * \class LocalUCollatorPointer
531  * "Smart pointer" class, closes a UCollator via ucol_close().
532  * For most methods see the LocalPointerBase base class.
533  *
534  * @see LocalPointerBase
535  * @see LocalPointer
536  * @stable ICU 4.4
537  */
538 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close);
539 
540 U_NAMESPACE_END
541 
542 #endif
543 
544 /**
545  * Compare two strings.
546  * The strings will be compared using the options already specified.
547  * @param coll The UCollator containing the comparison rules.
548  * @param source The source string.
549  * @param sourceLength The length of source, or -1 if null-terminated.
550  * @param target The target string.
551  * @param targetLength The length of target, or -1 if null-terminated.
552  * @return The result of comparing the strings; one of UCOL_EQUAL,
553  * UCOL_GREATER, UCOL_LESS
554  * @see ucol_greater
555  * @see ucol_greaterOrEqual
556  * @see ucol_equal
557  * @stable ICU 2.0
558  */
559 U_STABLE UCollationResult U_EXPORT2
560 ucol_strcoll(    const    UCollator    *coll,
561         const    UChar        *source,
562         int32_t            sourceLength,
563         const    UChar        *target,
564         int32_t            targetLength);
565 
566 /**
567 * Compare two strings in UTF-8.
568 * The strings will be compared using the options already specified.
569 * Note: When input string contains malformed a UTF-8 byte sequence,
570 * this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD).
571 * @param coll The UCollator containing the comparison rules.
572 * @param source The source UTF-8 string.
573 * @param sourceLength The length of source, or -1 if null-terminated.
574 * @param target The target UTF-8 string.
575 * @param targetLength The length of target, or -1 if null-terminated.
576 * @param status A pointer to a UErrorCode to receive any errors
577 * @return The result of comparing the strings; one of UCOL_EQUAL,
578 * UCOL_GREATER, UCOL_LESS
579 * @see ucol_greater
580 * @see ucol_greaterOrEqual
581 * @see ucol_equal
582 * @stable ICU 50
583 */
584 U_STABLE UCollationResult U_EXPORT2
585 ucol_strcollUTF8(
586         const UCollator *coll,
587         const char      *source,
588         int32_t         sourceLength,
589         const char      *target,
590         int32_t         targetLength,
591         UErrorCode      *status);
592 
593 /**
594  * Determine if one string is greater than another.
595  * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER
596  * @param coll The UCollator containing the comparison rules.
597  * @param source The source string.
598  * @param sourceLength The length of source, or -1 if null-terminated.
599  * @param target The target string.
600  * @param targetLength The length of target, or -1 if null-terminated.
601  * @return TRUE if source is greater than target, FALSE otherwise.
602  * @see ucol_strcoll
603  * @see ucol_greaterOrEqual
604  * @see ucol_equal
605  * @stable ICU 2.0
606  */
607 U_STABLE UBool U_EXPORT2
608 ucol_greater(const UCollator *coll,
609              const UChar     *source, int32_t sourceLength,
610              const UChar     *target, int32_t targetLength);
611 
612 /**
613  * Determine if one string is greater than or equal to another.
614  * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS
615  * @param coll The UCollator containing the comparison rules.
616  * @param source The source string.
617  * @param sourceLength The length of source, or -1 if null-terminated.
618  * @param target The target string.
619  * @param targetLength The length of target, or -1 if null-terminated.
620  * @return TRUE if source is greater than or equal to target, FALSE otherwise.
621  * @see ucol_strcoll
622  * @see ucol_greater
623  * @see ucol_equal
624  * @stable ICU 2.0
625  */
626 U_STABLE UBool U_EXPORT2
627 ucol_greaterOrEqual(const UCollator *coll,
628                     const UChar     *source, int32_t sourceLength,
629                     const UChar     *target, int32_t targetLength);
630 
631 /**
632  * Compare two strings for equality.
633  * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL
634  * @param coll The UCollator containing the comparison rules.
635  * @param source The source string.
636  * @param sourceLength The length of source, or -1 if null-terminated.
637  * @param target The target string.
638  * @param targetLength The length of target, or -1 if null-terminated.
639  * @return TRUE if source is equal to target, FALSE otherwise
640  * @see ucol_strcoll
641  * @see ucol_greater
642  * @see ucol_greaterOrEqual
643  * @stable ICU 2.0
644  */
645 U_STABLE UBool U_EXPORT2
646 ucol_equal(const UCollator *coll,
647            const UChar     *source, int32_t sourceLength,
648            const UChar     *target, int32_t targetLength);
649 
650 /**
651  * Compare two UTF-8 encoded trings.
652  * The strings will be compared using the options already specified.
653  * @param coll The UCollator containing the comparison rules.
654  * @param sIter The source string iterator.
655  * @param tIter The target string iterator.
656  * @return The result of comparing the strings; one of UCOL_EQUAL,
657  * UCOL_GREATER, UCOL_LESS
658  * @param status A pointer to a UErrorCode to receive any errors
659  * @see ucol_strcoll
660  * @stable ICU 2.6
661  */
662 U_STABLE UCollationResult U_EXPORT2
663 ucol_strcollIter(  const    UCollator    *coll,
664                   UCharIterator *sIter,
665                   UCharIterator *tIter,
666                   UErrorCode *status);
667 
668 /**
669  * Get the collation strength used in a UCollator.
670  * The strength influences how strings are compared.
671  * @param coll The UCollator to query.
672  * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY,
673  * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL
674  * @see ucol_setStrength
675  * @stable ICU 2.0
676  */
677 U_STABLE UCollationStrength U_EXPORT2
678 ucol_getStrength(const UCollator *coll);
679 
680 /**
681  * Set the collation strength used in a UCollator.
682  * The strength influences how strings are compared.
683  * @param coll The UCollator to set.
684  * @param strength The desired collation strength; one of UCOL_PRIMARY,
685  * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT
686  * @see ucol_getStrength
687  * @stable ICU 2.0
688  */
689 U_STABLE void U_EXPORT2
690 ucol_setStrength(UCollator *coll,
691                  UCollationStrength strength);
692 
693 /**
694  * Retrieves the reordering codes for this collator.
695  * These reordering codes are a combination of UScript codes and UColReorderCode entries.
696  * @param coll The UCollator to query.
697  * @param dest The array to fill with the script ordering.
698  * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
699  * will only return the length of the result without writing any codes (pre-flighting).
700  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
701  * failure before the function call.
702  * @return The number of reordering codes written to the dest array.
703  * @see ucol_setReorderCodes
704  * @see ucol_getEquivalentReorderCodes
705  * @see UScriptCode
706  * @see UColReorderCode
707  * @stable ICU 4.8
708  */
709 U_STABLE int32_t U_EXPORT2
710 ucol_getReorderCodes(const UCollator* coll,
711                     int32_t* dest,
712                     int32_t destCapacity,
713                     UErrorCode *pErrorCode);
714 /**
715  * Sets the reordering codes for this collator.
716  * Collation reordering allows scripts and some other groups of characters
717  * to be moved relative to each other. This reordering is done on top of
718  * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed
719  * at the start and/or the end of the collation order. These groups are specified using
720  * UScript codes and UColReorderCode entries.
721  *
722  * <p>By default, reordering codes specified for the start of the order are placed in the
723  * order given after several special non-script blocks. These special groups of characters
724  * are space, punctuation, symbol, currency, and digit. These special groups are represented with
725  * UColReorderCode entries. Script groups can be intermingled with
726  * these special non-script groups if those special groups are explicitly specified in the reordering.
727  *
728  * <p>The special code OTHERS stands for any script that is not explicitly
729  * mentioned in the list of reordering codes given. Anything that is after OTHERS
730  * will go at the very end of the reordering in the order given.
731  *
732  * <p>The special reorder code DEFAULT will reset the reordering for this collator
733  * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that
734  * was specified when this collator was created from resource data or from rules. The
735  * DEFAULT code <b>must</b> be the sole code supplied when it is used.
736  * If not, then U_ILLEGAL_ARGUMENT_ERROR will be set.
737  *
738  * <p>The special reorder code NONE will remove any reordering for this collator.
739  * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The
740  * NONE code <b>must</b> be the sole code supplied when it is used.
741  *
742  * @param coll The UCollator to set.
743  * @param reorderCodes An array of script codes in the new order. This can be NULL if the
744  * length is also set to 0. An empty array will clear any reordering codes on the collator.
745  * @param reorderCodesLength The length of reorderCodes.
746  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a
747  * failure before the function call.
748  * @see ucol_getReorderCodes
749  * @see ucol_getEquivalentReorderCodes
750  * @see UScriptCode
751  * @see UColReorderCode
752  * @stable ICU 4.8
753  */
754 U_STABLE void U_EXPORT2
755 ucol_setReorderCodes(UCollator* coll,
756                     const int32_t* reorderCodes,
757                     int32_t reorderCodesLength,
758                     UErrorCode *pErrorCode);
759 
760 /**
761  * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
762  * codes will be grouped and must reorder together.
763  * Beginning with ICU 55, scripts only reorder together if they are primary-equal,
764  * for example Hiragana and Katakana.
765  *
766  * @param reorderCode The reorder code to determine equivalence for.
767  * @param dest The array to fill with the script ordering.
768  * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
769  * will only return the length of the result without writing any codes (pre-flighting).
770  * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate
771  * a failure before the function call.
772  * @return The number of reordering codes written to the dest array.
773  * @see ucol_setReorderCodes
774  * @see ucol_getReorderCodes
775  * @see UScriptCode
776  * @see UColReorderCode
777  * @stable ICU 4.8
778  */
779 U_STABLE int32_t U_EXPORT2
780 ucol_getEquivalentReorderCodes(int32_t reorderCode,
781                     int32_t* dest,
782                     int32_t destCapacity,
783                     UErrorCode *pErrorCode);
784 
785 /**
786  * Get the display name for a UCollator.
787  * The display name is suitable for presentation to a user.
788  * @param objLoc The locale of the collator in question.
789  * @param dispLoc The locale for display.
790  * @param result A pointer to a buffer to receive the attribute.
791  * @param resultLength The maximum size of result.
792  * @param status A pointer to a UErrorCode to receive any errors
793  * @return The total buffer size needed; if greater than resultLength,
794  * the output was truncated.
795  * @stable ICU 2.0
796  */
797 U_STABLE int32_t U_EXPORT2
798 ucol_getDisplayName(    const    char        *objLoc,
799             const    char        *dispLoc,
800             UChar             *result,
801             int32_t         resultLength,
802             UErrorCode        *status);
803 
804 /**
805  * Get a locale for which collation rules are available.
806  * A UCollator in a locale returned by this function will perform the correct
807  * collation for the locale.
808  * @param localeIndex The index of the desired locale.
809  * @return A locale for which collation rules are available, or 0 if none.
810  * @see ucol_countAvailable
811  * @stable ICU 2.0
812  */
813 U_STABLE const char* U_EXPORT2
814 ucol_getAvailable(int32_t localeIndex);
815 
816 /**
817  * Determine how many locales have collation rules available.
818  * This function is most useful as determining the loop ending condition for
819  * calls to {@link #ucol_getAvailable }.
820  * @return The number of locales for which collation rules are available.
821  * @see ucol_getAvailable
822  * @stable ICU 2.0
823  */
824 U_STABLE int32_t U_EXPORT2
825 ucol_countAvailable(void);
826 
827 #if !UCONFIG_NO_SERVICE
828 /**
829  * Create a string enumerator of all locales for which a valid
830  * collator may be opened.
831  * @param status input-output error code
832  * @return a string enumeration over locale strings. The caller is
833  * responsible for closing the result.
834  * @stable ICU 3.0
835  */
836 U_STABLE UEnumeration* U_EXPORT2
837 ucol_openAvailableLocales(UErrorCode *status);
838 #endif
839 
840 /**
841  * Create a string enumerator of all possible keywords that are relevant to
842  * collation. At this point, the only recognized keyword for this
843  * service is "collation".
844  * @param status input-output error code
845  * @return a string enumeration over locale strings. The caller is
846  * responsible for closing the result.
847  * @stable ICU 3.0
848  */
849 U_STABLE UEnumeration* U_EXPORT2
850 ucol_getKeywords(UErrorCode *status);
851 
852 /**
853  * Given a keyword, create a string enumeration of all values
854  * for that keyword that are currently in use.
855  * @param keyword a particular keyword as enumerated by
856  * ucol_getKeywords. If any other keyword is passed in, *status is set
857  * to U_ILLEGAL_ARGUMENT_ERROR.
858  * @param status input-output error code
859  * @return a string enumeration over collation keyword values, or NULL
860  * upon error. The caller is responsible for closing the result.
861  * @stable ICU 3.0
862  */
863 U_STABLE UEnumeration* U_EXPORT2
864 ucol_getKeywordValues(const char *keyword, UErrorCode *status);
865 
866 /**
867  * Given a key and a locale, returns an array of string values in a preferred
868  * order that would make a difference. These are all and only those values where
869  * the open (creation) of the service with the locale formed from the input locale
870  * plus input keyword and that value has different behavior than creation with the
871  * input locale alone.
872  * @param key           one of the keys supported by this service.  For now, only
873  *                      "collation" is supported.
874  * @param locale        the locale
875  * @param commonlyUsed  if set to true it will return only commonly used values
876  *                      with the given locale in preferred order.  Otherwise,
877  *                      it will return all the available values for the locale.
878  * @param status error status
879  * @return a string enumeration over keyword values for the given key and the locale.
880  * @stable ICU 4.2
881  */
882 U_STABLE UEnumeration* U_EXPORT2
883 ucol_getKeywordValuesForLocale(const char* key,
884                                const char* locale,
885                                UBool commonlyUsed,
886                                UErrorCode* status);
887 
888 /**
889  * Return the functionally equivalent locale for the specified
890  * input locale, with respect to given keyword, for the
891  * collation service. If two different input locale + keyword
892  * combinations produce the same result locale, then collators
893  * instantiated for these two different input locales will behave
894  * equivalently. The converse is not always true; two collators
895  * may in fact be equivalent, but return different results, due to
896  * internal details. The return result has no other meaning than
897  * that stated above, and implies nothing as to the relationship
898  * between the two locales. This is intended for use by
899  * applications who wish to cache collators, or otherwise reuse
900  * collators when possible. The functional equivalent may change
901  * over time. For more information, please see the <a
902  * href="http://userguide.icu-project.org/locale#TOC-Locales-and-Services">
903  * Locales and Services</a> section of the ICU User Guide.
904  * @param result fillin for the functionally equivalent result locale
905  * @param resultCapacity capacity of the fillin buffer
906  * @param keyword a particular keyword as enumerated by
907  * ucol_getKeywords.
908  * @param locale the specified input locale
909  * @param isAvailable if non-NULL, pointer to a fillin parameter that
910  * on return indicates whether the specified input locale was 'available'
911  * to the collation service. A locale is defined as 'available' if it
912  * physically exists within the collation locale data.
913  * @param status pointer to input-output error code
914  * @return the actual buffer size needed for the locale. If greater
915  * than resultCapacity, the returned full name will be truncated and
916  * an error code will be returned.
917  * @stable ICU 3.0
918  */
919 U_STABLE int32_t U_EXPORT2
920 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity,
921                              const char* keyword, const char* locale,
922                              UBool* isAvailable, UErrorCode* status);
923 
924 /**
925  * Get the collation tailoring rules from a UCollator.
926  * The rules will follow the rule syntax.
927  * @param coll The UCollator to query.
928  * @param length
929  * @return The collation tailoring rules.
930  * @stable ICU 2.0
931  */
932 U_STABLE const UChar* U_EXPORT2
933 ucol_getRules(    const    UCollator    *coll,
934         int32_t            *length);
935 
936 #ifndef U_HIDE_DEPRECATED_API
937 /** Get the short definition string for a collator. This API harvests the collator's
938  *  locale and the attribute set and produces a string that can be used for opening
939  *  a collator with the same attributes using the ucol_openFromShortString API.
940  *  This string will be normalized.
941  *  The structure and the syntax of the string is defined in the "Naming collators"
942  *  section of the users guide:
943  *  http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme
944  *  This API supports preflighting.
945  *  @param coll a collator
946  *  @param locale a locale that will appear as a collators locale in the resulting
947  *                short string definition. If NULL, the locale will be harvested
948  *                from the collator.
949  *  @param buffer space to hold the resulting string
950  *  @param capacity capacity of the buffer
951  *  @param status for returning errors. All the preflighting errors are featured
952  *  @return length of the resulting string
953  *  @see ucol_openFromShortString
954  *  @see ucol_normalizeShortDefinitionString
955  *  @deprecated ICU 54
956  */
957 U_DEPRECATED int32_t U_EXPORT2
958 ucol_getShortDefinitionString(const UCollator *coll,
959                               const char *locale,
960                               char *buffer,
961                               int32_t capacity,
962                               UErrorCode *status);
963 
964 /** Verifies and normalizes short definition string.
965  *  Normalized short definition string has all the option sorted by the argument name,
966  *  so that equivalent definition strings are the same.
967  *  This API supports preflighting.
968  *  @param source definition string
969  *  @param destination space to hold the resulting string
970  *  @param capacity capacity of the buffer
971  *  @param parseError if not NULL, structure that will get filled with error's pre
972  *                   and post context in case of error.
973  *  @param status     Error code. This API will return an error if an invalid attribute
974  *                    or attribute/value combination is specified. All the preflighting
975  *                    errors are also featured
976  *  @return length of the resulting normalized string.
977  *
978  *  @see ucol_openFromShortString
979  *  @see ucol_getShortDefinitionString
980  *
981  *  @deprecated ICU 54
982  */
983 
984 U_DEPRECATED int32_t U_EXPORT2
985 ucol_normalizeShortDefinitionString(const char *source,
986                                     char *destination,
987                                     int32_t capacity,
988                                     UParseError *parseError,
989                                     UErrorCode *status);
990 #endif  /* U_HIDE_DEPRECATED_API */
991 
992 
993 /**
994  * Get a sort key for a string from a UCollator.
995  * Sort keys may be compared using <TT>strcmp</TT>.
996  *
997  * Note that sort keys are often less efficient than simply doing comparison.
998  * For more details, see the ICU User Guide.
999  *
1000  * Like ICU functions that write to an output buffer, the buffer contents
1001  * is undefined if the buffer capacity (resultLength parameter) is too small.
1002  * Unlike ICU functions that write a string to an output buffer,
1003  * the terminating zero byte is counted in the sort key length.
1004  * @param coll The UCollator containing the collation rules.
1005  * @param source The string to transform.
1006  * @param sourceLength The length of source, or -1 if null-terminated.
1007  * @param result A pointer to a buffer to receive the attribute.
1008  * @param resultLength The maximum size of result.
1009  * @return The size needed to fully store the sort key.
1010  *      If there was an internal error generating the sort key,
1011  *      a zero value is returned.
1012  * @see ucol_keyHashCode
1013  * @stable ICU 2.0
1014  */
1015 U_STABLE int32_t U_EXPORT2
1016 ucol_getSortKey(const    UCollator    *coll,
1017         const    UChar        *source,
1018         int32_t        sourceLength,
1019         uint8_t        *result,
1020         int32_t        resultLength);
1021 
1022 
1023 /** Gets the next count bytes of a sort key. Caller needs
1024  *  to preserve state array between calls and to provide
1025  *  the same type of UCharIterator set with the same string.
1026  *  The destination buffer provided must be big enough to store
1027  *  the number of requested bytes.
1028  *
1029  *  The generated sort key may or may not be compatible with
1030  *  sort keys generated using ucol_getSortKey().
1031  *  @param coll The UCollator containing the collation rules.
1032  *  @param iter UCharIterator containing the string we need
1033  *              the sort key to be calculated for.
1034  *  @param state Opaque state of sortkey iteration.
1035  *  @param dest Buffer to hold the resulting sortkey part
1036  *  @param count number of sort key bytes required.
1037  *  @param status error code indicator.
1038  *  @return the actual number of bytes of a sortkey. It can be
1039  *          smaller than count if we have reached the end of
1040  *          the sort key.
1041  *  @stable ICU 2.6
1042  */
1043 U_STABLE int32_t U_EXPORT2
1044 ucol_nextSortKeyPart(const UCollator *coll,
1045                      UCharIterator *iter,
1046                      uint32_t state[2],
1047                      uint8_t *dest, int32_t count,
1048                      UErrorCode *status);
1049 
1050 /** enum that is taken by ucol_getBound API
1051  * See below for explanation
1052  * do not change the values assigned to the
1053  * members of this enum. Underlying code
1054  * depends on them having these numbers
1055  * @stable ICU 2.0
1056  */
1057 typedef enum {
1058   /** lower bound */
1059   UCOL_BOUND_LOWER = 0,
1060   /** upper bound that will match strings of exact size */
1061   UCOL_BOUND_UPPER = 1,
1062   /** upper bound that will match all the strings that have the same initial substring as the given string */
1063   UCOL_BOUND_UPPER_LONG = 2,
1064 #ifndef U_HIDE_DEPRECATED_API
1065     /**
1066      * One more than the highest normal UColBoundMode value.
1067      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
1068      */
1069     UCOL_BOUND_VALUE_COUNT
1070 #endif  // U_HIDE_DEPRECATED_API
1071 } UColBoundMode;
1072 
1073 /**
1074  * Produce a bound for a given sortkey and a number of levels.
1075  * Return value is always the number of bytes needed, regardless of
1076  * whether the result buffer was big enough or even valid.<br>
1077  * Resulting bounds can be used to produce a range of strings that are
1078  * between upper and lower bounds. For example, if bounds are produced
1079  * for a sortkey of string "smith", strings between upper and lower
1080  * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
1081  * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
1082  * is produced, strings matched would be as above. However, if bound
1083  * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
1084  * also match "Smithsonian" and similar.<br>
1085  * For more on usage, see example in cintltst/capitst.c in procedure
1086  * TestBounds.
1087  * Sort keys may be compared using <TT>strcmp</TT>.
1088  * @param source The source sortkey.
1089  * @param sourceLength The length of source, or -1 if null-terminated.
1090  *                     (If an unmodified sortkey is passed, it is always null
1091  *                      terminated).
1092  * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which
1093  *                  produces a lower inclusive bound, UCOL_BOUND_UPPER, that
1094  *                  produces upper bound that matches strings of the same length
1095  *                  or UCOL_BOUND_UPPER_LONG that matches strings that have the
1096  *                  same starting substring as the source string.
1097  * @param noOfLevels  Number of levels required in the resulting bound (for most
1098  *                    uses, the recommended value is 1). See users guide for
1099  *                    explanation on number of levels a sortkey can have.
1100  * @param result A pointer to a buffer to receive the resulting sortkey.
1101  * @param resultLength The maximum size of result.
1102  * @param status Used for returning error code if something went wrong. If the
1103  *               number of levels requested is higher than the number of levels
1104  *               in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is
1105  *               issued.
1106  * @return The size needed to fully store the bound.
1107  * @see ucol_keyHashCode
1108  * @stable ICU 2.1
1109  */
1110 U_STABLE int32_t U_EXPORT2
1111 ucol_getBound(const uint8_t       *source,
1112         int32_t             sourceLength,
1113         UColBoundMode       boundType,
1114         uint32_t            noOfLevels,
1115         uint8_t             *result,
1116         int32_t             resultLength,
1117         UErrorCode          *status);
1118 
1119 /**
1120  * Gets the version information for a Collator. Version is currently
1121  * an opaque 32-bit number which depends, among other things, on major
1122  * versions of the collator tailoring and UCA.
1123  * @param coll The UCollator to query.
1124  * @param info the version # information, the result will be filled in
1125  * @stable ICU 2.0
1126  */
1127 U_STABLE void U_EXPORT2
1128 ucol_getVersion(const UCollator* coll, UVersionInfo info);
1129 
1130 /**
1131  * Gets the UCA version information for a Collator. Version is the
1132  * UCA version number (3.1.1, 4.0).
1133  * @param coll The UCollator to query.
1134  * @param info the version # information, the result will be filled in
1135  * @stable ICU 2.8
1136  */
1137 U_STABLE void U_EXPORT2
1138 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info);
1139 
1140 /**
1141  * Merges two sort keys. The levels are merged with their corresponding counterparts
1142  * (primaries with primaries, secondaries with secondaries etc.). Between the values
1143  * from the same level a separator is inserted.
1144  *
1145  * This is useful, for example, for combining sort keys from first and last names
1146  * to sort such pairs.
1147  * See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys
1148  *
1149  * The recommended way to achieve "merged" sorting is by
1150  * concatenating strings with U+FFFE between them.
1151  * The concatenation has the same sort order as the merged sort keys,
1152  * but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\uFFFE' + str2).
1153  * Using strings with U+FFFE may yield shorter sort keys.
1154  *
1155  * For details about Sort Key Features see
1156  * http://userguide.icu-project.org/collation/api#TOC-Sort-Key-Features
1157  *
1158  * It is possible to merge multiple sort keys by consecutively merging
1159  * another one with the intermediate result.
1160  *
1161  * The length of the merge result is the sum of the lengths of the input sort keys.
1162  *
1163  * Example (uncompressed):
1164  * <pre>191B1D 01 050505 01 910505 00
1165  * 1F2123 01 050505 01 910505 00</pre>
1166  * will be merged as
1167  * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre>
1168  *
1169  * If the destination buffer is not big enough, then its contents are undefined.
1170  * If any of source lengths are zero or any of the source pointers are NULL/undefined,
1171  * the result is of size zero.
1172  *
1173  * @param src1 the first sort key
1174  * @param src1Length the length of the first sort key, including the zero byte at the end;
1175  *        can be -1 if the function is to find the length
1176  * @param src2 the second sort key
1177  * @param src2Length the length of the second sort key, including the zero byte at the end;
1178  *        can be -1 if the function is to find the length
1179  * @param dest the buffer where the merged sort key is written,
1180  *        can be NULL if destCapacity==0
1181  * @param destCapacity the number of bytes in the dest buffer
1182  * @return the length of the merged sort key, src1Length+src2Length;
1183  *         can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments),
1184  *         in which cases the contents of dest is undefined
1185  * @stable ICU 2.0
1186  */
1187 U_STABLE int32_t U_EXPORT2
1188 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
1189                    const uint8_t *src2, int32_t src2Length,
1190                    uint8_t *dest, int32_t destCapacity);
1191 
1192 /**
1193  * Universal attribute setter
1194  * @param coll collator which attributes are to be changed
1195  * @param attr attribute type
1196  * @param value attribute value
1197  * @param status to indicate whether the operation went on smoothly or there were errors
1198  * @see UColAttribute
1199  * @see UColAttributeValue
1200  * @see ucol_getAttribute
1201  * @stable ICU 2.0
1202  */
1203 U_STABLE void U_EXPORT2
1204 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status);
1205 
1206 /**
1207  * Universal attribute getter
1208  * @param coll collator which attributes are to be changed
1209  * @param attr attribute type
1210  * @return attribute value
1211  * @param status to indicate whether the operation went on smoothly or there were errors
1212  * @see UColAttribute
1213  * @see UColAttributeValue
1214  * @see ucol_setAttribute
1215  * @stable ICU 2.0
1216  */
1217 U_STABLE UColAttributeValue  U_EXPORT2
1218 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status);
1219 
1220 /**
1221  * Sets the variable top to the top of the specified reordering group.
1222  * The variable top determines the highest-sorting character
1223  * which is affected by UCOL_ALTERNATE_HANDLING.
1224  * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
1225  * @param coll the collator
1226  * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION,
1227  *              UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY;
1228  *              or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group
1229  * @param pErrorCode Standard ICU error code. Its input value must
1230  *                   pass the U_SUCCESS() test, or else the function returns
1231  *                   immediately. Check for U_FAILURE() on output or use with
1232  *                   function chaining. (See User Guide for details.)
1233  * @see ucol_getMaxVariable
1234  * @stable ICU 53
1235  */
1236 U_STABLE void U_EXPORT2
1237 ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode);
1238 
1239 /**
1240  * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
1241  * @param coll the collator
1242  * @return the maximum variable reordering group.
1243  * @see ucol_setMaxVariable
1244  * @stable ICU 53
1245  */
1246 U_STABLE UColReorderCode U_EXPORT2
1247 ucol_getMaxVariable(const UCollator *coll);
1248 
1249 #ifndef U_HIDE_DEPRECATED_API
1250 /**
1251  * Sets the variable top to the primary weight of the specified string.
1252  *
1253  * Beginning with ICU 53, the variable top is pinned to
1254  * the top of one of the supported reordering groups,
1255  * and it must not be beyond the last of those groups.
1256  * See ucol_setMaxVariable().
1257  * @param coll the collator
1258  * @param varTop one or more (if contraction) UChars to which the variable top should be set
1259  * @param len length of variable top string. If -1 it is considered to be zero terminated.
1260  * @param status error code. If error code is set, the return value is undefined.
1261  *               Errors set by this function are:<br>
1262  *    U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
1263  *    U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
1264  *    the last reordering group supported by ucol_setMaxVariable()
1265  * @return variable top primary weight
1266  * @see ucol_getVariableTop
1267  * @see ucol_restoreVariableTop
1268  * @deprecated ICU 53 Call ucol_setMaxVariable() instead.
1269  */
1270 U_DEPRECATED uint32_t U_EXPORT2
1271 ucol_setVariableTop(UCollator *coll,
1272                     const UChar *varTop, int32_t len,
1273                     UErrorCode *status);
1274 #endif  /* U_HIDE_DEPRECATED_API */
1275 
1276 /**
1277  * Gets the variable top value of a Collator.
1278  * @param coll collator which variable top needs to be retrieved
1279  * @param status error code (not changed by function). If error code is set,
1280  *               the return value is undefined.
1281  * @return the variable top primary weight
1282  * @see ucol_getMaxVariable
1283  * @see ucol_setVariableTop
1284  * @see ucol_restoreVariableTop
1285  * @stable ICU 2.0
1286  */
1287 U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status);
1288 
1289 #ifndef U_HIDE_DEPRECATED_API
1290 /**
1291  * Sets the variable top to the specified primary weight.
1292  *
1293  * Beginning with ICU 53, the variable top is pinned to
1294  * the top of one of the supported reordering groups,
1295  * and it must not be beyond the last of those groups.
1296  * See ucol_setMaxVariable().
1297  * @param varTop primary weight, as returned by ucol_setVariableTop or ucol_getVariableTop
1298  * @param status error code
1299  * @see ucol_getVariableTop
1300  * @see ucol_setVariableTop
1301  * @deprecated ICU 53 Call ucol_setMaxVariable() instead.
1302  */
1303 U_DEPRECATED void U_EXPORT2
1304 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status);
1305 #endif  /* U_HIDE_DEPRECATED_API */
1306 
1307 /**
1308  * Thread safe cloning operation. The result is a clone of a given collator.
1309  * @param coll collator to be cloned
1310  * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
1311  * user allocated space for the new clone.
1312  * If NULL new memory will be allocated.
1313  *  If buffer is not large enough, new memory will be allocated.
1314  *  Clients can use the U_COL_SAFECLONE_BUFFERSIZE.
1315  * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
1316  *  pointer to size of allocated space.
1317  *  If *pBufferSize == 0, a sufficient size for use in cloning will
1318  *  be returned ('pre-flighting')
1319  *  If *pBufferSize is not enough for a stack-based safe clone,
1320  *  new memory will be allocated.
1321  * @param status to indicate whether the operation went on smoothly or there were errors
1322  *    An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any
1323  * allocations were necessary.
1324  * @return pointer to the new clone
1325  * @see ucol_open
1326  * @see ucol_openRules
1327  * @see ucol_close
1328  * @stable ICU 2.0
1329  */
1330 U_STABLE UCollator* U_EXPORT2
1331 ucol_safeClone(const UCollator *coll,
1332                void            *stackBuffer,
1333                int32_t         *pBufferSize,
1334                UErrorCode      *status);
1335 
1336 #ifndef U_HIDE_DEPRECATED_API
1337 
1338 /** default memory size for the new clone.
1339  * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer.
1340  */
1341 #define U_COL_SAFECLONE_BUFFERSIZE 1
1342 
1343 #endif /* U_HIDE_DEPRECATED_API */
1344 
1345 /**
1346  * Returns current rules. Delta defines whether full rules are returned or just the tailoring.
1347  * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough
1348  * to store rules, will store up to available space.
1349  *
1350  * ucol_getRules() should normally be used instead.
1351  * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales
1352  * @param coll collator to get the rules from
1353  * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
1354  * @param buffer buffer to store the result in. If NULL, you'll get no rules.
1355  * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in.
1356  * @return current rules
1357  * @stable ICU 2.0
1358  * @see UCOL_FULL_RULES
1359  */
1360 U_STABLE int32_t U_EXPORT2
1361 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen);
1362 
1363 #ifndef U_HIDE_DEPRECATED_API
1364 /**
1365  * gets the locale name of the collator. If the collator
1366  * is instantiated from the rules, then this function returns
1367  * NULL.
1368  * @param coll The UCollator for which the locale is needed
1369  * @param type You can choose between requested, valid and actual
1370  *             locale. For description see the definition of
1371  *             ULocDataLocaleType in uloc.h
1372  * @param status error code of the operation
1373  * @return real locale name from which the collation data comes.
1374  *         If the collator was instantiated from rules, returns
1375  *         NULL.
1376  * @deprecated ICU 2.8 Use ucol_getLocaleByType instead
1377  */
1378 U_DEPRECATED const char * U_EXPORT2
1379 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
1380 #endif  /* U_HIDE_DEPRECATED_API */
1381 
1382 /**
1383  * gets the locale name of the collator. If the collator
1384  * is instantiated from the rules, then this function returns
1385  * NULL.
1386  * @param coll The UCollator for which the locale is needed
1387  * @param type You can choose between requested, valid and actual
1388  *             locale. For description see the definition of
1389  *             ULocDataLocaleType in uloc.h
1390  * @param status error code of the operation
1391  * @return real locale name from which the collation data comes.
1392  *         If the collator was instantiated from rules, returns
1393  *         NULL.
1394  * @stable ICU 2.8
1395  */
1396 U_STABLE const char * U_EXPORT2
1397 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status);
1398 
1399 /**
1400  * Get a Unicode set that contains all the characters and sequences tailored in
1401  * this collator. The result must be disposed of by using uset_close.
1402  * @param coll        The UCollator for which we want to get tailored chars
1403  * @param status      error code of the operation
1404  * @return a pointer to newly created USet. Must be be disposed by using uset_close
1405  * @see ucol_openRules
1406  * @see uset_close
1407  * @stable ICU 2.4
1408  */
1409 U_STABLE USet * U_EXPORT2
1410 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status);
1411 
1412 #ifndef U_HIDE_INTERNAL_API
1413 /** Calculates the set of unsafe code points, given a collator.
1414  *   A character is unsafe if you could append any character and cause the ordering to alter significantly.
1415  *   Collation sorts in normalized order, so anything that rearranges in normalization can cause this.
1416  *   Thus if you have a character like a_umlaut, and you add a lower_dot to it,
1417  *   then it normalizes to a_lower_dot + umlaut, and sorts differently.
1418  *  @param coll Collator
1419  *  @param unsafe a fill-in set to receive the unsafe points
1420  *  @param status for catching errors
1421  *  @return number of elements in the set
1422  *  @internal ICU 3.0
1423  */
1424 U_INTERNAL int32_t U_EXPORT2
1425 ucol_getUnsafeSet( const UCollator *coll,
1426                   USet *unsafe,
1427                   UErrorCode *status);
1428 
1429 /** Touches all resources needed for instantiating a collator from a short string definition,
1430  *  thus filling up the cache.
1431  * @param definition A short string containing a locale and a set of attributes.
1432  *                   Attributes not explicitly mentioned are left at the default
1433  *                   state for a locale.
1434  * @param parseError if not NULL, structure that will get filled with error's pre
1435  *                   and post context in case of error.
1436  * @param forceDefaults if FALSE, the settings that are the same as the collator
1437  *                   default settings will not be applied (for example, setting
1438  *                   French secondary on a French collator would not be executed).
1439  *                   If TRUE, all the settings will be applied regardless of the
1440  *                   collator default value. If the definition
1441  *                   strings are to be cached, should be set to FALSE.
1442  * @param status     Error code. Apart from regular error conditions connected to
1443  *                   instantiating collators (like out of memory or similar), this
1444  *                   API will return an error if an invalid attribute or attribute/value
1445  *                   combination is specified.
1446  * @see ucol_openFromShortString
1447  * @internal ICU 3.2.1
1448  */
1449 U_INTERNAL void U_EXPORT2
1450 ucol_prepareShortStringOpen( const char *definition,
1451                           UBool forceDefaults,
1452                           UParseError *parseError,
1453                           UErrorCode *status);
1454 #endif  /* U_HIDE_INTERNAL_API */
1455 
1456 /** Creates a binary image of a collator. This binary image can be stored and
1457  *  later used to instantiate a collator using ucol_openBinary.
1458  *  This API supports preflighting.
1459  *  @param coll Collator
1460  *  @param buffer a fill-in buffer to receive the binary image
1461  *  @param capacity capacity of the destination buffer
1462  *  @param status for catching errors
1463  *  @return size of the image
1464  *  @see ucol_openBinary
1465  *  @stable ICU 3.2
1466  */
1467 U_STABLE int32_t U_EXPORT2
1468 ucol_cloneBinary(const UCollator *coll,
1469                  uint8_t *buffer, int32_t capacity,
1470                  UErrorCode *status);
1471 
1472 /** Opens a collator from a collator binary image created using
1473  *  ucol_cloneBinary. Binary image used in instantiation of the
1474  *  collator remains owned by the user and should stay around for
1475  *  the lifetime of the collator. The API also takes a base collator
1476  *  which must be the root collator.
1477  *  @param bin binary image owned by the user and required through the
1478  *             lifetime of the collator
1479  *  @param length size of the image. If negative, the API will try to
1480  *                figure out the length of the image
1481  *  @param base Base collator, for lookup of untailored characters.
1482  *              Must be the root collator, must not be NULL.
1483  *              The base is required to be present through the lifetime of the collator.
1484  *  @param status for catching errors
1485  *  @return newly created collator
1486  *  @see ucol_cloneBinary
1487  *  @stable ICU 3.2
1488  */
1489 U_STABLE UCollator* U_EXPORT2
1490 ucol_openBinary(const uint8_t *bin, int32_t length,
1491                 const UCollator *base,
1492                 UErrorCode *status);
1493 
1494 
1495 #endif /* #if !UCONFIG_NO_COLLATION */
1496 
1497 #endif
1498