1 /*
2 **********************************************************************
3 *   Copyright (C) 1998-2015, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *
7 * File unistr.h
8 *
9 * Modification History:
10 *
11 *   Date        Name        Description
12 *   09/25/98    stephen     Creation.
13 *   11/11/98    stephen     Changed per 11/9 code review.
14 *   04/20/99    stephen     Overhauled per 4/16 code review.
15 *   11/18/99    aliu        Made to inherit from Replaceable.  Added method
16 *                           handleReplaceBetween(); other methods unchanged.
17 *   06/25/01    grhoten     Remove dependency on iostream.
18 ******************************************************************************
19 */
20 
21 #ifndef UNISTR_H
22 #define UNISTR_H
23 
24 /**
25  * \file
26  * \brief C++ API: Unicode String
27  */
28 
29 #include "unicode/utypes.h"
30 #include "unicode/rep.h"
31 #include "unicode/std_string.h"
32 #include "unicode/stringpiece.h"
33 #include "unicode/bytestream.h"
34 #include "unicode/ucasemap.h"
35 
36 struct UConverter;          // unicode/ucnv.h
37 
38 #ifndef U_COMPARE_CODE_POINT_ORDER
39 /* see also ustring.h and unorm.h */
40 /**
41  * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
42  * Compare strings in code point order instead of code unit order.
43  * @stable ICU 2.2
44  */
45 #define U_COMPARE_CODE_POINT_ORDER  0x8000
46 #endif
47 
48 #ifndef USTRING_H
49 /**
50  * \ingroup ustring_ustrlen
51  */
52 U_STABLE int32_t U_EXPORT2
53 u_strlen(const UChar *s);
54 #endif
55 
56 /**
57  * \def U_STRING_CASE_MAPPER_DEFINED
58  * @internal
59  */
60 #ifndef U_STRING_CASE_MAPPER_DEFINED
61 #define U_STRING_CASE_MAPPER_DEFINED
62 
63 /**
64  * Internal string case mapping function type.
65  * @internal
66  */
67 typedef int32_t U_CALLCONV
68 UStringCaseMapper(const UCaseMap *csm,
69                   UChar *dest, int32_t destCapacity,
70                   const UChar *src, int32_t srcLength,
71                   UErrorCode *pErrorCode);
72 
73 #endif
74 
75 U_NAMESPACE_BEGIN
76 
77 class BreakIterator;        // unicode/brkiter.h
78 class Locale;               // unicode/locid.h
79 class StringCharacterIterator;
80 class UnicodeStringAppendable;  // unicode/appendable.h
81 
82 /* The <iostream> include has been moved to unicode/ustream.h */
83 
84 /**
85  * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
86  * which constructs a Unicode string from an invariant-character char * string.
87  * About invariant characters see utypes.h.
88  * This constructor has no runtime dependency on conversion code and is
89  * therefore recommended over ones taking a charset name string
90  * (where the empty string "" indicates invariant-character conversion).
91  *
92  * @stable ICU 3.2
93  */
94 #define US_INV icu::UnicodeString::kInvariant
95 
96 /**
97  * Unicode String literals in C++.
98  * Dependent on the platform properties, different UnicodeString
99  * constructors should be used to create a UnicodeString object from
100  * a string literal.
101  * The macros are defined for maximum performance.
102  * They work only for strings that contain "invariant characters", i.e.,
103  * only latin letters, digits, and some punctuation.
104  * See utypes.h for details.
105  *
106  * The string parameter must be a C string literal.
107  * The length of the string, not including the terminating
108  * <code>NUL</code>, must be specified as a constant.
109  * The U_STRING_DECL macro should be invoked exactly once for one
110  * such string variable before it is used.
111  * @stable ICU 2.0
112  */
113 #if defined(U_DECLARE_UTF16)
114 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
115 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
116 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
117 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
118 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
119 #else
120 #   define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
121 #endif
122 
123 /**
124  * Unicode String literals in C++.
125  * Dependent on the platform properties, different UnicodeString
126  * constructors should be used to create a UnicodeString object from
127  * a string literal.
128  * The macros are defined for improved performance.
129  * They work only for strings that contain "invariant characters", i.e.,
130  * only latin letters, digits, and some punctuation.
131  * See utypes.h for details.
132  *
133  * The string parameter must be a C string literal.
134  * @stable ICU 2.0
135  */
136 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
137 
138 /**
139  * \def UNISTR_FROM_CHAR_EXPLICIT
140  * This can be defined to be empty or "explicit".
141  * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
142  * constructors are marked as explicit, preventing their inadvertent use.
143  * @stable ICU 49
144  */
145 #ifndef UNISTR_FROM_CHAR_EXPLICIT
146 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
147     // Auto-"explicit" in ICU library code.
148 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
149 # else
150     // Empty by default for source code compatibility.
151 #   define UNISTR_FROM_CHAR_EXPLICIT
152 # endif
153 #endif
154 
155 /**
156  * \def UNISTR_FROM_STRING_EXPLICIT
157  * This can be defined to be empty or "explicit".
158  * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *)
159  * constructors are marked as explicit, preventing their inadvertent use.
160  *
161  * In particular, this helps prevent accidentally depending on ICU conversion code
162  * by passing a string literal into an API with a const UnicodeString & parameter.
163  * @stable ICU 49
164  */
165 #ifndef UNISTR_FROM_STRING_EXPLICIT
166 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
167     // Auto-"explicit" in ICU library code.
168 #   define UNISTR_FROM_STRING_EXPLICIT explicit
169 # else
170     // Empty by default for source code compatibility.
171 #   define UNISTR_FROM_STRING_EXPLICIT
172 # endif
173 #endif
174 
175 /* Cannot make the following #ifndef U_HIDE_INTERNAL_API,
176    it is used to construct other non-internal constants */
177 /**
178  * \def UNISTR_OBJECT_SIZE
179  * Desired sizeof(UnicodeString) in bytes.
180  * It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
181  * The object size may want to be a multiple of 16 bytes,
182  * which is a common granularity for heap allocation.
183  *
184  * Any space inside the object beyond sizeof(vtable pointer) + 2
185  * is available for storing short strings inside the object.
186  * The bigger the object, the longer a string that can be stored inside the object,
187  * without additional heap allocation.
188  *
189  * Depending on a platform's pointer size, pointer alignment requirements,
190  * and struct padding, the compiler will usually round up sizeof(UnicodeString)
191  * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
192  * to hold the fields for heap-allocated strings.
193  * Such a minimum size also ensures that the object is easily large enough
194  * to hold at least 2 UChars, for one supplementary code point (U16_MAX_LENGTH).
195  *
196  * sizeof(UnicodeString) >= 48 should work for all known platforms.
197  *
198  * For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
199  * sizeof(UnicodeString) = 64 would leave space for
200  * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
201  * UChars stored inside the object.
202  *
203  * The minimum object size on a 64-bit machine would be
204  * 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
205  * and the internal buffer would hold up to 11 UChars in that case.
206  *
207  * @see U16_MAX_LENGTH
208  * @draft ICU 56
209  */
210 #ifndef UNISTR_OBJECT_SIZE
211 # define UNISTR_OBJECT_SIZE 64
212 #endif
213 
214 /**
215  * UnicodeString is a string class that stores Unicode characters directly and provides
216  * similar functionality as the Java String and StringBuffer/StringBuilder classes.
217  * It is a concrete implementation of the abstract class Replaceable (for transliteration).
218  *
219  * A UnicodeString may also "alias" an external array of characters
220  * (that is, point to it, rather than own the array)
221  * whose lifetime must then at least match the lifetime of the aliasing object.
222  * This aliasing may be preserved when returning a UnicodeString by value,
223  * depending on the compiler and the function implementation,
224  * via Return Value Optimization (RVO) or the move assignment operator.
225  * (However, the copy assignment operator does not preserve aliasing.)
226  * For details see the description of storage models at the end of the class API docs
227  * and in the User Guide chapter linked from there.
228  *
229  * The UnicodeString class is not suitable for subclassing.
230  *
231  * <p>For an overview of Unicode strings in C and C++ see the
232  * <a href="http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-">User Guide Strings chapter</a>.</p>
233  *
234  * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
235  * A Unicode character may be stored with either one code unit
236  * (the most common case) or with a matched pair of special code units
237  * ("surrogates"). The data type for code units is UChar.
238  * For single-character handling, a Unicode character code <em>point</em> is a value
239  * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
240  *
241  * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
242  * This is the same as with multi-byte char* strings in traditional string handling.
243  * Operations on partial strings typically do not test for code point boundaries.
244  * If necessary, the user needs to take care of such boundaries by testing for the code unit
245  * values or by using functions like
246  * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
247  * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
248  *
249  * UnicodeString methods are more lenient with regard to input parameter values
250  * than other ICU APIs. In particular:
251  * - If indexes are out of bounds for a UnicodeString object
252  *   (<0 or >length()) then they are "pinned" to the nearest boundary.
253  * - If primitive string pointer values (e.g., const UChar * or char *)
254  *   for input strings are NULL, then those input string parameters are treated
255  *   as if they pointed to an empty string.
256  *   However, this is <em>not</em> the case for char * parameters for charset names
257  *   or other IDs.
258  * - Most UnicodeString methods do not take a UErrorCode parameter because
259  *   there are usually very few opportunities for failure other than a shortage
260  *   of memory, error codes in low-level C++ string methods would be inconvenient,
261  *   and the error code as the last parameter (ICU convention) would prevent
262  *   the use of default parameter values.
263  *   Instead, such methods set the UnicodeString into a "bogus" state
264  *   (see isBogus()) if an error occurs.
265  *
266  * In string comparisons, two UnicodeString objects that are both "bogus"
267  * compare equal (to be transitive and prevent endless loops in sorting),
268  * and a "bogus" string compares less than any non-"bogus" one.
269  *
270  * Const UnicodeString methods are thread-safe. Multiple threads can use
271  * const methods on the same UnicodeString object simultaneously,
272  * but non-const methods must not be called concurrently (in multiple threads)
273  * with any other (const or non-const) methods.
274  *
275  * Similarly, const UnicodeString & parameters are thread-safe.
276  * One object may be passed in as such a parameter concurrently in multiple threads.
277  * This includes the const UnicodeString & parameters for
278  * copy construction, assignment, and cloning.
279  *
280  * <p>UnicodeString uses several storage methods.
281  * String contents can be stored inside the UnicodeString object itself,
282  * in an allocated and shared buffer, or in an outside buffer that is "aliased".
283  * Most of this is done transparently, but careful aliasing in particular provides
284  * significant performance improvements.
285  * Also, the internal buffer is accessible via special functions.
286  * For details see the
287  * <a href="http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model">User Guide Strings chapter</a>.</p>
288  *
289  * @see utf.h
290  * @see CharacterIterator
291  * @stable ICU 2.0
292  */
293 class U_COMMON_API UnicodeString : public Replaceable
294 {
295 public:
296 
297   /**
298    * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
299    * which constructs a Unicode string from an invariant-character char * string.
300    * Use the macro US_INV instead of the full qualification for this value.
301    *
302    * @see US_INV
303    * @stable ICU 3.2
304    */
305   enum EInvariant {
306     /**
307      * @see EInvariant
308      * @stable ICU 3.2
309      */
310     kInvariant
311   };
312 
313   //========================================
314   // Read-only operations
315   //========================================
316 
317   /* Comparison - bitwise only - for international comparison use collation */
318 
319   /**
320    * Equality operator. Performs only bitwise comparison.
321    * @param text The UnicodeString to compare to this one.
322    * @return TRUE if <TT>text</TT> contains the same characters as this one,
323    * FALSE otherwise.
324    * @stable ICU 2.0
325    */
326   inline UBool operator== (const UnicodeString& text) const;
327 
328   /**
329    * Inequality operator. Performs only bitwise comparison.
330    * @param text The UnicodeString to compare to this one.
331    * @return FALSE if <TT>text</TT> contains the same characters as this one,
332    * TRUE otherwise.
333    * @stable ICU 2.0
334    */
335   inline UBool operator!= (const UnicodeString& text) const;
336 
337   /**
338    * Greater than operator. Performs only bitwise comparison.
339    * @param text The UnicodeString to compare to this one.
340    * @return TRUE if the characters in this are bitwise
341    * greater than the characters in <code>text</code>, FALSE otherwise
342    * @stable ICU 2.0
343    */
344   inline UBool operator> (const UnicodeString& text) const;
345 
346   /**
347    * Less than operator. Performs only bitwise comparison.
348    * @param text The UnicodeString to compare to this one.
349    * @return TRUE if the characters in this are bitwise
350    * less than the characters in <code>text</code>, FALSE otherwise
351    * @stable ICU 2.0
352    */
353   inline UBool operator< (const UnicodeString& text) const;
354 
355   /**
356    * Greater than or equal operator. Performs only bitwise comparison.
357    * @param text The UnicodeString to compare to this one.
358    * @return TRUE if the characters in this are bitwise
359    * greater than or equal to the characters in <code>text</code>, FALSE otherwise
360    * @stable ICU 2.0
361    */
362   inline UBool operator>= (const UnicodeString& text) const;
363 
364   /**
365    * Less than or equal operator. Performs only bitwise comparison.
366    * @param text The UnicodeString to compare to this one.
367    * @return TRUE if the characters in this are bitwise
368    * less than or equal to the characters in <code>text</code>, FALSE otherwise
369    * @stable ICU 2.0
370    */
371   inline UBool operator<= (const UnicodeString& text) const;
372 
373   /**
374    * Compare the characters bitwise in this UnicodeString to
375    * the characters in <code>text</code>.
376    * @param text The UnicodeString to compare to this one.
377    * @return The result of bitwise character comparison: 0 if this
378    * contains the same characters as <code>text</code>, -1 if the characters in
379    * this are bitwise less than the characters in <code>text</code>, +1 if the
380    * characters in this are bitwise greater than the characters
381    * in <code>text</code>.
382    * @stable ICU 2.0
383    */
384   inline int8_t compare(const UnicodeString& text) const;
385 
386   /**
387    * Compare the characters bitwise in the range
388    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
389    * in the <b>entire string</b> <TT>text</TT>.
390    * (The parameters "start" and "length" are not applied to the other text "text".)
391    * @param start the offset at which the compare operation begins
392    * @param length the number of characters of text to compare.
393    * @param text the other text to be compared against this string.
394    * @return The result of bitwise character comparison: 0 if this
395    * contains the same characters as <code>text</code>, -1 if the characters in
396    * this are bitwise less than the characters in <code>text</code>, +1 if the
397    * characters in this are bitwise greater than the characters
398    * in <code>text</code>.
399    * @stable ICU 2.0
400    */
401   inline int8_t compare(int32_t start,
402          int32_t length,
403          const UnicodeString& text) const;
404 
405   /**
406    * Compare the characters bitwise in the range
407    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
408    * in <TT>srcText</TT> in the range
409    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
410    * @param start the offset at which the compare operation begins
411    * @param length the number of characters in this to compare.
412    * @param srcText the text to be compared
413    * @param srcStart the offset into <TT>srcText</TT> to start comparison
414    * @param srcLength the number of characters in <TT>src</TT> to compare
415    * @return The result of bitwise character comparison: 0 if this
416    * contains the same characters as <code>srcText</code>, -1 if the characters in
417    * this are bitwise less than the characters in <code>srcText</code>, +1 if the
418    * characters in this are bitwise greater than the characters
419    * in <code>srcText</code>.
420    * @stable ICU 2.0
421    */
422    inline int8_t compare(int32_t start,
423          int32_t length,
424          const UnicodeString& srcText,
425          int32_t srcStart,
426          int32_t srcLength) const;
427 
428   /**
429    * Compare the characters bitwise in this UnicodeString with the first
430    * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
431    * @param srcChars The characters to compare to this UnicodeString.
432    * @param srcLength the number of characters in <TT>srcChars</TT> to compare
433    * @return The result of bitwise character comparison: 0 if this
434    * contains the same characters as <code>srcChars</code>, -1 if the characters in
435    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
436    * characters in this are bitwise greater than the characters
437    * in <code>srcChars</code>.
438    * @stable ICU 2.0
439    */
440   inline int8_t compare(const UChar *srcChars,
441          int32_t srcLength) const;
442 
443   /**
444    * Compare the characters bitwise in the range
445    * [<TT>start</TT>, <TT>start + length</TT>) with the first
446    * <TT>length</TT> characters in <TT>srcChars</TT>
447    * @param start the offset at which the compare operation begins
448    * @param length the number of characters to compare.
449    * @param srcChars the characters to be compared
450    * @return The result of bitwise character comparison: 0 if this
451    * contains the same characters as <code>srcChars</code>, -1 if the characters in
452    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
453    * characters in this are bitwise greater than the characters
454    * in <code>srcChars</code>.
455    * @stable ICU 2.0
456    */
457   inline int8_t compare(int32_t start,
458          int32_t length,
459          const UChar *srcChars) const;
460 
461   /**
462    * Compare the characters bitwise in the range
463    * [<TT>start</TT>, <TT>start + length</TT>) with the characters
464    * in <TT>srcChars</TT> in the range
465    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
466    * @param start the offset at which the compare operation begins
467    * @param length the number of characters in this to compare
468    * @param srcChars the characters to be compared
469    * @param srcStart the offset into <TT>srcChars</TT> to start comparison
470    * @param srcLength the number of characters in <TT>srcChars</TT> to compare
471    * @return The result of bitwise character comparison: 0 if this
472    * contains the same characters as <code>srcChars</code>, -1 if the characters in
473    * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
474    * characters in this are bitwise greater than the characters
475    * in <code>srcChars</code>.
476    * @stable ICU 2.0
477    */
478   inline int8_t compare(int32_t start,
479          int32_t length,
480          const UChar *srcChars,
481          int32_t srcStart,
482          int32_t srcLength) const;
483 
484   /**
485    * Compare the characters bitwise in the range
486    * [<TT>start</TT>, <TT>limit</TT>) with the characters
487    * in <TT>srcText</TT> in the range
488    * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
489    * @param start the offset at which the compare operation begins
490    * @param limit the offset immediately following the compare operation
491    * @param srcText the text to be compared
492    * @param srcStart the offset into <TT>srcText</TT> to start comparison
493    * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
494    * @return The result of bitwise character comparison: 0 if this
495    * contains the same characters as <code>srcText</code>, -1 if the characters in
496    * this are bitwise less than the characters in <code>srcText</code>, +1 if the
497    * characters in this are bitwise greater than the characters
498    * in <code>srcText</code>.
499    * @stable ICU 2.0
500    */
501   inline int8_t compareBetween(int32_t start,
502             int32_t limit,
503             const UnicodeString& srcText,
504             int32_t srcStart,
505             int32_t srcLimit) const;
506 
507   /**
508    * Compare two Unicode strings in code point order.
509    * The result may be different from the results of compare(), operator<, etc.
510    * if supplementary characters are present:
511    *
512    * In UTF-16, supplementary characters (with code points U+10000 and above) are
513    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
514    * which means that they compare as less than some other BMP characters like U+feff.
515    * This function compares Unicode strings in code point order.
516    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
517    *
518    * @param text Another string to compare this one to.
519    * @return a negative/zero/positive integer corresponding to whether
520    * this string is less than/equal to/greater than the second one
521    * in code point order
522    * @stable ICU 2.0
523    */
524   inline int8_t compareCodePointOrder(const UnicodeString& text) const;
525 
526   /**
527    * Compare two Unicode strings in code point order.
528    * The result may be different from the results of compare(), operator<, etc.
529    * if supplementary characters are present:
530    *
531    * In UTF-16, supplementary characters (with code points U+10000 and above) are
532    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
533    * which means that they compare as less than some other BMP characters like U+feff.
534    * This function compares Unicode strings in code point order.
535    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
536    *
537    * @param start The start offset in this string at which the compare operation begins.
538    * @param length The number of code units from this string to compare.
539    * @param srcText Another string to compare this one to.
540    * @return a negative/zero/positive integer corresponding to whether
541    * this string is less than/equal to/greater than the second one
542    * in code point order
543    * @stable ICU 2.0
544    */
545   inline int8_t compareCodePointOrder(int32_t start,
546                                       int32_t length,
547                                       const UnicodeString& srcText) const;
548 
549   /**
550    * Compare two Unicode strings in code point order.
551    * The result may be different from the results of compare(), operator<, etc.
552    * if supplementary characters are present:
553    *
554    * In UTF-16, supplementary characters (with code points U+10000 and above) are
555    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
556    * which means that they compare as less than some other BMP characters like U+feff.
557    * This function compares Unicode strings in code point order.
558    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
559    *
560    * @param start The start offset in this string at which the compare operation begins.
561    * @param length The number of code units from this string to compare.
562    * @param srcText Another string to compare this one to.
563    * @param srcStart The start offset in that string at which the compare operation begins.
564    * @param srcLength The number of code units from that string to compare.
565    * @return a negative/zero/positive integer corresponding to whether
566    * this string is less than/equal to/greater than the second one
567    * in code point order
568    * @stable ICU 2.0
569    */
570    inline int8_t compareCodePointOrder(int32_t start,
571                                        int32_t length,
572                                        const UnicodeString& srcText,
573                                        int32_t srcStart,
574                                        int32_t srcLength) const;
575 
576   /**
577    * Compare two Unicode strings in code point order.
578    * The result may be different from the results of compare(), operator<, etc.
579    * if supplementary characters are present:
580    *
581    * In UTF-16, supplementary characters (with code points U+10000 and above) are
582    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
583    * which means that they compare as less than some other BMP characters like U+feff.
584    * This function compares Unicode strings in code point order.
585    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
586    *
587    * @param srcChars A pointer to another string to compare this one to.
588    * @param srcLength The number of code units from that string to compare.
589    * @return a negative/zero/positive integer corresponding to whether
590    * this string is less than/equal to/greater than the second one
591    * in code point order
592    * @stable ICU 2.0
593    */
594   inline int8_t compareCodePointOrder(const UChar *srcChars,
595                                       int32_t srcLength) const;
596 
597   /**
598    * Compare two Unicode strings in code point order.
599    * The result may be different from the results of compare(), operator<, etc.
600    * if supplementary characters are present:
601    *
602    * In UTF-16, supplementary characters (with code points U+10000 and above) are
603    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
604    * which means that they compare as less than some other BMP characters like U+feff.
605    * This function compares Unicode strings in code point order.
606    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
607    *
608    * @param start The start offset in this string at which the compare operation begins.
609    * @param length The number of code units from this string to compare.
610    * @param srcChars A pointer to another string to compare this one to.
611    * @return a negative/zero/positive integer corresponding to whether
612    * this string is less than/equal to/greater than the second one
613    * in code point order
614    * @stable ICU 2.0
615    */
616   inline int8_t compareCodePointOrder(int32_t start,
617                                       int32_t length,
618                                       const UChar *srcChars) const;
619 
620   /**
621    * Compare two Unicode strings in code point order.
622    * The result may be different from the results of compare(), operator<, etc.
623    * if supplementary characters are present:
624    *
625    * In UTF-16, supplementary characters (with code points U+10000 and above) are
626    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
627    * which means that they compare as less than some other BMP characters like U+feff.
628    * This function compares Unicode strings in code point order.
629    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
630    *
631    * @param start The start offset in this string at which the compare operation begins.
632    * @param length The number of code units from this string to compare.
633    * @param srcChars A pointer to another string to compare this one to.
634    * @param srcStart The start offset in that string at which the compare operation begins.
635    * @param srcLength The number of code units from that string to compare.
636    * @return a negative/zero/positive integer corresponding to whether
637    * this string is less than/equal to/greater than the second one
638    * in code point order
639    * @stable ICU 2.0
640    */
641   inline int8_t compareCodePointOrder(int32_t start,
642                                       int32_t length,
643                                       const UChar *srcChars,
644                                       int32_t srcStart,
645                                       int32_t srcLength) const;
646 
647   /**
648    * Compare two Unicode strings in code point order.
649    * The result may be different from the results of compare(), operator<, etc.
650    * if supplementary characters are present:
651    *
652    * In UTF-16, supplementary characters (with code points U+10000 and above) are
653    * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
654    * which means that they compare as less than some other BMP characters like U+feff.
655    * This function compares Unicode strings in code point order.
656    * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
657    *
658    * @param start The start offset in this string at which the compare operation begins.
659    * @param limit The offset after the last code unit from this string to compare.
660    * @param srcText Another string to compare this one to.
661    * @param srcStart The start offset in that string at which the compare operation begins.
662    * @param srcLimit The offset after the last code unit from that string to compare.
663    * @return a negative/zero/positive integer corresponding to whether
664    * this string is less than/equal to/greater than the second one
665    * in code point order
666    * @stable ICU 2.0
667    */
668   inline int8_t compareCodePointOrderBetween(int32_t start,
669                                              int32_t limit,
670                                              const UnicodeString& srcText,
671                                              int32_t srcStart,
672                                              int32_t srcLimit) const;
673 
674   /**
675    * Compare two strings case-insensitively using full case folding.
676    * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
677    *
678    * @param text Another string to compare this one to.
679    * @param options A bit set of options:
680    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
681    *     Comparison in code unit order with default case folding.
682    *
683    *   - U_COMPARE_CODE_POINT_ORDER
684    *     Set to choose code point order instead of code unit order
685    *     (see u_strCompare for details).
686    *
687    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
688    *
689    * @return A negative, zero, or positive integer indicating the comparison result.
690    * @stable ICU 2.0
691    */
692   inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
693 
694   /**
695    * Compare two strings case-insensitively using full case folding.
696    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
697    *
698    * @param start The start offset in this string at which the compare operation begins.
699    * @param length The number of code units from this string to compare.
700    * @param srcText Another string to compare this one to.
701    * @param options A bit set of options:
702    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
703    *     Comparison in code unit order with default case folding.
704    *
705    *   - U_COMPARE_CODE_POINT_ORDER
706    *     Set to choose code point order instead of code unit order
707    *     (see u_strCompare for details).
708    *
709    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
710    *
711    * @return A negative, zero, or positive integer indicating the comparison result.
712    * @stable ICU 2.0
713    */
714   inline int8_t caseCompare(int32_t start,
715          int32_t length,
716          const UnicodeString& srcText,
717          uint32_t options) const;
718 
719   /**
720    * Compare two strings case-insensitively using full case folding.
721    * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
722    *
723    * @param start The start offset in this string at which the compare operation begins.
724    * @param length The number of code units from this string to compare.
725    * @param srcText Another string to compare this one to.
726    * @param srcStart The start offset in that string at which the compare operation begins.
727    * @param srcLength The number of code units from that string to compare.
728    * @param options A bit set of options:
729    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
730    *     Comparison in code unit order with default case folding.
731    *
732    *   - U_COMPARE_CODE_POINT_ORDER
733    *     Set to choose code point order instead of code unit order
734    *     (see u_strCompare for details).
735    *
736    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
737    *
738    * @return A negative, zero, or positive integer indicating the comparison result.
739    * @stable ICU 2.0
740    */
741   inline int8_t caseCompare(int32_t start,
742          int32_t length,
743          const UnicodeString& srcText,
744          int32_t srcStart,
745          int32_t srcLength,
746          uint32_t options) const;
747 
748   /**
749    * Compare two strings case-insensitively using full case folding.
750    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
751    *
752    * @param srcChars A pointer to another string to compare this one to.
753    * @param srcLength The number of code units from that string to compare.
754    * @param options A bit set of options:
755    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
756    *     Comparison in code unit order with default case folding.
757    *
758    *   - U_COMPARE_CODE_POINT_ORDER
759    *     Set to choose code point order instead of code unit order
760    *     (see u_strCompare for details).
761    *
762    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
763    *
764    * @return A negative, zero, or positive integer indicating the comparison result.
765    * @stable ICU 2.0
766    */
767   inline int8_t caseCompare(const UChar *srcChars,
768          int32_t srcLength,
769          uint32_t options) const;
770 
771   /**
772    * Compare two strings case-insensitively using full case folding.
773    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
774    *
775    * @param start The start offset in this string at which the compare operation begins.
776    * @param length The number of code units from this string to compare.
777    * @param srcChars A pointer to another string to compare this one to.
778    * @param options A bit set of options:
779    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
780    *     Comparison in code unit order with default case folding.
781    *
782    *   - U_COMPARE_CODE_POINT_ORDER
783    *     Set to choose code point order instead of code unit order
784    *     (see u_strCompare for details).
785    *
786    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
787    *
788    * @return A negative, zero, or positive integer indicating the comparison result.
789    * @stable ICU 2.0
790    */
791   inline int8_t caseCompare(int32_t start,
792          int32_t length,
793          const UChar *srcChars,
794          uint32_t options) const;
795 
796   /**
797    * Compare two strings case-insensitively using full case folding.
798    * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
799    *
800    * @param start The start offset in this string at which the compare operation begins.
801    * @param length The number of code units from this string to compare.
802    * @param srcChars A pointer to another string to compare this one to.
803    * @param srcStart The start offset in that string at which the compare operation begins.
804    * @param srcLength The number of code units from that string to compare.
805    * @param options A bit set of options:
806    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
807    *     Comparison in code unit order with default case folding.
808    *
809    *   - U_COMPARE_CODE_POINT_ORDER
810    *     Set to choose code point order instead of code unit order
811    *     (see u_strCompare for details).
812    *
813    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
814    *
815    * @return A negative, zero, or positive integer indicating the comparison result.
816    * @stable ICU 2.0
817    */
818   inline int8_t caseCompare(int32_t start,
819          int32_t length,
820          const UChar *srcChars,
821          int32_t srcStart,
822          int32_t srcLength,
823          uint32_t options) const;
824 
825   /**
826    * Compare two strings case-insensitively using full case folding.
827    * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
828    *
829    * @param start The start offset in this string at which the compare operation begins.
830    * @param limit The offset after the last code unit from this string to compare.
831    * @param srcText Another string to compare this one to.
832    * @param srcStart The start offset in that string at which the compare operation begins.
833    * @param srcLimit The offset after the last code unit from that string to compare.
834    * @param options A bit set of options:
835    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
836    *     Comparison in code unit order with default case folding.
837    *
838    *   - U_COMPARE_CODE_POINT_ORDER
839    *     Set to choose code point order instead of code unit order
840    *     (see u_strCompare for details).
841    *
842    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
843    *
844    * @return A negative, zero, or positive integer indicating the comparison result.
845    * @stable ICU 2.0
846    */
847   inline int8_t caseCompareBetween(int32_t start,
848             int32_t limit,
849             const UnicodeString& srcText,
850             int32_t srcStart,
851             int32_t srcLimit,
852             uint32_t options) const;
853 
854   /**
855    * Determine if this starts with the characters in <TT>text</TT>
856    * @param text The text to match.
857    * @return TRUE if this starts with the characters in <TT>text</TT>,
858    * FALSE otherwise
859    * @stable ICU 2.0
860    */
861   inline UBool startsWith(const UnicodeString& text) const;
862 
863   /**
864    * Determine if this starts with the characters in <TT>srcText</TT>
865    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
866    * @param srcText The text to match.
867    * @param srcStart the offset into <TT>srcText</TT> to start matching
868    * @param srcLength the number of characters in <TT>srcText</TT> to match
869    * @return TRUE if this starts with the characters in <TT>text</TT>,
870    * FALSE otherwise
871    * @stable ICU 2.0
872    */
873   inline UBool startsWith(const UnicodeString& srcText,
874             int32_t srcStart,
875             int32_t srcLength) const;
876 
877   /**
878    * Determine if this starts with the characters in <TT>srcChars</TT>
879    * @param srcChars The characters to match.
880    * @param srcLength the number of characters in <TT>srcChars</TT>
881    * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
882    * FALSE otherwise
883    * @stable ICU 2.0
884    */
885   inline UBool startsWith(const UChar *srcChars,
886             int32_t srcLength) const;
887 
888   /**
889    * Determine if this ends with the characters in <TT>srcChars</TT>
890    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
891    * @param srcChars The characters to match.
892    * @param srcStart the offset into <TT>srcText</TT> to start matching
893    * @param srcLength the number of characters in <TT>srcChars</TT> to match
894    * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
895    * @stable ICU 2.0
896    */
897   inline UBool startsWith(const UChar *srcChars,
898             int32_t srcStart,
899             int32_t srcLength) const;
900 
901   /**
902    * Determine if this ends with the characters in <TT>text</TT>
903    * @param text The text to match.
904    * @return TRUE if this ends with the characters in <TT>text</TT>,
905    * FALSE otherwise
906    * @stable ICU 2.0
907    */
908   inline UBool endsWith(const UnicodeString& text) const;
909 
910   /**
911    * Determine if this ends with the characters in <TT>srcText</TT>
912    * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
913    * @param srcText The text to match.
914    * @param srcStart the offset into <TT>srcText</TT> to start matching
915    * @param srcLength the number of characters in <TT>srcText</TT> to match
916    * @return TRUE if this ends with the characters in <TT>text</TT>,
917    * FALSE otherwise
918    * @stable ICU 2.0
919    */
920   inline UBool endsWith(const UnicodeString& srcText,
921           int32_t srcStart,
922           int32_t srcLength) const;
923 
924   /**
925    * Determine if this ends with the characters in <TT>srcChars</TT>
926    * @param srcChars The characters to match.
927    * @param srcLength the number of characters in <TT>srcChars</TT>
928    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
929    * FALSE otherwise
930    * @stable ICU 2.0
931    */
932   inline UBool endsWith(const UChar *srcChars,
933           int32_t srcLength) const;
934 
935   /**
936    * Determine if this ends with the characters in <TT>srcChars</TT>
937    * in the range  [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
938    * @param srcChars The characters to match.
939    * @param srcStart the offset into <TT>srcText</TT> to start matching
940    * @param srcLength the number of characters in <TT>srcChars</TT> to match
941    * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
942    * FALSE otherwise
943    * @stable ICU 2.0
944    */
945   inline UBool endsWith(const UChar *srcChars,
946           int32_t srcStart,
947           int32_t srcLength) const;
948 
949 
950   /* Searching - bitwise only */
951 
952   /**
953    * Locate in this the first occurrence of the characters in <TT>text</TT>,
954    * using bitwise comparison.
955    * @param text The text to search for.
956    * @return The offset into this of the start of <TT>text</TT>,
957    * or -1 if not found.
958    * @stable ICU 2.0
959    */
960   inline int32_t indexOf(const UnicodeString& text) const;
961 
962   /**
963    * Locate in this the first occurrence of the characters in <TT>text</TT>
964    * starting at offset <TT>start</TT>, using bitwise comparison.
965    * @param text The text to search for.
966    * @param start The offset at which searching will start.
967    * @return The offset into this of the start of <TT>text</TT>,
968    * or -1 if not found.
969    * @stable ICU 2.0
970    */
971   inline int32_t indexOf(const UnicodeString& text,
972               int32_t start) const;
973 
974   /**
975    * Locate in this the first occurrence in the range
976    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
977    * in <TT>text</TT>, using bitwise comparison.
978    * @param text The text to search for.
979    * @param start The offset at which searching will start.
980    * @param length The number of characters to search
981    * @return The offset into this of the start of <TT>text</TT>,
982    * or -1 if not found.
983    * @stable ICU 2.0
984    */
985   inline int32_t indexOf(const UnicodeString& text,
986               int32_t start,
987               int32_t length) const;
988 
989   /**
990    * Locate in this the first occurrence in the range
991    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
992    *  in <TT>srcText</TT> in the range
993    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
994    * using bitwise comparison.
995    * @param srcText The text to search for.
996    * @param srcStart the offset into <TT>srcText</TT> at which
997    * to start matching
998    * @param srcLength the number of characters in <TT>srcText</TT> to match
999    * @param start the offset into this at which to start matching
1000    * @param length the number of characters in this to search
1001    * @return The offset into this of the start of <TT>text</TT>,
1002    * or -1 if not found.
1003    * @stable ICU 2.0
1004    */
1005   inline int32_t indexOf(const UnicodeString& srcText,
1006               int32_t srcStart,
1007               int32_t srcLength,
1008               int32_t start,
1009               int32_t length) const;
1010 
1011   /**
1012    * Locate in this the first occurrence of the characters in
1013    * <TT>srcChars</TT>
1014    * starting at offset <TT>start</TT>, using bitwise comparison.
1015    * @param srcChars The text to search for.
1016    * @param srcLength the number of characters in <TT>srcChars</TT> to match
1017    * @param start the offset into this at which to start matching
1018    * @return The offset into this of the start of <TT>text</TT>,
1019    * or -1 if not found.
1020    * @stable ICU 2.0
1021    */
1022   inline int32_t indexOf(const UChar *srcChars,
1023               int32_t srcLength,
1024               int32_t start) const;
1025 
1026   /**
1027    * Locate in this the first occurrence in the range
1028    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1029    * in <TT>srcChars</TT>, using bitwise comparison.
1030    * @param srcChars The text to search for.
1031    * @param srcLength the number of characters in <TT>srcChars</TT>
1032    * @param start The offset at which searching will start.
1033    * @param length The number of characters to search
1034    * @return The offset into this of the start of <TT>srcChars</TT>,
1035    * or -1 if not found.
1036    * @stable ICU 2.0
1037    */
1038   inline int32_t indexOf(const UChar *srcChars,
1039               int32_t srcLength,
1040               int32_t start,
1041               int32_t length) const;
1042 
1043   /**
1044    * Locate in this the first occurrence in the range
1045    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1046    * in <TT>srcChars</TT> in the range
1047    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1048    * using bitwise comparison.
1049    * @param srcChars The text to search for.
1050    * @param srcStart the offset into <TT>srcChars</TT> at which
1051    * to start matching
1052    * @param srcLength the number of characters in <TT>srcChars</TT> to match
1053    * @param start the offset into this at which to start matching
1054    * @param length the number of characters in this to search
1055    * @return The offset into this of the start of <TT>text</TT>,
1056    * or -1 if not found.
1057    * @stable ICU 2.0
1058    */
1059   int32_t indexOf(const UChar *srcChars,
1060               int32_t srcStart,
1061               int32_t srcLength,
1062               int32_t start,
1063               int32_t length) const;
1064 
1065   /**
1066    * Locate in this the first occurrence of the BMP code point <code>c</code>,
1067    * using bitwise comparison.
1068    * @param c The code unit to search for.
1069    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1070    * @stable ICU 2.0
1071    */
1072   inline int32_t indexOf(UChar c) const;
1073 
1074   /**
1075    * Locate in this the first occurrence of the code point <TT>c</TT>,
1076    * using bitwise comparison.
1077    *
1078    * @param c The code point to search for.
1079    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1080    * @stable ICU 2.0
1081    */
1082   inline int32_t indexOf(UChar32 c) const;
1083 
1084   /**
1085    * Locate in this the first occurrence of the BMP code point <code>c</code>,
1086    * starting at offset <TT>start</TT>, using bitwise comparison.
1087    * @param c The code unit to search for.
1088    * @param start The offset at which searching will start.
1089    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1090    * @stable ICU 2.0
1091    */
1092   inline int32_t indexOf(UChar c,
1093               int32_t start) const;
1094 
1095   /**
1096    * Locate in this the first occurrence of the code point <TT>c</TT>
1097    * starting at offset <TT>start</TT>, using bitwise comparison.
1098    *
1099    * @param c The code point to search for.
1100    * @param start The offset at which searching will start.
1101    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1102    * @stable ICU 2.0
1103    */
1104   inline int32_t indexOf(UChar32 c,
1105               int32_t start) const;
1106 
1107   /**
1108    * Locate in this the first occurrence of the BMP code point <code>c</code>
1109    * in the range [<TT>start</TT>, <TT>start + length</TT>),
1110    * using bitwise comparison.
1111    * @param c The code unit to search for.
1112    * @param start the offset into this at which to start matching
1113    * @param length the number of characters in this to search
1114    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1115    * @stable ICU 2.0
1116    */
1117   inline int32_t indexOf(UChar c,
1118               int32_t start,
1119               int32_t length) const;
1120 
1121   /**
1122    * Locate in this the first occurrence of the code point <TT>c</TT>
1123    * in the range [<TT>start</TT>, <TT>start + length</TT>),
1124    * using bitwise comparison.
1125    *
1126    * @param c The code point to search for.
1127    * @param start the offset into this at which to start matching
1128    * @param length the number of characters in this to search
1129    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1130    * @stable ICU 2.0
1131    */
1132   inline int32_t indexOf(UChar32 c,
1133               int32_t start,
1134               int32_t length) const;
1135 
1136   /**
1137    * Locate in this the last occurrence of the characters in <TT>text</TT>,
1138    * using bitwise comparison.
1139    * @param text The text to search for.
1140    * @return The offset into this of the start of <TT>text</TT>,
1141    * or -1 if not found.
1142    * @stable ICU 2.0
1143    */
1144   inline int32_t lastIndexOf(const UnicodeString& text) const;
1145 
1146   /**
1147    * Locate in this the last occurrence of the characters in <TT>text</TT>
1148    * starting at offset <TT>start</TT>, using bitwise comparison.
1149    * @param text The text to search for.
1150    * @param start The offset at which searching will start.
1151    * @return The offset into this of the start of <TT>text</TT>,
1152    * or -1 if not found.
1153    * @stable ICU 2.0
1154    */
1155   inline int32_t lastIndexOf(const UnicodeString& text,
1156               int32_t start) const;
1157 
1158   /**
1159    * Locate in this the last occurrence in the range
1160    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1161    * in <TT>text</TT>, using bitwise comparison.
1162    * @param text The text to search for.
1163    * @param start The offset at which searching will start.
1164    * @param length The number of characters to search
1165    * @return The offset into this of the start of <TT>text</TT>,
1166    * or -1 if not found.
1167    * @stable ICU 2.0
1168    */
1169   inline int32_t lastIndexOf(const UnicodeString& text,
1170               int32_t start,
1171               int32_t length) const;
1172 
1173   /**
1174    * Locate in this the last occurrence in the range
1175    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1176    * in <TT>srcText</TT> in the range
1177    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1178    * using bitwise comparison.
1179    * @param srcText The text to search for.
1180    * @param srcStart the offset into <TT>srcText</TT> at which
1181    * to start matching
1182    * @param srcLength the number of characters in <TT>srcText</TT> to match
1183    * @param start the offset into this at which to start matching
1184    * @param length the number of characters in this to search
1185    * @return The offset into this of the start of <TT>text</TT>,
1186    * or -1 if not found.
1187    * @stable ICU 2.0
1188    */
1189   inline int32_t lastIndexOf(const UnicodeString& srcText,
1190               int32_t srcStart,
1191               int32_t srcLength,
1192               int32_t start,
1193               int32_t length) const;
1194 
1195   /**
1196    * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1197    * starting at offset <TT>start</TT>, using bitwise comparison.
1198    * @param srcChars The text to search for.
1199    * @param srcLength the number of characters in <TT>srcChars</TT> to match
1200    * @param start the offset into this at which to start matching
1201    * @return The offset into this of the start of <TT>text</TT>,
1202    * or -1 if not found.
1203    * @stable ICU 2.0
1204    */
1205   inline int32_t lastIndexOf(const UChar *srcChars,
1206               int32_t srcLength,
1207               int32_t start) const;
1208 
1209   /**
1210    * Locate in this the last occurrence in the range
1211    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1212    * in <TT>srcChars</TT>, using bitwise comparison.
1213    * @param srcChars The text to search for.
1214    * @param srcLength the number of characters in <TT>srcChars</TT>
1215    * @param start The offset at which searching will start.
1216    * @param length The number of characters to search
1217    * @return The offset into this of the start of <TT>srcChars</TT>,
1218    * or -1 if not found.
1219    * @stable ICU 2.0
1220    */
1221   inline int32_t lastIndexOf(const UChar *srcChars,
1222               int32_t srcLength,
1223               int32_t start,
1224               int32_t length) const;
1225 
1226   /**
1227    * Locate in this the last occurrence in the range
1228    * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1229    * in <TT>srcChars</TT> in the range
1230    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1231    * using bitwise comparison.
1232    * @param srcChars The text to search for.
1233    * @param srcStart the offset into <TT>srcChars</TT> at which
1234    * to start matching
1235    * @param srcLength the number of characters in <TT>srcChars</TT> to match
1236    * @param start the offset into this at which to start matching
1237    * @param length the number of characters in this to search
1238    * @return The offset into this of the start of <TT>text</TT>,
1239    * or -1 if not found.
1240    * @stable ICU 2.0
1241    */
1242   int32_t lastIndexOf(const UChar *srcChars,
1243               int32_t srcStart,
1244               int32_t srcLength,
1245               int32_t start,
1246               int32_t length) const;
1247 
1248   /**
1249    * Locate in this the last occurrence of the BMP code point <code>c</code>,
1250    * using bitwise comparison.
1251    * @param c The code unit to search for.
1252    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1253    * @stable ICU 2.0
1254    */
1255   inline int32_t lastIndexOf(UChar c) const;
1256 
1257   /**
1258    * Locate in this the last occurrence of the code point <TT>c</TT>,
1259    * using bitwise comparison.
1260    *
1261    * @param c The code point to search for.
1262    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1263    * @stable ICU 2.0
1264    */
1265   inline int32_t lastIndexOf(UChar32 c) const;
1266 
1267   /**
1268    * Locate in this the last occurrence of the BMP code point <code>c</code>
1269    * starting at offset <TT>start</TT>, using bitwise comparison.
1270    * @param c The code unit to search for.
1271    * @param start The offset at which searching will start.
1272    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1273    * @stable ICU 2.0
1274    */
1275   inline int32_t lastIndexOf(UChar c,
1276               int32_t start) const;
1277 
1278   /**
1279    * Locate in this the last occurrence of the code point <TT>c</TT>
1280    * starting at offset <TT>start</TT>, using bitwise comparison.
1281    *
1282    * @param c The code point to search for.
1283    * @param start The offset at which searching will start.
1284    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1285    * @stable ICU 2.0
1286    */
1287   inline int32_t lastIndexOf(UChar32 c,
1288               int32_t start) const;
1289 
1290   /**
1291    * Locate in this the last occurrence of the BMP code point <code>c</code>
1292    * in the range [<TT>start</TT>, <TT>start + length</TT>),
1293    * using bitwise comparison.
1294    * @param c The code unit to search for.
1295    * @param start the offset into this at which to start matching
1296    * @param length the number of characters in this to search
1297    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1298    * @stable ICU 2.0
1299    */
1300   inline int32_t lastIndexOf(UChar c,
1301               int32_t start,
1302               int32_t length) const;
1303 
1304   /**
1305    * Locate in this the last occurrence of the code point <TT>c</TT>
1306    * in the range [<TT>start</TT>, <TT>start + length</TT>),
1307    * using bitwise comparison.
1308    *
1309    * @param c The code point to search for.
1310    * @param start the offset into this at which to start matching
1311    * @param length the number of characters in this to search
1312    * @return The offset into this of <TT>c</TT>, or -1 if not found.
1313    * @stable ICU 2.0
1314    */
1315   inline int32_t lastIndexOf(UChar32 c,
1316               int32_t start,
1317               int32_t length) const;
1318 
1319 
1320   /* Character access */
1321 
1322   /**
1323    * Return the code unit at offset <tt>offset</tt>.
1324    * If the offset is not valid (0..length()-1) then U+ffff is returned.
1325    * @param offset a valid offset into the text
1326    * @return the code unit at offset <tt>offset</tt>
1327    *         or 0xffff if the offset is not valid for this string
1328    * @stable ICU 2.0
1329    */
1330   inline UChar charAt(int32_t offset) const;
1331 
1332   /**
1333    * Return the code unit at offset <tt>offset</tt>.
1334    * If the offset is not valid (0..length()-1) then U+ffff is returned.
1335    * @param offset a valid offset into the text
1336    * @return the code unit at offset <tt>offset</tt>
1337    * @stable ICU 2.0
1338    */
1339   inline UChar operator[] (int32_t offset) const;
1340 
1341   /**
1342    * Return the code point that contains the code unit
1343    * at offset <tt>offset</tt>.
1344    * If the offset is not valid (0..length()-1) then U+ffff is returned.
1345    * @param offset a valid offset into the text
1346    * that indicates the text offset of any of the code units
1347    * that will be assembled into a code point (21-bit value) and returned
1348    * @return the code point of text at <tt>offset</tt>
1349    *         or 0xffff if the offset is not valid for this string
1350    * @stable ICU 2.0
1351    */
1352   UChar32 char32At(int32_t offset) const;
1353 
1354   /**
1355    * Adjust a random-access offset so that
1356    * it points to the beginning of a Unicode character.
1357    * The offset that is passed in points to
1358    * any code unit of a code point,
1359    * while the returned offset will point to the first code unit
1360    * of the same code point.
1361    * In UTF-16, if the input offset points to a second surrogate
1362    * of a surrogate pair, then the returned offset will point
1363    * to the first surrogate.
1364    * @param offset a valid offset into one code point of the text
1365    * @return offset of the first code unit of the same code point
1366    * @see U16_SET_CP_START
1367    * @stable ICU 2.0
1368    */
1369   int32_t getChar32Start(int32_t offset) const;
1370 
1371   /**
1372    * Adjust a random-access offset so that
1373    * it points behind a Unicode character.
1374    * The offset that is passed in points behind
1375    * any code unit of a code point,
1376    * while the returned offset will point behind the last code unit
1377    * of the same code point.
1378    * In UTF-16, if the input offset points behind the first surrogate
1379    * (i.e., to the second surrogate)
1380    * of a surrogate pair, then the returned offset will point
1381    * behind the second surrogate (i.e., to the first surrogate).
1382    * @param offset a valid offset after any code unit of a code point of the text
1383    * @return offset of the first code unit after the same code point
1384    * @see U16_SET_CP_LIMIT
1385    * @stable ICU 2.0
1386    */
1387   int32_t getChar32Limit(int32_t offset) const;
1388 
1389   /**
1390    * Move the code unit index along the string by delta code points.
1391    * Interpret the input index as a code unit-based offset into the string,
1392    * move the index forward or backward by delta code points, and
1393    * return the resulting index.
1394    * The input index should point to the first code unit of a code point,
1395    * if there is more than one.
1396    *
1397    * Both input and output indexes are code unit-based as for all
1398    * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1399    * If delta<0 then the index is moved backward (toward the start of the string).
1400    * If delta>0 then the index is moved forward (toward the end of the string).
1401    *
1402    * This behaves like CharacterIterator::move32(delta, kCurrent).
1403    *
1404    * Behavior for out-of-bounds indexes:
1405    * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1406    * if the input index<0 then it is pinned to 0;
1407    * if it is index>length() then it is pinned to length().
1408    * Afterwards, the index is moved by <code>delta</code> code points
1409    * forward or backward,
1410    * but no further backward than to 0 and no further forward than to length().
1411    * The resulting index return value will be in between 0 and length(), inclusively.
1412    *
1413    * Examples:
1414    * <pre>
1415    * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1416    * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1417    *
1418    * // initial index: position of U+10000
1419    * int32_t index=1;
1420    *
1421    * // the following examples will all result in index==4, position of U+10ffff
1422    *
1423    * // skip 2 code points from some position in the string
1424    * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1425    *
1426    * // go to the 3rd code point from the start of s (0-based)
1427    * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1428    *
1429    * // go to the next-to-last code point of s
1430    * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1431    * </pre>
1432    *
1433    * @param index input code unit index
1434    * @param delta (signed) code point count to move the index forward or backward
1435    *        in the string
1436    * @return the resulting code unit index
1437    * @stable ICU 2.0
1438    */
1439   int32_t moveIndex32(int32_t index, int32_t delta) const;
1440 
1441   /* Substring extraction */
1442 
1443   /**
1444    * Copy the characters in the range
1445    * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1446    * beginning at <tt>dstStart</tt>.
1447    * If the string aliases to <code>dst</code> itself as an external buffer,
1448    * then extract() will not copy the contents.
1449    *
1450    * @param start offset of first character which will be copied into the array
1451    * @param length the number of characters to extract
1452    * @param dst array in which to copy characters.  The length of <tt>dst</tt>
1453    * must be at least (<tt>dstStart + length</tt>).
1454    * @param dstStart the offset in <TT>dst</TT> where the first character
1455    * will be extracted
1456    * @stable ICU 2.0
1457    */
1458   inline void extract(int32_t start,
1459            int32_t length,
1460            UChar *dst,
1461            int32_t dstStart = 0) const;
1462 
1463   /**
1464    * Copy the contents of the string into dest.
1465    * This is a convenience function that
1466    * checks if there is enough space in dest,
1467    * extracts the entire string if possible,
1468    * and NUL-terminates dest if possible.
1469    *
1470    * If the string fits into dest but cannot be NUL-terminated
1471    * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1472    * If the string itself does not fit into dest
1473    * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1474    *
1475    * If the string aliases to <code>dest</code> itself as an external buffer,
1476    * then extract() will not copy the contents.
1477    *
1478    * @param dest Destination string buffer.
1479    * @param destCapacity Number of UChars available at dest.
1480    * @param errorCode ICU error code.
1481    * @return length()
1482    * @stable ICU 2.0
1483    */
1484   int32_t
1485   extract(UChar *dest, int32_t destCapacity,
1486           UErrorCode &errorCode) const;
1487 
1488   /**
1489    * Copy the characters in the range
1490    * [<tt>start</tt>, <tt>start + length</tt>) into the  UnicodeString
1491    * <tt>target</tt>.
1492    * @param start offset of first character which will be copied
1493    * @param length the number of characters to extract
1494    * @param target UnicodeString into which to copy characters.
1495    * @return A reference to <TT>target</TT>
1496    * @stable ICU 2.0
1497    */
1498   inline void extract(int32_t start,
1499            int32_t length,
1500            UnicodeString& target) const;
1501 
1502   /**
1503    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1504    * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1505    * @param start offset of first character which will be copied into the array
1506    * @param limit offset immediately following the last character to be copied
1507    * @param dst array in which to copy characters.  The length of <tt>dst</tt>
1508    * must be at least (<tt>dstStart + (limit - start)</tt>).
1509    * @param dstStart the offset in <TT>dst</TT> where the first character
1510    * will be extracted
1511    * @stable ICU 2.0
1512    */
1513   inline void extractBetween(int32_t start,
1514               int32_t limit,
1515               UChar *dst,
1516               int32_t dstStart = 0) const;
1517 
1518   /**
1519    * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1520    * into the UnicodeString <tt>target</tt>.  Replaceable API.
1521    * @param start offset of first character which will be copied
1522    * @param limit offset immediately following the last character to be copied
1523    * @param target UnicodeString into which to copy characters.
1524    * @return A reference to <TT>target</TT>
1525    * @stable ICU 2.0
1526    */
1527   virtual void extractBetween(int32_t start,
1528               int32_t limit,
1529               UnicodeString& target) const;
1530 
1531   /**
1532    * Copy the characters in the range
1533    * [<tt>start</TT>, <tt>start + startLength</TT>) into an array of characters.
1534    * All characters must be invariant (see utypes.h).
1535    * Use US_INV as the last, signature-distinguishing parameter.
1536    *
1537    * This function does not write any more than <code>targetCapacity</code>
1538    * characters but returns the length of the entire output string
1539    * so that one can allocate a larger buffer and call the function again
1540    * if necessary.
1541    * The output string is NUL-terminated if possible.
1542    *
1543    * @param start offset of first character which will be copied
1544    * @param startLength the number of characters to extract
1545    * @param target the target buffer for extraction, can be NULL
1546    *               if targetLength is 0
1547    * @param targetCapacity the length of the target buffer
1548    * @param inv Signature-distinguishing paramater, use US_INV.
1549    * @return the output string length, not including the terminating NUL
1550    * @stable ICU 3.2
1551    */
1552   int32_t extract(int32_t start,
1553            int32_t startLength,
1554            char *target,
1555            int32_t targetCapacity,
1556            enum EInvariant inv) const;
1557 
1558 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1559 
1560   /**
1561    * Copy the characters in the range
1562    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1563    * in the platform's default codepage.
1564    * This function does not write any more than <code>targetLength</code>
1565    * characters but returns the length of the entire output string
1566    * so that one can allocate a larger buffer and call the function again
1567    * if necessary.
1568    * The output string is NUL-terminated if possible.
1569    *
1570    * @param start offset of first character which will be copied
1571    * @param startLength the number of characters to extract
1572    * @param target the target buffer for extraction
1573    * @param targetLength the length of the target buffer
1574    * If <TT>target</TT> is NULL, then the number of bytes required for
1575    * <TT>target</TT> is returned.
1576    * @return the output string length, not including the terminating NUL
1577    * @stable ICU 2.0
1578    */
1579   int32_t extract(int32_t start,
1580            int32_t startLength,
1581            char *target,
1582            uint32_t targetLength) const;
1583 
1584 #endif
1585 
1586 #if !UCONFIG_NO_CONVERSION
1587 
1588   /**
1589    * Copy the characters in the range
1590    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1591    * in a specified codepage.
1592    * The output string is NUL-terminated.
1593    *
1594    * Recommendation: For invariant-character strings use
1595    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1596    * because it avoids object code dependencies of UnicodeString on
1597    * the conversion code.
1598    *
1599    * @param start offset of first character which will be copied
1600    * @param startLength the number of characters to extract
1601    * @param target the target buffer for extraction
1602    * @param codepage the desired codepage for the characters.  0 has
1603    * the special meaning of the default codepage
1604    * If <code>codepage</code> is an empty string (<code>""</code>),
1605    * then a simple conversion is performed on the codepage-invariant
1606    * subset ("invariant characters") of the platform encoding. See utypes.h.
1607    * If <TT>target</TT> is NULL, then the number of bytes required for
1608    * <TT>target</TT> is returned. It is assumed that the target is big enough
1609    * to fit all of the characters.
1610    * @return the output string length, not including the terminating NUL
1611    * @stable ICU 2.0
1612    */
1613   inline int32_t extract(int32_t start,
1614                  int32_t startLength,
1615                  char *target,
1616                  const char *codepage = 0) const;
1617 
1618   /**
1619    * Copy the characters in the range
1620    * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1621    * in a specified codepage.
1622    * This function does not write any more than <code>targetLength</code>
1623    * characters but returns the length of the entire output string
1624    * so that one can allocate a larger buffer and call the function again
1625    * if necessary.
1626    * The output string is NUL-terminated if possible.
1627    *
1628    * Recommendation: For invariant-character strings use
1629    * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1630    * because it avoids object code dependencies of UnicodeString on
1631    * the conversion code.
1632    *
1633    * @param start offset of first character which will be copied
1634    * @param startLength the number of characters to extract
1635    * @param target the target buffer for extraction
1636    * @param targetLength the length of the target buffer
1637    * @param codepage the desired codepage for the characters.  0 has
1638    * the special meaning of the default codepage
1639    * If <code>codepage</code> is an empty string (<code>""</code>),
1640    * then a simple conversion is performed on the codepage-invariant
1641    * subset ("invariant characters") of the platform encoding. See utypes.h.
1642    * If <TT>target</TT> is NULL, then the number of bytes required for
1643    * <TT>target</TT> is returned.
1644    * @return the output string length, not including the terminating NUL
1645    * @stable ICU 2.0
1646    */
1647   int32_t extract(int32_t start,
1648            int32_t startLength,
1649            char *target,
1650            uint32_t targetLength,
1651            const char *codepage) const;
1652 
1653   /**
1654    * Convert the UnicodeString into a codepage string using an existing UConverter.
1655    * The output string is NUL-terminated if possible.
1656    *
1657    * This function avoids the overhead of opening and closing a converter if
1658    * multiple strings are extracted.
1659    *
1660    * @param dest destination string buffer, can be NULL if destCapacity==0
1661    * @param destCapacity the number of chars available at dest
1662    * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1663    *        or NULL for the default converter
1664    * @param errorCode normal ICU error code
1665    * @return the length of the output string, not counting the terminating NUL;
1666    *         if the length is greater than destCapacity, then the string will not fit
1667    *         and a buffer of the indicated length would need to be passed in
1668    * @stable ICU 2.0
1669    */
1670   int32_t extract(char *dest, int32_t destCapacity,
1671                   UConverter *cnv,
1672                   UErrorCode &errorCode) const;
1673 
1674 #endif
1675 
1676   /**
1677    * Create a temporary substring for the specified range.
1678    * Unlike the substring constructor and setTo() functions,
1679    * the object returned here will be a read-only alias (using getBuffer())
1680    * rather than copying the text.
1681    * As a result, this substring operation is much faster but requires
1682    * that the original string not be modified or deleted during the lifetime
1683    * of the returned substring object.
1684    * @param start offset of the first character visible in the substring
1685    * @param length length of the substring
1686    * @return a read-only alias UnicodeString object for the substring
1687    * @stable ICU 4.4
1688    */
1689   UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1690 
1691   /**
1692    * Create a temporary substring for the specified range.
1693    * Same as tempSubString(start, length) except that the substring range
1694    * is specified as a (start, limit) pair (with an exclusive limit index)
1695    * rather than a (start, length) pair.
1696    * @param start offset of the first character visible in the substring
1697    * @param limit offset immediately following the last character visible in the substring
1698    * @return a read-only alias UnicodeString object for the substring
1699    * @stable ICU 4.4
1700    */
1701   inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1702 
1703   /**
1704    * Convert the UnicodeString to UTF-8 and write the result
1705    * to a ByteSink. This is called by toUTF8String().
1706    * Unpaired surrogates are replaced with U+FFFD.
1707    * Calls u_strToUTF8WithSub().
1708    *
1709    * @param sink A ByteSink to which the UTF-8 version of the string is written.
1710    *             sink.Flush() is called at the end.
1711    * @stable ICU 4.2
1712    * @see toUTF8String
1713    */
1714   void toUTF8(ByteSink &sink) const;
1715 
1716 #if U_HAVE_STD_STRING
1717 
1718   /**
1719    * Convert the UnicodeString to UTF-8 and append the result
1720    * to a standard string.
1721    * Unpaired surrogates are replaced with U+FFFD.
1722    * Calls toUTF8().
1723    *
1724    * @param result A standard string (or a compatible object)
1725    *        to which the UTF-8 version of the string is appended.
1726    * @return The string object.
1727    * @stable ICU 4.2
1728    * @see toUTF8
1729    */
1730   template<typename StringClass>
toUTF8String(StringClass & result)1731   StringClass &toUTF8String(StringClass &result) const {
1732     StringByteSink<StringClass> sbs(&result);
1733     toUTF8(sbs);
1734     return result;
1735   }
1736 
1737 #endif
1738 
1739   /**
1740    * Convert the UnicodeString to UTF-32.
1741    * Unpaired surrogates are replaced with U+FFFD.
1742    * Calls u_strToUTF32WithSub().
1743    *
1744    * @param utf32 destination string buffer, can be NULL if capacity==0
1745    * @param capacity the number of UChar32s available at utf32
1746    * @param errorCode Standard ICU error code. Its input value must
1747    *                  pass the U_SUCCESS() test, or else the function returns
1748    *                  immediately. Check for U_FAILURE() on output or use with
1749    *                  function chaining. (See User Guide for details.)
1750    * @return The length of the UTF-32 string.
1751    * @see fromUTF32
1752    * @stable ICU 4.2
1753    */
1754   int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1755 
1756   /* Length operations */
1757 
1758   /**
1759    * Return the length of the UnicodeString object.
1760    * The length is the number of UChar code units are in the UnicodeString.
1761    * If you want the number of code points, please use countChar32().
1762    * @return the length of the UnicodeString object
1763    * @see countChar32
1764    * @stable ICU 2.0
1765    */
1766   inline int32_t length(void) const;
1767 
1768   /**
1769    * Count Unicode code points in the length UChar code units of the string.
1770    * A code point may occupy either one or two UChar code units.
1771    * Counting code points involves reading all code units.
1772    *
1773    * This functions is basically the inverse of moveIndex32().
1774    *
1775    * @param start the index of the first code unit to check
1776    * @param length the number of UChar code units to check
1777    * @return the number of code points in the specified code units
1778    * @see length
1779    * @stable ICU 2.0
1780    */
1781   int32_t
1782   countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1783 
1784   /**
1785    * Check if the length UChar code units of the string
1786    * contain more Unicode code points than a certain number.
1787    * This is more efficient than counting all code points in this part of the string
1788    * and comparing that number with a threshold.
1789    * This function may not need to scan the string at all if the length
1790    * falls within a certain range, and
1791    * never needs to count more than 'number+1' code points.
1792    * Logically equivalent to (countChar32(start, length)>number).
1793    * A Unicode code point may occupy either one or two UChar code units.
1794    *
1795    * @param start the index of the first code unit to check (0 for the entire string)
1796    * @param length the number of UChar code units to check
1797    *               (use INT32_MAX for the entire string; remember that start/length
1798    *                values are pinned)
1799    * @param number The number of code points in the (sub)string is compared against
1800    *               the 'number' parameter.
1801    * @return Boolean value for whether the string contains more Unicode code points
1802    *         than 'number'. Same as (u_countChar32(s, length)>number).
1803    * @see countChar32
1804    * @see u_strHasMoreChar32Than
1805    * @stable ICU 2.4
1806    */
1807   UBool
1808   hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1809 
1810   /**
1811    * Determine if this string is empty.
1812    * @return TRUE if this string contains 0 characters, FALSE otherwise.
1813    * @stable ICU 2.0
1814    */
1815   inline UBool isEmpty(void) const;
1816 
1817   /**
1818    * Return the capacity of the internal buffer of the UnicodeString object.
1819    * This is useful together with the getBuffer functions.
1820    * See there for details.
1821    *
1822    * @return the number of UChars available in the internal buffer
1823    * @see getBuffer
1824    * @stable ICU 2.0
1825    */
1826   inline int32_t getCapacity(void) const;
1827 
1828   /* Other operations */
1829 
1830   /**
1831    * Generate a hash code for this object.
1832    * @return The hash code of this UnicodeString.
1833    * @stable ICU 2.0
1834    */
1835   inline int32_t hashCode(void) const;
1836 
1837   /**
1838    * Determine if this object contains a valid string.
1839    * A bogus string has no value. It is different from an empty string,
1840    * although in both cases isEmpty() returns TRUE and length() returns 0.
1841    * setToBogus() and isBogus() can be used to indicate that no string value is available.
1842    * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
1843    * length() returns 0.
1844    *
1845    * @return TRUE if the string is bogus/invalid, FALSE otherwise
1846    * @see setToBogus()
1847    * @stable ICU 2.0
1848    */
1849   inline UBool isBogus(void) const;
1850 
1851 
1852   //========================================
1853   // Write operations
1854   //========================================
1855 
1856   /* Assignment operations */
1857 
1858   /**
1859    * Assignment operator.  Replace the characters in this UnicodeString
1860    * with the characters from <TT>srcText</TT>.
1861    *
1862    * Starting with ICU 2.4, the assignment operator and the copy constructor
1863    * allocate a new buffer and copy the buffer contents even for readonly aliases.
1864    * By contrast, the fastCopyFrom() function implements the old,
1865    * more efficient but less safe behavior
1866    * of making this string also a readonly alias to the same buffer.
1867    *
1868    * If the source object has an "open" buffer from getBuffer(minCapacity),
1869    * then the copy is an empty string.
1870    *
1871    * @param srcText The text containing the characters to replace
1872    * @return a reference to this
1873    * @stable ICU 2.0
1874    * @see fastCopyFrom
1875    */
1876   UnicodeString &operator=(const UnicodeString &srcText);
1877 
1878   /**
1879    * Almost the same as the assignment operator.
1880    * Replace the characters in this UnicodeString
1881    * with the characters from <code>srcText</code>.
1882    *
1883    * This function works the same as the assignment operator
1884    * for all strings except for ones that are readonly aliases.
1885    *
1886    * Starting with ICU 2.4, the assignment operator and the copy constructor
1887    * allocate a new buffer and copy the buffer contents even for readonly aliases.
1888    * This function implements the old, more efficient but less safe behavior
1889    * of making this string also a readonly alias to the same buffer.
1890    *
1891    * The fastCopyFrom function must be used only if it is known that the lifetime of
1892    * this UnicodeString does not exceed the lifetime of the aliased buffer
1893    * including its contents, for example for strings from resource bundles
1894    * or aliases to string constants.
1895    *
1896    * If the source object has an "open" buffer from getBuffer(minCapacity),
1897    * then the copy is an empty string.
1898    *
1899    * @param src The text containing the characters to replace.
1900    * @return a reference to this
1901    * @stable ICU 2.4
1902    */
1903   UnicodeString &fastCopyFrom(const UnicodeString &src);
1904 
1905 #ifndef U_HIDE_DRAFT_API
1906 #if U_HAVE_RVALUE_REFERENCES
1907   /**
1908    * Move assignment operator, might leave src in bogus state.
1909    * This string will have the same contents and state that the source string had.
1910    * The behavior is undefined if *this and src are the same object.
1911    * @param src source string
1912    * @return *this
1913    * @draft ICU 56
1914    */
1915   UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT {
1916     return moveFrom(src);
1917   }
1918 #endif
1919   /**
1920    * Move assignment, might leave src in bogus state.
1921    * This string will have the same contents and state that the source string had.
1922    * The behavior is undefined if *this and src are the same object.
1923    *
1924    * Can be called explicitly, does not need C++11 support.
1925    * @param src source string
1926    * @return *this
1927    * @draft ICU 56
1928    */
1929   UnicodeString &moveFrom(UnicodeString &src) U_NOEXCEPT;
1930 
1931   /**
1932    * Swap strings.
1933    * @param other other string
1934    * @draft ICU 56
1935    */
1936   void swap(UnicodeString &other) U_NOEXCEPT;
1937 
1938   /**
1939    * Non-member UnicodeString swap function.
1940    * @param s1 will get s2's contents and state
1941    * @param s2 will get s1's contents and state
1942    * @draft ICU 56
1943    */
1944   friend U_COMMON_API inline void U_EXPORT2
swap(UnicodeString & s1,UnicodeString & s2)1945   swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT {
1946     s1.swap(s2);
1947   }
1948 #endif  /* U_HIDE_DRAFT_API */
1949 
1950   /**
1951    * Assignment operator.  Replace the characters in this UnicodeString
1952    * with the code unit <TT>ch</TT>.
1953    * @param ch the code unit to replace
1954    * @return a reference to this
1955    * @stable ICU 2.0
1956    */
1957   inline UnicodeString& operator= (UChar ch);
1958 
1959   /**
1960    * Assignment operator.  Replace the characters in this UnicodeString
1961    * with the code point <TT>ch</TT>.
1962    * @param ch the code point to replace
1963    * @return a reference to this
1964    * @stable ICU 2.0
1965    */
1966   inline UnicodeString& operator= (UChar32 ch);
1967 
1968   /**
1969    * Set the text in the UnicodeString object to the characters
1970    * in <TT>srcText</TT> in the range
1971    * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1972    * <TT>srcText</TT> is not modified.
1973    * @param srcText the source for the new characters
1974    * @param srcStart the offset into <TT>srcText</TT> where new characters
1975    * will be obtained
1976    * @return a reference to this
1977    * @stable ICU 2.2
1978    */
1979   inline UnicodeString& setTo(const UnicodeString& srcText,
1980                int32_t srcStart);
1981 
1982   /**
1983    * Set the text in the UnicodeString object to the characters
1984    * in <TT>srcText</TT> in the range
1985    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1986    * <TT>srcText</TT> is not modified.
1987    * @param srcText the source for the new characters
1988    * @param srcStart the offset into <TT>srcText</TT> where new characters
1989    * will be obtained
1990    * @param srcLength the number of characters in <TT>srcText</TT> in the
1991    * replace string.
1992    * @return a reference to this
1993    * @stable ICU 2.0
1994    */
1995   inline UnicodeString& setTo(const UnicodeString& srcText,
1996                int32_t srcStart,
1997                int32_t srcLength);
1998 
1999   /**
2000    * Set the text in the UnicodeString object to the characters in
2001    * <TT>srcText</TT>.
2002    * <TT>srcText</TT> is not modified.
2003    * @param srcText the source for the new characters
2004    * @return a reference to this
2005    * @stable ICU 2.0
2006    */
2007   inline UnicodeString& setTo(const UnicodeString& srcText);
2008 
2009   /**
2010    * Set the characters in the UnicodeString object to the characters
2011    * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
2012    * @param srcChars the source for the new characters
2013    * @param srcLength the number of Unicode characters in srcChars.
2014    * @return a reference to this
2015    * @stable ICU 2.0
2016    */
2017   inline UnicodeString& setTo(const UChar *srcChars,
2018                int32_t srcLength);
2019 
2020   /**
2021    * Set the characters in the UnicodeString object to the code unit
2022    * <TT>srcChar</TT>.
2023    * @param srcChar the code unit which becomes the UnicodeString's character
2024    * content
2025    * @return a reference to this
2026    * @stable ICU 2.0
2027    */
2028   UnicodeString& setTo(UChar srcChar);
2029 
2030   /**
2031    * Set the characters in the UnicodeString object to the code point
2032    * <TT>srcChar</TT>.
2033    * @param srcChar the code point which becomes the UnicodeString's character
2034    * content
2035    * @return a reference to this
2036    * @stable ICU 2.0
2037    */
2038   UnicodeString& setTo(UChar32 srcChar);
2039 
2040   /**
2041    * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
2042    * The text will be used for the UnicodeString object, but
2043    * it will not be released when the UnicodeString is destroyed.
2044    * This has copy-on-write semantics:
2045    * When the string is modified, then the buffer is first copied into
2046    * newly allocated memory.
2047    * The aliased buffer is never modified.
2048    *
2049    * In an assignment to another UnicodeString, when using the copy constructor
2050    * or the assignment operator, the text will be copied.
2051    * When using fastCopyFrom(), the text will be aliased again,
2052    * so that both strings then alias the same readonly-text.
2053    *
2054    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2055    *                     This must be true if <code>textLength==-1</code>.
2056    * @param text The characters to alias for the UnicodeString.
2057    * @param textLength The number of Unicode characters in <code>text</code> to alias.
2058    *                   If -1, then this constructor will determine the length
2059    *                   by calling <code>u_strlen()</code>.
2060    * @return a reference to this
2061    * @stable ICU 2.0
2062    */
2063   UnicodeString &setTo(UBool isTerminated,
2064                        const UChar *text,
2065                        int32_t textLength);
2066 
2067   /**
2068    * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
2069    * The text will be used for the UnicodeString object, but
2070    * it will not be released when the UnicodeString is destroyed.
2071    * This has write-through semantics:
2072    * For as long as the capacity of the buffer is sufficient, write operations
2073    * will directly affect the buffer. When more capacity is necessary, then
2074    * a new buffer will be allocated and the contents copied as with regularly
2075    * constructed strings.
2076    * In an assignment to another UnicodeString, the buffer will be copied.
2077    * The extract(UChar *dst) function detects whether the dst pointer is the same
2078    * as the string buffer itself and will in this case not copy the contents.
2079    *
2080    * @param buffer The characters to alias for the UnicodeString.
2081    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2082    * @param buffCapacity The size of <code>buffer</code> in UChars.
2083    * @return a reference to this
2084    * @stable ICU 2.0
2085    */
2086   UnicodeString &setTo(UChar *buffer,
2087                        int32_t buffLength,
2088                        int32_t buffCapacity);
2089 
2090   /**
2091    * Make this UnicodeString object invalid.
2092    * The string will test TRUE with isBogus().
2093    *
2094    * A bogus string has no value. It is different from an empty string.
2095    * It can be used to indicate that no string value is available.
2096    * getBuffer() and getTerminatedBuffer() return NULL, and
2097    * length() returns 0.
2098    *
2099    * This utility function is used throughout the UnicodeString
2100    * implementation to indicate that a UnicodeString operation failed,
2101    * and may be used in other functions,
2102    * especially but not exclusively when such functions do not
2103    * take a UErrorCode for simplicity.
2104    *
2105    * The following methods, and no others, will clear a string object's bogus flag:
2106    * - remove()
2107    * - remove(0, INT32_MAX)
2108    * - truncate(0)
2109    * - operator=() (assignment operator)
2110    * - setTo(...)
2111    *
2112    * The simplest ways to turn a bogus string into an empty one
2113    * is to use the remove() function.
2114    * Examples for other functions that are equivalent to "set to empty string":
2115    * \code
2116    * if(s.isBogus()) {
2117    *   s.remove();           // set to an empty string (remove all), or
2118    *   s.remove(0, INT32_MAX); // set to an empty string (remove all), or
2119    *   s.truncate(0);        // set to an empty string (complete truncation), or
2120    *   s=UnicodeString();    // assign an empty string, or
2121    *   s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
2122    *   static const UChar nul=0;
2123    *   s.setTo(&nul, 0);     // set to an empty C Unicode string
2124    * }
2125    * \endcode
2126    *
2127    * @see isBogus()
2128    * @stable ICU 2.0
2129    */
2130   void setToBogus();
2131 
2132   /**
2133    * Set the character at the specified offset to the specified character.
2134    * @param offset A valid offset into the text of the character to set
2135    * @param ch The new character
2136    * @return A reference to this
2137    * @stable ICU 2.0
2138    */
2139   UnicodeString& setCharAt(int32_t offset,
2140                UChar ch);
2141 
2142 
2143   /* Append operations */
2144 
2145   /**
2146    * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
2147    * object.
2148    * @param ch the code unit to be appended
2149    * @return a reference to this
2150    * @stable ICU 2.0
2151    */
2152  inline  UnicodeString& operator+= (UChar ch);
2153 
2154   /**
2155    * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
2156    * object.
2157    * @param ch the code point to be appended
2158    * @return a reference to this
2159    * @stable ICU 2.0
2160    */
2161  inline  UnicodeString& operator+= (UChar32 ch);
2162 
2163   /**
2164    * Append operator. Append the characters in <TT>srcText</TT> to the
2165    * UnicodeString object. <TT>srcText</TT> is not modified.
2166    * @param srcText the source for the new characters
2167    * @return a reference to this
2168    * @stable ICU 2.0
2169    */
2170   inline UnicodeString& operator+= (const UnicodeString& srcText);
2171 
2172   /**
2173    * Append the characters
2174    * in <TT>srcText</TT> in the range
2175    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
2176    * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
2177    * is not modified.
2178    * @param srcText the source for the new characters
2179    * @param srcStart the offset into <TT>srcText</TT> where new characters
2180    * will be obtained
2181    * @param srcLength the number of characters in <TT>srcText</TT> in
2182    * the append string
2183    * @return a reference to this
2184    * @stable ICU 2.0
2185    */
2186   inline UnicodeString& append(const UnicodeString& srcText,
2187             int32_t srcStart,
2188             int32_t srcLength);
2189 
2190   /**
2191    * Append the characters in <TT>srcText</TT> to the UnicodeString object.
2192    * <TT>srcText</TT> is not modified.
2193    * @param srcText the source for the new characters
2194    * @return a reference to this
2195    * @stable ICU 2.0
2196    */
2197   inline UnicodeString& append(const UnicodeString& srcText);
2198 
2199   /**
2200    * Append the characters in <TT>srcChars</TT> in the range
2201    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
2202    * object at offset
2203    * <TT>start</TT>. <TT>srcChars</TT> is not modified.
2204    * @param srcChars the source for the new characters
2205    * @param srcStart the offset into <TT>srcChars</TT> where new characters
2206    * will be obtained
2207    * @param srcLength the number of characters in <TT>srcChars</TT> in
2208    *                  the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
2209    * @return a reference to this
2210    * @stable ICU 2.0
2211    */
2212   inline UnicodeString& append(const UChar *srcChars,
2213             int32_t srcStart,
2214             int32_t srcLength);
2215 
2216   /**
2217    * Append the characters in <TT>srcChars</TT> to the UnicodeString object
2218    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2219    * @param srcChars the source for the new characters
2220    * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
2221    *                  can be -1 if <TT>srcChars</TT> is NUL-terminated
2222    * @return a reference to this
2223    * @stable ICU 2.0
2224    */
2225   inline UnicodeString& append(const UChar *srcChars,
2226             int32_t srcLength);
2227 
2228   /**
2229    * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
2230    * @param srcChar the code unit to append
2231    * @return a reference to this
2232    * @stable ICU 2.0
2233    */
2234   inline UnicodeString& append(UChar srcChar);
2235 
2236   /**
2237    * Append the code point <TT>srcChar</TT> to the UnicodeString object.
2238    * @param srcChar the code point to append
2239    * @return a reference to this
2240    * @stable ICU 2.0
2241    */
2242   UnicodeString& append(UChar32 srcChar);
2243 
2244 
2245   /* Insert operations */
2246 
2247   /**
2248    * Insert the characters in <TT>srcText</TT> in the range
2249    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2250    * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2251    * @param start the offset where the insertion begins
2252    * @param srcText the source for the new characters
2253    * @param srcStart the offset into <TT>srcText</TT> where new characters
2254    * will be obtained
2255    * @param srcLength the number of characters in <TT>srcText</TT> in
2256    * the insert string
2257    * @return a reference to this
2258    * @stable ICU 2.0
2259    */
2260   inline UnicodeString& insert(int32_t start,
2261             const UnicodeString& srcText,
2262             int32_t srcStart,
2263             int32_t srcLength);
2264 
2265   /**
2266    * Insert the characters in <TT>srcText</TT> into the UnicodeString object
2267    * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2268    * @param start the offset where the insertion begins
2269    * @param srcText the source for the new characters
2270    * @return a reference to this
2271    * @stable ICU 2.0
2272    */
2273   inline UnicodeString& insert(int32_t start,
2274             const UnicodeString& srcText);
2275 
2276   /**
2277    * Insert the characters in <TT>srcChars</TT> in the range
2278    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2279    *  object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2280    * @param start the offset at which the insertion begins
2281    * @param srcChars the source for the new characters
2282    * @param srcStart the offset into <TT>srcChars</TT> where new characters
2283    * will be obtained
2284    * @param srcLength the number of characters in <TT>srcChars</TT>
2285    * in the insert string
2286    * @return a reference to this
2287    * @stable ICU 2.0
2288    */
2289   inline UnicodeString& insert(int32_t start,
2290             const UChar *srcChars,
2291             int32_t srcStart,
2292             int32_t srcLength);
2293 
2294   /**
2295    * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
2296    * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2297    * @param start the offset where the insertion begins
2298    * @param srcChars the source for the new characters
2299    * @param srcLength the number of Unicode characters in srcChars.
2300    * @return a reference to this
2301    * @stable ICU 2.0
2302    */
2303   inline UnicodeString& insert(int32_t start,
2304             const UChar *srcChars,
2305             int32_t srcLength);
2306 
2307   /**
2308    * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
2309    * offset <TT>start</TT>.
2310    * @param start the offset at which the insertion occurs
2311    * @param srcChar the code unit to insert
2312    * @return a reference to this
2313    * @stable ICU 2.0
2314    */
2315   inline UnicodeString& insert(int32_t start,
2316             UChar srcChar);
2317 
2318   /**
2319    * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
2320    * offset <TT>start</TT>.
2321    * @param start the offset at which the insertion occurs
2322    * @param srcChar the code point to insert
2323    * @return a reference to this
2324    * @stable ICU 2.0
2325    */
2326   inline UnicodeString& insert(int32_t start,
2327             UChar32 srcChar);
2328 
2329 
2330   /* Replace operations */
2331 
2332   /**
2333    * Replace the characters in the range
2334    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2335    * <TT>srcText</TT> in the range
2336    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
2337    * <TT>srcText</TT> is not modified.
2338    * @param start the offset at which the replace operation begins
2339    * @param length the number of characters to replace. The character at
2340    * <TT>start + length</TT> is not modified.
2341    * @param srcText the source for the new characters
2342    * @param srcStart the offset into <TT>srcText</TT> where new characters
2343    * will be obtained
2344    * @param srcLength the number of characters in <TT>srcText</TT> in
2345    * the replace string
2346    * @return a reference to this
2347    * @stable ICU 2.0
2348    */
2349   UnicodeString& replace(int32_t start,
2350              int32_t length,
2351              const UnicodeString& srcText,
2352              int32_t srcStart,
2353              int32_t srcLength);
2354 
2355   /**
2356    * Replace the characters in the range
2357    * [<TT>start</TT>, <TT>start + length</TT>)
2358    * with the characters in <TT>srcText</TT>.  <TT>srcText</TT> is
2359    *  not modified.
2360    * @param start the offset at which the replace operation begins
2361    * @param length the number of characters to replace. The character at
2362    * <TT>start + length</TT> is not modified.
2363    * @param srcText the source for the new characters
2364    * @return a reference to this
2365    * @stable ICU 2.0
2366    */
2367   UnicodeString& replace(int32_t start,
2368              int32_t length,
2369              const UnicodeString& srcText);
2370 
2371   /**
2372    * Replace the characters in the range
2373    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2374    * <TT>srcChars</TT> in the range
2375    * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
2376    * is not modified.
2377    * @param start the offset at which the replace operation begins
2378    * @param length the number of characters to replace.  The character at
2379    * <TT>start + length</TT> is not modified.
2380    * @param srcChars the source for the new characters
2381    * @param srcStart the offset into <TT>srcChars</TT> where new characters
2382    * will be obtained
2383    * @param srcLength the number of characters in <TT>srcChars</TT>
2384    * in the replace string
2385    * @return a reference to this
2386    * @stable ICU 2.0
2387    */
2388   UnicodeString& replace(int32_t start,
2389              int32_t length,
2390              const UChar *srcChars,
2391              int32_t srcStart,
2392              int32_t srcLength);
2393 
2394   /**
2395    * Replace the characters in the range
2396    * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2397    * <TT>srcChars</TT>.  <TT>srcChars</TT> is not modified.
2398    * @param start the offset at which the replace operation begins
2399    * @param length number of characters to replace.  The character at
2400    * <TT>start + length</TT> is not modified.
2401    * @param srcChars the source for the new characters
2402    * @param srcLength the number of Unicode characters in srcChars
2403    * @return a reference to this
2404    * @stable ICU 2.0
2405    */
2406   inline UnicodeString& replace(int32_t start,
2407              int32_t length,
2408              const UChar *srcChars,
2409              int32_t srcLength);
2410 
2411   /**
2412    * Replace the characters in the range
2413    * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2414    * <TT>srcChar</TT>.
2415    * @param start the offset at which the replace operation begins
2416    * @param length the number of characters to replace.  The character at
2417    * <TT>start + length</TT> is not modified.
2418    * @param srcChar the new code unit
2419    * @return a reference to this
2420    * @stable ICU 2.0
2421    */
2422   inline UnicodeString& replace(int32_t start,
2423              int32_t length,
2424              UChar srcChar);
2425 
2426   /**
2427    * Replace the characters in the range
2428    * [<TT>start</TT>, <TT>start + length</TT>) with the code point
2429    * <TT>srcChar</TT>.
2430    * @param start the offset at which the replace operation begins
2431    * @param length the number of characters to replace.  The character at
2432    * <TT>start + length</TT> is not modified.
2433    * @param srcChar the new code point
2434    * @return a reference to this
2435    * @stable ICU 2.0
2436    */
2437   UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2438 
2439   /**
2440    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2441    * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2442    * @param start the offset at which the replace operation begins
2443    * @param limit the offset immediately following the replace range
2444    * @param srcText the source for the new characters
2445    * @return a reference to this
2446    * @stable ICU 2.0
2447    */
2448   inline UnicodeString& replaceBetween(int32_t start,
2449                 int32_t limit,
2450                 const UnicodeString& srcText);
2451 
2452   /**
2453    * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2454    * with the characters in <TT>srcText</TT> in the range
2455    * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2456    * @param start the offset at which the replace operation begins
2457    * @param limit the offset immediately following the replace range
2458    * @param srcText the source for the new characters
2459    * @param srcStart the offset into <TT>srcChars</TT> where new characters
2460    * will be obtained
2461    * @param srcLimit the offset immediately following the range to copy
2462    * in <TT>srcText</TT>
2463    * @return a reference to this
2464    * @stable ICU 2.0
2465    */
2466   inline UnicodeString& replaceBetween(int32_t start,
2467                 int32_t limit,
2468                 const UnicodeString& srcText,
2469                 int32_t srcStart,
2470                 int32_t srcLimit);
2471 
2472   /**
2473    * Replace a substring of this object with the given text.
2474    * @param start the beginning index, inclusive; <code>0 <= start
2475    * <= limit</code>.
2476    * @param limit the ending index, exclusive; <code>start <= limit
2477    * <= length()</code>.
2478    * @param text the text to replace characters <code>start</code>
2479    * to <code>limit - 1</code>
2480    * @stable ICU 2.0
2481    */
2482   virtual void handleReplaceBetween(int32_t start,
2483                                     int32_t limit,
2484                                     const UnicodeString& text);
2485 
2486   /**
2487    * Replaceable API
2488    * @return TRUE if it has MetaData
2489    * @stable ICU 2.4
2490    */
2491   virtual UBool hasMetaData() const;
2492 
2493   /**
2494    * Copy a substring of this object, retaining attribute (out-of-band)
2495    * information.  This method is used to duplicate or reorder substrings.
2496    * The destination index must not overlap the source range.
2497    *
2498    * @param start the beginning index, inclusive; <code>0 <= start <=
2499    * limit</code>.
2500    * @param limit the ending index, exclusive; <code>start <= limit <=
2501    * length()</code>.
2502    * @param dest the destination index.  The characters from
2503    * <code>start..limit-1</code> will be copied to <code>dest</code>.
2504    * Implementations of this method may assume that <code>dest <= start ||
2505    * dest >= limit</code>.
2506    * @stable ICU 2.0
2507    */
2508   virtual void copy(int32_t start, int32_t limit, int32_t dest);
2509 
2510   /* Search and replace operations */
2511 
2512   /**
2513    * Replace all occurrences of characters in oldText with the characters
2514    * in newText
2515    * @param oldText the text containing the search text
2516    * @param newText the text containing the replacement text
2517    * @return a reference to this
2518    * @stable ICU 2.0
2519    */
2520   inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2521                 const UnicodeString& newText);
2522 
2523   /**
2524    * Replace all occurrences of characters in oldText with characters
2525    * in newText
2526    * in the range [<TT>start</TT>, <TT>start + length</TT>).
2527    * @param start the start of the range in which replace will performed
2528    * @param length the length of the range in which replace will be performed
2529    * @param oldText the text containing the search text
2530    * @param newText the text containing the replacement text
2531    * @return a reference to this
2532    * @stable ICU 2.0
2533    */
2534   inline UnicodeString& findAndReplace(int32_t start,
2535                 int32_t length,
2536                 const UnicodeString& oldText,
2537                 const UnicodeString& newText);
2538 
2539   /**
2540    * Replace all occurrences of characters in oldText in the range
2541    * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2542    * in newText in the range
2543    * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
2544    * in the range [<TT>start</TT>, <TT>start + length</TT>).
2545    * @param start the start of the range in which replace will performed
2546    * @param length the length of the range in which replace will be performed
2547    * @param oldText the text containing the search text
2548    * @param oldStart the start of the search range in <TT>oldText</TT>
2549    * @param oldLength the length of the search range in <TT>oldText</TT>
2550    * @param newText the text containing the replacement text
2551    * @param newStart the start of the replacement range in <TT>newText</TT>
2552    * @param newLength the length of the replacement range in <TT>newText</TT>
2553    * @return a reference to this
2554    * @stable ICU 2.0
2555    */
2556   UnicodeString& findAndReplace(int32_t start,
2557                 int32_t length,
2558                 const UnicodeString& oldText,
2559                 int32_t oldStart,
2560                 int32_t oldLength,
2561                 const UnicodeString& newText,
2562                 int32_t newStart,
2563                 int32_t newLength);
2564 
2565 
2566   /* Remove operations */
2567 
2568   /**
2569    * Remove all characters from the UnicodeString object.
2570    * @return a reference to this
2571    * @stable ICU 2.0
2572    */
2573   inline UnicodeString& remove(void);
2574 
2575   /**
2576    * Remove the characters in the range
2577    * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2578    * @param start the offset of the first character to remove
2579    * @param length the number of characters to remove
2580    * @return a reference to this
2581    * @stable ICU 2.0
2582    */
2583   inline UnicodeString& remove(int32_t start,
2584                                int32_t length = (int32_t)INT32_MAX);
2585 
2586   /**
2587    * Remove the characters in the range
2588    * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2589    * @param start the offset of the first character to remove
2590    * @param limit the offset immediately following the range to remove
2591    * @return a reference to this
2592    * @stable ICU 2.0
2593    */
2594   inline UnicodeString& removeBetween(int32_t start,
2595                                       int32_t limit = (int32_t)INT32_MAX);
2596 
2597   /**
2598    * Retain only the characters in the range
2599    * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
2600    * Removes characters before <code>start</code> and at and after <code>limit</code>.
2601    * @param start the offset of the first character to retain
2602    * @param limit the offset immediately following the range to retain
2603    * @return a reference to this
2604    * @stable ICU 4.4
2605    */
2606   inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2607 
2608   /* Length operations */
2609 
2610   /**
2611    * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2612    * If the length of this UnicodeString is less than targetLength,
2613    * length() - targetLength copies of padChar will be added to the
2614    * beginning of this UnicodeString.
2615    * @param targetLength the desired length of the string
2616    * @param padChar the character to use for padding. Defaults to
2617    * space (U+0020)
2618    * @return TRUE if the text was padded, FALSE otherwise.
2619    * @stable ICU 2.0
2620    */
2621   UBool padLeading(int32_t targetLength,
2622                     UChar padChar = 0x0020);
2623 
2624   /**
2625    * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2626    * If the length of this UnicodeString is less than targetLength,
2627    * length() - targetLength copies of padChar will be added to the
2628    * end of this UnicodeString.
2629    * @param targetLength the desired length of the string
2630    * @param padChar the character to use for padding. Defaults to
2631    * space (U+0020)
2632    * @return TRUE if the text was padded, FALSE otherwise.
2633    * @stable ICU 2.0
2634    */
2635   UBool padTrailing(int32_t targetLength,
2636                      UChar padChar = 0x0020);
2637 
2638   /**
2639    * Truncate this UnicodeString to the <TT>targetLength</TT>.
2640    * @param targetLength the desired length of this UnicodeString.
2641    * @return TRUE if the text was truncated, FALSE otherwise
2642    * @stable ICU 2.0
2643    */
2644   inline UBool truncate(int32_t targetLength);
2645 
2646   /**
2647    * Trims leading and trailing whitespace from this UnicodeString.
2648    * @return a reference to this
2649    * @stable ICU 2.0
2650    */
2651   UnicodeString& trim(void);
2652 
2653 
2654   /* Miscellaneous operations */
2655 
2656   /**
2657    * Reverse this UnicodeString in place.
2658    * @return a reference to this
2659    * @stable ICU 2.0
2660    */
2661   inline UnicodeString& reverse(void);
2662 
2663   /**
2664    * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2665    * this UnicodeString.
2666    * @param start the start of the range to reverse
2667    * @param length the number of characters to to reverse
2668    * @return a reference to this
2669    * @stable ICU 2.0
2670    */
2671   inline UnicodeString& reverse(int32_t start,
2672              int32_t length);
2673 
2674   /**
2675    * Convert the characters in this to UPPER CASE following the conventions of
2676    * the default locale.
2677    * @return A reference to this.
2678    * @stable ICU 2.0
2679    */
2680   UnicodeString& toUpper(void);
2681 
2682   /**
2683    * Convert the characters in this to UPPER CASE following the conventions of
2684    * a specific locale.
2685    * @param locale The locale containing the conventions to use.
2686    * @return A reference to this.
2687    * @stable ICU 2.0
2688    */
2689   UnicodeString& toUpper(const Locale& locale);
2690 
2691   /**
2692    * Convert the characters in this to lower case following the conventions of
2693    * the default locale.
2694    * @return A reference to this.
2695    * @stable ICU 2.0
2696    */
2697   UnicodeString& toLower(void);
2698 
2699   /**
2700    * Convert the characters in this to lower case following the conventions of
2701    * a specific locale.
2702    * @param locale The locale containing the conventions to use.
2703    * @return A reference to this.
2704    * @stable ICU 2.0
2705    */
2706   UnicodeString& toLower(const Locale& locale);
2707 
2708 #if !UCONFIG_NO_BREAK_ITERATION
2709 
2710   /**
2711    * Titlecase this string, convenience function using the default locale.
2712    *
2713    * Casing is locale-dependent and context-sensitive.
2714    * Titlecasing uses a break iterator to find the first characters of words
2715    * that are to be titlecased. It titlecases those characters and lowercases
2716    * all others.
2717    *
2718    * The titlecase break iterator can be provided to customize for arbitrary
2719    * styles, using rules and dictionaries beyond the standard iterators.
2720    * It may be more efficient to always provide an iterator to avoid
2721    * opening and closing one for each string.
2722    * The standard titlecase iterator for the root locale implements the
2723    * algorithm of Unicode TR 21.
2724    *
2725    * This function uses only the setText(), first() and next() methods of the
2726    * provided break iterator.
2727    *
2728    * @param titleIter A break iterator to find the first characters of words
2729    *                  that are to be titlecased.
2730    *                  If none is provided (0), then a standard titlecase
2731    *                  break iterator is opened.
2732    *                  Otherwise the provided iterator is set to the string's text.
2733    * @return A reference to this.
2734    * @stable ICU 2.1
2735    */
2736   UnicodeString &toTitle(BreakIterator *titleIter);
2737 
2738   /**
2739    * Titlecase this string.
2740    *
2741    * Casing is locale-dependent and context-sensitive.
2742    * Titlecasing uses a break iterator to find the first characters of words
2743    * that are to be titlecased. It titlecases those characters and lowercases
2744    * all others.
2745    *
2746    * The titlecase break iterator can be provided to customize for arbitrary
2747    * styles, using rules and dictionaries beyond the standard iterators.
2748    * It may be more efficient to always provide an iterator to avoid
2749    * opening and closing one for each string.
2750    * The standard titlecase iterator for the root locale implements the
2751    * algorithm of Unicode TR 21.
2752    *
2753    * This function uses only the setText(), first() and next() methods of the
2754    * provided break iterator.
2755    *
2756    * @param titleIter A break iterator to find the first characters of words
2757    *                  that are to be titlecased.
2758    *                  If none is provided (0), then a standard titlecase
2759    *                  break iterator is opened.
2760    *                  Otherwise the provided iterator is set to the string's text.
2761    * @param locale    The locale to consider.
2762    * @return A reference to this.
2763    * @stable ICU 2.1
2764    */
2765   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2766 
2767   /**
2768    * Titlecase this string, with options.
2769    *
2770    * Casing is locale-dependent and context-sensitive.
2771    * Titlecasing uses a break iterator to find the first characters of words
2772    * that are to be titlecased. It titlecases those characters and lowercases
2773    * all others. (This can be modified with options.)
2774    *
2775    * The titlecase break iterator can be provided to customize for arbitrary
2776    * styles, using rules and dictionaries beyond the standard iterators.
2777    * It may be more efficient to always provide an iterator to avoid
2778    * opening and closing one for each string.
2779    * The standard titlecase iterator for the root locale implements the
2780    * algorithm of Unicode TR 21.
2781    *
2782    * This function uses only the setText(), first() and next() methods of the
2783    * provided break iterator.
2784    *
2785    * @param titleIter A break iterator to find the first characters of words
2786    *                  that are to be titlecased.
2787    *                  If none is provided (0), then a standard titlecase
2788    *                  break iterator is opened.
2789    *                  Otherwise the provided iterator is set to the string's text.
2790    * @param locale    The locale to consider.
2791    * @param options Options bit set, see ucasemap_open().
2792    * @return A reference to this.
2793    * @see U_TITLECASE_NO_LOWERCASE
2794    * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
2795    * @see ucasemap_open
2796    * @stable ICU 3.8
2797    */
2798   UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2799 
2800 #endif
2801 
2802   /**
2803    * Case-folds the characters in this string.
2804    *
2805    * Case-folding is locale-independent and not context-sensitive,
2806    * but there is an option for whether to include or exclude mappings for dotted I
2807    * and dotless i that are marked with 'T' in CaseFolding.txt.
2808    *
2809    * The result may be longer or shorter than the original.
2810    *
2811    * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2812    * @return A reference to this.
2813    * @stable ICU 2.0
2814    */
2815   UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2816 
2817   //========================================
2818   // Access to the internal buffer
2819   //========================================
2820 
2821   /**
2822    * Get a read/write pointer to the internal buffer.
2823    * The buffer is guaranteed to be large enough for at least minCapacity UChars,
2824    * writable, and is still owned by the UnicodeString object.
2825    * Calls to getBuffer(minCapacity) must not be nested, and
2826    * must be matched with calls to releaseBuffer(newLength).
2827    * If the string buffer was read-only or shared,
2828    * then it will be reallocated and copied.
2829    *
2830    * An attempted nested call will return 0, and will not further modify the
2831    * state of the UnicodeString object.
2832    * It also returns 0 if the string is bogus.
2833    *
2834    * The actual capacity of the string buffer may be larger than minCapacity.
2835    * getCapacity() returns the actual capacity.
2836    * For many operations, the full capacity should be used to avoid reallocations.
2837    *
2838    * While the buffer is "open" between getBuffer(minCapacity)
2839    * and releaseBuffer(newLength), the following applies:
2840    * - The string length is set to 0.
2841    * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2842    * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2843    * - You can read from and write to the returned buffer.
2844    * - The previous string contents will still be in the buffer;
2845    *   if you want to use it, then you need to call length() before getBuffer(minCapacity).
2846    *   If the length() was greater than minCapacity, then any contents after minCapacity
2847    *   may be lost.
2848    *   The buffer contents is not NUL-terminated by getBuffer().
2849    *   If length()<getCapacity() then you can terminate it by writing a NUL
2850    *   at index length().
2851    * - You must call releaseBuffer(newLength) before and in order to
2852    *   return to normal UnicodeString operation.
2853    *
2854    * @param minCapacity the minimum number of UChars that are to be available
2855    *        in the buffer, starting at the returned pointer;
2856    *        default to the current string capacity if minCapacity==-1
2857    * @return a writable pointer to the internal string buffer,
2858    *         or 0 if an error occurs (nested calls, out of memory)
2859    *
2860    * @see releaseBuffer
2861    * @see getTerminatedBuffer()
2862    * @stable ICU 2.0
2863    */
2864   UChar *getBuffer(int32_t minCapacity);
2865 
2866   /**
2867    * Release a read/write buffer on a UnicodeString object with an
2868    * "open" getBuffer(minCapacity).
2869    * This function must be called in a matched pair with getBuffer(minCapacity).
2870    * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2871    *
2872    * It will set the string length to newLength, at most to the current capacity.
2873    * If newLength==-1 then it will set the length according to the
2874    * first NUL in the buffer, or to the capacity if there is no NUL.
2875    *
2876    * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2877    *
2878    * @param newLength the new length of the UnicodeString object;
2879    *        defaults to the current capacity if newLength is greater than that;
2880    *        if newLength==-1, it defaults to u_strlen(buffer) but not more than
2881    *        the current capacity of the string
2882    *
2883    * @see getBuffer(int32_t minCapacity)
2884    * @stable ICU 2.0
2885    */
2886   void releaseBuffer(int32_t newLength=-1);
2887 
2888   /**
2889    * Get a read-only pointer to the internal buffer.
2890    * This can be called at any time on a valid UnicodeString.
2891    *
2892    * It returns 0 if the string is bogus, or
2893    * during an "open" getBuffer(minCapacity).
2894    *
2895    * It can be called as many times as desired.
2896    * The pointer that it returns will remain valid until the UnicodeString object is modified,
2897    * at which time the pointer is semantically invalidated and must not be used any more.
2898    *
2899    * The capacity of the buffer can be determined with getCapacity().
2900    * The part after length() may or may not be initialized and valid,
2901    * depending on the history of the UnicodeString object.
2902    *
2903    * The buffer contents is (probably) not NUL-terminated.
2904    * You can check if it is with
2905    * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2906    * (See getTerminatedBuffer().)
2907    *
2908    * The buffer may reside in read-only memory. Its contents must not
2909    * be modified.
2910    *
2911    * @return a read-only pointer to the internal string buffer,
2912    *         or 0 if the string is empty or bogus
2913    *
2914    * @see getBuffer(int32_t minCapacity)
2915    * @see getTerminatedBuffer()
2916    * @stable ICU 2.0
2917    */
2918   inline const UChar *getBuffer() const;
2919 
2920   /**
2921    * Get a read-only pointer to the internal buffer,
2922    * making sure that it is NUL-terminated.
2923    * This can be called at any time on a valid UnicodeString.
2924    *
2925    * It returns 0 if the string is bogus, or
2926    * during an "open" getBuffer(minCapacity), or if the buffer cannot
2927    * be NUL-terminated (because memory allocation failed).
2928    *
2929    * It can be called as many times as desired.
2930    * The pointer that it returns will remain valid until the UnicodeString object is modified,
2931    * at which time the pointer is semantically invalidated and must not be used any more.
2932    *
2933    * The capacity of the buffer can be determined with getCapacity().
2934    * The part after length()+1 may or may not be initialized and valid,
2935    * depending on the history of the UnicodeString object.
2936    *
2937    * The buffer contents is guaranteed to be NUL-terminated.
2938    * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2939    * is written.
2940    * For this reason, this function is not const, unlike getBuffer().
2941    * Note that a UnicodeString may also contain NUL characters as part of its contents.
2942    *
2943    * The buffer may reside in read-only memory. Its contents must not
2944    * be modified.
2945    *
2946    * @return a read-only pointer to the internal string buffer,
2947    *         or 0 if the string is empty or bogus
2948    *
2949    * @see getBuffer(int32_t minCapacity)
2950    * @see getBuffer()
2951    * @stable ICU 2.2
2952    */
2953   const UChar *getTerminatedBuffer();
2954 
2955   //========================================
2956   // Constructors
2957   //========================================
2958 
2959   /** Construct an empty UnicodeString.
2960    * @stable ICU 2.0
2961    */
2962   inline UnicodeString();
2963 
2964   /**
2965    * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
2966    * @param capacity the number of UChars this UnicodeString should hold
2967    * before a resize is necessary; if count is greater than 0 and count
2968    * code points c take up more space than capacity, then capacity is adjusted
2969    * accordingly.
2970    * @param c is used to initially fill the string
2971    * @param count specifies how many code points c are to be written in the
2972    *              string
2973    * @stable ICU 2.0
2974    */
2975   UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2976 
2977   /**
2978    * Single UChar (code unit) constructor.
2979    *
2980    * It is recommended to mark this constructor "explicit" by
2981    * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2982    * on the compiler command line or similar.
2983    * @param ch the character to place in the UnicodeString
2984    * @stable ICU 2.0
2985    */
2986   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
2987 
2988   /**
2989    * Single UChar32 (code point) constructor.
2990    *
2991    * It is recommended to mark this constructor "explicit" by
2992    * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2993    * on the compiler command line or similar.
2994    * @param ch the character to place in the UnicodeString
2995    * @stable ICU 2.0
2996    */
2997   UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
2998 
2999   /**
3000    * UChar* constructor.
3001    *
3002    * It is recommended to mark this constructor "explicit" by
3003    * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3004    * on the compiler command line or similar.
3005    * @param text The characters to place in the UnicodeString.  <TT>text</TT>
3006    * must be NULL (U+0000) terminated.
3007    * @stable ICU 2.0
3008    */
3009   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
3010 
3011   /**
3012    * UChar* constructor.
3013    * @param text The characters to place in the UnicodeString.
3014    * @param textLength The number of Unicode characters in <TT>text</TT>
3015    * to copy.
3016    * @stable ICU 2.0
3017    */
3018   UnicodeString(const UChar *text,
3019         int32_t textLength);
3020 
3021   /**
3022    * Readonly-aliasing UChar* constructor.
3023    * The text will be used for the UnicodeString object, but
3024    * it will not be released when the UnicodeString is destroyed.
3025    * This has copy-on-write semantics:
3026    * When the string is modified, then the buffer is first copied into
3027    * newly allocated memory.
3028    * The aliased buffer is never modified.
3029    *
3030    * In an assignment to another UnicodeString, when using the copy constructor
3031    * or the assignment operator, the text will be copied.
3032    * When using fastCopyFrom(), the text will be aliased again,
3033    * so that both strings then alias the same readonly-text.
3034    *
3035    * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
3036    *                     This must be true if <code>textLength==-1</code>.
3037    * @param text The characters to alias for the UnicodeString.
3038    * @param textLength The number of Unicode characters in <code>text</code> to alias.
3039    *                   If -1, then this constructor will determine the length
3040    *                   by calling <code>u_strlen()</code>.
3041    * @stable ICU 2.0
3042    */
3043   UnicodeString(UBool isTerminated,
3044                 const UChar *text,
3045                 int32_t textLength);
3046 
3047   /**
3048    * Writable-aliasing UChar* constructor.
3049    * The text will be used for the UnicodeString object, but
3050    * it will not be released when the UnicodeString is destroyed.
3051    * This has write-through semantics:
3052    * For as long as the capacity of the buffer is sufficient, write operations
3053    * will directly affect the buffer. When more capacity is necessary, then
3054    * a new buffer will be allocated and the contents copied as with regularly
3055    * constructed strings.
3056    * In an assignment to another UnicodeString, the buffer will be copied.
3057    * The extract(UChar *dst) function detects whether the dst pointer is the same
3058    * as the string buffer itself and will in this case not copy the contents.
3059    *
3060    * @param buffer The characters to alias for the UnicodeString.
3061    * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
3062    * @param buffCapacity The size of <code>buffer</code> in UChars.
3063    * @stable ICU 2.0
3064    */
3065   UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
3066 
3067 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
3068 
3069   /**
3070    * char* constructor.
3071    * Uses the default converter (and thus depends on the ICU conversion code)
3072    * unless U_CHARSET_IS_UTF8 is set to 1.
3073    *
3074    * For ASCII (really "invariant character") strings it is more efficient to use
3075    * the constructor that takes a US_INV (for its enum EInvariant).
3076    * For ASCII (invariant-character) string literals, see UNICODE_STRING and
3077    * UNICODE_STRING_SIMPLE.
3078    *
3079    * It is recommended to mark this constructor "explicit" by
3080    * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
3081    * on the compiler command line or similar.
3082    * @param codepageData an array of bytes, null-terminated,
3083    *                     in the platform's default codepage.
3084    * @stable ICU 2.0
3085    * @see UNICODE_STRING
3086    * @see UNICODE_STRING_SIMPLE
3087    */
3088   UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
3089 
3090   /**
3091    * char* constructor.
3092    * Uses the default converter (and thus depends on the ICU conversion code)
3093    * unless U_CHARSET_IS_UTF8 is set to 1.
3094    * @param codepageData an array of bytes in the platform's default codepage.
3095    * @param dataLength The number of bytes in <TT>codepageData</TT>.
3096    * @stable ICU 2.0
3097    */
3098   UnicodeString(const char *codepageData, int32_t dataLength);
3099 
3100 #endif
3101 
3102 #if !UCONFIG_NO_CONVERSION
3103 
3104   /**
3105    * char* constructor.
3106    * @param codepageData an array of bytes, null-terminated
3107    * @param codepage the encoding of <TT>codepageData</TT>.  The special
3108    * value 0 for <TT>codepage</TT> indicates that the text is in the
3109    * platform's default codepage.
3110    *
3111    * If <code>codepage</code> is an empty string (<code>""</code>),
3112    * then a simple conversion is performed on the codepage-invariant
3113    * subset ("invariant characters") of the platform encoding. See utypes.h.
3114    * Recommendation: For invariant-character strings use the constructor
3115    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3116    * because it avoids object code dependencies of UnicodeString on
3117    * the conversion code.
3118    *
3119    * @stable ICU 2.0
3120    */
3121   UnicodeString(const char *codepageData, const char *codepage);
3122 
3123   /**
3124    * char* constructor.
3125    * @param codepageData an array of bytes.
3126    * @param dataLength The number of bytes in <TT>codepageData</TT>.
3127    * @param codepage the encoding of <TT>codepageData</TT>.  The special
3128    * value 0 for <TT>codepage</TT> indicates that the text is in the
3129    * platform's default codepage.
3130    * If <code>codepage</code> is an empty string (<code>""</code>),
3131    * then a simple conversion is performed on the codepage-invariant
3132    * subset ("invariant characters") of the platform encoding. See utypes.h.
3133    * Recommendation: For invariant-character strings use the constructor
3134    * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3135    * because it avoids object code dependencies of UnicodeString on
3136    * the conversion code.
3137    *
3138    * @stable ICU 2.0
3139    */
3140   UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3141 
3142   /**
3143    * char * / UConverter constructor.
3144    * This constructor uses an existing UConverter object to
3145    * convert the codepage string to Unicode and construct a UnicodeString
3146    * from that.
3147    *
3148    * The converter is reset at first.
3149    * If the error code indicates a failure before this constructor is called,
3150    * or if an error occurs during conversion or construction,
3151    * then the string will be bogus.
3152    *
3153    * This function avoids the overhead of opening and closing a converter if
3154    * multiple strings are constructed.
3155    *
3156    * @param src input codepage string
3157    * @param srcLength length of the input string, can be -1 for NUL-terminated strings
3158    * @param cnv converter object (ucnv_resetToUnicode() will be called),
3159    *        can be NULL for the default converter
3160    * @param errorCode normal ICU error code
3161    * @stable ICU 2.0
3162    */
3163   UnicodeString(
3164         const char *src, int32_t srcLength,
3165         UConverter *cnv,
3166         UErrorCode &errorCode);
3167 
3168 #endif
3169 
3170   /**
3171    * Constructs a Unicode string from an invariant-character char * string.
3172    * About invariant characters see utypes.h.
3173    * This constructor has no runtime dependency on conversion code and is
3174    * therefore recommended over ones taking a charset name string
3175    * (where the empty string "" indicates invariant-character conversion).
3176    *
3177    * Use the macro US_INV as the third, signature-distinguishing parameter.
3178    *
3179    * For example:
3180    * \code
3181    * void fn(const char *s) {
3182    *   UnicodeString ustr(s, -1, US_INV);
3183    *   // use ustr ...
3184    * }
3185    * \endcode
3186    *
3187    * @param src String using only invariant characters.
3188    * @param length Length of src, or -1 if NUL-terminated.
3189    * @param inv Signature-distinguishing paramater, use US_INV.
3190    *
3191    * @see US_INV
3192    * @stable ICU 3.2
3193    */
3194   UnicodeString(const char *src, int32_t length, enum EInvariant inv);
3195 
3196 
3197   /**
3198    * Copy constructor.
3199    *
3200    * Starting with ICU 2.4, the assignment operator and the copy constructor
3201    * allocate a new buffer and copy the buffer contents even for readonly aliases.
3202    * By contrast, the fastCopyFrom() function implements the old,
3203    * more efficient but less safe behavior
3204    * of making this string also a readonly alias to the same buffer.
3205    *
3206    * If the source object has an "open" buffer from getBuffer(minCapacity),
3207    * then the copy is an empty string.
3208    *
3209    * @param that The UnicodeString object to copy.
3210    * @stable ICU 2.0
3211    * @see fastCopyFrom
3212    */
3213   UnicodeString(const UnicodeString& that);
3214 
3215 #ifndef U_HIDE_DRAFT_API
3216 #if U_HAVE_RVALUE_REFERENCES
3217   /**
3218    * Move constructor, might leave src in bogus state.
3219    * This string will have the same contents and state that the source string had.
3220    * @param src source string
3221    * @draft ICU 56
3222    */
3223   UnicodeString(UnicodeString &&src) U_NOEXCEPT;
3224 #endif
3225 #endif  /* U_HIDE_DRAFT_API */
3226 
3227   /**
3228    * 'Substring' constructor from tail of source string.
3229    * @param src The UnicodeString object to copy.
3230    * @param srcStart The offset into <tt>src</tt> at which to start copying.
3231    * @stable ICU 2.2
3232    */
3233   UnicodeString(const UnicodeString& src, int32_t srcStart);
3234 
3235   /**
3236    * 'Substring' constructor from subrange of source string.
3237    * @param src The UnicodeString object to copy.
3238    * @param srcStart The offset into <tt>src</tt> at which to start copying.
3239    * @param srcLength The number of characters from <tt>src</tt> to copy.
3240    * @stable ICU 2.2
3241    */
3242   UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3243 
3244   /**
3245    * Clone this object, an instance of a subclass of Replaceable.
3246    * Clones can be used concurrently in multiple threads.
3247    * If a subclass does not implement clone(), or if an error occurs,
3248    * then NULL is returned.
3249    * The clone functions in all subclasses return a pointer to a Replaceable
3250    * because some compilers do not support covariant (same-as-this)
3251    * return types; cast to the appropriate subclass if necessary.
3252    * The caller must delete the clone.
3253    *
3254    * @return a clone of this object
3255    *
3256    * @see Replaceable::clone
3257    * @see getDynamicClassID
3258    * @stable ICU 2.6
3259    */
3260   virtual Replaceable *clone() const;
3261 
3262   /** Destructor.
3263    * @stable ICU 2.0
3264    */
3265   virtual ~UnicodeString();
3266 
3267   /**
3268    * Create a UnicodeString from a UTF-8 string.
3269    * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3270    * Calls u_strFromUTF8WithSub().
3271    *
3272    * @param utf8 UTF-8 input string.
3273    *             Note that a StringPiece can be implicitly constructed
3274    *             from a std::string or a NUL-terminated const char * string.
3275    * @return A UnicodeString with equivalent UTF-16 contents.
3276    * @see toUTF8
3277    * @see toUTF8String
3278    * @stable ICU 4.2
3279    */
3280   static UnicodeString fromUTF8(const StringPiece &utf8);
3281 
3282   /**
3283    * Create a UnicodeString from a UTF-32 string.
3284    * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3285    * Calls u_strFromUTF32WithSub().
3286    *
3287    * @param utf32 UTF-32 input string. Must not be NULL.
3288    * @param length Length of the input string, or -1 if NUL-terminated.
3289    * @return A UnicodeString with equivalent UTF-16 contents.
3290    * @see toUTF32
3291    * @stable ICU 4.2
3292    */
3293   static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3294 
3295   /* Miscellaneous operations */
3296 
3297   /**
3298    * Unescape a string of characters and return a string containing
3299    * the result.  The following escape sequences are recognized:
3300    *
3301    * \\uhhhh       4 hex digits; h in [0-9A-Fa-f]
3302    * \\Uhhhhhhhh   8 hex digits
3303    * \\xhh         1-2 hex digits
3304    * \\ooo         1-3 octal digits; o in [0-7]
3305    * \\cX          control-X; X is masked with 0x1F
3306    *
3307    * as well as the standard ANSI C escapes:
3308    *
3309    * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3310    * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3311    * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3312    *
3313    * Anything else following a backslash is generically escaped.  For
3314    * example, "[a\\-z]" returns "[a-z]".
3315    *
3316    * If an escape sequence is ill-formed, this method returns an empty
3317    * string.  An example of an ill-formed sequence is "\\u" followed by
3318    * fewer than 4 hex digits.
3319    *
3320    * This function is similar to u_unescape() but not identical to it.
3321    * The latter takes a source char*, so it does escape recognition
3322    * and also invariant conversion.
3323    *
3324    * @return a string with backslash escapes interpreted, or an
3325    * empty string on error.
3326    * @see UnicodeString#unescapeAt()
3327    * @see u_unescape()
3328    * @see u_unescapeAt()
3329    * @stable ICU 2.0
3330    */
3331   UnicodeString unescape() const;
3332 
3333   /**
3334    * Unescape a single escape sequence and return the represented
3335    * character.  See unescape() for a listing of the recognized escape
3336    * sequences.  The character at offset-1 is assumed (without
3337    * checking) to be a backslash.  If the escape sequence is
3338    * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
3339    * returned.
3340    *
3341    * @param offset an input output parameter.  On input, it is the
3342    * offset into this string where the escape sequence is located,
3343    * after the initial backslash.  On output, it is advanced after the
3344    * last character parsed.  On error, it is not advanced at all.
3345    * @return the character represented by the escape sequence at
3346    * offset, or U_SENTINEL=-1 on error.
3347    * @see UnicodeString#unescape()
3348    * @see u_unescape()
3349    * @see u_unescapeAt()
3350    * @stable ICU 2.0
3351    */
3352   UChar32 unescapeAt(int32_t &offset) const;
3353 
3354   /**
3355    * ICU "poor man's RTTI", returns a UClassID for this class.
3356    *
3357    * @stable ICU 2.2
3358    */
3359   static UClassID U_EXPORT2 getStaticClassID();
3360 
3361   /**
3362    * ICU "poor man's RTTI", returns a UClassID for the actual class.
3363    *
3364    * @stable ICU 2.2
3365    */
3366   virtual UClassID getDynamicClassID() const;
3367 
3368   //========================================
3369   // Implementation methods
3370   //========================================
3371 
3372 protected:
3373   /**
3374    * Implement Replaceable::getLength() (see jitterbug 1027).
3375    * @stable ICU 2.4
3376    */
3377   virtual int32_t getLength() const;
3378 
3379   /**
3380    * The change in Replaceable to use virtual getCharAt() allows
3381    * UnicodeString::charAt() to be inline again (see jitterbug 709).
3382    * @stable ICU 2.4
3383    */
3384   virtual UChar getCharAt(int32_t offset) const;
3385 
3386   /**
3387    * The change in Replaceable to use virtual getChar32At() allows
3388    * UnicodeString::char32At() to be inline again (see jitterbug 709).
3389    * @stable ICU 2.4
3390    */
3391   virtual UChar32 getChar32At(int32_t offset) const;
3392 
3393 private:
3394   // For char* constructors. Could be made public.
3395   UnicodeString &setToUTF8(const StringPiece &utf8);
3396   // For extract(char*).
3397   // We could make a toUTF8(target, capacity, errorCode) public but not
3398   // this version: New API will be cleaner if we make callers create substrings
3399   // rather than having start+length on every method,
3400   // and it should take a UErrorCode&.
3401   int32_t
3402   toUTF8(int32_t start, int32_t len,
3403          char *target, int32_t capacity) const;
3404 
3405   /**
3406    * Internal string contents comparison, called by operator==.
3407    * Requires: this & text not bogus and have same lengths.
3408    */
3409   UBool doEquals(const UnicodeString &text, int32_t len) const;
3410 
3411   inline int8_t
3412   doCompare(int32_t start,
3413            int32_t length,
3414            const UnicodeString& srcText,
3415            int32_t srcStart,
3416            int32_t srcLength) const;
3417 
3418   int8_t doCompare(int32_t start,
3419            int32_t length,
3420            const UChar *srcChars,
3421            int32_t srcStart,
3422            int32_t srcLength) const;
3423 
3424   inline int8_t
3425   doCompareCodePointOrder(int32_t start,
3426                           int32_t length,
3427                           const UnicodeString& srcText,
3428                           int32_t srcStart,
3429                           int32_t srcLength) const;
3430 
3431   int8_t doCompareCodePointOrder(int32_t start,
3432                                  int32_t length,
3433                                  const UChar *srcChars,
3434                                  int32_t srcStart,
3435                                  int32_t srcLength) const;
3436 
3437   inline int8_t
3438   doCaseCompare(int32_t start,
3439                 int32_t length,
3440                 const UnicodeString &srcText,
3441                 int32_t srcStart,
3442                 int32_t srcLength,
3443                 uint32_t options) const;
3444 
3445   int8_t
3446   doCaseCompare(int32_t start,
3447                 int32_t length,
3448                 const UChar *srcChars,
3449                 int32_t srcStart,
3450                 int32_t srcLength,
3451                 uint32_t options) const;
3452 
3453   int32_t doIndexOf(UChar c,
3454             int32_t start,
3455             int32_t length) const;
3456 
3457   int32_t doIndexOf(UChar32 c,
3458                         int32_t start,
3459                         int32_t length) const;
3460 
3461   int32_t doLastIndexOf(UChar c,
3462                 int32_t start,
3463                 int32_t length) const;
3464 
3465   int32_t doLastIndexOf(UChar32 c,
3466                             int32_t start,
3467                             int32_t length) const;
3468 
3469   void doExtract(int32_t start,
3470          int32_t length,
3471          UChar *dst,
3472          int32_t dstStart) const;
3473 
3474   inline void doExtract(int32_t start,
3475          int32_t length,
3476          UnicodeString& target) const;
3477 
3478   inline UChar doCharAt(int32_t offset)  const;
3479 
3480   UnicodeString& doReplace(int32_t start,
3481                int32_t length,
3482                const UnicodeString& srcText,
3483                int32_t srcStart,
3484                int32_t srcLength);
3485 
3486   UnicodeString& doReplace(int32_t start,
3487                int32_t length,
3488                const UChar *srcChars,
3489                int32_t srcStart,
3490                int32_t srcLength);
3491 
3492   UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3493   UnicodeString& doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength);
3494 
3495   UnicodeString& doReverse(int32_t start,
3496                int32_t length);
3497 
3498   // calculate hash code
3499   int32_t doHashCode(void) const;
3500 
3501   // get pointer to start of array
3502   // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3503   inline UChar* getArrayStart(void);
3504   inline const UChar* getArrayStart(void) const;
3505 
3506   inline UBool hasShortLength() const;
3507   inline int32_t getShortLength() const;
3508 
3509   // A UnicodeString object (not necessarily its current buffer)
3510   // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3511   inline UBool isWritable() const;
3512 
3513   // Is the current buffer writable?
3514   inline UBool isBufferWritable() const;
3515 
3516   // None of the following does releaseArray().
3517   inline void setZeroLength();
3518   inline void setShortLength(int32_t len);
3519   inline void setLength(int32_t len);
3520   inline void setToEmpty();
3521   inline void setArray(UChar *array, int32_t len, int32_t capacity); // sets length but not flags
3522 
3523   // allocate the array; result may be the stack buffer
3524   // sets refCount to 1 if appropriate
3525   // sets fArray, fCapacity, and flags
3526   // sets length to 0
3527   // returns boolean for success or failure
3528   UBool allocate(int32_t capacity);
3529 
3530   // release the array if owned
3531   void releaseArray(void);
3532 
3533   // turn a bogus string into an empty one
3534   void unBogus();
3535 
3536   // implements assigment operator, copy constructor, and fastCopyFrom()
3537   UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3538 
3539   // Copies just the fields without memory management.
3540   void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT;
3541 
3542   // Pin start and limit to acceptable values.
3543   inline void pinIndex(int32_t& start) const;
3544   inline void pinIndices(int32_t& start,
3545                          int32_t& length) const;
3546 
3547 #if !UCONFIG_NO_CONVERSION
3548 
3549   /* Internal extract() using UConverter. */
3550   int32_t doExtract(int32_t start, int32_t length,
3551                     char *dest, int32_t destCapacity,
3552                     UConverter *cnv,
3553                     UErrorCode &errorCode) const;
3554 
3555   /*
3556    * Real constructor for converting from codepage data.
3557    * It assumes that it is called with !fRefCounted.
3558    *
3559    * If <code>codepage==0</code>, then the default converter
3560    * is used for the platform encoding.
3561    * If <code>codepage</code> is an empty string (<code>""</code>),
3562    * then a simple conversion is performed on the codepage-invariant
3563    * subset ("invariant characters") of the platform encoding. See utypes.h.
3564    */
3565   void doCodepageCreate(const char *codepageData,
3566                         int32_t dataLength,
3567                         const char *codepage);
3568 
3569   /*
3570    * Worker function for creating a UnicodeString from
3571    * a codepage string using a UConverter.
3572    */
3573   void
3574   doCodepageCreate(const char *codepageData,
3575                    int32_t dataLength,
3576                    UConverter *converter,
3577                    UErrorCode &status);
3578 
3579 #endif
3580 
3581   /*
3582    * This function is called when write access to the array
3583    * is necessary.
3584    *
3585    * We need to make a copy of the array if
3586    * the buffer is read-only, or
3587    * the buffer is refCounted (shared), and refCount>1, or
3588    * the buffer is too small.
3589    *
3590    * Return FALSE if memory could not be allocated.
3591    */
3592   UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3593                             int32_t growCapacity = -1,
3594                             UBool doCopyArray = TRUE,
3595                             int32_t **pBufferToDelete = 0,
3596                             UBool forceClone = FALSE);
3597 
3598   /**
3599    * Common function for UnicodeString case mappings.
3600    * The stringCaseMapper has the same type UStringCaseMapper
3601    * as in ustr_imp.h for ustrcase_map().
3602    */
3603   UnicodeString &
3604   caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
3605 
3606   // ref counting
3607   void addRef(void);
3608   int32_t removeRef(void);
3609   int32_t refCount(void) const;
3610 
3611   // constants
3612   enum {
3613     /**
3614      * Size of stack buffer for short strings.
3615      * Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
3616      * @see UNISTR_OBJECT_SIZE
3617      */
3618     US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
3619     kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
3620     kGrowSize=128, // grow size for this buffer
3621     kInvalidHashCode=0, // invalid hash code
3622     kEmptyHashCode=1, // hash code for empty string
3623 
3624     // bit flag values for fLengthAndFlags
3625     kIsBogus=1,         // this string is bogus, i.e., not valid or NULL
3626     kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
3627     kRefCounted=4,      // there is a refCount field before the characters in fArray
3628     kBufferIsReadonly=8,// do not write to this buffer
3629     kOpenGetBuffer=16,  // getBuffer(minCapacity) was called (is "open"),
3630                         // and releaseBuffer(newLength) must be called
3631     kAllStorageFlags=0x1f,
3632 
3633     kLengthShift=5,     // remaining 11 bits for non-negative short length, or negative if long
3634     kLength1=1<<kLengthShift,
3635     kMaxShortLength=0x3ff,  // max non-negative short length (leaves top bit 0)
3636     kLengthIsLarge=0xffe0,  // short length < 0, real length is in fUnion.fFields.fLength
3637 
3638     // combined values for convenience
3639     kShortString=kUsingStackBuffer,
3640     kLongString=kRefCounted,
3641     kReadonlyAlias=kBufferIsReadonly,
3642     kWritableAlias=0
3643   };
3644 
3645   friend class UnicodeStringAppendable;
3646 
3647   union StackBufferOrFields;        // forward declaration necessary before friend declaration
3648   friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3649 
3650   /*
3651    * The following are all the class fields that are stored
3652    * in each UnicodeString object.
3653    * Note that UnicodeString has virtual functions,
3654    * therefore there is an implicit vtable pointer
3655    * as the first real field.
3656    * The fields should be aligned such that no padding is necessary.
3657    * On 32-bit machines, the size should be 32 bytes,
3658    * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3659    *
3660    * We use a hack to achieve this.
3661    *
3662    * With at least some compilers, each of the following is forced to
3663    * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3664    * rounded up with additional padding if the fields do not already fit that requirement:
3665    * - sizeof(class UnicodeString)
3666    * - offsetof(UnicodeString, fUnion)
3667    * - sizeof(fUnion)
3668    * - sizeof(fStackFields)
3669    *
3670    * We optimize for the longest possible internal buffer for short strings.
3671    * fUnion.fStackFields begins with 2 bytes for storage flags
3672    * and the length of relatively short strings,
3673    * followed by the buffer for short string contents.
3674    * There is no padding inside fStackFields.
3675    *
3676    * Heap-allocated and aliased strings use fUnion.fFields.
3677    * Both fStackFields and fFields must begin with the same fields for flags and short length,
3678    * that is, those must have the same memory offsets inside the object,
3679    * because the flags must be inspected in order to decide which half of fUnion is being used.
3680    * We assume that the compiler does not reorder the fields.
3681    *
3682    * (Padding at the end of fFields is ok:
3683    * As long as it is no larger than fStackFields, it is not wasted space.)
3684    *
3685    * For some of the history of the UnicodeString class fields layout, see
3686    * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
3687    * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
3688    * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
3689    */
3690   // (implicit) *vtable;
3691   union StackBufferOrFields {
3692     // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
3693     // Each struct of the union must begin with fLengthAndFlags.
3694     struct {
3695       int16_t fLengthAndFlags;          // bit fields: see constants above
3696       UChar fBuffer[US_STACKBUF_SIZE];  // buffer for short strings
3697     } fStackFields;
3698     struct {
3699       int16_t fLengthAndFlags;          // bit fields: see constants above
3700       int32_t fLength;    // number of characters in fArray if >127; else undefined
3701       int32_t fCapacity;  // capacity of fArray (in UChars)
3702       // array pointer last to minimize padding for machines with P128 data model
3703       // or pointer sizes that are not a power of 2
3704       UChar   *fArray;    // the Unicode data
3705     } fFields;
3706   } fUnion;
3707 };
3708 
3709 /**
3710  * Create a new UnicodeString with the concatenation of two others.
3711  *
3712  * @param s1 The first string to be copied to the new one.
3713  * @param s2 The second string to be copied to the new one, after s1.
3714  * @return UnicodeString(s1).append(s2)
3715  * @stable ICU 2.8
3716  */
3717 U_COMMON_API UnicodeString U_EXPORT2
3718 operator+ (const UnicodeString &s1, const UnicodeString &s2);
3719 
3720 //========================================
3721 // Inline members
3722 //========================================
3723 
3724 //========================================
3725 // Privates
3726 //========================================
3727 
3728 inline void
pinIndex(int32_t & start)3729 UnicodeString::pinIndex(int32_t& start) const
3730 {
3731   // pin index
3732   if(start < 0) {
3733     start = 0;
3734   } else if(start > length()) {
3735     start = length();
3736   }
3737 }
3738 
3739 inline void
pinIndices(int32_t & start,int32_t & _length)3740 UnicodeString::pinIndices(int32_t& start,
3741                           int32_t& _length) const
3742 {
3743   // pin indices
3744   int32_t len = length();
3745   if(start < 0) {
3746     start = 0;
3747   } else if(start > len) {
3748     start = len;
3749   }
3750   if(_length < 0) {
3751     _length = 0;
3752   } else if(_length > (len - start)) {
3753     _length = (len - start);
3754   }
3755 }
3756 
3757 inline UChar*
getArrayStart()3758 UnicodeString::getArrayStart() {
3759   return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3760     fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3761 }
3762 
3763 inline const UChar*
getArrayStart()3764 UnicodeString::getArrayStart() const {
3765   return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3766     fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3767 }
3768 
3769 //========================================
3770 // Default constructor
3771 //========================================
3772 
3773 inline
UnicodeString()3774 UnicodeString::UnicodeString() {
3775   fUnion.fStackFields.fLengthAndFlags=kShortString;
3776 }
3777 
3778 //========================================
3779 // Read-only implementation methods
3780 //========================================
3781 inline UBool
hasShortLength()3782 UnicodeString::hasShortLength() const {
3783   return fUnion.fFields.fLengthAndFlags>=0;
3784 }
3785 
3786 inline int32_t
getShortLength()3787 UnicodeString::getShortLength() const {
3788   // fLengthAndFlags must be non-negative -> short length >= 0
3789   // and arithmetic or logical shift does not matter.
3790   return fUnion.fFields.fLengthAndFlags>>kLengthShift;
3791 }
3792 
3793 inline int32_t
length()3794 UnicodeString::length() const {
3795   return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
3796 }
3797 
3798 inline int32_t
getCapacity()3799 UnicodeString::getCapacity() const {
3800   return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3801     US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
3802 }
3803 
3804 inline int32_t
hashCode()3805 UnicodeString::hashCode() const
3806 { return doHashCode(); }
3807 
3808 inline UBool
isBogus()3809 UnicodeString::isBogus() const
3810 { return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
3811 
3812 inline UBool
isWritable()3813 UnicodeString::isWritable() const
3814 { return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); }
3815 
3816 inline UBool
isBufferWritable()3817 UnicodeString::isBufferWritable() const
3818 {
3819   return (UBool)(
3820       !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3821       (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1));
3822 }
3823 
3824 inline const UChar *
getBuffer()3825 UnicodeString::getBuffer() const {
3826   if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
3827     return 0;
3828   } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
3829     return fUnion.fStackFields.fBuffer;
3830   } else {
3831     return fUnion.fFields.fArray;
3832   }
3833 }
3834 
3835 //========================================
3836 // Read-only alias methods
3837 //========================================
3838 inline int8_t
doCompare(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)3839 UnicodeString::doCompare(int32_t start,
3840               int32_t thisLength,
3841               const UnicodeString& srcText,
3842               int32_t srcStart,
3843               int32_t srcLength) const
3844 {
3845   if(srcText.isBogus()) {
3846     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3847   } else {
3848     srcText.pinIndices(srcStart, srcLength);
3849     return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3850   }
3851 }
3852 
3853 inline UBool
3854 UnicodeString::operator== (const UnicodeString& text) const
3855 {
3856   if(isBogus()) {
3857     return text.isBogus();
3858   } else {
3859     int32_t len = length(), textLength = text.length();
3860     return !text.isBogus() && len == textLength && doEquals(text, len);
3861   }
3862 }
3863 
3864 inline UBool
3865 UnicodeString::operator!= (const UnicodeString& text) const
3866 { return (! operator==(text)); }
3867 
3868 inline UBool
3869 UnicodeString::operator> (const UnicodeString& text) const
3870 { return doCompare(0, length(), text, 0, text.length()) == 1; }
3871 
3872 inline UBool
3873 UnicodeString::operator< (const UnicodeString& text) const
3874 { return doCompare(0, length(), text, 0, text.length()) == -1; }
3875 
3876 inline UBool
3877 UnicodeString::operator>= (const UnicodeString& text) const
3878 { return doCompare(0, length(), text, 0, text.length()) != -1; }
3879 
3880 inline UBool
3881 UnicodeString::operator<= (const UnicodeString& text) const
3882 { return doCompare(0, length(), text, 0, text.length()) != 1; }
3883 
3884 inline int8_t
compare(const UnicodeString & text)3885 UnicodeString::compare(const UnicodeString& text) const
3886 { return doCompare(0, length(), text, 0, text.length()); }
3887 
3888 inline int8_t
compare(int32_t start,int32_t _length,const UnicodeString & srcText)3889 UnicodeString::compare(int32_t start,
3890                int32_t _length,
3891                const UnicodeString& srcText) const
3892 { return doCompare(start, _length, srcText, 0, srcText.length()); }
3893 
3894 inline int8_t
compare(const UChar * srcChars,int32_t srcLength)3895 UnicodeString::compare(const UChar *srcChars,
3896                int32_t srcLength) const
3897 { return doCompare(0, length(), srcChars, 0, srcLength); }
3898 
3899 inline int8_t
compare(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)3900 UnicodeString::compare(int32_t start,
3901                int32_t _length,
3902                const UnicodeString& srcText,
3903                int32_t srcStart,
3904                int32_t srcLength) const
3905 { return doCompare(start, _length, srcText, srcStart, srcLength); }
3906 
3907 inline int8_t
compare(int32_t start,int32_t _length,const UChar * srcChars)3908 UnicodeString::compare(int32_t start,
3909                int32_t _length,
3910                const UChar *srcChars) const
3911 { return doCompare(start, _length, srcChars, 0, _length); }
3912 
3913 inline int8_t
compare(int32_t start,int32_t _length,const UChar * srcChars,int32_t srcStart,int32_t srcLength)3914 UnicodeString::compare(int32_t start,
3915                int32_t _length,
3916                const UChar *srcChars,
3917                int32_t srcStart,
3918                int32_t srcLength) const
3919 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
3920 
3921 inline int8_t
compareBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit)3922 UnicodeString::compareBetween(int32_t start,
3923                   int32_t limit,
3924                   const UnicodeString& srcText,
3925                   int32_t srcStart,
3926                   int32_t srcLimit) const
3927 { return doCompare(start, limit - start,
3928            srcText, srcStart, srcLimit - srcStart); }
3929 
3930 inline int8_t
doCompareCodePointOrder(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)3931 UnicodeString::doCompareCodePointOrder(int32_t start,
3932                                        int32_t thisLength,
3933                                        const UnicodeString& srcText,
3934                                        int32_t srcStart,
3935                                        int32_t srcLength) const
3936 {
3937   if(srcText.isBogus()) {
3938     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3939   } else {
3940     srcText.pinIndices(srcStart, srcLength);
3941     return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3942   }
3943 }
3944 
3945 inline int8_t
compareCodePointOrder(const UnicodeString & text)3946 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
3947 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
3948 
3949 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const UnicodeString & srcText)3950 UnicodeString::compareCodePointOrder(int32_t start,
3951                                      int32_t _length,
3952                                      const UnicodeString& srcText) const
3953 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
3954 
3955 inline int8_t
compareCodePointOrder(const UChar * srcChars,int32_t srcLength)3956 UnicodeString::compareCodePointOrder(const UChar *srcChars,
3957                                      int32_t srcLength) const
3958 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
3959 
3960 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)3961 UnicodeString::compareCodePointOrder(int32_t start,
3962                                      int32_t _length,
3963                                      const UnicodeString& srcText,
3964                                      int32_t srcStart,
3965                                      int32_t srcLength) const
3966 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3967 
3968 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const UChar * srcChars)3969 UnicodeString::compareCodePointOrder(int32_t start,
3970                                      int32_t _length,
3971                                      const UChar *srcChars) const
3972 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3973 
3974 inline int8_t
compareCodePointOrder(int32_t start,int32_t _length,const UChar * srcChars,int32_t srcStart,int32_t srcLength)3975 UnicodeString::compareCodePointOrder(int32_t start,
3976                                      int32_t _length,
3977                                      const UChar *srcChars,
3978                                      int32_t srcStart,
3979                                      int32_t srcLength) const
3980 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3981 
3982 inline int8_t
compareCodePointOrderBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit)3983 UnicodeString::compareCodePointOrderBetween(int32_t start,
3984                                             int32_t limit,
3985                                             const UnicodeString& srcText,
3986                                             int32_t srcStart,
3987                                             int32_t srcLimit) const
3988 { return doCompareCodePointOrder(start, limit - start,
3989            srcText, srcStart, srcLimit - srcStart); }
3990 
3991 inline int8_t
doCaseCompare(int32_t start,int32_t thisLength,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,uint32_t options)3992 UnicodeString::doCaseCompare(int32_t start,
3993                              int32_t thisLength,
3994                              const UnicodeString &srcText,
3995                              int32_t srcStart,
3996                              int32_t srcLength,
3997                              uint32_t options) const
3998 {
3999   if(srcText.isBogus()) {
4000     return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4001   } else {
4002     srcText.pinIndices(srcStart, srcLength);
4003     return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4004   }
4005 }
4006 
4007 inline int8_t
caseCompare(const UnicodeString & text,uint32_t options)4008 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4009   return doCaseCompare(0, length(), text, 0, text.length(), options);
4010 }
4011 
4012 inline int8_t
caseCompare(int32_t start,int32_t _length,const UnicodeString & srcText,uint32_t options)4013 UnicodeString::caseCompare(int32_t start,
4014                            int32_t _length,
4015                            const UnicodeString &srcText,
4016                            uint32_t options) const {
4017   return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
4018 }
4019 
4020 inline int8_t
caseCompare(const UChar * srcChars,int32_t srcLength,uint32_t options)4021 UnicodeString::caseCompare(const UChar *srcChars,
4022                            int32_t srcLength,
4023                            uint32_t options) const {
4024   return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
4025 }
4026 
4027 inline int8_t
caseCompare(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,uint32_t options)4028 UnicodeString::caseCompare(int32_t start,
4029                            int32_t _length,
4030                            const UnicodeString &srcText,
4031                            int32_t srcStart,
4032                            int32_t srcLength,
4033                            uint32_t options) const {
4034   return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4035 }
4036 
4037 inline int8_t
caseCompare(int32_t start,int32_t _length,const UChar * srcChars,uint32_t options)4038 UnicodeString::caseCompare(int32_t start,
4039                            int32_t _length,
4040                            const UChar *srcChars,
4041                            uint32_t options) const {
4042   return doCaseCompare(start, _length, srcChars, 0, _length, options);
4043 }
4044 
4045 inline int8_t
caseCompare(int32_t start,int32_t _length,const UChar * srcChars,int32_t srcStart,int32_t srcLength,uint32_t options)4046 UnicodeString::caseCompare(int32_t start,
4047                            int32_t _length,
4048                            const UChar *srcChars,
4049                            int32_t srcStart,
4050                            int32_t srcLength,
4051                            uint32_t options) const {
4052   return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4053 }
4054 
4055 inline int8_t
caseCompareBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit,uint32_t options)4056 UnicodeString::caseCompareBetween(int32_t start,
4057                                   int32_t limit,
4058                                   const UnicodeString &srcText,
4059                                   int32_t srcStart,
4060                                   int32_t srcLimit,
4061                                   uint32_t options) const {
4062   return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4063 }
4064 
4065 inline int32_t
indexOf(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,int32_t start,int32_t _length)4066 UnicodeString::indexOf(const UnicodeString& srcText,
4067                int32_t srcStart,
4068                int32_t srcLength,
4069                int32_t start,
4070                int32_t _length) const
4071 {
4072   if(!srcText.isBogus()) {
4073     srcText.pinIndices(srcStart, srcLength);
4074     if(srcLength > 0) {
4075       return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4076     }
4077   }
4078   return -1;
4079 }
4080 
4081 inline int32_t
indexOf(const UnicodeString & text)4082 UnicodeString::indexOf(const UnicodeString& text) const
4083 { return indexOf(text, 0, text.length(), 0, length()); }
4084 
4085 inline int32_t
indexOf(const UnicodeString & text,int32_t start)4086 UnicodeString::indexOf(const UnicodeString& text,
4087                int32_t start) const {
4088   pinIndex(start);
4089   return indexOf(text, 0, text.length(), start, length() - start);
4090 }
4091 
4092 inline int32_t
indexOf(const UnicodeString & text,int32_t start,int32_t _length)4093 UnicodeString::indexOf(const UnicodeString& text,
4094                int32_t start,
4095                int32_t _length) const
4096 { return indexOf(text, 0, text.length(), start, _length); }
4097 
4098 inline int32_t
indexOf(const UChar * srcChars,int32_t srcLength,int32_t start)4099 UnicodeString::indexOf(const UChar *srcChars,
4100                int32_t srcLength,
4101                int32_t start) const {
4102   pinIndex(start);
4103   return indexOf(srcChars, 0, srcLength, start, length() - start);
4104 }
4105 
4106 inline int32_t
indexOf(const UChar * srcChars,int32_t srcLength,int32_t start,int32_t _length)4107 UnicodeString::indexOf(const UChar *srcChars,
4108                int32_t srcLength,
4109                int32_t start,
4110                int32_t _length) const
4111 { return indexOf(srcChars, 0, srcLength, start, _length); }
4112 
4113 inline int32_t
indexOf(UChar c,int32_t start,int32_t _length)4114 UnicodeString::indexOf(UChar c,
4115                int32_t start,
4116                int32_t _length) const
4117 { return doIndexOf(c, start, _length); }
4118 
4119 inline int32_t
indexOf(UChar32 c,int32_t start,int32_t _length)4120 UnicodeString::indexOf(UChar32 c,
4121                int32_t start,
4122                int32_t _length) const
4123 { return doIndexOf(c, start, _length); }
4124 
4125 inline int32_t
indexOf(UChar c)4126 UnicodeString::indexOf(UChar c) const
4127 { return doIndexOf(c, 0, length()); }
4128 
4129 inline int32_t
indexOf(UChar32 c)4130 UnicodeString::indexOf(UChar32 c) const
4131 { return indexOf(c, 0, length()); }
4132 
4133 inline int32_t
indexOf(UChar c,int32_t start)4134 UnicodeString::indexOf(UChar c,
4135                int32_t start) const {
4136   pinIndex(start);
4137   return doIndexOf(c, start, length() - start);
4138 }
4139 
4140 inline int32_t
indexOf(UChar32 c,int32_t start)4141 UnicodeString::indexOf(UChar32 c,
4142                int32_t start) const {
4143   pinIndex(start);
4144   return indexOf(c, start, length() - start);
4145 }
4146 
4147 inline int32_t
lastIndexOf(const UChar * srcChars,int32_t srcLength,int32_t start,int32_t _length)4148 UnicodeString::lastIndexOf(const UChar *srcChars,
4149                int32_t srcLength,
4150                int32_t start,
4151                int32_t _length) const
4152 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
4153 
4154 inline int32_t
lastIndexOf(const UChar * srcChars,int32_t srcLength,int32_t start)4155 UnicodeString::lastIndexOf(const UChar *srcChars,
4156                int32_t srcLength,
4157                int32_t start) const {
4158   pinIndex(start);
4159   return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
4160 }
4161 
4162 inline int32_t
lastIndexOf(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength,int32_t start,int32_t _length)4163 UnicodeString::lastIndexOf(const UnicodeString& srcText,
4164                int32_t srcStart,
4165                int32_t srcLength,
4166                int32_t start,
4167                int32_t _length) const
4168 {
4169   if(!srcText.isBogus()) {
4170     srcText.pinIndices(srcStart, srcLength);
4171     if(srcLength > 0) {
4172       return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4173     }
4174   }
4175   return -1;
4176 }
4177 
4178 inline int32_t
lastIndexOf(const UnicodeString & text,int32_t start,int32_t _length)4179 UnicodeString::lastIndexOf(const UnicodeString& text,
4180                int32_t start,
4181                int32_t _length) const
4182 { return lastIndexOf(text, 0, text.length(), start, _length); }
4183 
4184 inline int32_t
lastIndexOf(const UnicodeString & text,int32_t start)4185 UnicodeString::lastIndexOf(const UnicodeString& text,
4186                int32_t start) const {
4187   pinIndex(start);
4188   return lastIndexOf(text, 0, text.length(), start, length() - start);
4189 }
4190 
4191 inline int32_t
lastIndexOf(const UnicodeString & text)4192 UnicodeString::lastIndexOf(const UnicodeString& text) const
4193 { return lastIndexOf(text, 0, text.length(), 0, length()); }
4194 
4195 inline int32_t
lastIndexOf(UChar c,int32_t start,int32_t _length)4196 UnicodeString::lastIndexOf(UChar c,
4197                int32_t start,
4198                int32_t _length) const
4199 { return doLastIndexOf(c, start, _length); }
4200 
4201 inline int32_t
lastIndexOf(UChar32 c,int32_t start,int32_t _length)4202 UnicodeString::lastIndexOf(UChar32 c,
4203                int32_t start,
4204                int32_t _length) const {
4205   return doLastIndexOf(c, start, _length);
4206 }
4207 
4208 inline int32_t
lastIndexOf(UChar c)4209 UnicodeString::lastIndexOf(UChar c) const
4210 { return doLastIndexOf(c, 0, length()); }
4211 
4212 inline int32_t
lastIndexOf(UChar32 c)4213 UnicodeString::lastIndexOf(UChar32 c) const {
4214   return lastIndexOf(c, 0, length());
4215 }
4216 
4217 inline int32_t
lastIndexOf(UChar c,int32_t start)4218 UnicodeString::lastIndexOf(UChar c,
4219                int32_t start) const {
4220   pinIndex(start);
4221   return doLastIndexOf(c, start, length() - start);
4222 }
4223 
4224 inline int32_t
lastIndexOf(UChar32 c,int32_t start)4225 UnicodeString::lastIndexOf(UChar32 c,
4226                int32_t start) const {
4227   pinIndex(start);
4228   return lastIndexOf(c, start, length() - start);
4229 }
4230 
4231 inline UBool
startsWith(const UnicodeString & text)4232 UnicodeString::startsWith(const UnicodeString& text) const
4233 { return compare(0, text.length(), text, 0, text.length()) == 0; }
4234 
4235 inline UBool
startsWith(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4236 UnicodeString::startsWith(const UnicodeString& srcText,
4237               int32_t srcStart,
4238               int32_t srcLength) const
4239 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
4240 
4241 inline UBool
startsWith(const UChar * srcChars,int32_t srcLength)4242 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
4243   if(srcLength < 0) {
4244     srcLength = u_strlen(srcChars);
4245   }
4246   return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
4247 }
4248 
4249 inline UBool
startsWith(const UChar * srcChars,int32_t srcStart,int32_t srcLength)4250 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
4251   if(srcLength < 0) {
4252     srcLength = u_strlen(srcChars);
4253   }
4254   return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
4255 }
4256 
4257 inline UBool
endsWith(const UnicodeString & text)4258 UnicodeString::endsWith(const UnicodeString& text) const
4259 { return doCompare(length() - text.length(), text.length(),
4260            text, 0, text.length()) == 0; }
4261 
4262 inline UBool
endsWith(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4263 UnicodeString::endsWith(const UnicodeString& srcText,
4264             int32_t srcStart,
4265             int32_t srcLength) const {
4266   srcText.pinIndices(srcStart, srcLength);
4267   return doCompare(length() - srcLength, srcLength,
4268                    srcText, srcStart, srcLength) == 0;
4269 }
4270 
4271 inline UBool
endsWith(const UChar * srcChars,int32_t srcLength)4272 UnicodeString::endsWith(const UChar *srcChars,
4273             int32_t srcLength) const {
4274   if(srcLength < 0) {
4275     srcLength = u_strlen(srcChars);
4276   }
4277   return doCompare(length() - srcLength, srcLength,
4278                    srcChars, 0, srcLength) == 0;
4279 }
4280 
4281 inline UBool
endsWith(const UChar * srcChars,int32_t srcStart,int32_t srcLength)4282 UnicodeString::endsWith(const UChar *srcChars,
4283             int32_t srcStart,
4284             int32_t srcLength) const {
4285   if(srcLength < 0) {
4286     srcLength = u_strlen(srcChars + srcStart);
4287   }
4288   return doCompare(length() - srcLength, srcLength,
4289                    srcChars, srcStart, srcLength) == 0;
4290 }
4291 
4292 //========================================
4293 // replace
4294 //========================================
4295 inline UnicodeString&
replace(int32_t start,int32_t _length,const UnicodeString & srcText)4296 UnicodeString::replace(int32_t start,
4297                int32_t _length,
4298                const UnicodeString& srcText)
4299 { return doReplace(start, _length, srcText, 0, srcText.length()); }
4300 
4301 inline UnicodeString&
replace(int32_t start,int32_t _length,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4302 UnicodeString::replace(int32_t start,
4303                int32_t _length,
4304                const UnicodeString& srcText,
4305                int32_t srcStart,
4306                int32_t srcLength)
4307 { return doReplace(start, _length, srcText, srcStart, srcLength); }
4308 
4309 inline UnicodeString&
replace(int32_t start,int32_t _length,const UChar * srcChars,int32_t srcLength)4310 UnicodeString::replace(int32_t start,
4311                int32_t _length,
4312                const UChar *srcChars,
4313                int32_t srcLength)
4314 { return doReplace(start, _length, srcChars, 0, srcLength); }
4315 
4316 inline UnicodeString&
replace(int32_t start,int32_t _length,const UChar * srcChars,int32_t srcStart,int32_t srcLength)4317 UnicodeString::replace(int32_t start,
4318                int32_t _length,
4319                const UChar *srcChars,
4320                int32_t srcStart,
4321                int32_t srcLength)
4322 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
4323 
4324 inline UnicodeString&
replace(int32_t start,int32_t _length,UChar srcChar)4325 UnicodeString::replace(int32_t start,
4326                int32_t _length,
4327                UChar srcChar)
4328 { return doReplace(start, _length, &srcChar, 0, 1); }
4329 
4330 inline UnicodeString&
replaceBetween(int32_t start,int32_t limit,const UnicodeString & srcText)4331 UnicodeString::replaceBetween(int32_t start,
4332                   int32_t limit,
4333                   const UnicodeString& srcText)
4334 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4335 
4336 inline UnicodeString&
replaceBetween(int32_t start,int32_t limit,const UnicodeString & srcText,int32_t srcStart,int32_t srcLimit)4337 UnicodeString::replaceBetween(int32_t start,
4338                   int32_t limit,
4339                   const UnicodeString& srcText,
4340                   int32_t srcStart,
4341                   int32_t srcLimit)
4342 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4343 
4344 inline UnicodeString&
findAndReplace(const UnicodeString & oldText,const UnicodeString & newText)4345 UnicodeString::findAndReplace(const UnicodeString& oldText,
4346                   const UnicodeString& newText)
4347 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
4348             newText, 0, newText.length()); }
4349 
4350 inline UnicodeString&
findAndReplace(int32_t start,int32_t _length,const UnicodeString & oldText,const UnicodeString & newText)4351 UnicodeString::findAndReplace(int32_t start,
4352                   int32_t _length,
4353                   const UnicodeString& oldText,
4354                   const UnicodeString& newText)
4355 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
4356             newText, 0, newText.length()); }
4357 
4358 // ============================
4359 // extract
4360 // ============================
4361 inline void
doExtract(int32_t start,int32_t _length,UnicodeString & target)4362 UnicodeString::doExtract(int32_t start,
4363              int32_t _length,
4364              UnicodeString& target) const
4365 { target.replace(0, target.length(), *this, start, _length); }
4366 
4367 inline void
extract(int32_t start,int32_t _length,UChar * target,int32_t targetStart)4368 UnicodeString::extract(int32_t start,
4369                int32_t _length,
4370                UChar *target,
4371                int32_t targetStart) const
4372 { doExtract(start, _length, target, targetStart); }
4373 
4374 inline void
extract(int32_t start,int32_t _length,UnicodeString & target)4375 UnicodeString::extract(int32_t start,
4376                int32_t _length,
4377                UnicodeString& target) const
4378 { doExtract(start, _length, target); }
4379 
4380 #if !UCONFIG_NO_CONVERSION
4381 
4382 inline int32_t
extract(int32_t start,int32_t _length,char * dst,const char * codepage)4383 UnicodeString::extract(int32_t start,
4384                int32_t _length,
4385                char *dst,
4386                const char *codepage) const
4387 
4388 {
4389   // This dstSize value will be checked explicitly
4390   return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4391 }
4392 
4393 #endif
4394 
4395 inline void
extractBetween(int32_t start,int32_t limit,UChar * dst,int32_t dstStart)4396 UnicodeString::extractBetween(int32_t start,
4397                   int32_t limit,
4398                   UChar *dst,
4399                   int32_t dstStart) const {
4400   pinIndex(start);
4401   pinIndex(limit);
4402   doExtract(start, limit - start, dst, dstStart);
4403 }
4404 
4405 inline UnicodeString
tempSubStringBetween(int32_t start,int32_t limit)4406 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4407     return tempSubString(start, limit - start);
4408 }
4409 
4410 inline UChar
doCharAt(int32_t offset)4411 UnicodeString::doCharAt(int32_t offset) const
4412 {
4413   if((uint32_t)offset < (uint32_t)length()) {
4414     return getArrayStart()[offset];
4415   } else {
4416     return kInvalidUChar;
4417   }
4418 }
4419 
4420 inline UChar
charAt(int32_t offset)4421 UnicodeString::charAt(int32_t offset) const
4422 { return doCharAt(offset); }
4423 
4424 inline UChar
4425 UnicodeString::operator[] (int32_t offset) const
4426 { return doCharAt(offset); }
4427 
4428 inline UBool
isEmpty()4429 UnicodeString::isEmpty() const {
4430   // Arithmetic or logical right shift does not matter: only testing for 0.
4431   return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
4432 }
4433 
4434 //========================================
4435 // Write implementation methods
4436 //========================================
4437 inline void
setZeroLength()4438 UnicodeString::setZeroLength() {
4439   fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4440 }
4441 
4442 inline void
setShortLength(int32_t len)4443 UnicodeString::setShortLength(int32_t len) {
4444   // requires 0 <= len <= kMaxShortLength
4445   fUnion.fFields.fLengthAndFlags =
4446     (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
4447 }
4448 
4449 inline void
setLength(int32_t len)4450 UnicodeString::setLength(int32_t len) {
4451   if(len <= kMaxShortLength) {
4452     setShortLength(len);
4453   } else {
4454     fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
4455     fUnion.fFields.fLength = len;
4456   }
4457 }
4458 
4459 inline void
setToEmpty()4460 UnicodeString::setToEmpty() {
4461   fUnion.fFields.fLengthAndFlags = kShortString;
4462 }
4463 
4464 inline void
setArray(UChar * array,int32_t len,int32_t capacity)4465 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
4466   setLength(len);
4467   fUnion.fFields.fArray = array;
4468   fUnion.fFields.fCapacity = capacity;
4469 }
4470 
4471 inline UnicodeString&
4472 UnicodeString::operator= (UChar ch)
4473 { return doReplace(0, length(), &ch, 0, 1); }
4474 
4475 inline UnicodeString&
4476 UnicodeString::operator= (UChar32 ch)
4477 { return replace(0, length(), ch); }
4478 
4479 inline UnicodeString&
setTo(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4480 UnicodeString::setTo(const UnicodeString& srcText,
4481              int32_t srcStart,
4482              int32_t srcLength)
4483 {
4484   unBogus();
4485   return doReplace(0, length(), srcText, srcStart, srcLength);
4486 }
4487 
4488 inline UnicodeString&
setTo(const UnicodeString & srcText,int32_t srcStart)4489 UnicodeString::setTo(const UnicodeString& srcText,
4490              int32_t srcStart)
4491 {
4492   unBogus();
4493   srcText.pinIndex(srcStart);
4494   return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4495 }
4496 
4497 inline UnicodeString&
setTo(const UnicodeString & srcText)4498 UnicodeString::setTo(const UnicodeString& srcText)
4499 {
4500   return copyFrom(srcText);
4501 }
4502 
4503 inline UnicodeString&
setTo(const UChar * srcChars,int32_t srcLength)4504 UnicodeString::setTo(const UChar *srcChars,
4505              int32_t srcLength)
4506 {
4507   unBogus();
4508   return doReplace(0, length(), srcChars, 0, srcLength);
4509 }
4510 
4511 inline UnicodeString&
setTo(UChar srcChar)4512 UnicodeString::setTo(UChar srcChar)
4513 {
4514   unBogus();
4515   return doReplace(0, length(), &srcChar, 0, 1);
4516 }
4517 
4518 inline UnicodeString&
setTo(UChar32 srcChar)4519 UnicodeString::setTo(UChar32 srcChar)
4520 {
4521   unBogus();
4522   return replace(0, length(), srcChar);
4523 }
4524 
4525 inline UnicodeString&
append(const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4526 UnicodeString::append(const UnicodeString& srcText,
4527               int32_t srcStart,
4528               int32_t srcLength)
4529 { return doAppend(srcText, srcStart, srcLength); }
4530 
4531 inline UnicodeString&
append(const UnicodeString & srcText)4532 UnicodeString::append(const UnicodeString& srcText)
4533 { return doAppend(srcText, 0, srcText.length()); }
4534 
4535 inline UnicodeString&
append(const UChar * srcChars,int32_t srcStart,int32_t srcLength)4536 UnicodeString::append(const UChar *srcChars,
4537               int32_t srcStart,
4538               int32_t srcLength)
4539 { return doAppend(srcChars, srcStart, srcLength); }
4540 
4541 inline UnicodeString&
append(const UChar * srcChars,int32_t srcLength)4542 UnicodeString::append(const UChar *srcChars,
4543               int32_t srcLength)
4544 { return doAppend(srcChars, 0, srcLength); }
4545 
4546 inline UnicodeString&
append(UChar srcChar)4547 UnicodeString::append(UChar srcChar)
4548 { return doAppend(&srcChar, 0, 1); }
4549 
4550 inline UnicodeString&
4551 UnicodeString::operator+= (UChar ch)
4552 { return doAppend(&ch, 0, 1); }
4553 
4554 inline UnicodeString&
4555 UnicodeString::operator+= (UChar32 ch) {
4556   return append(ch);
4557 }
4558 
4559 inline UnicodeString&
4560 UnicodeString::operator+= (const UnicodeString& srcText)
4561 { return doAppend(srcText, 0, srcText.length()); }
4562 
4563 inline UnicodeString&
insert(int32_t start,const UnicodeString & srcText,int32_t srcStart,int32_t srcLength)4564 UnicodeString::insert(int32_t start,
4565               const UnicodeString& srcText,
4566               int32_t srcStart,
4567               int32_t srcLength)
4568 { return doReplace(start, 0, srcText, srcStart, srcLength); }
4569 
4570 inline UnicodeString&
insert(int32_t start,const UnicodeString & srcText)4571 UnicodeString::insert(int32_t start,
4572               const UnicodeString& srcText)
4573 { return doReplace(start, 0, srcText, 0, srcText.length()); }
4574 
4575 inline UnicodeString&
insert(int32_t start,const UChar * srcChars,int32_t srcStart,int32_t srcLength)4576 UnicodeString::insert(int32_t start,
4577               const UChar *srcChars,
4578               int32_t srcStart,
4579               int32_t srcLength)
4580 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
4581 
4582 inline UnicodeString&
insert(int32_t start,const UChar * srcChars,int32_t srcLength)4583 UnicodeString::insert(int32_t start,
4584               const UChar *srcChars,
4585               int32_t srcLength)
4586 { return doReplace(start, 0, srcChars, 0, srcLength); }
4587 
4588 inline UnicodeString&
insert(int32_t start,UChar srcChar)4589 UnicodeString::insert(int32_t start,
4590               UChar srcChar)
4591 { return doReplace(start, 0, &srcChar, 0, 1); }
4592 
4593 inline UnicodeString&
insert(int32_t start,UChar32 srcChar)4594 UnicodeString::insert(int32_t start,
4595               UChar32 srcChar)
4596 { return replace(start, 0, srcChar); }
4597 
4598 
4599 inline UnicodeString&
remove()4600 UnicodeString::remove()
4601 {
4602   // remove() of a bogus string makes the string empty and non-bogus
4603   if(isBogus()) {
4604     setToEmpty();
4605   } else {
4606     setZeroLength();
4607   }
4608   return *this;
4609 }
4610 
4611 inline UnicodeString&
remove(int32_t start,int32_t _length)4612 UnicodeString::remove(int32_t start,
4613              int32_t _length)
4614 {
4615     if(start <= 0 && _length == INT32_MAX) {
4616         // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4617         return remove();
4618     }
4619     return doReplace(start, _length, NULL, 0, 0);
4620 }
4621 
4622 inline UnicodeString&
removeBetween(int32_t start,int32_t limit)4623 UnicodeString::removeBetween(int32_t start,
4624                 int32_t limit)
4625 { return doReplace(start, limit - start, NULL, 0, 0); }
4626 
4627 inline UnicodeString &
retainBetween(int32_t start,int32_t limit)4628 UnicodeString::retainBetween(int32_t start, int32_t limit) {
4629   truncate(limit);
4630   return doReplace(0, start, NULL, 0, 0);
4631 }
4632 
4633 inline UBool
truncate(int32_t targetLength)4634 UnicodeString::truncate(int32_t targetLength)
4635 {
4636   if(isBogus() && targetLength == 0) {
4637     // truncate(0) of a bogus string makes the string empty and non-bogus
4638     unBogus();
4639     return FALSE;
4640   } else if((uint32_t)targetLength < (uint32_t)length()) {
4641     setLength(targetLength);
4642     return TRUE;
4643   } else {
4644     return FALSE;
4645   }
4646 }
4647 
4648 inline UnicodeString&
reverse()4649 UnicodeString::reverse()
4650 { return doReverse(0, length()); }
4651 
4652 inline UnicodeString&
reverse(int32_t start,int32_t _length)4653 UnicodeString::reverse(int32_t start,
4654                int32_t _length)
4655 { return doReverse(start, _length); }
4656 
4657 U_NAMESPACE_END
4658 
4659 #endif
4660