1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1999-2014, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  unistr_case.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:2
12 *
13 *   created on: 2004aug19
14 *   created by: Markus W. Scherer
15 *
16 *   Case-mapping functions moved here from unistr.cpp
17 */
18 
19 #include "unicode/utypes.h"
20 #include "unicode/putil.h"
21 #include "cstring.h"
22 #include "cmemory.h"
23 #include "unicode/ustring.h"
24 #include "unicode/unistr.h"
25 #include "unicode/uchar.h"
26 #include "uelement.h"
27 #include "ustr_imp.h"
28 
29 U_NAMESPACE_BEGIN
30 
31 //========================================
32 // Read-only implementation
33 //========================================
34 
35 int8_t
doCaseCompare(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength,uint32_t options) const36 UnicodeString::doCaseCompare(int32_t start,
37                              int32_t length,
38                              const UChar *srcChars,
39                              int32_t srcStart,
40                              int32_t srcLength,
41                              uint32_t options) const
42 {
43   // compare illegal string values
44   // treat const UChar *srcChars==NULL as an empty string
45   if(isBogus()) {
46     return -1;
47   }
48 
49   // pin indices to legal values
50   pinIndices(start, length);
51 
52   if(srcChars == NULL) {
53     srcStart = srcLength = 0;
54   }
55 
56   // get the correct pointer
57   const UChar *chars = getArrayStart();
58 
59   chars += start;
60   if(srcStart!=0) {
61     srcChars += srcStart;
62   }
63 
64   if(chars != srcChars) {
65     UErrorCode errorCode=U_ZERO_ERROR;
66     int32_t result=u_strcmpFold(chars, length, srcChars, srcLength,
67                                 options|U_COMPARE_IGNORE_CASE, &errorCode);
68     if(result!=0) {
69       return (int8_t)(result >> 24 | 1);
70     }
71   } else {
72     // get the srcLength if necessary
73     if(srcLength < 0) {
74       srcLength = u_strlen(srcChars + srcStart);
75     }
76     if(length != srcLength) {
77       return (int8_t)((length - srcLength) >> 24 | 1);
78     }
79   }
80   return 0;
81 }
82 
83 //========================================
84 // Write implementation
85 //========================================
86 
87 UnicodeString &
caseMap(const UCaseMap * csm,UStringCaseMapper * stringCaseMapper)88 UnicodeString::caseMap(const UCaseMap *csm,
89                        UStringCaseMapper *stringCaseMapper) {
90   if(isEmpty() || !isWritable()) {
91     // nothing to do
92     return *this;
93   }
94 
95   // We need to allocate a new buffer for the internal string case mapping function.
96   // This is very similar to how doReplace() keeps the old array pointer
97   // and deletes the old array itself after it is done.
98   // In addition, we are forcing cloneArrayIfNeeded() to always allocate a new array.
99   UChar oldStackBuffer[US_STACKBUF_SIZE];
100   UChar *oldArray;
101   int32_t oldLength;
102 
103   if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
104     // copy the stack buffer contents because it will be overwritten
105     oldArray = oldStackBuffer;
106     oldLength = getShortLength();
107     u_memcpy(oldStackBuffer, fUnion.fStackFields.fBuffer, oldLength);
108   } else {
109     oldArray = getArrayStart();
110     oldLength = length();
111   }
112 
113   int32_t capacity;
114   if(oldLength <= US_STACKBUF_SIZE) {
115     capacity = US_STACKBUF_SIZE;
116   } else {
117     capacity = oldLength + 20;
118   }
119   int32_t *bufferToDelete = 0;
120   if(!cloneArrayIfNeeded(capacity, capacity, FALSE, &bufferToDelete, TRUE)) {
121     return *this;
122   }
123 
124   // Case-map, and if the result is too long, then reallocate and repeat.
125   UErrorCode errorCode;
126   int32_t newLength;
127   do {
128     errorCode = U_ZERO_ERROR;
129     newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(),
130                                  oldArray, oldLength, &errorCode);
131     setLength(newLength);
132   } while(errorCode==U_BUFFER_OVERFLOW_ERROR && cloneArrayIfNeeded(newLength, newLength, FALSE));
133 
134   if (bufferToDelete) {
135     uprv_free(bufferToDelete);
136   }
137   if(U_FAILURE(errorCode)) {
138     setToBogus();
139   }
140   return *this;
141 }
142 
143 UnicodeString &
foldCase(uint32_t options)144 UnicodeString::foldCase(uint32_t options) {
145   UCaseMap csm=UCASEMAP_INITIALIZER;
146   csm.csp=ucase_getSingleton();
147   csm.options=options;
148   return caseMap(&csm, ustrcase_internalFold);
149 }
150 
151 U_NAMESPACE_END
152 
153 // Defined here to reduce dependencies on break iterator
154 U_CAPI int32_t U_EXPORT2
uhash_hashCaselessUnicodeString(const UElement key)155 uhash_hashCaselessUnicodeString(const UElement key) {
156     U_NAMESPACE_USE
157     const UnicodeString *str = (const UnicodeString*) key.pointer;
158     if (str == NULL) {
159         return 0;
160     }
161     // Inefficient; a better way would be to have a hash function in
162     // UnicodeString that does case folding on the fly.
163     UnicodeString copy(*str);
164     return copy.foldCase().hashCode();
165 }
166 
167 // Defined here to reduce dependencies on break iterator
168 U_CAPI UBool U_EXPORT2
uhash_compareCaselessUnicodeString(const UElement key1,const UElement key2)169 uhash_compareCaselessUnicodeString(const UElement key1, const UElement key2) {
170     U_NAMESPACE_USE
171     const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
172     const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
173     if (str1 == str2) {
174         return TRUE;
175     }
176     if (str1 == NULL || str2 == NULL) {
177         return FALSE;
178     }
179     return str1->caseCompare(*str2, U_FOLD_CASE_DEFAULT) == 0;
180 }
181