1 /*
2 *******************************************************************************
3 * Copyright (C) 2013-2015, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 * collationsettings.cpp
7 *
8 * created on: 2013feb07
9 * created by: Markus W. Scherer
10 */
11 
12 #include "unicode/utypes.h"
13 
14 #if !UCONFIG_NO_COLLATION
15 
16 #include "unicode/ucol.h"
17 #include "cmemory.h"
18 #include "collation.h"
19 #include "collationdata.h"
20 #include "collationsettings.h"
21 #include "sharedobject.h"
22 #include "uassert.h"
23 #include "umutex.h"
24 #include "uvectr32.h"
25 
26 U_NAMESPACE_BEGIN
27 
CollationSettings(const CollationSettings & other)28 CollationSettings::CollationSettings(const CollationSettings &other)
29         : SharedObject(other),
30           options(other.options), variableTop(other.variableTop),
31           reorderTable(NULL),
32           minHighNoReorder(other.minHighNoReorder),
33           reorderRanges(NULL), reorderRangesLength(0),
34           reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
35           fastLatinOptions(other.fastLatinOptions) {
36     UErrorCode errorCode = U_ZERO_ERROR;
37     copyReorderingFrom(other, errorCode);
38     if(fastLatinOptions >= 0) {
39         uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries));
40     }
41 }
42 
~CollationSettings()43 CollationSettings::~CollationSettings() {
44     if(reorderCodesCapacity != 0) {
45         uprv_free(const_cast<int32_t *>(reorderCodes));
46     }
47 }
48 
49 UBool
operator ==(const CollationSettings & other) const50 CollationSettings::operator==(const CollationSettings &other) const {
51     if(options != other.options) { return FALSE; }
52     if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; }
53     if(reorderCodesLength != other.reorderCodesLength) { return FALSE; }
54     for(int32_t i = 0; i < reorderCodesLength; ++i) {
55         if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; }
56     }
57     return TRUE;
58 }
59 
60 int32_t
hashCode() const61 CollationSettings::hashCode() const {
62     int32_t h = options << 8;
63     if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
64     h ^= reorderCodesLength;
65     for(int32_t i = 0; i < reorderCodesLength; ++i) {
66         h ^= (reorderCodes[i] << i);
67     }
68     return h;
69 }
70 
71 void
resetReordering()72 CollationSettings::resetReordering() {
73     // When we turn off reordering, we want to set a NULL permutation
74     // rather than a no-op permutation.
75     // Keep the memory via reorderCodes and its capacity.
76     reorderTable = NULL;
77     minHighNoReorder = 0;
78     reorderRangesLength = 0;
79     reorderCodesLength = 0;
80 }
81 
82 void
aliasReordering(const CollationData & data,const int32_t * codes,int32_t length,const uint32_t * ranges,int32_t rangesLength,const uint8_t * table,UErrorCode & errorCode)83 CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
84                                    const uint32_t *ranges, int32_t rangesLength,
85                                    const uint8_t *table, UErrorCode &errorCode) {
86     if(U_FAILURE(errorCode)) { return; }
87     if(table != NULL &&
88             (rangesLength == 0 ?
89                     !reorderTableHasSplitBytes(table) :
90                     rangesLength >= 2 &&
91                     // The first offset must be 0. The last offset must not be 0.
92                     (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) {
93         // We need to release the memory before setting the alias pointer.
94         if(reorderCodesCapacity != 0) {
95             uprv_free(const_cast<int32_t *>(reorderCodes));
96             reorderCodesCapacity = 0;
97         }
98         reorderTable = table;
99         reorderCodes = codes;
100         reorderCodesLength = length;
101         // Drop ranges before the first split byte. They are reordered by the table.
102         // This then speeds up reordering of the remaining ranges.
103         int32_t firstSplitByteRangeIndex = 0;
104         while(firstSplitByteRangeIndex < rangesLength &&
105                 (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
106             // The second byte of the primary limit is 0.
107             ++firstSplitByteRangeIndex;
108         }
109         if(firstSplitByteRangeIndex == rangesLength) {
110             U_ASSERT(!reorderTableHasSplitBytes(table));
111             minHighNoReorder = 0;
112             reorderRanges = NULL;
113             reorderRangesLength = 0;
114         } else {
115             U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0);
116             minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
117             reorderRanges = ranges + firstSplitByteRangeIndex;
118             reorderRangesLength = rangesLength - firstSplitByteRangeIndex;
119         }
120         return;
121     }
122     // Regenerate missing data.
123     setReordering(data, codes, length, errorCode);
124 }
125 
126 void
setReordering(const CollationData & data,const int32_t * codes,int32_t codesLength,UErrorCode & errorCode)127 CollationSettings::setReordering(const CollationData &data,
128                                  const int32_t *codes, int32_t codesLength,
129                                  UErrorCode &errorCode) {
130     if(U_FAILURE(errorCode)) { return; }
131     if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) {
132         resetReordering();
133         return;
134     }
135     UVector32 rangesList(errorCode);
136     data.makeReorderRanges(codes, codesLength, rangesList, errorCode);
137     if(U_FAILURE(errorCode)) { return; }
138     int32_t rangesLength = rangesList.size();
139     if(rangesLength == 0) {
140         resetReordering();
141         return;
142     }
143     const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer());
144     // ranges[] contains at least two (limit, offset) pairs.
145     // The first offset must be 0. The last offset must not be 0.
146     // Separators (at the low end) and trailing weights (at the high end)
147     // are never reordered.
148     U_ASSERT(rangesLength >= 2);
149     U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
150     minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
151 
152     // Write the lead byte permutation table.
153     // Set a 0 for each lead byte that has a range boundary in the middle.
154     uint8_t table[256];
155     int32_t b = 0;
156     int32_t firstSplitByteRangeIndex = -1;
157     for(int32_t i = 0; i < rangesLength; ++i) {
158         uint32_t pair = ranges[i];
159         int32_t limit1 = (int32_t)(pair >> 24);
160         while(b < limit1) {
161             table[b] = (uint8_t)(b + pair);
162             ++b;
163         }
164         // Check the second byte of the limit.
165         if((pair & 0xff0000) != 0) {
166             table[limit1] = 0;
167             b = limit1 + 1;
168             if(firstSplitByteRangeIndex < 0) {
169                 firstSplitByteRangeIndex = i;
170             }
171         }
172     }
173     while(b <= 0xff) {
174         table[b] = (uint8_t)b;
175         ++b;
176     }
177     if(firstSplitByteRangeIndex < 0) {
178         // The lead byte permutation table alone suffices for reordering.
179         rangesLength = 0;
180     } else {
181         // Remove the ranges below the first split byte.
182         ranges += firstSplitByteRangeIndex;
183         rangesLength -= firstSplitByteRangeIndex;
184     }
185     setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode);
186 }
187 
188 void
setReorderArrays(const int32_t * codes,int32_t codesLength,const uint32_t * ranges,int32_t rangesLength,const uint8_t * table,UErrorCode & errorCode)189 CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength,
190                                     const uint32_t *ranges, int32_t rangesLength,
191                                     const uint8_t *table, UErrorCode &errorCode) {
192     if(U_FAILURE(errorCode)) { return; }
193     int32_t *ownedCodes;
194     int32_t totalLength = codesLength + rangesLength;
195     U_ASSERT(totalLength > 0);
196     if(totalLength <= reorderCodesCapacity) {
197         ownedCodes = const_cast<int32_t *>(reorderCodes);
198     } else {
199         // Allocate one memory block for the codes, the ranges, and the 16-aligned table.
200         int32_t capacity = (totalLength + 3) & ~3;  // round up to a multiple of 4 ints
201         ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256);
202         if(ownedCodes == NULL) {
203             resetReordering();
204             errorCode = U_MEMORY_ALLOCATION_ERROR;
205             return;
206         }
207         if(reorderCodesCapacity != 0) {
208             uprv_free(const_cast<int32_t *>(reorderCodes));
209         }
210         reorderCodes = ownedCodes;
211         reorderCodesCapacity = capacity;
212     }
213     uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256);
214     uprv_memcpy(ownedCodes, codes, codesLength * 4);
215     uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4);
216     reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity);
217     reorderCodesLength = codesLength;
218     reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength;
219     reorderRangesLength = rangesLength;
220 }
221 
222 void
copyReorderingFrom(const CollationSettings & other,UErrorCode & errorCode)223 CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) {
224     if(U_FAILURE(errorCode)) { return; }
225     if(!other.hasReordering()) {
226         resetReordering();
227         return;
228     }
229     minHighNoReorder = other.minHighNoReorder;
230     if(other.reorderCodesCapacity == 0) {
231         // The reorder arrays are aliased to memory-mapped data.
232         reorderTable = other.reorderTable;
233         reorderRanges = other.reorderRanges;
234         reorderRangesLength = other.reorderRangesLength;
235         reorderCodes = other.reorderCodes;
236         reorderCodesLength = other.reorderCodesLength;
237     } else {
238         setReorderArrays(other.reorderCodes, other.reorderCodesLength,
239                          other.reorderRanges, other.reorderRangesLength,
240                          other.reorderTable, errorCode);
241     }
242 }
243 
244 UBool
reorderTableHasSplitBytes(const uint8_t table[256])245 CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) {
246     U_ASSERT(table[0] == 0);
247     for(int32_t i = 1; i < 256; ++i) {
248         if(table[i] == 0) {
249             return TRUE;
250         }
251     }
252     return FALSE;
253 }
254 
255 uint32_t
reorderEx(uint32_t p) const256 CollationSettings::reorderEx(uint32_t p) const {
257     if(p >= minHighNoReorder) { return p; }
258     // Round up p so that its lower 16 bits are >= any offset bits.
259     // Then compare q directly with (limit, offset) pairs.
260     uint32_t q = p | 0xffff;
261     uint32_t r;
262     const uint32_t *ranges = reorderRanges;
263     while(q >= (r = *ranges)) { ++ranges; }
264     return p + (r << 24);
265 }
266 
267 void
setStrength(int32_t value,int32_t defaultOptions,UErrorCode & errorCode)268 CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
269     if(U_FAILURE(errorCode)) { return; }
270     int32_t noStrength = options & ~STRENGTH_MASK;
271     switch(value) {
272     case UCOL_PRIMARY:
273     case UCOL_SECONDARY:
274     case UCOL_TERTIARY:
275     case UCOL_QUATERNARY:
276     case UCOL_IDENTICAL:
277         options = noStrength | (value << STRENGTH_SHIFT);
278         break;
279     case UCOL_DEFAULT:
280         options = noStrength | (defaultOptions & STRENGTH_MASK);
281         break;
282     default:
283         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
284         break;
285     }
286 }
287 
288 void
setFlag(int32_t bit,UColAttributeValue value,int32_t defaultOptions,UErrorCode & errorCode)289 CollationSettings::setFlag(int32_t bit, UColAttributeValue value,
290                            int32_t defaultOptions, UErrorCode &errorCode) {
291     if(U_FAILURE(errorCode)) { return; }
292     switch(value) {
293     case UCOL_ON:
294         options |= bit;
295         break;
296     case UCOL_OFF:
297         options &= ~bit;
298         break;
299     case UCOL_DEFAULT:
300         options = (options & ~bit) | (defaultOptions & bit);
301         break;
302     default:
303         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
304         break;
305     }
306 }
307 
308 void
setCaseFirst(UColAttributeValue value,int32_t defaultOptions,UErrorCode & errorCode)309 CollationSettings::setCaseFirst(UColAttributeValue value,
310                                 int32_t defaultOptions, UErrorCode &errorCode) {
311     if(U_FAILURE(errorCode)) { return; }
312     int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
313     switch(value) {
314     case UCOL_OFF:
315         options = noCaseFirst;
316         break;
317     case UCOL_LOWER_FIRST:
318         options = noCaseFirst | CASE_FIRST;
319         break;
320     case UCOL_UPPER_FIRST:
321         options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK;
322         break;
323     case UCOL_DEFAULT:
324         options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
325         break;
326     default:
327         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
328         break;
329     }
330 }
331 
332 void
setAlternateHandling(UColAttributeValue value,int32_t defaultOptions,UErrorCode & errorCode)333 CollationSettings::setAlternateHandling(UColAttributeValue value,
334                                         int32_t defaultOptions, UErrorCode &errorCode) {
335     if(U_FAILURE(errorCode)) { return; }
336     int32_t noAlternate = options & ~ALTERNATE_MASK;
337     switch(value) {
338     case UCOL_NON_IGNORABLE:
339         options = noAlternate;
340         break;
341     case UCOL_SHIFTED:
342         options = noAlternate | SHIFTED;
343         break;
344     case UCOL_DEFAULT:
345         options = noAlternate | (defaultOptions & ALTERNATE_MASK);
346         break;
347     default:
348         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
349         break;
350     }
351 }
352 
353 void
setMaxVariable(int32_t value,int32_t defaultOptions,UErrorCode & errorCode)354 CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
355     if(U_FAILURE(errorCode)) { return; }
356     int32_t noMax = options & ~MAX_VARIABLE_MASK;
357     switch(value) {
358     case MAX_VAR_SPACE:
359     case MAX_VAR_PUNCT:
360     case MAX_VAR_SYMBOL:
361     case MAX_VAR_CURRENCY:
362         options = noMax | (value << MAX_VARIABLE_SHIFT);
363         break;
364     case UCOL_DEFAULT:
365         options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
366         break;
367     default:
368         errorCode = U_ILLEGAL_ARGUMENT_ERROR;
369         break;
370     }
371 }
372 
373 U_NAMESPACE_END
374 
375 #endif  // !UCONFIG_NO_COLLATION
376