1 /*
2 *******************************************************************************
3 * Copyright (C) 2013-2015, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * collationsettings.cpp
7 *
8 * created on: 2013feb07
9 * created by: Markus W. Scherer
10 */
11
12 #include "unicode/utypes.h"
13
14 #if !UCONFIG_NO_COLLATION
15
16 #include "unicode/ucol.h"
17 #include "cmemory.h"
18 #include "collation.h"
19 #include "collationdata.h"
20 #include "collationsettings.h"
21 #include "sharedobject.h"
22 #include "uassert.h"
23 #include "umutex.h"
24 #include "uvectr32.h"
25
26 U_NAMESPACE_BEGIN
27
CollationSettings(const CollationSettings & other)28 CollationSettings::CollationSettings(const CollationSettings &other)
29 : SharedObject(other),
30 options(other.options), variableTop(other.variableTop),
31 reorderTable(NULL),
32 minHighNoReorder(other.minHighNoReorder),
33 reorderRanges(NULL), reorderRangesLength(0),
34 reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
35 fastLatinOptions(other.fastLatinOptions) {
36 UErrorCode errorCode = U_ZERO_ERROR;
37 copyReorderingFrom(other, errorCode);
38 if(fastLatinOptions >= 0) {
39 uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries));
40 }
41 }
42
~CollationSettings()43 CollationSettings::~CollationSettings() {
44 if(reorderCodesCapacity != 0) {
45 uprv_free(const_cast<int32_t *>(reorderCodes));
46 }
47 }
48
49 UBool
operator ==(const CollationSettings & other) const50 CollationSettings::operator==(const CollationSettings &other) const {
51 if(options != other.options) { return FALSE; }
52 if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; }
53 if(reorderCodesLength != other.reorderCodesLength) { return FALSE; }
54 for(int32_t i = 0; i < reorderCodesLength; ++i) {
55 if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; }
56 }
57 return TRUE;
58 }
59
60 int32_t
hashCode() const61 CollationSettings::hashCode() const {
62 int32_t h = options << 8;
63 if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
64 h ^= reorderCodesLength;
65 for(int32_t i = 0; i < reorderCodesLength; ++i) {
66 h ^= (reorderCodes[i] << i);
67 }
68 return h;
69 }
70
71 void
resetReordering()72 CollationSettings::resetReordering() {
73 // When we turn off reordering, we want to set a NULL permutation
74 // rather than a no-op permutation.
75 // Keep the memory via reorderCodes and its capacity.
76 reorderTable = NULL;
77 minHighNoReorder = 0;
78 reorderRangesLength = 0;
79 reorderCodesLength = 0;
80 }
81
82 void
aliasReordering(const CollationData & data,const int32_t * codes,int32_t length,const uint32_t * ranges,int32_t rangesLength,const uint8_t * table,UErrorCode & errorCode)83 CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
84 const uint32_t *ranges, int32_t rangesLength,
85 const uint8_t *table, UErrorCode &errorCode) {
86 if(U_FAILURE(errorCode)) { return; }
87 if(table != NULL &&
88 (rangesLength == 0 ?
89 !reorderTableHasSplitBytes(table) :
90 rangesLength >= 2 &&
91 // The first offset must be 0. The last offset must not be 0.
92 (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) {
93 // We need to release the memory before setting the alias pointer.
94 if(reorderCodesCapacity != 0) {
95 uprv_free(const_cast<int32_t *>(reorderCodes));
96 reorderCodesCapacity = 0;
97 }
98 reorderTable = table;
99 reorderCodes = codes;
100 reorderCodesLength = length;
101 // Drop ranges before the first split byte. They are reordered by the table.
102 // This then speeds up reordering of the remaining ranges.
103 int32_t firstSplitByteRangeIndex = 0;
104 while(firstSplitByteRangeIndex < rangesLength &&
105 (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
106 // The second byte of the primary limit is 0.
107 ++firstSplitByteRangeIndex;
108 }
109 if(firstSplitByteRangeIndex == rangesLength) {
110 U_ASSERT(!reorderTableHasSplitBytes(table));
111 minHighNoReorder = 0;
112 reorderRanges = NULL;
113 reorderRangesLength = 0;
114 } else {
115 U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0);
116 minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
117 reorderRanges = ranges + firstSplitByteRangeIndex;
118 reorderRangesLength = rangesLength - firstSplitByteRangeIndex;
119 }
120 return;
121 }
122 // Regenerate missing data.
123 setReordering(data, codes, length, errorCode);
124 }
125
126 void
setReordering(const CollationData & data,const int32_t * codes,int32_t codesLength,UErrorCode & errorCode)127 CollationSettings::setReordering(const CollationData &data,
128 const int32_t *codes, int32_t codesLength,
129 UErrorCode &errorCode) {
130 if(U_FAILURE(errorCode)) { return; }
131 if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) {
132 resetReordering();
133 return;
134 }
135 UVector32 rangesList(errorCode);
136 data.makeReorderRanges(codes, codesLength, rangesList, errorCode);
137 if(U_FAILURE(errorCode)) { return; }
138 int32_t rangesLength = rangesList.size();
139 if(rangesLength == 0) {
140 resetReordering();
141 return;
142 }
143 const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer());
144 // ranges[] contains at least two (limit, offset) pairs.
145 // The first offset must be 0. The last offset must not be 0.
146 // Separators (at the low end) and trailing weights (at the high end)
147 // are never reordered.
148 U_ASSERT(rangesLength >= 2);
149 U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
150 minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
151
152 // Write the lead byte permutation table.
153 // Set a 0 for each lead byte that has a range boundary in the middle.
154 uint8_t table[256];
155 int32_t b = 0;
156 int32_t firstSplitByteRangeIndex = -1;
157 for(int32_t i = 0; i < rangesLength; ++i) {
158 uint32_t pair = ranges[i];
159 int32_t limit1 = (int32_t)(pair >> 24);
160 while(b < limit1) {
161 table[b] = (uint8_t)(b + pair);
162 ++b;
163 }
164 // Check the second byte of the limit.
165 if((pair & 0xff0000) != 0) {
166 table[limit1] = 0;
167 b = limit1 + 1;
168 if(firstSplitByteRangeIndex < 0) {
169 firstSplitByteRangeIndex = i;
170 }
171 }
172 }
173 while(b <= 0xff) {
174 table[b] = (uint8_t)b;
175 ++b;
176 }
177 if(firstSplitByteRangeIndex < 0) {
178 // The lead byte permutation table alone suffices for reordering.
179 rangesLength = 0;
180 } else {
181 // Remove the ranges below the first split byte.
182 ranges += firstSplitByteRangeIndex;
183 rangesLength -= firstSplitByteRangeIndex;
184 }
185 setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode);
186 }
187
188 void
setReorderArrays(const int32_t * codes,int32_t codesLength,const uint32_t * ranges,int32_t rangesLength,const uint8_t * table,UErrorCode & errorCode)189 CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength,
190 const uint32_t *ranges, int32_t rangesLength,
191 const uint8_t *table, UErrorCode &errorCode) {
192 if(U_FAILURE(errorCode)) { return; }
193 int32_t *ownedCodes;
194 int32_t totalLength = codesLength + rangesLength;
195 U_ASSERT(totalLength > 0);
196 if(totalLength <= reorderCodesCapacity) {
197 ownedCodes = const_cast<int32_t *>(reorderCodes);
198 } else {
199 // Allocate one memory block for the codes, the ranges, and the 16-aligned table.
200 int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of 4 ints
201 ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256);
202 if(ownedCodes == NULL) {
203 resetReordering();
204 errorCode = U_MEMORY_ALLOCATION_ERROR;
205 return;
206 }
207 if(reorderCodesCapacity != 0) {
208 uprv_free(const_cast<int32_t *>(reorderCodes));
209 }
210 reorderCodes = ownedCodes;
211 reorderCodesCapacity = capacity;
212 }
213 uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256);
214 uprv_memcpy(ownedCodes, codes, codesLength * 4);
215 uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4);
216 reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity);
217 reorderCodesLength = codesLength;
218 reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength;
219 reorderRangesLength = rangesLength;
220 }
221
222 void
copyReorderingFrom(const CollationSettings & other,UErrorCode & errorCode)223 CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) {
224 if(U_FAILURE(errorCode)) { return; }
225 if(!other.hasReordering()) {
226 resetReordering();
227 return;
228 }
229 minHighNoReorder = other.minHighNoReorder;
230 if(other.reorderCodesCapacity == 0) {
231 // The reorder arrays are aliased to memory-mapped data.
232 reorderTable = other.reorderTable;
233 reorderRanges = other.reorderRanges;
234 reorderRangesLength = other.reorderRangesLength;
235 reorderCodes = other.reorderCodes;
236 reorderCodesLength = other.reorderCodesLength;
237 } else {
238 setReorderArrays(other.reorderCodes, other.reorderCodesLength,
239 other.reorderRanges, other.reorderRangesLength,
240 other.reorderTable, errorCode);
241 }
242 }
243
244 UBool
reorderTableHasSplitBytes(const uint8_t table[256])245 CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) {
246 U_ASSERT(table[0] == 0);
247 for(int32_t i = 1; i < 256; ++i) {
248 if(table[i] == 0) {
249 return TRUE;
250 }
251 }
252 return FALSE;
253 }
254
255 uint32_t
reorderEx(uint32_t p) const256 CollationSettings::reorderEx(uint32_t p) const {
257 if(p >= minHighNoReorder) { return p; }
258 // Round up p so that its lower 16 bits are >= any offset bits.
259 // Then compare q directly with (limit, offset) pairs.
260 uint32_t q = p | 0xffff;
261 uint32_t r;
262 const uint32_t *ranges = reorderRanges;
263 while(q >= (r = *ranges)) { ++ranges; }
264 return p + (r << 24);
265 }
266
267 void
setStrength(int32_t value,int32_t defaultOptions,UErrorCode & errorCode)268 CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
269 if(U_FAILURE(errorCode)) { return; }
270 int32_t noStrength = options & ~STRENGTH_MASK;
271 switch(value) {
272 case UCOL_PRIMARY:
273 case UCOL_SECONDARY:
274 case UCOL_TERTIARY:
275 case UCOL_QUATERNARY:
276 case UCOL_IDENTICAL:
277 options = noStrength | (value << STRENGTH_SHIFT);
278 break;
279 case UCOL_DEFAULT:
280 options = noStrength | (defaultOptions & STRENGTH_MASK);
281 break;
282 default:
283 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
284 break;
285 }
286 }
287
288 void
setFlag(int32_t bit,UColAttributeValue value,int32_t defaultOptions,UErrorCode & errorCode)289 CollationSettings::setFlag(int32_t bit, UColAttributeValue value,
290 int32_t defaultOptions, UErrorCode &errorCode) {
291 if(U_FAILURE(errorCode)) { return; }
292 switch(value) {
293 case UCOL_ON:
294 options |= bit;
295 break;
296 case UCOL_OFF:
297 options &= ~bit;
298 break;
299 case UCOL_DEFAULT:
300 options = (options & ~bit) | (defaultOptions & bit);
301 break;
302 default:
303 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
304 break;
305 }
306 }
307
308 void
setCaseFirst(UColAttributeValue value,int32_t defaultOptions,UErrorCode & errorCode)309 CollationSettings::setCaseFirst(UColAttributeValue value,
310 int32_t defaultOptions, UErrorCode &errorCode) {
311 if(U_FAILURE(errorCode)) { return; }
312 int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
313 switch(value) {
314 case UCOL_OFF:
315 options = noCaseFirst;
316 break;
317 case UCOL_LOWER_FIRST:
318 options = noCaseFirst | CASE_FIRST;
319 break;
320 case UCOL_UPPER_FIRST:
321 options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK;
322 break;
323 case UCOL_DEFAULT:
324 options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
325 break;
326 default:
327 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
328 break;
329 }
330 }
331
332 void
setAlternateHandling(UColAttributeValue value,int32_t defaultOptions,UErrorCode & errorCode)333 CollationSettings::setAlternateHandling(UColAttributeValue value,
334 int32_t defaultOptions, UErrorCode &errorCode) {
335 if(U_FAILURE(errorCode)) { return; }
336 int32_t noAlternate = options & ~ALTERNATE_MASK;
337 switch(value) {
338 case UCOL_NON_IGNORABLE:
339 options = noAlternate;
340 break;
341 case UCOL_SHIFTED:
342 options = noAlternate | SHIFTED;
343 break;
344 case UCOL_DEFAULT:
345 options = noAlternate | (defaultOptions & ALTERNATE_MASK);
346 break;
347 default:
348 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
349 break;
350 }
351 }
352
353 void
setMaxVariable(int32_t value,int32_t defaultOptions,UErrorCode & errorCode)354 CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
355 if(U_FAILURE(errorCode)) { return; }
356 int32_t noMax = options & ~MAX_VARIABLE_MASK;
357 switch(value) {
358 case MAX_VAR_SPACE:
359 case MAX_VAR_PUNCT:
360 case MAX_VAR_SYMBOL:
361 case MAX_VAR_CURRENCY:
362 options = noMax | (value << MAX_VARIABLE_SHIFT);
363 break;
364 case UCOL_DEFAULT:
365 options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
366 break;
367 default:
368 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
369 break;
370 }
371 }
372
373 U_NAMESPACE_END
374
375 #endif // !UCONFIG_NO_COLLATION
376