1 /*
2 *******************************************************************************
3 * Copyright (C) 2013-2014, International Business Machines
4 * Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 * collationrootelements.cpp
7 *
8 * created on: 2013mar05
9 * created by: Markus W. Scherer
10 */
11 
12 #include "unicode/utypes.h"
13 
14 #if !UCONFIG_NO_COLLATION
15 
16 #include "collation.h"
17 #include "collationrootelements.h"
18 #include "uassert.h"
19 
20 U_NAMESPACE_BEGIN
21 
22 int64_t
lastCEWithPrimaryBefore(uint32_t p) const23 CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const {
24     if(p == 0) { return 0; }
25     U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]);
26     int32_t index = findP(p);
27     uint32_t q = elements[index];
28     uint32_t secTer;
29     if(p == (q & 0xffffff00)) {
30         // p == elements[index] is a root primary. Find the CE before it.
31         // We must not be in a primary range.
32         U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
33         secTer = elements[index - 1];
34         if((secTer & SEC_TER_DELTA_FLAG) == 0) {
35             // Primary CE just before p.
36             p = secTer & 0xffffff00;
37             secTer = Collation::COMMON_SEC_AND_TER_CE;
38         } else {
39             // secTer = last secondary & tertiary for the previous primary
40             index -= 2;
41             for(;;) {
42                 p = elements[index];
43                 if((p & SEC_TER_DELTA_FLAG) == 0) {
44                     p &= 0xffffff00;
45                     break;
46                 }
47                 --index;
48             }
49         }
50     } else {
51         // p > elements[index] which is the previous primary.
52         // Find the last secondary & tertiary weights for it.
53         p = q & 0xffffff00;
54         secTer = Collation::COMMON_SEC_AND_TER_CE;
55         for(;;) {
56             q = elements[++index];
57             if((q & SEC_TER_DELTA_FLAG) == 0) {
58                 // We must not be in a primary range.
59                 U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
60                 break;
61             }
62             secTer = q;
63         }
64     }
65     return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG);
66 }
67 
68 int64_t
firstCEWithPrimaryAtLeast(uint32_t p) const69 CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const {
70     if(p == 0) { return 0; }
71     int32_t index = findP(p);
72     if(p != (elements[index] & 0xffffff00)) {
73         for(;;) {
74             p = elements[++index];
75             if((p & SEC_TER_DELTA_FLAG) == 0) {
76                 // First primary after p. We must not be in a primary range.
77                 U_ASSERT((p & PRIMARY_STEP_MASK) == 0);
78                 break;
79             }
80         }
81     }
82     // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
83     return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE;
84 }
85 
86 uint32_t
getPrimaryBefore(uint32_t p,UBool isCompressible) const87 CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const {
88     int32_t index = findPrimary(p);
89     int32_t step;
90     uint32_t q = elements[index];
91     if(p == (q & 0xffffff00)) {
92         // Found p itself. Return the previous primary.
93         // See if p is at the end of a previous range.
94         step = (int32_t)q & PRIMARY_STEP_MASK;
95         if(step == 0) {
96             // p is not at the end of a range. Look for the previous primary.
97             do {
98                 p = elements[--index];
99             } while((p & SEC_TER_DELTA_FLAG) != 0);
100             return p & 0xffffff00;
101         }
102     } else {
103         // p is in a range, and not at the start.
104         uint32_t nextElement = elements[index + 1];
105         U_ASSERT(isEndOfPrimaryRange(nextElement));
106         step = (int32_t)nextElement & PRIMARY_STEP_MASK;
107     }
108     // Return the previous range primary.
109     if((p & 0xffff) == 0) {
110         return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step);
111     } else {
112         return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step);
113     }
114 }
115 
116 uint32_t
getSecondaryBefore(uint32_t p,uint32_t s) const117 CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const {
118     int32_t index;
119     uint32_t previousSec, sec;
120     if(p == 0) {
121         index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
122         // Gap at the beginning of the secondary CE range.
123         previousSec = 0;
124         sec = elements[index] >> 16;
125     } else {
126         index = findPrimary(p) + 1;
127         previousSec = Collation::BEFORE_WEIGHT16;
128         sec = getFirstSecTerForPrimary(index) >> 16;
129     }
130     U_ASSERT(s >= sec);
131     while(s > sec) {
132         previousSec = sec;
133         U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
134         sec = elements[index++] >> 16;
135     }
136     U_ASSERT(sec == s);
137     return previousSec;
138 }
139 
140 uint32_t
getTertiaryBefore(uint32_t p,uint32_t s,uint32_t t) const141 CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const {
142     U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0);
143     int32_t index;
144     uint32_t previousTer, secTer;
145     if(p == 0) {
146         if(s == 0) {
147             index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX];
148             // Gap at the beginning of the tertiary CE range.
149             previousTer = 0;
150         } else {
151             index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
152             previousTer = Collation::BEFORE_WEIGHT16;
153         }
154         secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
155     } else {
156         index = findPrimary(p) + 1;
157         previousTer = Collation::BEFORE_WEIGHT16;
158         secTer = getFirstSecTerForPrimary(index);
159     }
160     uint32_t st = (s << 16) | t;
161     while(st > secTer) {
162         if((secTer >> 16) == s) { previousTer = secTer; }
163         U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
164         secTer = elements[index++] & ~SEC_TER_DELTA_FLAG;
165     }
166     U_ASSERT(secTer == st);
167     return previousTer & 0xffff;
168 }
169 
170 uint32_t
getPrimaryAfter(uint32_t p,int32_t index,UBool isCompressible) const171 CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const {
172     U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1]));
173     uint32_t q = elements[++index];
174     int32_t step;
175     if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) {
176         // Return the next primary in this range.
177         if((p & 0xffff) == 0) {
178             return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step);
179         } else {
180             return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step);
181         }
182     } else {
183         // Return the next primary in the list.
184         while((q & SEC_TER_DELTA_FLAG) != 0) {
185             q = elements[++index];
186         }
187         U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
188         return q;
189     }
190 }
191 
192 uint32_t
getSecondaryAfter(int32_t index,uint32_t s) const193 CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const {
194     uint32_t secTer;
195     uint32_t secLimit;
196     if(index == 0) {
197         // primary = 0
198         U_ASSERT(s != 0);
199         index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
200         secTer = elements[index];
201         // Gap at the end of the secondary CE range.
202         secLimit = 0x10000;
203     } else {
204         U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
205         secTer = getFirstSecTerForPrimary(index + 1);
206         // If this is an explicit sec/ter unit, then it will be read once more.
207         // Gap for secondaries of primary CEs.
208         secLimit = getSecondaryBoundary();
209     }
210     for(;;) {
211         uint32_t sec = secTer >> 16;
212         if(sec > s) { return sec; }
213         secTer = elements[++index];
214         if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
215     }
216 }
217 
218 uint32_t
getTertiaryAfter(int32_t index,uint32_t s,uint32_t t) const219 CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const {
220     uint32_t secTer;
221     uint32_t terLimit;
222     if(index == 0) {
223         // primary = 0
224         if(s == 0) {
225             U_ASSERT(t != 0);
226             index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX];
227             // Gap at the end of the tertiary CE range.
228             terLimit = 0x4000;
229         } else {
230             index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX];
231             // Gap for tertiaries of primary/secondary CEs.
232             terLimit = getTertiaryBoundary();
233         }
234         secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
235     } else {
236         U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
237         secTer = getFirstSecTerForPrimary(index + 1);
238         // If this is an explicit sec/ter unit, then it will be read once more.
239         terLimit = getTertiaryBoundary();
240     }
241     uint32_t st = (s << 16) | t;
242     for(;;) {
243         if(secTer > st) {
244             U_ASSERT((secTer >> 16) == s);
245             return secTer & 0xffff;
246         }
247         secTer = elements[++index];
248         // No tertiary greater than t for this primary+secondary.
249         if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
250         secTer &= ~SEC_TER_DELTA_FLAG;
251     }
252 }
253 
254 uint32_t
getFirstSecTerForPrimary(int32_t index) const255 CollationRootElements::getFirstSecTerForPrimary(int32_t index) const {
256     uint32_t secTer = elements[index];
257     if((secTer & SEC_TER_DELTA_FLAG) == 0) {
258         // No sec/ter delta.
259         return Collation::COMMON_SEC_AND_TER_CE;
260     }
261     secTer &= ~SEC_TER_DELTA_FLAG;
262     if(secTer > Collation::COMMON_SEC_AND_TER_CE) {
263         // Implied sec/ter.
264         return Collation::COMMON_SEC_AND_TER_CE;
265     }
266     // Explicit sec/ter below common/common.
267     return secTer;
268 }
269 
270 int32_t
findPrimary(uint32_t p) const271 CollationRootElements::findPrimary(uint32_t p) const {
272     // Requirement: p must occur as a root primary.
273     U_ASSERT((p & 0xff) == 0);  // at most a 3-byte primary
274     int32_t index = findP(p);
275     // If p is in a range, then we just assume that p is an actual primary in this range.
276     // (Too cumbersome/expensive to check.)
277     // Otherwise, it must be an exact match.
278     U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00));
279     return index;
280 }
281 
282 int32_t
findP(uint32_t p) const283 CollationRootElements::findP(uint32_t p) const {
284     // p need not occur as a root primary.
285     // For example, it might be a reordering group boundary.
286     U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE);
287     // modified binary search
288     int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX];
289     U_ASSERT(p >= elements[start]);
290     int32_t limit = length - 1;
291     U_ASSERT(elements[limit] >= PRIMARY_SENTINEL);
292     U_ASSERT(p < elements[limit]);
293     while((start + 1) < limit) {
294         // Invariant: elements[start] and elements[limit] are primaries,
295         // and elements[start]<=p<=elements[limit].
296         int32_t i = (start + limit) / 2;
297         uint32_t q = elements[i];
298         if((q & SEC_TER_DELTA_FLAG) != 0) {
299             // Find the next primary.
300             int32_t j = i + 1;
301             for(;;) {
302                 if(j == limit) { break; }
303                 q = elements[j];
304                 if((q & SEC_TER_DELTA_FLAG) == 0) {
305                     i = j;
306                     break;
307                 }
308                 ++j;
309             }
310             if((q & SEC_TER_DELTA_FLAG) != 0) {
311                 // Find the preceding primary.
312                 j = i - 1;
313                 for(;;) {
314                     if(j == start) { break; }
315                     q = elements[j];
316                     if((q & SEC_TER_DELTA_FLAG) == 0) {
317                         i = j;
318                         break;
319                     }
320                     --j;
321                 }
322                 if((q & SEC_TER_DELTA_FLAG) != 0) {
323                     // No primary between start and limit.
324                     break;
325                 }
326             }
327         }
328         if(p < (q & 0xffffff00)) {  // Reset the "step" bits of a range end primary.
329             limit = i;
330         } else {
331             start = i;
332         }
333     }
334     return start;
335 }
336 
337 U_NAMESPACE_END
338 
339 #endif  // !UCONFIG_NO_COLLATION
340