• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *******************************************************************************
3  * Copyright (C) 2012-2015, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  *******************************************************************************
6  * CollationKeys.java, ported from collationkeys.h/.cpp
7  *
8  * C++ version created on: 2012sep02
9  * created by: Markus W. Scherer
10  */
11 
12 package com.ibm.icu.impl.coll;
13 
14 import com.ibm.icu.text.Collator;
15 
16 public final class CollationKeys /* all methods are static */ {
17 
18     // Java porting note: C++ SortKeyByteSink class extends a common class ByteSink,
19     // which is not available in Java. We don't need a super class created for implementing
20     // collation features.
21     public static abstract class SortKeyByteSink {
22         protected byte[] buffer_;
23         // protected int capacity_; == buffer_.length
24         private int appended_ = 0;
25         // not used in Java -- private int ignore_ = 0;
26 
SortKeyByteSink(byte[] dest)27         public SortKeyByteSink(byte[] dest) {
28             buffer_ = dest;
29         }
30 
31         /**
32          * Needed in Java for when we write to the buffer directly.
33          * In C++, the SortKeyByteSink is a subclass of ByteSink and lower-level code can write to that.
34          * TODO: Can we make Java SortKeyByteSink have-a ByteArrayWrapper and write through to it?
35          * Or maybe create interface ByteSink, have SortKeyByteSink implement it, and have BOCSU write to that??
36          */
setBufferAndAppended(byte[] dest, int app)37         public void setBufferAndAppended(byte[] dest, int app) {
38             buffer_ = dest;
39             appended_ = app;
40         }
41 
42         /* not used in Java -- public void IgnoreBytes(int numIgnore) {
43             ignore_ = numIgnore;
44         } */
45 
46         /**
47          * @param bytes
48          *            the array of byte
49          * @param n
50          *            the length of bytes to be appended
51          */
Append(byte[] bytes, int n)52         public void Append(byte[] bytes, int n) {
53             if (n <= 0 || bytes == null) {
54                 return;
55             }
56 
57             /* not used in Java -- if (ignore_ > 0) {
58                 int ignoreRest = ignore_ - n;
59                 if (ignoreRest >= 0) {
60                     ignore_ = ignoreRest;
61                     return;
62                 } else {
63                     start = ignore_;
64                     n = -ignoreRest;
65                     ignore_ = 0;
66                 }
67             } */
68 
69             int length = appended_;
70             appended_ += n;
71 
72             int available = buffer_.length - length;
73             if (n <= available) {
74                 System.arraycopy(bytes, 0, buffer_, length, n);
75             } else {
76                 AppendBeyondCapacity(bytes, 0, n, length);
77             }
78         }
79 
Append(int b)80         public void Append(int b) {
81             /* not used in Java -- if (ignore_ > 0) {
82                 --ignore_;
83             } else */ {
84                 if (appended_ < buffer_.length || Resize(1, appended_)) {
85                     buffer_[appended_] = (byte) b;
86                 }
87                 ++appended_;
88             }
89         }
90 
91         // Java porting note: This method is not used by collator implementation.
92         //
93         // virtual char *GetAppendBuffer(int min_capacity,
94         // int desired_capacity_hint,
95         // char *scratch, int scratch_capacity,
96         // int *result_capacity);
97 
NumberOfBytesAppended()98         public int NumberOfBytesAppended() {
99             return appended_;
100         }
101 
GetRemainingCapacity()102         public int GetRemainingCapacity() {
103             return /* not used in Java -- ignore_ + */ buffer_.length - appended_;
104         }
105 
Overflowed()106         public boolean Overflowed() {
107             return appended_ > buffer_.length;
108         }
109 
110         /* not used in Java -- public boolean IsOk() {
111             return true;
112         } */
113 
114         /**
115          * @param bytes
116          *            the array of byte
117          * @param start
118          *            the start index within the array to be appended
119          * @param n
120          *            the length of bytes to be appended
121          * @param length
122          *            the length of buffer required to store the entire data (i.e. already appended
123          *            bytes + bytes to be appended by this method)
124          */
AppendBeyondCapacity(byte[] bytes, int start, int n, int length)125         protected abstract void AppendBeyondCapacity(byte[] bytes, int start, int n, int length);
126 
Resize(int appendCapacity, int length)127         protected abstract boolean Resize(int appendCapacity, int length);
128     }
129 
130     public static class LevelCallback {
131         /**
132          * @param level
133          *            The next level about to be written to the ByteSink.
134          * @return true if the level is to be written (the base class implementation always returns
135          *         true)
136          */
needToWrite(int level)137         boolean needToWrite(int level) {
138             return true;
139         }
140     }
141     public static final LevelCallback SIMPLE_LEVEL_FALLBACK = new LevelCallback();
142 
143     private static final class SortKeyLevel {
144         private static final int INITIAL_CAPACITY = 40;
145 
146         byte[] buffer = new byte[INITIAL_CAPACITY];
147         int len = 0;
148         // not used in Java -- private static final boolean ok = true;  // In C++ "ok" is reset when memory allocations fail.
149 
SortKeyLevel()150         SortKeyLevel() {
151         }
152 
153         /* not used in Java -- boolean isOk() {
154             return ok;
155         } */
156 
isEmpty()157         boolean isEmpty() {
158             return len == 0;
159         }
160 
length()161         int length() {
162             return len;
163         }
164 
165         // Java porting note: Java uses this instead of C++ operator [] overload
166         // uint8_t operator[](int index)
getAt(int index)167         byte getAt(int index) {
168             return buffer[index];
169         }
170 
data()171         byte[] data() {
172             return buffer;
173         }
174 
appendByte(int b)175         void appendByte(int b) {
176             if (len < buffer.length || ensureCapacity(1)) {
177                 buffer[len++] = (byte) b;
178             }
179         }
180 
appendWeight16(int w)181         void appendWeight16(int w) {
182             assert ((w & 0xffff) != 0);
183             byte b0 = (byte) (w >>> 8);
184             byte b1 = (byte) w;
185             int appendLength = (b1 == 0) ? 1 : 2;
186             if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) {
187                 buffer[len++] = b0;
188                 if (b1 != 0) {
189                     buffer[len++] = b1;
190                 }
191             }
192         }
193 
appendWeight32(long w)194         void appendWeight32(long w) {
195             assert (w != 0);
196             byte[] bytes = new byte[] { (byte) (w >>> 24), (byte) (w >>> 16), (byte) (w >>> 8),
197                     (byte) w };
198             int appendLength = (bytes[1] == 0) ? 1 : (bytes[2] == 0) ? 2 : (bytes[3] == 0) ? 3 : 4;
199             if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) {
200                 buffer[len++] = bytes[0];
201                 if (bytes[1] != 0) {
202                     buffer[len++] = bytes[1];
203                     if (bytes[2] != 0) {
204                         buffer[len++] = bytes[2];
205                         if (bytes[3] != 0) {
206                             buffer[len++] = bytes[3];
207                         }
208                     }
209                 }
210             }
211         }
212 
appendReverseWeight16(int w)213         void appendReverseWeight16(int w) {
214             assert ((w & 0xffff) != 0);
215             byte b0 = (byte) (w >>> 8);
216             byte b1 = (byte) w;
217             int appendLength = (b1 == 0) ? 1 : 2;
218             if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) {
219                 if (b1 == 0) {
220                     buffer[len++] = b0;
221                 } else {
222                     buffer[len] = b1;
223                     buffer[len + 1] = b0;
224                     len += 2;
225                 }
226             }
227         }
228 
229         // Appends all but the last byte to the sink. The last byte should be the 01 terminator.
appendTo(SortKeyByteSink sink)230         void appendTo(SortKeyByteSink sink) {
231             assert (len > 0 && buffer[len - 1] == 1);
232             sink.Append(buffer, len - 1);
233         }
234 
ensureCapacity(int appendCapacity)235         private boolean ensureCapacity(int appendCapacity) {
236             /* not used in Java -- if (!ok) {
237                 return false;
238             } */
239             int newCapacity = 2 * buffer.length;
240             int altCapacity = len + 2 * appendCapacity;
241             if (newCapacity < altCapacity) {
242                 newCapacity = altCapacity;
243             }
244             if (newCapacity < 200) {
245                 newCapacity = 200;
246             }
247             byte[] newbuf = new byte[newCapacity];
248             System.arraycopy(buffer, 0, newbuf, 0, len);
249             buffer = newbuf;
250 
251             return true;
252         }
253     }
254 
getSortKeyLevel(int levels, int level)255     private static SortKeyLevel getSortKeyLevel(int levels, int level) {
256         return (levels & level) != 0 ? new SortKeyLevel() : null;
257     }
258 
CollationKeys()259     private CollationKeys() {
260     } // no instantiation
261 
262     // Secondary level: Compress up to 33 common weights as 05..25 or 25..45.
263     private static final int SEC_COMMON_LOW = Collation.COMMON_BYTE;
264     private static final int SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20;
265     static final int SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40; // read by CollationDataReader
266     private static final int SEC_COMMON_MAX_COUNT = 0x21;
267 
268     // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13.
269     private static final int CASE_LOWER_FIRST_COMMON_LOW = 1;
270     private static final int CASE_LOWER_FIRST_COMMON_MIDDLE = 7;
271     private static final int CASE_LOWER_FIRST_COMMON_HIGH = 13;
272     private static final int CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7;
273 
274     // Case level, upperFirst: Compress up to 13 common weights as 3..15.
275     private static final int CASE_UPPER_FIRST_COMMON_LOW = 3;
276     @SuppressWarnings("unused")
277     private static final int CASE_UPPER_FIRST_COMMON_HIGH = 15;
278     private static final int CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13;
279 
280     // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5.
281     private static final int TER_ONLY_COMMON_LOW = Collation.COMMON_BYTE;
282     private static final int TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60;
283     private static final int TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0;
284     private static final int TER_ONLY_COMMON_MAX_COUNT = 0x61;
285 
286     // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45.
287     private static final int TER_LOWER_FIRST_COMMON_LOW = Collation.COMMON_BYTE;
288     private static final int TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20;
289     private static final int TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40;
290     private static final int TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21;
291 
292     // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5.
293     private static final int TER_UPPER_FIRST_COMMON_LOW = Collation.COMMON_BYTE + 0x80;
294     private static final int TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20;
295     private static final int TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40;
296     private static final int TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21;
297 
298     // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC.
299     private static final int QUAT_COMMON_LOW = 0x1c;
300     private static final int QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70;
301     private static final int QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0;
302     private static final int QUAT_COMMON_MAX_COUNT = 0x71;
303     // Primary weights shifted to quaternary level must be encoded with
304     // a lead byte below the common-weight compression range.
305     private static final int QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b
306 
307     /**
308      * Map from collation strength (UColAttributeValue) to a mask of Collation.Level bits up to that
309      * strength, excluding the CASE_LEVEL which is independent of the strength, and excluding
310      * IDENTICAL_LEVEL which this function does not write.
311      */
312     private static final int levelMasks[] = new int[] {
313         2,          // UCOL_PRIMARY -> PRIMARY_LEVEL
314         6,          // UCOL_SECONDARY -> up to SECONDARY_LEVEL
315         0x16,       // UCOL_TERTIARY -> up to TERTIARY_LEVEL
316         0x36,       // UCOL_QUATERNARY -> up to QUATERNARY_LEVEL
317         0, 0, 0, 0,
318         0, 0, 0, 0,
319         0, 0, 0,
320         0x36        // UCOL_IDENTICAL -> up to QUATERNARY_LEVEL
321     };
322 
323     /**
324      * Writes the sort key bytes for minLevel up to the iterator data's strength. Optionally writes
325      * the case level. Stops writing levels when callback.needToWrite(level) returns false.
326      * Separates levels with the LEVEL_SEPARATOR_BYTE but does not write a TERMINATOR_BYTE.
327      */
writeSortKeyUpToQuaternary(CollationIterator iter, boolean[] compressibleBytes, CollationSettings settings, SortKeyByteSink sink, int minLevel, LevelCallback callback, boolean preflight)328     public static void writeSortKeyUpToQuaternary(CollationIterator iter, boolean[] compressibleBytes,
329             CollationSettings settings, SortKeyByteSink sink, int minLevel, LevelCallback callback,
330             boolean preflight) {
331 
332         int options = settings.options;
333         // Set of levels to process and write.
334         int levels = levelMasks[CollationSettings.getStrength(options)];
335         if ((options & CollationSettings.CASE_LEVEL) != 0) {
336             levels |= Collation.CASE_LEVEL_FLAG;
337         }
338         // Minus the levels below minLevel.
339         levels &= ~((1 << minLevel) - 1);
340         if (levels == 0) {
341             return;
342         }
343 
344         long variableTop;
345         if ((options & CollationSettings.ALTERNATE_MASK) == 0) {
346             variableTop = 0;
347         } else {
348             // +1 so that we can use "<" and primary ignorables test out early.
349             variableTop = settings.variableTop + 1;
350         }
351 
352         int tertiaryMask = CollationSettings.getTertiaryMask(options);
353 
354         byte[] p234 = new byte[3];
355         SortKeyLevel cases = getSortKeyLevel(levels, Collation.CASE_LEVEL_FLAG);
356         SortKeyLevel secondaries = getSortKeyLevel(levels, Collation.SECONDARY_LEVEL_FLAG);
357         SortKeyLevel tertiaries = getSortKeyLevel(levels, Collation.TERTIARY_LEVEL_FLAG);
358         SortKeyLevel quaternaries = getSortKeyLevel(levels, Collation.QUATERNARY_LEVEL_FLAG);
359 
360         long prevReorderedPrimary = 0;  // 0==no compression
361         int commonCases = 0;
362         int commonSecondaries = 0;
363         int commonTertiaries = 0;
364         int commonQuaternaries = 0;
365 
366         int prevSecondary = 0;
367         int secSegmentStart = 0;
368 
369         for (;;) {
370             // No need to keep all CEs in the buffer when we write a sort key.
371             iter.clearCEsIfNoneRemaining();
372             long ce = iter.nextCE();
373             long p = ce >>> 32;
374             if (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY) {
375                 // Variable CE, shift it to quaternary level.
376                 // Ignore all following primary ignorables, and shift further variable CEs.
377                 if (commonQuaternaries != 0) {
378                     --commonQuaternaries;
379                     while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
380                         quaternaries.appendByte(QUAT_COMMON_MIDDLE);
381                         commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
382                     }
383                     // Shifted primary weights are lower than the common weight.
384                     quaternaries.appendByte(QUAT_COMMON_LOW + commonQuaternaries);
385                     commonQuaternaries = 0;
386                 }
387                 do {
388                     if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) {
389                         if (settings.hasReordering()) {
390                             p = settings.reorder(p);
391                         }
392                         if (((int) p >>> 24) >= QUAT_SHIFTED_LIMIT_BYTE) {
393                             // Prevent shifted primary lead bytes from
394                             // overlapping with the common compression range.
395                             quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE);
396                         }
397                         quaternaries.appendWeight32(p);
398                     }
399                     do {
400                         ce = iter.nextCE();
401                         p = ce >>> 32;
402                     } while (p == 0);
403                 } while (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY);
404             }
405             // ce could be primary ignorable, or NO_CE, or the merge separator,
406             // or a regular primary CE, but it is not variable.
407             // If ce==NO_CE, then write nothing for the primary level but
408             // terminate compression on all levels and then exit the loop.
409             if (p > Collation.NO_CE_PRIMARY && (levels & Collation.PRIMARY_LEVEL_FLAG) != 0) {
410                 // Test the un-reordered primary for compressibility.
411                 boolean isCompressible = compressibleBytes[(int) p >>> 24];
412                 if(settings.hasReordering()) {
413                     p = settings.reorder(p);
414                 }
415                 int p1 = (int) p >>> 24;
416                 if (!isCompressible || p1 != ((int) prevReorderedPrimary >>> 24)) {
417                     if (prevReorderedPrimary != 0) {
418                         if (p < prevReorderedPrimary) {
419                             // No primary compression terminator
420                             // at the end of the level or merged segment.
421                             if (p1 > Collation.MERGE_SEPARATOR_BYTE) {
422                                 sink.Append(Collation.PRIMARY_COMPRESSION_LOW_BYTE);
423                             }
424                         } else {
425                             sink.Append(Collation.PRIMARY_COMPRESSION_HIGH_BYTE);
426                         }
427                     }
428                     sink.Append(p1);
429                     if(isCompressible) {
430                         prevReorderedPrimary = p;
431                     } else {
432                         prevReorderedPrimary = 0;
433                     }
434                 }
435                 byte p2 = (byte) (p >>> 16);
436                 if (p2 != 0) {
437                     p234[0] = p2;
438                     p234[1] = (byte) (p >>> 8);
439                     p234[2] = (byte) p;
440                     sink.Append(p234, (p234[1] == 0) ? 1 : (p234[2] == 0) ? 2 : 3);
441                 }
442                 // Optimization for internalNextSortKeyPart():
443                 // When the primary level overflows we can stop because we need not
444                 // calculate (preflight) the whole sort key length.
445                 if (!preflight && sink.Overflowed()) {
446                     // not used in Java -- if (!sink.IsOk()) {
447                     // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in
448                     // C implementation. IsOk() in Java always returns true, so this
449                     // is a dead code.
450                     return;
451                 }
452             }
453 
454             int lower32 = (int) ce;
455             if (lower32 == 0) {
456                 continue;
457             } // completely ignorable, no secondary/case/tertiary/quaternary
458 
459             if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) {
460                 int s = lower32 >>> 16;  // 16 bits
461                 if (s == 0) {
462                     // secondary ignorable
463                 } else if (s == Collation.COMMON_WEIGHT16 &&
464                         ((options & CollationSettings.BACKWARD_SECONDARY) == 0 ||
465                             p != Collation.MERGE_SEPARATOR_PRIMARY)) {
466                     // s is a common secondary weight, and
467                     // backwards-secondary is off or the ce is not the merge separator.
468                     ++commonSecondaries;
469                 } else if ((options & CollationSettings.BACKWARD_SECONDARY) == 0) {
470                     if (commonSecondaries != 0) {
471                         --commonSecondaries;
472                         while (commonSecondaries >= SEC_COMMON_MAX_COUNT) {
473                             secondaries.appendByte(SEC_COMMON_MIDDLE);
474                             commonSecondaries -= SEC_COMMON_MAX_COUNT;
475                         }
476                         int b;
477                         if (s < Collation.COMMON_WEIGHT16) {
478                             b = SEC_COMMON_LOW + commonSecondaries;
479                         } else {
480                             b = SEC_COMMON_HIGH - commonSecondaries;
481                         }
482                         secondaries.appendByte(b);
483                         commonSecondaries = 0;
484                     }
485                     secondaries.appendWeight16(s);
486                 } else {
487                     if (commonSecondaries != 0) {
488                         --commonSecondaries;
489                         // Append reverse weights. The level will be re-reversed later.
490                         int remainder = commonSecondaries % SEC_COMMON_MAX_COUNT;
491                         int b;
492                         if (prevSecondary < Collation.COMMON_WEIGHT16) {
493                             b = SEC_COMMON_LOW + remainder;
494                         } else {
495                             b = SEC_COMMON_HIGH - remainder;
496                         }
497                         secondaries.appendByte(b);
498                         commonSecondaries -= remainder;
499                         // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT.
500                         while (commonSecondaries > 0) { // same as >= SEC_COMMON_MAX_COUNT
501                             secondaries.appendByte(SEC_COMMON_MIDDLE);
502                             commonSecondaries -= SEC_COMMON_MAX_COUNT;
503                         }
504                         // commonSecondaries == 0
505                     }
506                     if (0 < p && p <= Collation.MERGE_SEPARATOR_PRIMARY) {
507                         // The backwards secondary level compares secondary weights backwards
508                         // within segments separated by the merge separator (U+FFFE).
509                         byte[] secs = secondaries.data();
510                         int last = secondaries.length() - 1;
511                         while (secSegmentStart < last) {
512                             byte b = secs[secSegmentStart];
513                             secs[secSegmentStart++] = secs[last];
514                             secs[last--] = b;
515                         }
516                         secondaries.appendByte(p == Collation.NO_CE_PRIMARY ?
517                             Collation.LEVEL_SEPARATOR_BYTE : Collation.MERGE_SEPARATOR_BYTE);
518                         prevSecondary = 0;
519                         secSegmentStart = secondaries.length();
520                     } else {
521                         secondaries.appendReverseWeight16(s);
522                         prevSecondary = s;
523                     }
524                 }
525             }
526 
527             if ((levels & Collation.CASE_LEVEL_FLAG) != 0) {
528                 if ((CollationSettings.getStrength(options) == Collator.PRIMARY) ? p == 0
529                         : (lower32 >>> 16) == 0) {
530                     // Primary+caseLevel: Ignore case level weights of primary ignorables.
531                     // Otherwise: Ignore case level weights of secondary ignorables.
532                     // For details see the comments in the CollationCompare class.
533                 } else {
534                     int c = (lower32 >>> 8) & 0xff; // case bits & tertiary lead byte
535                     assert ((c & 0xc0) != 0xc0);
536                     if ((c & 0xc0) == 0 && c > Collation.LEVEL_SEPARATOR_BYTE) {
537                         ++commonCases;
538                     } else {
539                         if ((options & CollationSettings.UPPER_FIRST) == 0) {
540                             // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14,
541                             // upper=15.
542                             // If there are only common (=lowest) weights in the whole level,
543                             // then we need not write anything.
544                             // Level length differences are handled already on the next-higher level.
545                             if (commonCases != 0 &&
546                                     (c > Collation.LEVEL_SEPARATOR_BYTE || !cases.isEmpty())) {
547                                 --commonCases;
548                                 while (commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) {
549                                     cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4);
550                                     commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT;
551                                 }
552                                 int b;
553                                 if (c <= Collation.LEVEL_SEPARATOR_BYTE) {
554                                     b = CASE_LOWER_FIRST_COMMON_LOW + commonCases;
555                                 } else {
556                                     b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases;
557                                 }
558                                 cases.appendByte(b << 4);
559                                 commonCases = 0;
560                             }
561                             if (c > Collation.LEVEL_SEPARATOR_BYTE) {
562                                 c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >>> 6)) << 4; // 14 or 15
563                             }
564                         } else {
565                             // upperFirst: Compress common weights to nibbles 3..15, mixed=2,
566                             // upper=1.
567                             // The compressed common case weights only go up from the "low" value
568                             // because with upperFirst the common weight is the highest one.
569                             if (commonCases != 0) {
570                                 --commonCases;
571                                 while (commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) {
572                                     cases.appendByte(CASE_UPPER_FIRST_COMMON_LOW << 4);
573                                     commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT;
574                                 }
575                                 cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4);
576                                 commonCases = 0;
577                             }
578                             if (c > Collation.LEVEL_SEPARATOR_BYTE) {
579                                 c = (CASE_UPPER_FIRST_COMMON_LOW - (c >>> 6)) << 4; // 2 or 1
580                             }
581                         }
582                         // c is a separator byte 01,
583                         // or a left-shifted nibble 0x10, 0x20, ... 0xf0.
584                         cases.appendByte(c);
585                     }
586                 }
587             }
588 
589             if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) {
590                 int t = lower32 & tertiaryMask;
591                 assert ((lower32 & 0xc000) != 0xc000);
592                 if (t == Collation.COMMON_WEIGHT16) {
593                     ++commonTertiaries;
594                 } else if ((tertiaryMask & 0x8000) == 0) {
595                     // Tertiary weights without case bits.
596                     // Move lead bytes 06..3F to C6..FF for a large common-weight range.
597                     if (commonTertiaries != 0) {
598                         --commonTertiaries;
599                         while (commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) {
600                             tertiaries.appendByte(TER_ONLY_COMMON_MIDDLE);
601                             commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT;
602                         }
603                         int b;
604                         if (t < Collation.COMMON_WEIGHT16) {
605                             b = TER_ONLY_COMMON_LOW + commonTertiaries;
606                         } else {
607                             b = TER_ONLY_COMMON_HIGH - commonTertiaries;
608                         }
609                         tertiaries.appendByte(b);
610                         commonTertiaries = 0;
611                     }
612                     if (t > Collation.COMMON_WEIGHT16) {
613                         t += 0xc000;
614                     }
615                     tertiaries.appendWeight16(t);
616                 } else if ((options & CollationSettings.UPPER_FIRST) == 0) {
617                     // Tertiary weights with caseFirst=lowerFirst.
618                     // Move lead bytes 06..BF to 46..FF for the common-weight range.
619                     if (commonTertiaries != 0) {
620                         --commonTertiaries;
621                         while (commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) {
622                             tertiaries.appendByte(TER_LOWER_FIRST_COMMON_MIDDLE);
623                             commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT;
624                         }
625                         int b;
626                         if (t < Collation.COMMON_WEIGHT16) {
627                             b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries;
628                         } else {
629                             b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries;
630                         }
631                         tertiaries.appendByte(b);
632                         commonTertiaries = 0;
633                     }
634                     if (t > Collation.COMMON_WEIGHT16) {
635                         t += 0x4000;
636                     }
637                     tertiaries.appendWeight16(t);
638                 } else {
639                     // Tertiary weights with caseFirst=upperFirst.
640                     // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
641                     // to keep tertiary CEs well-formed.
642                     // Their case+tertiary weights must be greater than those of
643                     // primary and secondary CEs.
644                     //
645                     // Separator         01 -> 01      (unchanged)
646                     // Lowercase     02..04 -> 82..84  (includes uncased)
647                     // Common weight     05 -> 85..C5  (common-weight compression range)
648                     // Lowercase     06..3F -> C6..FF
649                     // Mixed case    42..7F -> 42..7F
650                     // Uppercase     82..BF -> 02..3F
651                     // Tertiary CE   86..BF -> C6..FF
652                     if (t <= Collation.NO_CE_WEIGHT16) {
653                         // Keep separators unchanged.
654                     } else if ((lower32 >>> 16) != 0) {
655                         // Invert case bits of primary & secondary CEs.
656                         t ^= 0xc000;
657                         if (t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) {
658                             t -= 0x4000;
659                         }
660                     } else {
661                         // Keep uppercase bits of tertiary CEs.
662                         assert (0x8600 <= t && t <= 0xbfff);
663                         t += 0x4000;
664                     }
665                     if (commonTertiaries != 0) {
666                         --commonTertiaries;
667                         while (commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) {
668                             tertiaries.appendByte(TER_UPPER_FIRST_COMMON_MIDDLE);
669                             commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT;
670                         }
671                         int b;
672                         if (t < (TER_UPPER_FIRST_COMMON_LOW << 8)) {
673                             b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries;
674                         } else {
675                             b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries;
676                         }
677                         tertiaries.appendByte(b);
678                         commonTertiaries = 0;
679                     }
680                     tertiaries.appendWeight16(t);
681                 }
682             }
683 
684             if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) {
685                 int q = lower32 & 0xffff;
686                 if ((q & 0xc0) == 0 && q > Collation.NO_CE_WEIGHT16) {
687                     ++commonQuaternaries;
688                 } else if (q == Collation.NO_CE_WEIGHT16
689                         && (options & CollationSettings.ALTERNATE_MASK) == 0
690                         && quaternaries.isEmpty()) {
691                     // If alternate=non-ignorable and there are only common quaternary weights,
692                     // then we need not write anything.
693                     // The only weights greater than the merge separator and less than the common
694                     // weight
695                     // are shifted primary weights, which are not generated for
696                     // alternate=non-ignorable.
697                     // There are also exactly as many quaternary weights as tertiary weights,
698                     // so level length differences are handled already on tertiary level.
699                     // Any above-common quaternary weight will compare greater regardless.
700                     quaternaries.appendByte(Collation.LEVEL_SEPARATOR_BYTE);
701                 } else {
702                     if (q == Collation.NO_CE_WEIGHT16) {
703                         q = Collation.LEVEL_SEPARATOR_BYTE;
704                     } else {
705                         q = 0xfc + ((q >>> 6) & 3);
706                     }
707                     if (commonQuaternaries != 0) {
708                         --commonQuaternaries;
709                         while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
710                             quaternaries.appendByte(QUAT_COMMON_MIDDLE);
711                             commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
712                         }
713                         int b;
714                         if (q < QUAT_COMMON_LOW) {
715                             b = QUAT_COMMON_LOW + commonQuaternaries;
716                         } else {
717                             b = QUAT_COMMON_HIGH - commonQuaternaries;
718                         }
719                         quaternaries.appendByte(b);
720                         commonQuaternaries = 0;
721                     }
722                     quaternaries.appendByte(q);
723                 }
724             }
725 
726             if ((lower32 >>> 24) == Collation.LEVEL_SEPARATOR_BYTE) {
727                 break;
728             } // ce == NO_CE
729         }
730 
731         // Append the beyond-primary levels.
732         // not used in Java -- boolean ok = true;
733         if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) {
734             if (!callback.needToWrite(Collation.SECONDARY_LEVEL)) {
735                 return;
736             }
737             // not used in Java -- ok &= secondaries.isOk();
738             sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
739             secondaries.appendTo(sink);
740         }
741 
742         if ((levels & Collation.CASE_LEVEL_FLAG) != 0) {
743             if (!callback.needToWrite(Collation.CASE_LEVEL)) {
744                 return;
745             }
746             // not used in Java -- ok &= cases.isOk();
747             sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
748             // Write pairs of nibbles as bytes, except separator bytes as themselves.
749             int length = cases.length() - 1; // Ignore the trailing NO_CE.
750             byte b = 0;
751             for (int i = 0; i < length; ++i) {
752                 byte c = cases.getAt(i);
753                 assert ((c & 0xf) == 0 && c != 0);
754                 if (b == 0) {
755                     b = c;
756                 } else {
757                     sink.Append(b | ((c >> 4) & 0xf));
758                     b = 0;
759                 }
760             }
761             if (b != 0) {
762                 sink.Append(b);
763             }
764         }
765 
766         if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) {
767             if (!callback.needToWrite(Collation.TERTIARY_LEVEL)) {
768                 return;
769             }
770             // not used in Java -- ok &= tertiaries.isOk();
771             sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
772             tertiaries.appendTo(sink);
773         }
774 
775         if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) {
776             if (!callback.needToWrite(Collation.QUATERNARY_LEVEL)) {
777                 return;
778             }
779             // not used in Java -- ok &= quaternaries.isOk();
780             sink.Append(Collation.LEVEL_SEPARATOR_BYTE);
781             quaternaries.appendTo(sink);
782         }
783 
784         // not used in Java -- if (!ok || !sink.IsOk()) {
785         // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in
786         // C implementation. IsOk() in Java always returns true, so this
787         // is a dead code.
788     }
789 }
790