1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2012-2015, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * CollationKeys.java, ported from collationkeys.h/.cpp 9 * 10 * C++ version created on: 2012sep02 11 * created by: Markus W. Scherer 12 */ 13 14 package com.ibm.icu.impl.coll; 15 16 import com.ibm.icu.text.Collator; 17 18 public final class CollationKeys /* all methods are static */ { 19 20 // Java porting note: C++ SortKeyByteSink class extends a common class ByteSink, 21 // which is not available in Java. We don't need a super class created for implementing 22 // collation features. 23 public static abstract class SortKeyByteSink { 24 protected byte[] buffer_; 25 // protected int capacity_; == buffer_.length 26 private int appended_ = 0; 27 // not used in Java -- private int ignore_ = 0; 28 SortKeyByteSink(byte[] dest)29 public SortKeyByteSink(byte[] dest) { 30 buffer_ = dest; 31 } 32 33 /** 34 * Needed in Java for when we write to the buffer directly. 35 * In C++, the SortKeyByteSink is a subclass of ByteSink and lower-level code can write to that. 36 * TODO: Can we make Java SortKeyByteSink have-a ByteArrayWrapper and write through to it? 37 * Or maybe create interface ByteSink, have SortKeyByteSink implement it, and have BOCSU write to that?? 38 */ setBufferAndAppended(byte[] dest, int app)39 public void setBufferAndAppended(byte[] dest, int app) { 40 buffer_ = dest; 41 appended_ = app; 42 } 43 44 /* not used in Java -- public void IgnoreBytes(int numIgnore) { 45 ignore_ = numIgnore; 46 } */ 47 48 /** 49 * @param bytes 50 * the array of byte 51 * @param n 52 * the length of bytes to be appended 53 */ Append(byte[] bytes, int n)54 public void Append(byte[] bytes, int n) { 55 if (n <= 0 || bytes == null) { 56 return; 57 } 58 59 /* not used in Java -- if (ignore_ > 0) { 60 int ignoreRest = ignore_ - n; 61 if (ignoreRest >= 0) { 62 ignore_ = ignoreRest; 63 return; 64 } else { 65 start = ignore_; 66 n = -ignoreRest; 67 ignore_ = 0; 68 } 69 } */ 70 71 int length = appended_; 72 appended_ += n; 73 74 int available = buffer_.length - length; 75 if (n <= available) { 76 System.arraycopy(bytes, 0, buffer_, length, n); 77 } else { 78 AppendBeyondCapacity(bytes, 0, n, length); 79 } 80 } 81 Append(int b)82 public void Append(int b) { 83 /* not used in Java -- if (ignore_ > 0) { 84 --ignore_; 85 } else */ { 86 if (appended_ < buffer_.length || Resize(1, appended_)) { 87 buffer_[appended_] = (byte) b; 88 } 89 ++appended_; 90 } 91 } 92 93 // Java porting note: This method is not used by collator implementation. 94 // 95 // virtual char *GetAppendBuffer(int min_capacity, 96 // int desired_capacity_hint, 97 // char *scratch, int scratch_capacity, 98 // int *result_capacity); 99 NumberOfBytesAppended()100 public int NumberOfBytesAppended() { 101 return appended_; 102 } 103 GetRemainingCapacity()104 public int GetRemainingCapacity() { 105 return /* not used in Java -- ignore_ + */ buffer_.length - appended_; 106 } 107 Overflowed()108 public boolean Overflowed() { 109 return appended_ > buffer_.length; 110 } 111 112 /* not used in Java -- public boolean IsOk() { 113 return true; 114 } */ 115 116 /** 117 * @param bytes 118 * the array of byte 119 * @param start 120 * the start index within the array to be appended 121 * @param n 122 * the length of bytes to be appended 123 * @param length 124 * the length of buffer required to store the entire data (i.e. already appended 125 * bytes + bytes to be appended by this method) 126 */ AppendBeyondCapacity(byte[] bytes, int start, int n, int length)127 protected abstract void AppendBeyondCapacity(byte[] bytes, int start, int n, int length); 128 Resize(int appendCapacity, int length)129 protected abstract boolean Resize(int appendCapacity, int length); 130 } 131 132 public static class LevelCallback { 133 /** 134 * @param level 135 * The next level about to be written to the ByteSink. 136 * @return true if the level is to be written (the base class implementation always returns 137 * true) 138 */ needToWrite(int level)139 boolean needToWrite(int level) { 140 return true; 141 } 142 } 143 public static final LevelCallback SIMPLE_LEVEL_FALLBACK = new LevelCallback(); 144 145 private static final class SortKeyLevel { 146 private static final int INITIAL_CAPACITY = 40; 147 148 byte[] buffer = new byte[INITIAL_CAPACITY]; 149 int len = 0; 150 // not used in Java -- private static final boolean ok = true; // In C++ "ok" is reset when memory allocations fail. 151 SortKeyLevel()152 SortKeyLevel() { 153 } 154 155 /* not used in Java -- boolean isOk() { 156 return ok; 157 } */ 158 isEmpty()159 boolean isEmpty() { 160 return len == 0; 161 } 162 length()163 int length() { 164 return len; 165 } 166 167 // Java porting note: Java uses this instead of C++ operator [] overload 168 // uint8_t operator[](int index) getAt(int index)169 byte getAt(int index) { 170 return buffer[index]; 171 } 172 data()173 byte[] data() { 174 return buffer; 175 } 176 appendByte(int b)177 void appendByte(int b) { 178 if (len < buffer.length || ensureCapacity(1)) { 179 buffer[len++] = (byte) b; 180 } 181 } 182 appendWeight16(int w)183 void appendWeight16(int w) { 184 assert ((w & 0xffff) != 0); 185 byte b0 = (byte) (w >>> 8); 186 byte b1 = (byte) w; 187 int appendLength = (b1 == 0) ? 1 : 2; 188 if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) { 189 buffer[len++] = b0; 190 if (b1 != 0) { 191 buffer[len++] = b1; 192 } 193 } 194 } 195 appendWeight32(long w)196 void appendWeight32(long w) { 197 assert (w != 0); 198 byte[] bytes = new byte[] { (byte) (w >>> 24), (byte) (w >>> 16), (byte) (w >>> 8), 199 (byte) w }; 200 int appendLength = (bytes[1] == 0) ? 1 : (bytes[2] == 0) ? 2 : (bytes[3] == 0) ? 3 : 4; 201 if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) { 202 buffer[len++] = bytes[0]; 203 if (bytes[1] != 0) { 204 buffer[len++] = bytes[1]; 205 if (bytes[2] != 0) { 206 buffer[len++] = bytes[2]; 207 if (bytes[3] != 0) { 208 buffer[len++] = bytes[3]; 209 } 210 } 211 } 212 } 213 } 214 appendReverseWeight16(int w)215 void appendReverseWeight16(int w) { 216 assert ((w & 0xffff) != 0); 217 byte b0 = (byte) (w >>> 8); 218 byte b1 = (byte) w; 219 int appendLength = (b1 == 0) ? 1 : 2; 220 if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) { 221 if (b1 == 0) { 222 buffer[len++] = b0; 223 } else { 224 buffer[len] = b1; 225 buffer[len + 1] = b0; 226 len += 2; 227 } 228 } 229 } 230 231 // Appends all but the last byte to the sink. The last byte should be the 01 terminator. appendTo(SortKeyByteSink sink)232 void appendTo(SortKeyByteSink sink) { 233 assert (len > 0 && buffer[len - 1] == 1); 234 sink.Append(buffer, len - 1); 235 } 236 ensureCapacity(int appendCapacity)237 private boolean ensureCapacity(int appendCapacity) { 238 /* not used in Java -- if (!ok) { 239 return false; 240 } */ 241 int newCapacity = 2 * buffer.length; 242 int altCapacity = len + 2 * appendCapacity; 243 if (newCapacity < altCapacity) { 244 newCapacity = altCapacity; 245 } 246 if (newCapacity < 200) { 247 newCapacity = 200; 248 } 249 byte[] newbuf = new byte[newCapacity]; 250 System.arraycopy(buffer, 0, newbuf, 0, len); 251 buffer = newbuf; 252 253 return true; 254 } 255 } 256 getSortKeyLevel(int levels, int level)257 private static SortKeyLevel getSortKeyLevel(int levels, int level) { 258 return (levels & level) != 0 ? new SortKeyLevel() : null; 259 } 260 CollationKeys()261 private CollationKeys() { 262 } // no instantiation 263 264 // Secondary level: Compress up to 33 common weights as 05..25 or 25..45. 265 private static final int SEC_COMMON_LOW = Collation.COMMON_BYTE; 266 private static final int SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20; 267 static final int SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40; // read by CollationDataReader 268 private static final int SEC_COMMON_MAX_COUNT = 0x21; 269 270 // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13. 271 private static final int CASE_LOWER_FIRST_COMMON_LOW = 1; 272 private static final int CASE_LOWER_FIRST_COMMON_MIDDLE = 7; 273 private static final int CASE_LOWER_FIRST_COMMON_HIGH = 13; 274 private static final int CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7; 275 276 // Case level, upperFirst: Compress up to 13 common weights as 3..15. 277 private static final int CASE_UPPER_FIRST_COMMON_LOW = 3; 278 @SuppressWarnings("unused") 279 private static final int CASE_UPPER_FIRST_COMMON_HIGH = 15; 280 private static final int CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13; 281 282 // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5. 283 private static final int TER_ONLY_COMMON_LOW = Collation.COMMON_BYTE; 284 private static final int TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60; 285 private static final int TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0; 286 private static final int TER_ONLY_COMMON_MAX_COUNT = 0x61; 287 288 // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45. 289 private static final int TER_LOWER_FIRST_COMMON_LOW = Collation.COMMON_BYTE; 290 private static final int TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20; 291 private static final int TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40; 292 private static final int TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21; 293 294 // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5. 295 private static final int TER_UPPER_FIRST_COMMON_LOW = Collation.COMMON_BYTE + 0x80; 296 private static final int TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20; 297 private static final int TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40; 298 private static final int TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21; 299 300 // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC. 301 private static final int QUAT_COMMON_LOW = 0x1c; 302 private static final int QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70; 303 private static final int QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0; 304 private static final int QUAT_COMMON_MAX_COUNT = 0x71; 305 // Primary weights shifted to quaternary level must be encoded with 306 // a lead byte below the common-weight compression range. 307 private static final int QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b 308 309 /** 310 * Map from collation strength (UColAttributeValue) to a mask of Collation.Level bits up to that 311 * strength, excluding the CASE_LEVEL which is independent of the strength, and excluding 312 * IDENTICAL_LEVEL which this function does not write. 313 */ 314 private static final int levelMasks[] = new int[] { 315 2, // UCOL_PRIMARY -> PRIMARY_LEVEL 316 6, // UCOL_SECONDARY -> up to SECONDARY_LEVEL 317 0x16, // UCOL_TERTIARY -> up to TERTIARY_LEVEL 318 0x36, // UCOL_QUATERNARY -> up to QUATERNARY_LEVEL 319 0, 0, 0, 0, 320 0, 0, 0, 0, 321 0, 0, 0, 322 0x36 // UCOL_IDENTICAL -> up to QUATERNARY_LEVEL 323 }; 324 325 /** 326 * Writes the sort key bytes for minLevel up to the iterator data's strength. Optionally writes 327 * the case level. Stops writing levels when callback.needToWrite(level) returns false. 328 * Separates levels with the LEVEL_SEPARATOR_BYTE but does not write a TERMINATOR_BYTE. 329 */ writeSortKeyUpToQuaternary(CollationIterator iter, boolean[] compressibleBytes, CollationSettings settings, SortKeyByteSink sink, int minLevel, LevelCallback callback, boolean preflight)330 public static void writeSortKeyUpToQuaternary(CollationIterator iter, boolean[] compressibleBytes, 331 CollationSettings settings, SortKeyByteSink sink, int minLevel, LevelCallback callback, 332 boolean preflight) { 333 334 int options = settings.options; 335 // Set of levels to process and write. 336 int levels = levelMasks[CollationSettings.getStrength(options)]; 337 if ((options & CollationSettings.CASE_LEVEL) != 0) { 338 levels |= Collation.CASE_LEVEL_FLAG; 339 } 340 // Minus the levels below minLevel. 341 levels &= ~((1 << minLevel) - 1); 342 if (levels == 0) { 343 return; 344 } 345 346 long variableTop; 347 if ((options & CollationSettings.ALTERNATE_MASK) == 0) { 348 variableTop = 0; 349 } else { 350 // +1 so that we can use "<" and primary ignorables test out early. 351 variableTop = settings.variableTop + 1; 352 } 353 354 int tertiaryMask = CollationSettings.getTertiaryMask(options); 355 356 byte[] p234 = new byte[3]; 357 SortKeyLevel cases = getSortKeyLevel(levels, Collation.CASE_LEVEL_FLAG); 358 SortKeyLevel secondaries = getSortKeyLevel(levels, Collation.SECONDARY_LEVEL_FLAG); 359 SortKeyLevel tertiaries = getSortKeyLevel(levels, Collation.TERTIARY_LEVEL_FLAG); 360 SortKeyLevel quaternaries = getSortKeyLevel(levels, Collation.QUATERNARY_LEVEL_FLAG); 361 362 long prevReorderedPrimary = 0; // 0==no compression 363 int commonCases = 0; 364 int commonSecondaries = 0; 365 int commonTertiaries = 0; 366 int commonQuaternaries = 0; 367 368 int prevSecondary = 0; 369 int secSegmentStart = 0; 370 371 for (;;) { 372 // No need to keep all CEs in the buffer when we write a sort key. 373 iter.clearCEsIfNoneRemaining(); 374 long ce = iter.nextCE(); 375 long p = ce >>> 32; 376 if (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY) { 377 // Variable CE, shift it to quaternary level. 378 // Ignore all following primary ignorables, and shift further variable CEs. 379 if (commonQuaternaries != 0) { 380 --commonQuaternaries; 381 while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) { 382 quaternaries.appendByte(QUAT_COMMON_MIDDLE); 383 commonQuaternaries -= QUAT_COMMON_MAX_COUNT; 384 } 385 // Shifted primary weights are lower than the common weight. 386 quaternaries.appendByte(QUAT_COMMON_LOW + commonQuaternaries); 387 commonQuaternaries = 0; 388 } 389 do { 390 if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) { 391 if (settings.hasReordering()) { 392 p = settings.reorder(p); 393 } 394 if (((int) p >>> 24) >= QUAT_SHIFTED_LIMIT_BYTE) { 395 // Prevent shifted primary lead bytes from 396 // overlapping with the common compression range. 397 quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE); 398 } 399 quaternaries.appendWeight32(p); 400 } 401 do { 402 ce = iter.nextCE(); 403 p = ce >>> 32; 404 } while (p == 0); 405 } while (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY); 406 } 407 // ce could be primary ignorable, or NO_CE, or the merge separator, 408 // or a regular primary CE, but it is not variable. 409 // If ce==NO_CE, then write nothing for the primary level but 410 // terminate compression on all levels and then exit the loop. 411 if (p > Collation.NO_CE_PRIMARY && (levels & Collation.PRIMARY_LEVEL_FLAG) != 0) { 412 // Test the un-reordered primary for compressibility. 413 boolean isCompressible = compressibleBytes[(int) p >>> 24]; 414 if(settings.hasReordering()) { 415 p = settings.reorder(p); 416 } 417 int p1 = (int) p >>> 24; 418 if (!isCompressible || p1 != ((int) prevReorderedPrimary >>> 24)) { 419 if (prevReorderedPrimary != 0) { 420 if (p < prevReorderedPrimary) { 421 // No primary compression terminator 422 // at the end of the level or merged segment. 423 if (p1 > Collation.MERGE_SEPARATOR_BYTE) { 424 sink.Append(Collation.PRIMARY_COMPRESSION_LOW_BYTE); 425 } 426 } else { 427 sink.Append(Collation.PRIMARY_COMPRESSION_HIGH_BYTE); 428 } 429 } 430 sink.Append(p1); 431 if(isCompressible) { 432 prevReorderedPrimary = p; 433 } else { 434 prevReorderedPrimary = 0; 435 } 436 } 437 byte p2 = (byte) (p >>> 16); 438 if (p2 != 0) { 439 p234[0] = p2; 440 p234[1] = (byte) (p >>> 8); 441 p234[2] = (byte) p; 442 sink.Append(p234, (p234[1] == 0) ? 1 : (p234[2] == 0) ? 2 : 3); 443 } 444 // Optimization for internalNextSortKeyPart(): 445 // When the primary level overflows we can stop because we need not 446 // calculate (preflight) the whole sort key length. 447 if (!preflight && sink.Overflowed()) { 448 // not used in Java -- if (!sink.IsOk()) { 449 // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in 450 // C implementation. IsOk() in Java always returns true, so this 451 // is a dead code. 452 return; 453 } 454 } 455 456 int lower32 = (int) ce; 457 if (lower32 == 0) { 458 continue; 459 } // completely ignorable, no secondary/case/tertiary/quaternary 460 461 if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) { 462 int s = lower32 >>> 16; // 16 bits 463 if (s == 0) { 464 // secondary ignorable 465 } else if (s == Collation.COMMON_WEIGHT16 && 466 ((options & CollationSettings.BACKWARD_SECONDARY) == 0 || 467 p != Collation.MERGE_SEPARATOR_PRIMARY)) { 468 // s is a common secondary weight, and 469 // backwards-secondary is off or the ce is not the merge separator. 470 ++commonSecondaries; 471 } else if ((options & CollationSettings.BACKWARD_SECONDARY) == 0) { 472 if (commonSecondaries != 0) { 473 --commonSecondaries; 474 while (commonSecondaries >= SEC_COMMON_MAX_COUNT) { 475 secondaries.appendByte(SEC_COMMON_MIDDLE); 476 commonSecondaries -= SEC_COMMON_MAX_COUNT; 477 } 478 int b; 479 if (s < Collation.COMMON_WEIGHT16) { 480 b = SEC_COMMON_LOW + commonSecondaries; 481 } else { 482 b = SEC_COMMON_HIGH - commonSecondaries; 483 } 484 secondaries.appendByte(b); 485 commonSecondaries = 0; 486 } 487 secondaries.appendWeight16(s); 488 } else { 489 if (commonSecondaries != 0) { 490 --commonSecondaries; 491 // Append reverse weights. The level will be re-reversed later. 492 int remainder = commonSecondaries % SEC_COMMON_MAX_COUNT; 493 int b; 494 if (prevSecondary < Collation.COMMON_WEIGHT16) { 495 b = SEC_COMMON_LOW + remainder; 496 } else { 497 b = SEC_COMMON_HIGH - remainder; 498 } 499 secondaries.appendByte(b); 500 commonSecondaries -= remainder; 501 // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT. 502 while (commonSecondaries > 0) { // same as >= SEC_COMMON_MAX_COUNT 503 secondaries.appendByte(SEC_COMMON_MIDDLE); 504 commonSecondaries -= SEC_COMMON_MAX_COUNT; 505 } 506 // commonSecondaries == 0 507 } 508 if (0 < p && p <= Collation.MERGE_SEPARATOR_PRIMARY) { 509 // The backwards secondary level compares secondary weights backwards 510 // within segments separated by the merge separator (U+FFFE). 511 byte[] secs = secondaries.data(); 512 int last = secondaries.length() - 1; 513 while (secSegmentStart < last) { 514 byte b = secs[secSegmentStart]; 515 secs[secSegmentStart++] = secs[last]; 516 secs[last--] = b; 517 } 518 secondaries.appendByte(p == Collation.NO_CE_PRIMARY ? 519 Collation.LEVEL_SEPARATOR_BYTE : Collation.MERGE_SEPARATOR_BYTE); 520 prevSecondary = 0; 521 secSegmentStart = secondaries.length(); 522 } else { 523 secondaries.appendReverseWeight16(s); 524 prevSecondary = s; 525 } 526 } 527 } 528 529 if ((levels & Collation.CASE_LEVEL_FLAG) != 0) { 530 if ((CollationSettings.getStrength(options) == Collator.PRIMARY) ? p == 0 531 : (lower32 >>> 16) == 0) { 532 // Primary+caseLevel: Ignore case level weights of primary ignorables. 533 // Otherwise: Ignore case level weights of secondary ignorables. 534 // For details see the comments in the CollationCompare class. 535 } else { 536 int c = (lower32 >>> 8) & 0xff; // case bits & tertiary lead byte 537 assert ((c & 0xc0) != 0xc0); 538 if ((c & 0xc0) == 0 && c > Collation.LEVEL_SEPARATOR_BYTE) { 539 ++commonCases; 540 } else { 541 if ((options & CollationSettings.UPPER_FIRST) == 0) { 542 // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, 543 // upper=15. 544 // If there are only common (=lowest) weights in the whole level, 545 // then we need not write anything. 546 // Level length differences are handled already on the next-higher level. 547 if (commonCases != 0 && 548 (c > Collation.LEVEL_SEPARATOR_BYTE || !cases.isEmpty())) { 549 --commonCases; 550 while (commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) { 551 cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4); 552 commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT; 553 } 554 int b; 555 if (c <= Collation.LEVEL_SEPARATOR_BYTE) { 556 b = CASE_LOWER_FIRST_COMMON_LOW + commonCases; 557 } else { 558 b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases; 559 } 560 cases.appendByte(b << 4); 561 commonCases = 0; 562 } 563 if (c > Collation.LEVEL_SEPARATOR_BYTE) { 564 c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >>> 6)) << 4; // 14 or 15 565 } 566 } else { 567 // upperFirst: Compress common weights to nibbles 3..15, mixed=2, 568 // upper=1. 569 // The compressed common case weights only go up from the "low" value 570 // because with upperFirst the common weight is the highest one. 571 if (commonCases != 0) { 572 --commonCases; 573 while (commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) { 574 cases.appendByte(CASE_UPPER_FIRST_COMMON_LOW << 4); 575 commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT; 576 } 577 cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4); 578 commonCases = 0; 579 } 580 if (c > Collation.LEVEL_SEPARATOR_BYTE) { 581 c = (CASE_UPPER_FIRST_COMMON_LOW - (c >>> 6)) << 4; // 2 or 1 582 } 583 } 584 // c is a separator byte 01, 585 // or a left-shifted nibble 0x10, 0x20, ... 0xf0. 586 cases.appendByte(c); 587 } 588 } 589 } 590 591 if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) { 592 int t = lower32 & tertiaryMask; 593 assert ((lower32 & 0xc000) != 0xc000); 594 if (t == Collation.COMMON_WEIGHT16) { 595 ++commonTertiaries; 596 } else if ((tertiaryMask & 0x8000) == 0) { 597 // Tertiary weights without case bits. 598 // Move lead bytes 06..3F to C6..FF for a large common-weight range. 599 if (commonTertiaries != 0) { 600 --commonTertiaries; 601 while (commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) { 602 tertiaries.appendByte(TER_ONLY_COMMON_MIDDLE); 603 commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT; 604 } 605 int b; 606 if (t < Collation.COMMON_WEIGHT16) { 607 b = TER_ONLY_COMMON_LOW + commonTertiaries; 608 } else { 609 b = TER_ONLY_COMMON_HIGH - commonTertiaries; 610 } 611 tertiaries.appendByte(b); 612 commonTertiaries = 0; 613 } 614 if (t > Collation.COMMON_WEIGHT16) { 615 t += 0xc000; 616 } 617 tertiaries.appendWeight16(t); 618 } else if ((options & CollationSettings.UPPER_FIRST) == 0) { 619 // Tertiary weights with caseFirst=lowerFirst. 620 // Move lead bytes 06..BF to 46..FF for the common-weight range. 621 if (commonTertiaries != 0) { 622 --commonTertiaries; 623 while (commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) { 624 tertiaries.appendByte(TER_LOWER_FIRST_COMMON_MIDDLE); 625 commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT; 626 } 627 int b; 628 if (t < Collation.COMMON_WEIGHT16) { 629 b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries; 630 } else { 631 b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries; 632 } 633 tertiaries.appendByte(b); 634 commonTertiaries = 0; 635 } 636 if (t > Collation.COMMON_WEIGHT16) { 637 t += 0x4000; 638 } 639 tertiaries.appendWeight16(t); 640 } else { 641 // Tertiary weights with caseFirst=upperFirst. 642 // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut), 643 // to keep tertiary CEs well-formed. 644 // Their case+tertiary weights must be greater than those of 645 // primary and secondary CEs. 646 // 647 // Separator 01 -> 01 (unchanged) 648 // Lowercase 02..04 -> 82..84 (includes uncased) 649 // Common weight 05 -> 85..C5 (common-weight compression range) 650 // Lowercase 06..3F -> C6..FF 651 // Mixed case 42..7F -> 42..7F 652 // Uppercase 82..BF -> 02..3F 653 // Tertiary CE 86..BF -> C6..FF 654 if (t <= Collation.NO_CE_WEIGHT16) { 655 // Keep separators unchanged. 656 } else if ((lower32 >>> 16) != 0) { 657 // Invert case bits of primary & secondary CEs. 658 t ^= 0xc000; 659 if (t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) { 660 t -= 0x4000; 661 } 662 } else { 663 // Keep uppercase bits of tertiary CEs. 664 assert (0x8600 <= t && t <= 0xbfff); 665 t += 0x4000; 666 } 667 if (commonTertiaries != 0) { 668 --commonTertiaries; 669 while (commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) { 670 tertiaries.appendByte(TER_UPPER_FIRST_COMMON_MIDDLE); 671 commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT; 672 } 673 int b; 674 if (t < (TER_UPPER_FIRST_COMMON_LOW << 8)) { 675 b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries; 676 } else { 677 b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries; 678 } 679 tertiaries.appendByte(b); 680 commonTertiaries = 0; 681 } 682 tertiaries.appendWeight16(t); 683 } 684 } 685 686 if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) { 687 int q = lower32 & 0xffff; 688 if ((q & 0xc0) == 0 && q > Collation.NO_CE_WEIGHT16) { 689 ++commonQuaternaries; 690 } else if (q == Collation.NO_CE_WEIGHT16 691 && (options & CollationSettings.ALTERNATE_MASK) == 0 692 && quaternaries.isEmpty()) { 693 // If alternate=non-ignorable and there are only common quaternary weights, 694 // then we need not write anything. 695 // The only weights greater than the merge separator and less than the common 696 // weight 697 // are shifted primary weights, which are not generated for 698 // alternate=non-ignorable. 699 // There are also exactly as many quaternary weights as tertiary weights, 700 // so level length differences are handled already on tertiary level. 701 // Any above-common quaternary weight will compare greater regardless. 702 quaternaries.appendByte(Collation.LEVEL_SEPARATOR_BYTE); 703 } else { 704 if (q == Collation.NO_CE_WEIGHT16) { 705 q = Collation.LEVEL_SEPARATOR_BYTE; 706 } else { 707 q = 0xfc + ((q >>> 6) & 3); 708 } 709 if (commonQuaternaries != 0) { 710 --commonQuaternaries; 711 while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) { 712 quaternaries.appendByte(QUAT_COMMON_MIDDLE); 713 commonQuaternaries -= QUAT_COMMON_MAX_COUNT; 714 } 715 int b; 716 if (q < QUAT_COMMON_LOW) { 717 b = QUAT_COMMON_LOW + commonQuaternaries; 718 } else { 719 b = QUAT_COMMON_HIGH - commonQuaternaries; 720 } 721 quaternaries.appendByte(b); 722 commonQuaternaries = 0; 723 } 724 quaternaries.appendByte(q); 725 } 726 } 727 728 if ((lower32 >>> 24) == Collation.LEVEL_SEPARATOR_BYTE) { 729 break; 730 } // ce == NO_CE 731 } 732 733 // Append the beyond-primary levels. 734 // not used in Java -- boolean ok = true; 735 if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) { 736 if (!callback.needToWrite(Collation.SECONDARY_LEVEL)) { 737 return; 738 } 739 // not used in Java -- ok &= secondaries.isOk(); 740 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 741 secondaries.appendTo(sink); 742 } 743 744 if ((levels & Collation.CASE_LEVEL_FLAG) != 0) { 745 if (!callback.needToWrite(Collation.CASE_LEVEL)) { 746 return; 747 } 748 // not used in Java -- ok &= cases.isOk(); 749 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 750 // Write pairs of nibbles as bytes, except separator bytes as themselves. 751 int length = cases.length() - 1; // Ignore the trailing NO_CE. 752 byte b = 0; 753 for (int i = 0; i < length; ++i) { 754 byte c = cases.getAt(i); 755 assert ((c & 0xf) == 0 && c != 0); 756 if (b == 0) { 757 b = c; 758 } else { 759 sink.Append(b | ((c >> 4) & 0xf)); 760 b = 0; 761 } 762 } 763 if (b != 0) { 764 sink.Append(b); 765 } 766 } 767 768 if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) { 769 if (!callback.needToWrite(Collation.TERTIARY_LEVEL)) { 770 return; 771 } 772 // not used in Java -- ok &= tertiaries.isOk(); 773 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 774 tertiaries.appendTo(sink); 775 } 776 777 if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) { 778 if (!callback.needToWrite(Collation.QUATERNARY_LEVEL)) { 779 return; 780 } 781 // not used in Java -- ok &= quaternaries.isOk(); 782 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 783 quaternaries.appendTo(sink); 784 } 785 786 // not used in Java -- if (!ok || !sink.IsOk()) { 787 // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in 788 // C implementation. IsOk() in Java always returns true, so this 789 // is a dead code. 790 } 791 } 792