1 /* 2 ******************************************************************************* 3 * Copyright (C) 2012-2015, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * CollationKeys.java, ported from collationkeys.h/.cpp 7 * 8 * C++ version created on: 2012sep02 9 * created by: Markus W. Scherer 10 */ 11 12 package com.ibm.icu.impl.coll; 13 14 import com.ibm.icu.text.Collator; 15 16 public final class CollationKeys /* all methods are static */ { 17 18 // Java porting note: C++ SortKeyByteSink class extends a common class ByteSink, 19 // which is not available in Java. We don't need a super class created for implementing 20 // collation features. 21 public static abstract class SortKeyByteSink { 22 protected byte[] buffer_; 23 // protected int capacity_; == buffer_.length 24 private int appended_ = 0; 25 // not used in Java -- private int ignore_ = 0; 26 SortKeyByteSink(byte[] dest)27 public SortKeyByteSink(byte[] dest) { 28 buffer_ = dest; 29 } 30 31 /** 32 * Needed in Java for when we write to the buffer directly. 33 * In C++, the SortKeyByteSink is a subclass of ByteSink and lower-level code can write to that. 34 * TODO: Can we make Java SortKeyByteSink have-a ByteArrayWrapper and write through to it? 35 * Or maybe create interface ByteSink, have SortKeyByteSink implement it, and have BOCSU write to that?? 36 */ setBufferAndAppended(byte[] dest, int app)37 public void setBufferAndAppended(byte[] dest, int app) { 38 buffer_ = dest; 39 appended_ = app; 40 } 41 42 /* not used in Java -- public void IgnoreBytes(int numIgnore) { 43 ignore_ = numIgnore; 44 } */ 45 46 /** 47 * @param bytes 48 * the array of byte 49 * @param n 50 * the length of bytes to be appended 51 */ Append(byte[] bytes, int n)52 public void Append(byte[] bytes, int n) { 53 if (n <= 0 || bytes == null) { 54 return; 55 } 56 57 /* not used in Java -- if (ignore_ > 0) { 58 int ignoreRest = ignore_ - n; 59 if (ignoreRest >= 0) { 60 ignore_ = ignoreRest; 61 return; 62 } else { 63 start = ignore_; 64 n = -ignoreRest; 65 ignore_ = 0; 66 } 67 } */ 68 69 int length = appended_; 70 appended_ += n; 71 72 int available = buffer_.length - length; 73 if (n <= available) { 74 System.arraycopy(bytes, 0, buffer_, length, n); 75 } else { 76 AppendBeyondCapacity(bytes, 0, n, length); 77 } 78 } 79 Append(int b)80 public void Append(int b) { 81 /* not used in Java -- if (ignore_ > 0) { 82 --ignore_; 83 } else */ { 84 if (appended_ < buffer_.length || Resize(1, appended_)) { 85 buffer_[appended_] = (byte) b; 86 } 87 ++appended_; 88 } 89 } 90 91 // Java porting note: This method is not used by collator implementation. 92 // 93 // virtual char *GetAppendBuffer(int min_capacity, 94 // int desired_capacity_hint, 95 // char *scratch, int scratch_capacity, 96 // int *result_capacity); 97 NumberOfBytesAppended()98 public int NumberOfBytesAppended() { 99 return appended_; 100 } 101 GetRemainingCapacity()102 public int GetRemainingCapacity() { 103 return /* not used in Java -- ignore_ + */ buffer_.length - appended_; 104 } 105 Overflowed()106 public boolean Overflowed() { 107 return appended_ > buffer_.length; 108 } 109 110 /* not used in Java -- public boolean IsOk() { 111 return true; 112 } */ 113 114 /** 115 * @param bytes 116 * the array of byte 117 * @param start 118 * the start index within the array to be appended 119 * @param n 120 * the length of bytes to be appended 121 * @param length 122 * the length of buffer required to store the entire data (i.e. already appended 123 * bytes + bytes to be appended by this method) 124 */ AppendBeyondCapacity(byte[] bytes, int start, int n, int length)125 protected abstract void AppendBeyondCapacity(byte[] bytes, int start, int n, int length); 126 Resize(int appendCapacity, int length)127 protected abstract boolean Resize(int appendCapacity, int length); 128 } 129 130 public static class LevelCallback { 131 /** 132 * @param level 133 * The next level about to be written to the ByteSink. 134 * @return true if the level is to be written (the base class implementation always returns 135 * true) 136 */ needToWrite(int level)137 boolean needToWrite(int level) { 138 return true; 139 } 140 } 141 public static final LevelCallback SIMPLE_LEVEL_FALLBACK = new LevelCallback(); 142 143 private static final class SortKeyLevel { 144 private static final int INITIAL_CAPACITY = 40; 145 146 byte[] buffer = new byte[INITIAL_CAPACITY]; 147 int len = 0; 148 // not used in Java -- private static final boolean ok = true; // In C++ "ok" is reset when memory allocations fail. 149 SortKeyLevel()150 SortKeyLevel() { 151 } 152 153 /* not used in Java -- boolean isOk() { 154 return ok; 155 } */ 156 isEmpty()157 boolean isEmpty() { 158 return len == 0; 159 } 160 length()161 int length() { 162 return len; 163 } 164 165 // Java porting note: Java uses this instead of C++ operator [] overload 166 // uint8_t operator[](int index) getAt(int index)167 byte getAt(int index) { 168 return buffer[index]; 169 } 170 data()171 byte[] data() { 172 return buffer; 173 } 174 appendByte(int b)175 void appendByte(int b) { 176 if (len < buffer.length || ensureCapacity(1)) { 177 buffer[len++] = (byte) b; 178 } 179 } 180 appendWeight16(int w)181 void appendWeight16(int w) { 182 assert ((w & 0xffff) != 0); 183 byte b0 = (byte) (w >>> 8); 184 byte b1 = (byte) w; 185 int appendLength = (b1 == 0) ? 1 : 2; 186 if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) { 187 buffer[len++] = b0; 188 if (b1 != 0) { 189 buffer[len++] = b1; 190 } 191 } 192 } 193 appendWeight32(long w)194 void appendWeight32(long w) { 195 assert (w != 0); 196 byte[] bytes = new byte[] { (byte) (w >>> 24), (byte) (w >>> 16), (byte) (w >>> 8), 197 (byte) w }; 198 int appendLength = (bytes[1] == 0) ? 1 : (bytes[2] == 0) ? 2 : (bytes[3] == 0) ? 3 : 4; 199 if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) { 200 buffer[len++] = bytes[0]; 201 if (bytes[1] != 0) { 202 buffer[len++] = bytes[1]; 203 if (bytes[2] != 0) { 204 buffer[len++] = bytes[2]; 205 if (bytes[3] != 0) { 206 buffer[len++] = bytes[3]; 207 } 208 } 209 } 210 } 211 } 212 appendReverseWeight16(int w)213 void appendReverseWeight16(int w) { 214 assert ((w & 0xffff) != 0); 215 byte b0 = (byte) (w >>> 8); 216 byte b1 = (byte) w; 217 int appendLength = (b1 == 0) ? 1 : 2; 218 if ((len + appendLength) <= buffer.length || ensureCapacity(appendLength)) { 219 if (b1 == 0) { 220 buffer[len++] = b0; 221 } else { 222 buffer[len] = b1; 223 buffer[len + 1] = b0; 224 len += 2; 225 } 226 } 227 } 228 229 // Appends all but the last byte to the sink. The last byte should be the 01 terminator. appendTo(SortKeyByteSink sink)230 void appendTo(SortKeyByteSink sink) { 231 assert (len > 0 && buffer[len - 1] == 1); 232 sink.Append(buffer, len - 1); 233 } 234 ensureCapacity(int appendCapacity)235 private boolean ensureCapacity(int appendCapacity) { 236 /* not used in Java -- if (!ok) { 237 return false; 238 } */ 239 int newCapacity = 2 * buffer.length; 240 int altCapacity = len + 2 * appendCapacity; 241 if (newCapacity < altCapacity) { 242 newCapacity = altCapacity; 243 } 244 if (newCapacity < 200) { 245 newCapacity = 200; 246 } 247 byte[] newbuf = new byte[newCapacity]; 248 System.arraycopy(buffer, 0, newbuf, 0, len); 249 buffer = newbuf; 250 251 return true; 252 } 253 } 254 getSortKeyLevel(int levels, int level)255 private static SortKeyLevel getSortKeyLevel(int levels, int level) { 256 return (levels & level) != 0 ? new SortKeyLevel() : null; 257 } 258 CollationKeys()259 private CollationKeys() { 260 } // no instantiation 261 262 // Secondary level: Compress up to 33 common weights as 05..25 or 25..45. 263 private static final int SEC_COMMON_LOW = Collation.COMMON_BYTE; 264 private static final int SEC_COMMON_MIDDLE = SEC_COMMON_LOW + 0x20; 265 static final int SEC_COMMON_HIGH = SEC_COMMON_LOW + 0x40; // read by CollationDataReader 266 private static final int SEC_COMMON_MAX_COUNT = 0x21; 267 268 // Case level, lowerFirst: Compress up to 7 common weights as 1..7 or 7..13. 269 private static final int CASE_LOWER_FIRST_COMMON_LOW = 1; 270 private static final int CASE_LOWER_FIRST_COMMON_MIDDLE = 7; 271 private static final int CASE_LOWER_FIRST_COMMON_HIGH = 13; 272 private static final int CASE_LOWER_FIRST_COMMON_MAX_COUNT = 7; 273 274 // Case level, upperFirst: Compress up to 13 common weights as 3..15. 275 private static final int CASE_UPPER_FIRST_COMMON_LOW = 3; 276 @SuppressWarnings("unused") 277 private static final int CASE_UPPER_FIRST_COMMON_HIGH = 15; 278 private static final int CASE_UPPER_FIRST_COMMON_MAX_COUNT = 13; 279 280 // Tertiary level only (no case): Compress up to 97 common weights as 05..65 or 65..C5. 281 private static final int TER_ONLY_COMMON_LOW = Collation.COMMON_BYTE; 282 private static final int TER_ONLY_COMMON_MIDDLE = TER_ONLY_COMMON_LOW + 0x60; 283 private static final int TER_ONLY_COMMON_HIGH = TER_ONLY_COMMON_LOW + 0xc0; 284 private static final int TER_ONLY_COMMON_MAX_COUNT = 0x61; 285 286 // Tertiary with case, lowerFirst: Compress up to 33 common weights as 05..25 or 25..45. 287 private static final int TER_LOWER_FIRST_COMMON_LOW = Collation.COMMON_BYTE; 288 private static final int TER_LOWER_FIRST_COMMON_MIDDLE = TER_LOWER_FIRST_COMMON_LOW + 0x20; 289 private static final int TER_LOWER_FIRST_COMMON_HIGH = TER_LOWER_FIRST_COMMON_LOW + 0x40; 290 private static final int TER_LOWER_FIRST_COMMON_MAX_COUNT = 0x21; 291 292 // Tertiary with case, upperFirst: Compress up to 33 common weights as 85..A5 or A5..C5. 293 private static final int TER_UPPER_FIRST_COMMON_LOW = Collation.COMMON_BYTE + 0x80; 294 private static final int TER_UPPER_FIRST_COMMON_MIDDLE = TER_UPPER_FIRST_COMMON_LOW + 0x20; 295 private static final int TER_UPPER_FIRST_COMMON_HIGH = TER_UPPER_FIRST_COMMON_LOW + 0x40; 296 private static final int TER_UPPER_FIRST_COMMON_MAX_COUNT = 0x21; 297 298 // Quaternary level: Compress up to 113 common weights as 1C..8C or 8C..FC. 299 private static final int QUAT_COMMON_LOW = 0x1c; 300 private static final int QUAT_COMMON_MIDDLE = QUAT_COMMON_LOW + 0x70; 301 private static final int QUAT_COMMON_HIGH = QUAT_COMMON_LOW + 0xE0; 302 private static final int QUAT_COMMON_MAX_COUNT = 0x71; 303 // Primary weights shifted to quaternary level must be encoded with 304 // a lead byte below the common-weight compression range. 305 private static final int QUAT_SHIFTED_LIMIT_BYTE = QUAT_COMMON_LOW - 1; // 0x1b 306 307 /** 308 * Map from collation strength (UColAttributeValue) to a mask of Collation.Level bits up to that 309 * strength, excluding the CASE_LEVEL which is independent of the strength, and excluding 310 * IDENTICAL_LEVEL which this function does not write. 311 */ 312 private static final int levelMasks[] = new int[] { 313 2, // UCOL_PRIMARY -> PRIMARY_LEVEL 314 6, // UCOL_SECONDARY -> up to SECONDARY_LEVEL 315 0x16, // UCOL_TERTIARY -> up to TERTIARY_LEVEL 316 0x36, // UCOL_QUATERNARY -> up to QUATERNARY_LEVEL 317 0, 0, 0, 0, 318 0, 0, 0, 0, 319 0, 0, 0, 320 0x36 // UCOL_IDENTICAL -> up to QUATERNARY_LEVEL 321 }; 322 323 /** 324 * Writes the sort key bytes for minLevel up to the iterator data's strength. Optionally writes 325 * the case level. Stops writing levels when callback.needToWrite(level) returns false. 326 * Separates levels with the LEVEL_SEPARATOR_BYTE but does not write a TERMINATOR_BYTE. 327 */ writeSortKeyUpToQuaternary(CollationIterator iter, boolean[] compressibleBytes, CollationSettings settings, SortKeyByteSink sink, int minLevel, LevelCallback callback, boolean preflight)328 public static void writeSortKeyUpToQuaternary(CollationIterator iter, boolean[] compressibleBytes, 329 CollationSettings settings, SortKeyByteSink sink, int minLevel, LevelCallback callback, 330 boolean preflight) { 331 332 int options = settings.options; 333 // Set of levels to process and write. 334 int levels = levelMasks[CollationSettings.getStrength(options)]; 335 if ((options & CollationSettings.CASE_LEVEL) != 0) { 336 levels |= Collation.CASE_LEVEL_FLAG; 337 } 338 // Minus the levels below minLevel. 339 levels &= ~((1 << minLevel) - 1); 340 if (levels == 0) { 341 return; 342 } 343 344 long variableTop; 345 if ((options & CollationSettings.ALTERNATE_MASK) == 0) { 346 variableTop = 0; 347 } else { 348 // +1 so that we can use "<" and primary ignorables test out early. 349 variableTop = settings.variableTop + 1; 350 } 351 352 int tertiaryMask = CollationSettings.getTertiaryMask(options); 353 354 byte[] p234 = new byte[3]; 355 SortKeyLevel cases = getSortKeyLevel(levels, Collation.CASE_LEVEL_FLAG); 356 SortKeyLevel secondaries = getSortKeyLevel(levels, Collation.SECONDARY_LEVEL_FLAG); 357 SortKeyLevel tertiaries = getSortKeyLevel(levels, Collation.TERTIARY_LEVEL_FLAG); 358 SortKeyLevel quaternaries = getSortKeyLevel(levels, Collation.QUATERNARY_LEVEL_FLAG); 359 360 long prevReorderedPrimary = 0; // 0==no compression 361 int commonCases = 0; 362 int commonSecondaries = 0; 363 int commonTertiaries = 0; 364 int commonQuaternaries = 0; 365 366 int prevSecondary = 0; 367 int secSegmentStart = 0; 368 369 for (;;) { 370 // No need to keep all CEs in the buffer when we write a sort key. 371 iter.clearCEsIfNoneRemaining(); 372 long ce = iter.nextCE(); 373 long p = ce >>> 32; 374 if (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY) { 375 // Variable CE, shift it to quaternary level. 376 // Ignore all following primary ignorables, and shift further variable CEs. 377 if (commonQuaternaries != 0) { 378 --commonQuaternaries; 379 while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) { 380 quaternaries.appendByte(QUAT_COMMON_MIDDLE); 381 commonQuaternaries -= QUAT_COMMON_MAX_COUNT; 382 } 383 // Shifted primary weights are lower than the common weight. 384 quaternaries.appendByte(QUAT_COMMON_LOW + commonQuaternaries); 385 commonQuaternaries = 0; 386 } 387 do { 388 if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) { 389 if (settings.hasReordering()) { 390 p = settings.reorder(p); 391 } 392 if (((int) p >>> 24) >= QUAT_SHIFTED_LIMIT_BYTE) { 393 // Prevent shifted primary lead bytes from 394 // overlapping with the common compression range. 395 quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE); 396 } 397 quaternaries.appendWeight32(p); 398 } 399 do { 400 ce = iter.nextCE(); 401 p = ce >>> 32; 402 } while (p == 0); 403 } while (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY); 404 } 405 // ce could be primary ignorable, or NO_CE, or the merge separator, 406 // or a regular primary CE, but it is not variable. 407 // If ce==NO_CE, then write nothing for the primary level but 408 // terminate compression on all levels and then exit the loop. 409 if (p > Collation.NO_CE_PRIMARY && (levels & Collation.PRIMARY_LEVEL_FLAG) != 0) { 410 // Test the un-reordered primary for compressibility. 411 boolean isCompressible = compressibleBytes[(int) p >>> 24]; 412 if(settings.hasReordering()) { 413 p = settings.reorder(p); 414 } 415 int p1 = (int) p >>> 24; 416 if (!isCompressible || p1 != ((int) prevReorderedPrimary >>> 24)) { 417 if (prevReorderedPrimary != 0) { 418 if (p < prevReorderedPrimary) { 419 // No primary compression terminator 420 // at the end of the level or merged segment. 421 if (p1 > Collation.MERGE_SEPARATOR_BYTE) { 422 sink.Append(Collation.PRIMARY_COMPRESSION_LOW_BYTE); 423 } 424 } else { 425 sink.Append(Collation.PRIMARY_COMPRESSION_HIGH_BYTE); 426 } 427 } 428 sink.Append(p1); 429 if(isCompressible) { 430 prevReorderedPrimary = p; 431 } else { 432 prevReorderedPrimary = 0; 433 } 434 } 435 byte p2 = (byte) (p >>> 16); 436 if (p2 != 0) { 437 p234[0] = p2; 438 p234[1] = (byte) (p >>> 8); 439 p234[2] = (byte) p; 440 sink.Append(p234, (p234[1] == 0) ? 1 : (p234[2] == 0) ? 2 : 3); 441 } 442 // Optimization for internalNextSortKeyPart(): 443 // When the primary level overflows we can stop because we need not 444 // calculate (preflight) the whole sort key length. 445 if (!preflight && sink.Overflowed()) { 446 // not used in Java -- if (!sink.IsOk()) { 447 // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in 448 // C implementation. IsOk() in Java always returns true, so this 449 // is a dead code. 450 return; 451 } 452 } 453 454 int lower32 = (int) ce; 455 if (lower32 == 0) { 456 continue; 457 } // completely ignorable, no secondary/case/tertiary/quaternary 458 459 if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) { 460 int s = lower32 >>> 16; // 16 bits 461 if (s == 0) { 462 // secondary ignorable 463 } else if (s == Collation.COMMON_WEIGHT16 && 464 ((options & CollationSettings.BACKWARD_SECONDARY) == 0 || 465 p != Collation.MERGE_SEPARATOR_PRIMARY)) { 466 // s is a common secondary weight, and 467 // backwards-secondary is off or the ce is not the merge separator. 468 ++commonSecondaries; 469 } else if ((options & CollationSettings.BACKWARD_SECONDARY) == 0) { 470 if (commonSecondaries != 0) { 471 --commonSecondaries; 472 while (commonSecondaries >= SEC_COMMON_MAX_COUNT) { 473 secondaries.appendByte(SEC_COMMON_MIDDLE); 474 commonSecondaries -= SEC_COMMON_MAX_COUNT; 475 } 476 int b; 477 if (s < Collation.COMMON_WEIGHT16) { 478 b = SEC_COMMON_LOW + commonSecondaries; 479 } else { 480 b = SEC_COMMON_HIGH - commonSecondaries; 481 } 482 secondaries.appendByte(b); 483 commonSecondaries = 0; 484 } 485 secondaries.appendWeight16(s); 486 } else { 487 if (commonSecondaries != 0) { 488 --commonSecondaries; 489 // Append reverse weights. The level will be re-reversed later. 490 int remainder = commonSecondaries % SEC_COMMON_MAX_COUNT; 491 int b; 492 if (prevSecondary < Collation.COMMON_WEIGHT16) { 493 b = SEC_COMMON_LOW + remainder; 494 } else { 495 b = SEC_COMMON_HIGH - remainder; 496 } 497 secondaries.appendByte(b); 498 commonSecondaries -= remainder; 499 // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT. 500 while (commonSecondaries > 0) { // same as >= SEC_COMMON_MAX_COUNT 501 secondaries.appendByte(SEC_COMMON_MIDDLE); 502 commonSecondaries -= SEC_COMMON_MAX_COUNT; 503 } 504 // commonSecondaries == 0 505 } 506 if (0 < p && p <= Collation.MERGE_SEPARATOR_PRIMARY) { 507 // The backwards secondary level compares secondary weights backwards 508 // within segments separated by the merge separator (U+FFFE). 509 byte[] secs = secondaries.data(); 510 int last = secondaries.length() - 1; 511 while (secSegmentStart < last) { 512 byte b = secs[secSegmentStart]; 513 secs[secSegmentStart++] = secs[last]; 514 secs[last--] = b; 515 } 516 secondaries.appendByte(p == Collation.NO_CE_PRIMARY ? 517 Collation.LEVEL_SEPARATOR_BYTE : Collation.MERGE_SEPARATOR_BYTE); 518 prevSecondary = 0; 519 secSegmentStart = secondaries.length(); 520 } else { 521 secondaries.appendReverseWeight16(s); 522 prevSecondary = s; 523 } 524 } 525 } 526 527 if ((levels & Collation.CASE_LEVEL_FLAG) != 0) { 528 if ((CollationSettings.getStrength(options) == Collator.PRIMARY) ? p == 0 529 : (lower32 >>> 16) == 0) { 530 // Primary+caseLevel: Ignore case level weights of primary ignorables. 531 // Otherwise: Ignore case level weights of secondary ignorables. 532 // For details see the comments in the CollationCompare class. 533 } else { 534 int c = (lower32 >>> 8) & 0xff; // case bits & tertiary lead byte 535 assert ((c & 0xc0) != 0xc0); 536 if ((c & 0xc0) == 0 && c > Collation.LEVEL_SEPARATOR_BYTE) { 537 ++commonCases; 538 } else { 539 if ((options & CollationSettings.UPPER_FIRST) == 0) { 540 // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, 541 // upper=15. 542 // If there are only common (=lowest) weights in the whole level, 543 // then we need not write anything. 544 // Level length differences are handled already on the next-higher level. 545 if (commonCases != 0 && 546 (c > Collation.LEVEL_SEPARATOR_BYTE || !cases.isEmpty())) { 547 --commonCases; 548 while (commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) { 549 cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4); 550 commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT; 551 } 552 int b; 553 if (c <= Collation.LEVEL_SEPARATOR_BYTE) { 554 b = CASE_LOWER_FIRST_COMMON_LOW + commonCases; 555 } else { 556 b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases; 557 } 558 cases.appendByte(b << 4); 559 commonCases = 0; 560 } 561 if (c > Collation.LEVEL_SEPARATOR_BYTE) { 562 c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >>> 6)) << 4; // 14 or 15 563 } 564 } else { 565 // upperFirst: Compress common weights to nibbles 3..15, mixed=2, 566 // upper=1. 567 // The compressed common case weights only go up from the "low" value 568 // because with upperFirst the common weight is the highest one. 569 if (commonCases != 0) { 570 --commonCases; 571 while (commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) { 572 cases.appendByte(CASE_UPPER_FIRST_COMMON_LOW << 4); 573 commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT; 574 } 575 cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4); 576 commonCases = 0; 577 } 578 if (c > Collation.LEVEL_SEPARATOR_BYTE) { 579 c = (CASE_UPPER_FIRST_COMMON_LOW - (c >>> 6)) << 4; // 2 or 1 580 } 581 } 582 // c is a separator byte 01, 583 // or a left-shifted nibble 0x10, 0x20, ... 0xf0. 584 cases.appendByte(c); 585 } 586 } 587 } 588 589 if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) { 590 int t = lower32 & tertiaryMask; 591 assert ((lower32 & 0xc000) != 0xc000); 592 if (t == Collation.COMMON_WEIGHT16) { 593 ++commonTertiaries; 594 } else if ((tertiaryMask & 0x8000) == 0) { 595 // Tertiary weights without case bits. 596 // Move lead bytes 06..3F to C6..FF for a large common-weight range. 597 if (commonTertiaries != 0) { 598 --commonTertiaries; 599 while (commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) { 600 tertiaries.appendByte(TER_ONLY_COMMON_MIDDLE); 601 commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT; 602 } 603 int b; 604 if (t < Collation.COMMON_WEIGHT16) { 605 b = TER_ONLY_COMMON_LOW + commonTertiaries; 606 } else { 607 b = TER_ONLY_COMMON_HIGH - commonTertiaries; 608 } 609 tertiaries.appendByte(b); 610 commonTertiaries = 0; 611 } 612 if (t > Collation.COMMON_WEIGHT16) { 613 t += 0xc000; 614 } 615 tertiaries.appendWeight16(t); 616 } else if ((options & CollationSettings.UPPER_FIRST) == 0) { 617 // Tertiary weights with caseFirst=lowerFirst. 618 // Move lead bytes 06..BF to 46..FF for the common-weight range. 619 if (commonTertiaries != 0) { 620 --commonTertiaries; 621 while (commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) { 622 tertiaries.appendByte(TER_LOWER_FIRST_COMMON_MIDDLE); 623 commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT; 624 } 625 int b; 626 if (t < Collation.COMMON_WEIGHT16) { 627 b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries; 628 } else { 629 b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries; 630 } 631 tertiaries.appendByte(b); 632 commonTertiaries = 0; 633 } 634 if (t > Collation.COMMON_WEIGHT16) { 635 t += 0x4000; 636 } 637 tertiaries.appendWeight16(t); 638 } else { 639 // Tertiary weights with caseFirst=upperFirst. 640 // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut), 641 // to keep tertiary CEs well-formed. 642 // Their case+tertiary weights must be greater than those of 643 // primary and secondary CEs. 644 // 645 // Separator 01 -> 01 (unchanged) 646 // Lowercase 02..04 -> 82..84 (includes uncased) 647 // Common weight 05 -> 85..C5 (common-weight compression range) 648 // Lowercase 06..3F -> C6..FF 649 // Mixed case 42..7F -> 42..7F 650 // Uppercase 82..BF -> 02..3F 651 // Tertiary CE 86..BF -> C6..FF 652 if (t <= Collation.NO_CE_WEIGHT16) { 653 // Keep separators unchanged. 654 } else if ((lower32 >>> 16) != 0) { 655 // Invert case bits of primary & secondary CEs. 656 t ^= 0xc000; 657 if (t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) { 658 t -= 0x4000; 659 } 660 } else { 661 // Keep uppercase bits of tertiary CEs. 662 assert (0x8600 <= t && t <= 0xbfff); 663 t += 0x4000; 664 } 665 if (commonTertiaries != 0) { 666 --commonTertiaries; 667 while (commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) { 668 tertiaries.appendByte(TER_UPPER_FIRST_COMMON_MIDDLE); 669 commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT; 670 } 671 int b; 672 if (t < (TER_UPPER_FIRST_COMMON_LOW << 8)) { 673 b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries; 674 } else { 675 b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries; 676 } 677 tertiaries.appendByte(b); 678 commonTertiaries = 0; 679 } 680 tertiaries.appendWeight16(t); 681 } 682 } 683 684 if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) { 685 int q = lower32 & 0xffff; 686 if ((q & 0xc0) == 0 && q > Collation.NO_CE_WEIGHT16) { 687 ++commonQuaternaries; 688 } else if (q == Collation.NO_CE_WEIGHT16 689 && (options & CollationSettings.ALTERNATE_MASK) == 0 690 && quaternaries.isEmpty()) { 691 // If alternate=non-ignorable and there are only common quaternary weights, 692 // then we need not write anything. 693 // The only weights greater than the merge separator and less than the common 694 // weight 695 // are shifted primary weights, which are not generated for 696 // alternate=non-ignorable. 697 // There are also exactly as many quaternary weights as tertiary weights, 698 // so level length differences are handled already on tertiary level. 699 // Any above-common quaternary weight will compare greater regardless. 700 quaternaries.appendByte(Collation.LEVEL_SEPARATOR_BYTE); 701 } else { 702 if (q == Collation.NO_CE_WEIGHT16) { 703 q = Collation.LEVEL_SEPARATOR_BYTE; 704 } else { 705 q = 0xfc + ((q >>> 6) & 3); 706 } 707 if (commonQuaternaries != 0) { 708 --commonQuaternaries; 709 while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) { 710 quaternaries.appendByte(QUAT_COMMON_MIDDLE); 711 commonQuaternaries -= QUAT_COMMON_MAX_COUNT; 712 } 713 int b; 714 if (q < QUAT_COMMON_LOW) { 715 b = QUAT_COMMON_LOW + commonQuaternaries; 716 } else { 717 b = QUAT_COMMON_HIGH - commonQuaternaries; 718 } 719 quaternaries.appendByte(b); 720 commonQuaternaries = 0; 721 } 722 quaternaries.appendByte(q); 723 } 724 } 725 726 if ((lower32 >>> 24) == Collation.LEVEL_SEPARATOR_BYTE) { 727 break; 728 } // ce == NO_CE 729 } 730 731 // Append the beyond-primary levels. 732 // not used in Java -- boolean ok = true; 733 if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) { 734 if (!callback.needToWrite(Collation.SECONDARY_LEVEL)) { 735 return; 736 } 737 // not used in Java -- ok &= secondaries.isOk(); 738 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 739 secondaries.appendTo(sink); 740 } 741 742 if ((levels & Collation.CASE_LEVEL_FLAG) != 0) { 743 if (!callback.needToWrite(Collation.CASE_LEVEL)) { 744 return; 745 } 746 // not used in Java -- ok &= cases.isOk(); 747 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 748 // Write pairs of nibbles as bytes, except separator bytes as themselves. 749 int length = cases.length() - 1; // Ignore the trailing NO_CE. 750 byte b = 0; 751 for (int i = 0; i < length; ++i) { 752 byte c = cases.getAt(i); 753 assert ((c & 0xf) == 0 && c != 0); 754 if (b == 0) { 755 b = c; 756 } else { 757 sink.Append(b | ((c >> 4) & 0xf)); 758 b = 0; 759 } 760 } 761 if (b != 0) { 762 sink.Append(b); 763 } 764 } 765 766 if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) { 767 if (!callback.needToWrite(Collation.TERTIARY_LEVEL)) { 768 return; 769 } 770 // not used in Java -- ok &= tertiaries.isOk(); 771 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 772 tertiaries.appendTo(sink); 773 } 774 775 if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) { 776 if (!callback.needToWrite(Collation.QUATERNARY_LEVEL)) { 777 return; 778 } 779 // not used in Java -- ok &= quaternaries.isOk(); 780 sink.Append(Collation.LEVEL_SEPARATOR_BYTE); 781 quaternaries.appendTo(sink); 782 } 783 784 // not used in Java -- if (!ok || !sink.IsOk()) { 785 // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in 786 // C implementation. IsOk() in Java always returns true, so this 787 // is a dead code. 788 } 789 } 790