1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2010-2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * collation.cpp 9 * 10 * created on: 2010oct27 11 * created by: Markus W. Scherer 12 */ 13 14 #include "unicode/utypes.h" 15 16 #if !UCONFIG_NO_COLLATION 17 18 #include "collation.h" 19 #include "uassert.h" 20 21 U_NAMESPACE_BEGIN 22 23 // Some compilers don't care if constants are defined in the .cpp file. 24 // MS Visual C++ does not like it, but gcc requires it. clang does not care. 25 #ifndef _MSC_VER 26 const uint8_t Collation::LEVEL_SEPARATOR_BYTE; 27 const uint8_t Collation::MERGE_SEPARATOR_BYTE; 28 const uint32_t Collation::ONLY_TERTIARY_MASK; 29 const uint32_t Collation::CASE_AND_TERTIARY_MASK; 30 #endif 31 32 uint32_t 33 Collation::incTwoBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) { 34 // Extract the second byte, minus the minimum byte value, 35 // plus the offset, modulo the number of usable byte values, plus the minimum. 36 // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 37 uint32_t primary; 38 if(isCompressible) { 39 offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4; 40 primary = (uint32_t)((offset % 251) + 4) << 16; 41 offset /= 251; 42 } else { 43 offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2; 44 primary = (uint32_t)((offset % 254) + 2) << 16; 45 offset /= 254; 46 } 47 // First byte, assume no further overflow. 48 return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24)); 49 } 50 51 uint32_t 52 Collation::incThreeBytePrimaryByOffset(uint32_t basePrimary, UBool isCompressible, int32_t offset) { 53 // Extract the third byte, minus the minimum byte value, 54 // plus the offset, modulo the number of usable byte values, plus the minimum. 55 offset += ((int32_t)(basePrimary >> 8) & 0xff) - 2; 56 uint32_t primary = (uint32_t)((offset % 254) + 2) << 8; 57 offset /= 254; 58 // Same with the second byte, 59 // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 60 if(isCompressible) { 61 offset += ((int32_t)(basePrimary >> 16) & 0xff) - 4; 62 primary |= (uint32_t)((offset % 251) + 4) << 16; 63 offset /= 251; 64 } else { 65 offset += ((int32_t)(basePrimary >> 16) & 0xff) - 2; 66 primary |= (uint32_t)((offset % 254) + 2) << 16; 67 offset /= 254; 68 } 69 // First byte, assume no further overflow. 70 return primary | ((basePrimary & 0xff000000) + (uint32_t)(offset << 24)); 71 } 72 73 uint32_t 74 Collation::decTwoBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) { 75 // Extract the second byte, minus the minimum byte value, 76 // minus the step, modulo the number of usable byte values, plus the minimum. 77 // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 78 // Assume no further underflow for the first byte. 79 U_ASSERT(0 < step && step <= 0x7f); 80 int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - step; 81 if(isCompressible) { 82 if(byte2 < 4) { 83 byte2 += 251; 84 basePrimary -= 0x1000000; 85 } 86 } else { 87 if(byte2 < 2) { 88 byte2 += 254; 89 basePrimary -= 0x1000000; 90 } 91 } 92 return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16); 93 } 94 95 uint32_t 96 Collation::decThreeBytePrimaryByOneStep(uint32_t basePrimary, UBool isCompressible, int32_t step) { 97 // Extract the third byte, minus the minimum byte value, 98 // minus the step, modulo the number of usable byte values, plus the minimum. 99 U_ASSERT(0 < step && step <= 0x7f); 100 int32_t byte3 = ((int32_t)(basePrimary >> 8) & 0xff) - step; 101 if(byte3 >= 2) { 102 return (basePrimary & 0xffff0000) | ((uint32_t)byte3 << 8); 103 } 104 byte3 += 254; 105 // Same with the second byte, 106 // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 107 int32_t byte2 = ((int32_t)(basePrimary >> 16) & 0xff) - 1; 108 if(isCompressible) { 109 if(byte2 < 4) { 110 byte2 = 0xfe; 111 basePrimary -= 0x1000000; 112 } 113 } else { 114 if(byte2 < 2) { 115 byte2 = 0xff; 116 basePrimary -= 0x1000000; 117 } 118 } 119 // First byte, assume no further underflow. 120 return (basePrimary & 0xff000000) | ((uint32_t)byte2 << 16) | ((uint32_t)byte3 << 8); 121 } 122 123 uint32_t 124 Collation::getThreeBytePrimaryForOffsetData(UChar32 c, int64_t dataCE) { 125 uint32_t p = (uint32_t)(dataCE >> 32); // three-byte primary pppppp00 126 int32_t lower32 = (int32_t)dataCE; // base code point b & step s: bbbbbbss (bit 7: isCompressible) 127 int32_t offset = (c - (lower32 >> 8)) * (lower32 & 0x7f); // delta * increment 128 UBool isCompressible = (lower32 & 0x80) != 0; 129 return Collation::incThreeBytePrimaryByOffset(p, isCompressible, offset); 130 } 131 132 uint32_t 133 Collation::unassignedPrimaryFromCodePoint(UChar32 c) { 134 // Create a gap before U+0000. Use c=-1 for [first unassigned]. 135 ++c; 136 // Fourth byte: 18 values, every 14th byte value (gap of 13). 137 uint32_t primary = 2 + (c % 18) * 14; 138 c /= 18; 139 // Third byte: 254 values. 140 primary |= (2 + (c % 254)) << 8; 141 c /= 254; 142 // Second byte: 251 values 04..FE excluding the primary compression bytes. 143 primary |= (4 + (c % 251)) << 16; 144 // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18). 145 return primary | (UNASSIGNED_IMPLICIT_BYTE << 24); 146 } 147 148 U_NAMESPACE_END 149 150 #endif // !UCONFIG_NO_COLLATION 151