1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * 9 * scriptset.cpp 10 * 11 * created on: 2013 Jan 7 12 * created by: Andy Heninger 13 */ 14 15 #include "unicode/utypes.h" 16 17 #include "unicode/uchar.h" 18 #include "unicode/unistr.h" 19 20 #include "scriptset.h" 21 #include "uassert.h" 22 #include "cmemory.h" 23 24 U_NAMESPACE_BEGIN 25 26 //---------------------------------------------------------------------------- 27 // 28 // ScriptSet implementation 29 // 30 //---------------------------------------------------------------------------- 31 ScriptSet::ScriptSet() { 32 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 33 bits[i] = 0; 34 } 35 } 36 37 ScriptSet::~ScriptSet() { 38 } 39 40 ScriptSet::ScriptSet(const ScriptSet &other) { 41 *this = other; 42 } 43 44 45 ScriptSet & ScriptSet::operator =(const ScriptSet &other) { 46 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 47 bits[i] = other.bits[i]; 48 } 49 return *this; 50 } 51 52 53 UBool ScriptSet::operator == (const ScriptSet &other) const { 54 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 55 if (bits[i] != other.bits[i]) { 56 return FALSE; 57 } 58 } 59 return TRUE; 60 } 61 62 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const { 63 if (U_FAILURE(status)) { 64 return FALSE; 65 } 66 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 67 status = U_ILLEGAL_ARGUMENT_ERROR; 68 return FALSE; 69 } 70 uint32_t index = script / 32; 71 uint32_t bit = 1 << (script & 31); 72 return ((bits[index] & bit) != 0); 73 } 74 75 76 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) { 77 if (U_FAILURE(status)) { 78 return *this; 79 } 80 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 81 status = U_ILLEGAL_ARGUMENT_ERROR; 82 return *this; 83 } 84 uint32_t index = script / 32; 85 uint32_t bit = 1 << (script & 31); 86 bits[index] |= bit; 87 return *this; 88 } 89 90 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) { 91 if (U_FAILURE(status)) { 92 return *this; 93 } 94 if (script < 0 || script >= (int32_t)sizeof(bits) * 8) { 95 status = U_ILLEGAL_ARGUMENT_ERROR; 96 return *this; 97 } 98 uint32_t index = script / 32; 99 uint32_t bit = 1 << (script & 31); 100 bits[index] &= ~bit; 101 return *this; 102 } 103 104 105 106 ScriptSet &ScriptSet::Union(const ScriptSet &other) { 107 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 108 bits[i] |= other.bits[i]; 109 } 110 return *this; 111 } 112 113 ScriptSet &ScriptSet::intersect(const ScriptSet &other) { 114 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 115 bits[i] &= other.bits[i]; 116 } 117 return *this; 118 } 119 120 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) { 121 ScriptSet t; 122 t.set(script, status); 123 if (U_SUCCESS(status)) { 124 this->intersect(t); 125 } 126 return *this; 127 } 128 129 UBool ScriptSet::intersects(const ScriptSet &other) const { 130 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 131 if ((bits[i] & other.bits[i]) != 0) { 132 return true; 133 } 134 } 135 return false; 136 } 137 138 UBool ScriptSet::contains(const ScriptSet &other) const { 139 ScriptSet t(*this); 140 t.intersect(other); 141 return (t == other); 142 } 143 144 145 ScriptSet &ScriptSet::setAll() { 146 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 147 bits[i] = 0xffffffffu; 148 } 149 return *this; 150 } 151 152 153 ScriptSet &ScriptSet::resetAll() { 154 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 155 bits[i] = 0; 156 } 157 return *this; 158 } 159 160 int32_t ScriptSet::countMembers() const { 161 // This bit counter is good for sparse numbers of '1's, which is 162 // very much the case that we will usually have. 163 int32_t count = 0; 164 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 165 uint32_t x = bits[i]; 166 while (x > 0) { 167 count++; 168 x &= (x - 1); // and off the least significant one bit. 169 } 170 } 171 return count; 172 } 173 174 int32_t ScriptSet::hashCode() const { 175 int32_t hash = 0; 176 for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 177 hash ^= bits[i]; 178 } 179 return hash; 180 } 181 182 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const { 183 // TODO: Wants a better implementation. 184 if (fromIndex < 0) { 185 return -1; 186 } 187 UErrorCode status = U_ZERO_ERROR; 188 for (int32_t scriptIndex = fromIndex; scriptIndex < (int32_t)sizeof(bits)*8; scriptIndex++) { 189 if (test((UScriptCode)scriptIndex, status)) { 190 return scriptIndex; 191 } 192 } 193 return -1; 194 } 195 196 UBool ScriptSet::isEmpty() const { 197 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 198 if (bits[i] != 0) { 199 return FALSE; 200 } 201 } 202 return TRUE; 203 } 204 205 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const { 206 UBool firstTime = TRUE; 207 for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) { 208 if (!firstTime) { 209 dest.append((UChar)0x20); 210 } 211 firstTime = FALSE; 212 const char *scriptName = uscript_getShortName((UScriptCode(i))); 213 dest.append(UnicodeString(scriptName, -1, US_INV)); 214 } 215 return dest; 216 } 217 218 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) { 219 resetAll(); 220 if (U_FAILURE(status)) { 221 return *this; 222 } 223 UnicodeString oneScriptName; 224 for (int32_t i=0; i<scriptString.length();) { 225 UChar32 c = scriptString.char32At(i); 226 i = scriptString.moveIndex32(i, 1); 227 if (!u_isUWhiteSpace(c)) { 228 oneScriptName.append(c); 229 if (i < scriptString.length()) { 230 continue; 231 } 232 } 233 if (oneScriptName.length() > 0) { 234 char buf[40]; 235 oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV); 236 buf[sizeof(buf)-1] = 0; 237 int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf); 238 if (sc == UCHAR_INVALID_CODE) { 239 status = U_ILLEGAL_ARGUMENT_ERROR; 240 } else { 241 this->set((UScriptCode)sc, status); 242 } 243 if (U_FAILURE(status)) { 244 return *this; 245 } 246 oneScriptName.remove(); 247 } 248 } 249 return *this; 250 } 251 252 void ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) { 253 if (U_FAILURE(status)) { return; } 254 static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 5; 255 MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts; 256 UErrorCode internalStatus = U_ZERO_ERROR; 257 int32_t script_count = -1; 258 259 while (TRUE) { 260 script_count = uscript_getScriptExtensions( 261 codePoint, scripts.getAlias(), scripts.getCapacity(), &internalStatus); 262 if (internalStatus == U_BUFFER_OVERFLOW_ERROR) { 263 // Need to allocate more space 264 if (scripts.resize(script_count) == NULL) { 265 status = U_MEMORY_ALLOCATION_ERROR; 266 return; 267 } 268 internalStatus = U_ZERO_ERROR; 269 } else { 270 break; 271 } 272 } 273 274 // Check if we failed for some reason other than buffer overflow 275 if (U_FAILURE(internalStatus)) { 276 status = internalStatus; 277 return; 278 } 279 280 // Load the scripts into the ScriptSet and return 281 for (int32_t i = 0; i < script_count; i++) { 282 this->set(scripts[i], status); 283 if (U_FAILURE(status)) { return; } 284 } 285 } 286 287 U_NAMESPACE_END 288 289 U_CAPI UBool U_EXPORT2 290 uhash_equalsScriptSet(const UElement key1, const UElement key2) { 291 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 292 icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer); 293 return (*s1 == *s2); 294 } 295 296 U_CAPI int8_t U_EXPORT2 297 uhash_compareScriptSet(UElement key0, UElement key1) { 298 icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer); 299 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 300 int32_t diff = s0->countMembers() - s1->countMembers(); 301 if (diff != 0) return diff; 302 int32_t i0 = s0->nextSetBit(0); 303 int32_t i1 = s1->nextSetBit(0); 304 while ((diff = i0-i1) == 0 && i0 > 0) { 305 i0 = s0->nextSetBit(i0+1); 306 i1 = s1->nextSetBit(i1+1); 307 } 308 return (int8_t)diff; 309 } 310 311 U_CAPI int32_t U_EXPORT2 312 uhash_hashScriptSet(const UElement key) { 313 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer); 314 return s->hashCode(); 315 } 316 317 U_CAPI void U_EXPORT2 318 uhash_deleteScriptSet(void *obj) { 319 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj); 320 delete s; 321 } 322