1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * loadednormalizer2impl.cpp 9 * 10 * created on: 2014sep03 11 * created by: Markus W. Scherer 12 */ 13 14 #include "unicode/utypes.h" 15 16 #if !UCONFIG_NO_NORMALIZATION 17 18 #include "unicode/udata.h" 19 #include "unicode/localpointer.h" 20 #include "unicode/normalizer2.h" 21 #include "unicode/unistr.h" 22 #include "unicode/unorm.h" 23 #include "cstring.h" 24 #include "mutex.h" 25 #include "norm2allmodes.h" 26 #include "normalizer2impl.h" 27 #include "uassert.h" 28 #include "ucln_cmn.h" 29 #include "uhash.h" 30 31 U_NAMESPACE_BEGIN 32 33 class LoadedNormalizer2Impl : public Normalizer2Impl { 34 public: 35 LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {} 36 virtual ~LoadedNormalizer2Impl(); 37 38 void load(const char *packageName, const char *name, UErrorCode &errorCode); 39 40 private: 41 static UBool U_CALLCONV 42 isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo); 43 44 UDataMemory *memory; 45 UTrie2 *ownedTrie; 46 }; 47 48 LoadedNormalizer2Impl::~LoadedNormalizer2Impl() { 49 udata_close(memory); 50 utrie2_close(ownedTrie); 51 } 52 53 UBool U_CALLCONV 54 LoadedNormalizer2Impl::isAcceptable(void * /*context*/, 55 const char * /* type */, const char * /*name*/, 56 const UDataInfo *pInfo) { 57 if( 58 pInfo->size>=20 && 59 pInfo->isBigEndian==U_IS_BIG_ENDIAN && 60 pInfo->charsetFamily==U_CHARSET_FAMILY && 61 pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */ 62 pInfo->dataFormat[1]==0x72 && 63 pInfo->dataFormat[2]==0x6d && 64 pInfo->dataFormat[3]==0x32 && 65 pInfo->formatVersion[0]==3 66 ) { 67 // Normalizer2Impl *me=(Normalizer2Impl *)context; 68 // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4); 69 return TRUE; 70 } else { 71 return FALSE; 72 } 73 } 74 75 void 76 LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) { 77 if(U_FAILURE(errorCode)) { 78 return; 79 } 80 memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode); 81 if(U_FAILURE(errorCode)) { 82 return; 83 } 84 const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory); 85 const int32_t *inIndexes=(const int32_t *)inBytes; 86 int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4; 87 if(indexesLength<=IX_MIN_LCCC_CP) { 88 errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes. 89 return; 90 } 91 92 int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET]; 93 int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET]; 94 ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, 95 inBytes+offset, nextOffset-offset, NULL, 96 &errorCode); 97 if(U_FAILURE(errorCode)) { 98 return; 99 } 100 101 offset=nextOffset; 102 nextOffset=inIndexes[IX_SMALL_FCD_OFFSET]; 103 const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset); 104 105 // smallFCD: new in formatVersion 2 106 offset=nextOffset; 107 const uint8_t *inSmallFCD=inBytes+offset; 108 109 init(inIndexes, ownedTrie, inExtraData, inSmallFCD); 110 } 111 112 // instance cache ---------------------------------------------------------- *** 113 114 Norm2AllModes * 115 Norm2AllModes::createInstance(const char *packageName, 116 const char *name, 117 UErrorCode &errorCode) { 118 if(U_FAILURE(errorCode)) { 119 return NULL; 120 } 121 LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl; 122 if(impl==NULL) { 123 errorCode=U_MEMORY_ALLOCATION_ERROR; 124 return NULL; 125 } 126 impl->load(packageName, name, errorCode); 127 return createInstance(impl, errorCode); 128 } 129 130 U_CDECL_BEGIN 131 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup(); 132 U_CDECL_END 133 134 static Norm2AllModes *nfkcSingleton; 135 static Norm2AllModes *nfkc_cfSingleton; 136 static UHashtable *cache=NULL; 137 138 static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER; 139 static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER; 140 141 // UInitOnce singleton initialization function 142 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) { 143 if (uprv_strcmp(what, "nfkc") == 0) { 144 nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode); 145 } else if (uprv_strcmp(what, "nfkc_cf") == 0) { 146 nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode); 147 } else { 148 U_ASSERT(FALSE); // Unknown singleton 149 } 150 ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup); 151 } 152 153 U_CDECL_BEGIN 154 155 static void U_CALLCONV deleteNorm2AllModes(void *allModes) { 156 delete (Norm2AllModes *)allModes; 157 } 158 159 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() { 160 delete nfkcSingleton; 161 nfkcSingleton = NULL; 162 delete nfkc_cfSingleton; 163 nfkc_cfSingleton = NULL; 164 uhash_close(cache); 165 cache=NULL; 166 nfkcInitOnce.reset(); 167 nfkc_cfInitOnce.reset(); 168 return TRUE; 169 } 170 171 U_CDECL_END 172 173 const Norm2AllModes * 174 Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) { 175 if(U_FAILURE(errorCode)) { return NULL; } 176 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode); 177 return nfkcSingleton; 178 } 179 180 const Norm2AllModes * 181 Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) { 182 if(U_FAILURE(errorCode)) { return NULL; } 183 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode); 184 return nfkc_cfSingleton; 185 } 186 187 const Normalizer2 * 188 Normalizer2::getNFKCInstance(UErrorCode &errorCode) { 189 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); 190 return allModes!=NULL ? &allModes->comp : NULL; 191 } 192 193 const Normalizer2 * 194 Normalizer2::getNFKDInstance(UErrorCode &errorCode) { 195 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); 196 return allModes!=NULL ? &allModes->decomp : NULL; 197 } 198 199 const Normalizer2 * 200 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) { 201 const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); 202 return allModes!=NULL ? &allModes->comp : NULL; 203 } 204 205 const Normalizer2 * 206 Normalizer2::getInstance(const char *packageName, 207 const char *name, 208 UNormalization2Mode mode, 209 UErrorCode &errorCode) { 210 if(U_FAILURE(errorCode)) { 211 return NULL; 212 } 213 if(name==NULL || *name==0) { 214 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 215 return NULL; 216 } 217 const Norm2AllModes *allModes=NULL; 218 if(packageName==NULL) { 219 if(0==uprv_strcmp(name, "nfc")) { 220 allModes=Norm2AllModes::getNFCInstance(errorCode); 221 } else if(0==uprv_strcmp(name, "nfkc")) { 222 allModes=Norm2AllModes::getNFKCInstance(errorCode); 223 } else if(0==uprv_strcmp(name, "nfkc_cf")) { 224 allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); 225 } 226 } 227 if(allModes==NULL && U_SUCCESS(errorCode)) { 228 { 229 Mutex lock; 230 if(cache!=NULL) { 231 allModes=(Norm2AllModes *)uhash_get(cache, name); 232 } 233 } 234 if(allModes==NULL) { 235 ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup); 236 LocalPointer<Norm2AllModes> localAllModes( 237 Norm2AllModes::createInstance(packageName, name, errorCode)); 238 if(U_SUCCESS(errorCode)) { 239 Mutex lock; 240 if(cache==NULL) { 241 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode); 242 if(U_FAILURE(errorCode)) { 243 return NULL; 244 } 245 uhash_setKeyDeleter(cache, uprv_free); 246 uhash_setValueDeleter(cache, deleteNorm2AllModes); 247 } 248 void *temp=uhash_get(cache, name); 249 if(temp==NULL) { 250 int32_t keyLength=uprv_strlen(name)+1; 251 char *nameCopy=(char *)uprv_malloc(keyLength); 252 if(nameCopy==NULL) { 253 errorCode=U_MEMORY_ALLOCATION_ERROR; 254 return NULL; 255 } 256 uprv_memcpy(nameCopy, name, keyLength); 257 allModes=localAllModes.getAlias(); 258 uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode); 259 } else { 260 // race condition 261 allModes=(Norm2AllModes *)temp; 262 } 263 } 264 } 265 } 266 if(allModes!=NULL && U_SUCCESS(errorCode)) { 267 switch(mode) { 268 case UNORM2_COMPOSE: 269 return &allModes->comp; 270 case UNORM2_DECOMPOSE: 271 return &allModes->decomp; 272 case UNORM2_FCD: 273 return &allModes->fcd; 274 case UNORM2_COMPOSE_CONTIGUOUS: 275 return &allModes->fcc; 276 default: 277 break; // do nothing 278 } 279 } 280 return NULL; 281 } 282 283 const Normalizer2 * 284 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) { 285 if(U_FAILURE(errorCode)) { 286 return NULL; 287 } 288 switch(mode) { 289 case UNORM_NFD: 290 return Normalizer2::getNFDInstance(errorCode); 291 case UNORM_NFKD: 292 return Normalizer2::getNFKDInstance(errorCode); 293 case UNORM_NFC: 294 return Normalizer2::getNFCInstance(errorCode); 295 case UNORM_NFKC: 296 return Normalizer2::getNFKCInstance(errorCode); 297 case UNORM_FCD: 298 return getFCDInstance(errorCode); 299 default: // UNORM_NONE 300 return getNoopInstance(errorCode); 301 } 302 } 303 304 const Normalizer2Impl * 305 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) { 306 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode); 307 return allModes!=NULL ? allModes->impl : NULL; 308 } 309 310 const Normalizer2Impl * 311 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) { 312 const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode); 313 return allModes!=NULL ? allModes->impl : NULL; 314 } 315 316 U_NAMESPACE_END 317 318 // C API ------------------------------------------------------------------- *** 319 320 U_NAMESPACE_USE 321 322 U_CAPI const UNormalizer2 * U_EXPORT2 323 unorm2_getNFKCInstance(UErrorCode *pErrorCode) { 324 return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode); 325 } 326 327 U_CAPI const UNormalizer2 * U_EXPORT2 328 unorm2_getNFKDInstance(UErrorCode *pErrorCode) { 329 return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode); 330 } 331 332 U_CAPI const UNormalizer2 * U_EXPORT2 333 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) { 334 return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode); 335 } 336 337 U_CAPI const UNormalizer2 * U_EXPORT2 338 unorm2_getInstance(const char *packageName, 339 const char *name, 340 UNormalization2Mode mode, 341 UErrorCode *pErrorCode) { 342 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode); 343 } 344 345 U_CFUNC UNormalizationCheckResult 346 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) { 347 if(mode<=UNORM_NONE || UNORM_FCD<=mode) { 348 return UNORM_YES; 349 } 350 UErrorCode errorCode=U_ZERO_ERROR; 351 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode); 352 if(U_SUCCESS(errorCode)) { 353 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c); 354 } else { 355 return UNORM_MAYBE; 356 } 357 } 358 359 #endif // !UCONFIG_NO_NORMALIZATION 360