1 /*
2 *******************************************************************************
3 * Copyright (C) 2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * loadednormalizer2impl.cpp
7 *
8 * created on: 2014sep03
9 * created by: Markus W. Scherer
10 */
11
12 #include "unicode/utypes.h"
13
14 #if !UCONFIG_NO_NORMALIZATION
15
16 #include "unicode/udata.h"
17 #include "unicode/localpointer.h"
18 #include "unicode/normalizer2.h"
19 #include "unicode/unistr.h"
20 #include "unicode/unorm.h"
21 #include "cstring.h"
22 #include "mutex.h"
23 #include "norm2allmodes.h"
24 #include "normalizer2impl.h"
25 #include "uassert.h"
26 #include "ucln_cmn.h"
27 #include "uhash.h"
28
29 U_NAMESPACE_BEGIN
30
31 class LoadedNormalizer2Impl : public Normalizer2Impl {
32 public:
LoadedNormalizer2Impl()33 LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
34 virtual ~LoadedNormalizer2Impl();
35
36 void load(const char *packageName, const char *name, UErrorCode &errorCode);
37
38 private:
39 static UBool U_CALLCONV
40 isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
41
42 UDataMemory *memory;
43 UTrie2 *ownedTrie;
44 };
45
~LoadedNormalizer2Impl()46 LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
47 udata_close(memory);
48 utrie2_close(ownedTrie);
49 }
50
51 UBool U_CALLCONV
isAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)52 LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
53 const char * /* type */, const char * /*name*/,
54 const UDataInfo *pInfo) {
55 if(
56 pInfo->size>=20 &&
57 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
58 pInfo->charsetFamily==U_CHARSET_FAMILY &&
59 pInfo->dataFormat[0]==0x4e && /* dataFormat="Nrm2" */
60 pInfo->dataFormat[1]==0x72 &&
61 pInfo->dataFormat[2]==0x6d &&
62 pInfo->dataFormat[3]==0x32 &&
63 pInfo->formatVersion[0]==2
64 ) {
65 // Normalizer2Impl *me=(Normalizer2Impl *)context;
66 // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
67 return TRUE;
68 } else {
69 return FALSE;
70 }
71 }
72
73 void
load(const char * packageName,const char * name,UErrorCode & errorCode)74 LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
75 if(U_FAILURE(errorCode)) {
76 return;
77 }
78 memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
79 if(U_FAILURE(errorCode)) {
80 return;
81 }
82 const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
83 const int32_t *inIndexes=(const int32_t *)inBytes;
84 int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
85 if(indexesLength<=IX_MIN_MAYBE_YES) {
86 errorCode=U_INVALID_FORMAT_ERROR; // Not enough indexes.
87 return;
88 }
89
90 int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
91 int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
92 ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
93 inBytes+offset, nextOffset-offset, NULL,
94 &errorCode);
95 if(U_FAILURE(errorCode)) {
96 return;
97 }
98
99 offset=nextOffset;
100 nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
101 const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
102
103 // smallFCD: new in formatVersion 2
104 offset=nextOffset;
105 const uint8_t *inSmallFCD=inBytes+offset;
106
107 init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
108 }
109
110 // instance cache ---------------------------------------------------------- ***
111
112 Norm2AllModes *
createInstance(const char * packageName,const char * name,UErrorCode & errorCode)113 Norm2AllModes::createInstance(const char *packageName,
114 const char *name,
115 UErrorCode &errorCode) {
116 if(U_FAILURE(errorCode)) {
117 return NULL;
118 }
119 LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
120 if(impl==NULL) {
121 errorCode=U_MEMORY_ALLOCATION_ERROR;
122 return NULL;
123 }
124 impl->load(packageName, name, errorCode);
125 return createInstance(impl, errorCode);
126 }
127
128 U_CDECL_BEGIN
129 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
130 U_CDECL_END
131
132 static Norm2AllModes *nfkcSingleton;
133 static Norm2AllModes *nfkc_cfSingleton;
134 static UHashtable *cache=NULL;
135
136 static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
137 static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
138
139 // UInitOnce singleton initialization function
initSingletons(const char * what,UErrorCode & errorCode)140 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
141 if (uprv_strcmp(what, "nfkc") == 0) {
142 nfkcSingleton = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
143 } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
144 nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
145 } else {
146 U_ASSERT(FALSE); // Unknown singleton
147 }
148 ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
149 }
150
151 U_CDECL_BEGIN
152
deleteNorm2AllModes(void * allModes)153 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
154 delete (Norm2AllModes *)allModes;
155 }
156
uprv_loaded_normalizer2_cleanup()157 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
158 delete nfkcSingleton;
159 nfkcSingleton = NULL;
160 delete nfkc_cfSingleton;
161 nfkc_cfSingleton = NULL;
162 uhash_close(cache);
163 cache=NULL;
164 nfkcInitOnce.reset();
165 nfkc_cfInitOnce.reset();
166 return TRUE;
167 }
168
169 U_CDECL_END
170
171 const Norm2AllModes *
getNFKCInstance(UErrorCode & errorCode)172 Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
173 if(U_FAILURE(errorCode)) { return NULL; }
174 umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
175 return nfkcSingleton;
176 }
177
178 const Norm2AllModes *
getNFKC_CFInstance(UErrorCode & errorCode)179 Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
180 if(U_FAILURE(errorCode)) { return NULL; }
181 umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
182 return nfkc_cfSingleton;
183 }
184
185 const Normalizer2 *
getNFKCInstance(UErrorCode & errorCode)186 Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
187 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
188 return allModes!=NULL ? &allModes->comp : NULL;
189 }
190
191 const Normalizer2 *
getNFKDInstance(UErrorCode & errorCode)192 Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
193 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
194 return allModes!=NULL ? &allModes->decomp : NULL;
195 }
196
197 const Normalizer2 *
getNFKCCasefoldInstance(UErrorCode & errorCode)198 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
199 const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
200 return allModes!=NULL ? &allModes->comp : NULL;
201 }
202
203 const Normalizer2 *
getInstance(const char * packageName,const char * name,UNormalization2Mode mode,UErrorCode & errorCode)204 Normalizer2::getInstance(const char *packageName,
205 const char *name,
206 UNormalization2Mode mode,
207 UErrorCode &errorCode) {
208 if(U_FAILURE(errorCode)) {
209 return NULL;
210 }
211 if(name==NULL || *name==0) {
212 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
213 return NULL;
214 }
215 const Norm2AllModes *allModes=NULL;
216 if(packageName==NULL) {
217 if(0==uprv_strcmp(name, "nfc")) {
218 allModes=Norm2AllModes::getNFCInstance(errorCode);
219 } else if(0==uprv_strcmp(name, "nfkc")) {
220 allModes=Norm2AllModes::getNFKCInstance(errorCode);
221 } else if(0==uprv_strcmp(name, "nfkc_cf")) {
222 allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
223 }
224 }
225 if(allModes==NULL && U_SUCCESS(errorCode)) {
226 {
227 Mutex lock;
228 if(cache!=NULL) {
229 allModes=(Norm2AllModes *)uhash_get(cache, name);
230 }
231 }
232 if(allModes==NULL) {
233 LocalPointer<Norm2AllModes> localAllModes(
234 Norm2AllModes::createInstance(packageName, name, errorCode));
235 if(U_SUCCESS(errorCode)) {
236 Mutex lock;
237 if(cache==NULL) {
238 cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
239 if(U_FAILURE(errorCode)) {
240 return NULL;
241 }
242 uhash_setKeyDeleter(cache, uprv_free);
243 uhash_setValueDeleter(cache, deleteNorm2AllModes);
244 }
245 void *temp=uhash_get(cache, name);
246 if(temp==NULL) {
247 int32_t keyLength=uprv_strlen(name)+1;
248 char *nameCopy=(char *)uprv_malloc(keyLength);
249 if(nameCopy==NULL) {
250 errorCode=U_MEMORY_ALLOCATION_ERROR;
251 return NULL;
252 }
253 uprv_memcpy(nameCopy, name, keyLength);
254 allModes=localAllModes.getAlias();
255 uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
256 } else {
257 // race condition
258 allModes=(Norm2AllModes *)temp;
259 }
260 }
261 }
262 }
263 if(allModes!=NULL && U_SUCCESS(errorCode)) {
264 switch(mode) {
265 case UNORM2_COMPOSE:
266 return &allModes->comp;
267 case UNORM2_DECOMPOSE:
268 return &allModes->decomp;
269 case UNORM2_FCD:
270 return &allModes->fcd;
271 case UNORM2_COMPOSE_CONTIGUOUS:
272 return &allModes->fcc;
273 default:
274 break; // do nothing
275 }
276 }
277 return NULL;
278 }
279
280 const Normalizer2 *
getInstance(UNormalizationMode mode,UErrorCode & errorCode)281 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
282 if(U_FAILURE(errorCode)) {
283 return NULL;
284 }
285 switch(mode) {
286 case UNORM_NFD:
287 return Normalizer2::getNFDInstance(errorCode);
288 case UNORM_NFKD:
289 return Normalizer2::getNFKDInstance(errorCode);
290 case UNORM_NFC:
291 return Normalizer2::getNFCInstance(errorCode);
292 case UNORM_NFKC:
293 return Normalizer2::getNFKCInstance(errorCode);
294 case UNORM_FCD:
295 return getFCDInstance(errorCode);
296 default: // UNORM_NONE
297 return getNoopInstance(errorCode);
298 }
299 }
300
301 const Normalizer2Impl *
getNFKCImpl(UErrorCode & errorCode)302 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
303 const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
304 return allModes!=NULL ? allModes->impl : NULL;
305 }
306
307 const Normalizer2Impl *
getNFKC_CFImpl(UErrorCode & errorCode)308 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
309 const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
310 return allModes!=NULL ? allModes->impl : NULL;
311 }
312
313 U_NAMESPACE_END
314
315 // C API ------------------------------------------------------------------- ***
316
317 U_NAMESPACE_USE
318
319 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCInstance(UErrorCode * pErrorCode)320 unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
321 return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
322 }
323
324 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKDInstance(UErrorCode * pErrorCode)325 unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
326 return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
327 }
328
329 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCCasefoldInstance(UErrorCode * pErrorCode)330 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
331 return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
332 }
333
334 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getInstance(const char * packageName,const char * name,UNormalization2Mode mode,UErrorCode * pErrorCode)335 unorm2_getInstance(const char *packageName,
336 const char *name,
337 UNormalization2Mode mode,
338 UErrorCode *pErrorCode) {
339 return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
340 }
341
342 U_CFUNC UNormalizationCheckResult
unorm_getQuickCheck(UChar32 c,UNormalizationMode mode)343 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
344 if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
345 return UNORM_YES;
346 }
347 UErrorCode errorCode=U_ZERO_ERROR;
348 const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
349 if(U_SUCCESS(errorCode)) {
350 return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
351 } else {
352 return UNORM_MAYBE;
353 }
354 }
355
356 #endif // !UCONFIG_NO_NORMALIZATION
357