1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * loadednormalizer2impl.h 9 * 10 * created on: 2014sep07 11 * created by: Markus W. Scherer 12 */ 13 14 #ifndef __NORM2ALLMODES_H__ 15 #define __NORM2ALLMODES_H__ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_NORMALIZATION 20 21 #include "unicode/normalizer2.h" 22 #include "unicode/unistr.h" 23 #include "cpputils.h" 24 #include "normalizer2impl.h" 25 26 U_NAMESPACE_BEGIN 27 28 // Intermediate class: 29 // Has Normalizer2Impl and does boilerplate argument checking and setup. 30 class Normalizer2WithImpl : public Normalizer2 { 31 public: Normalizer2WithImpl(const Normalizer2Impl & ni)32 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} 33 virtual ~Normalizer2WithImpl(); 34 35 // normalize 36 virtual UnicodeString & normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode)37 normalize(const UnicodeString &src, 38 UnicodeString &dest, 39 UErrorCode &errorCode) const { 40 if(U_FAILURE(errorCode)) { 41 dest.setToBogus(); 42 return dest; 43 } 44 const UChar *sArray=src.getBuffer(); 45 if(&dest==&src || sArray==NULL) { 46 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 47 dest.setToBogus(); 48 return dest; 49 } 50 dest.remove(); 51 ReorderingBuffer buffer(impl, dest); 52 if(buffer.init(src.length(), errorCode)) { 53 normalize(sArray, sArray+src.length(), buffer, errorCode); 54 } 55 return dest; 56 } 57 virtual void 58 normalize(const UChar *src, const UChar *limit, 59 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 60 61 // normalize and append 62 virtual UnicodeString & normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode)63 normalizeSecondAndAppend(UnicodeString &first, 64 const UnicodeString &second, 65 UErrorCode &errorCode) const { 66 return normalizeSecondAndAppend(first, second, TRUE, errorCode); 67 } 68 virtual UnicodeString & append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode)69 append(UnicodeString &first, 70 const UnicodeString &second, 71 UErrorCode &errorCode) const { 72 return normalizeSecondAndAppend(first, second, FALSE, errorCode); 73 } 74 UnicodeString & normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UBool doNormalize,UErrorCode & errorCode)75 normalizeSecondAndAppend(UnicodeString &first, 76 const UnicodeString &second, 77 UBool doNormalize, 78 UErrorCode &errorCode) const { 79 uprv_checkCanGetBuffer(first, errorCode); 80 if(U_FAILURE(errorCode)) { 81 return first; 82 } 83 const UChar *secondArray=second.getBuffer(); 84 if(&first==&second || secondArray==NULL) { 85 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 86 return first; 87 } 88 int32_t firstLength=first.length(); 89 UnicodeString safeMiddle; 90 { 91 ReorderingBuffer buffer(impl, first); 92 if(buffer.init(firstLength+second.length(), errorCode)) { 93 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, 94 safeMiddle, buffer, errorCode); 95 } 96 } // The ReorderingBuffer destructor finalizes the first string. 97 if(U_FAILURE(errorCode)) { 98 // Restore the modified suffix of the first string. 99 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); 100 } 101 return first; 102 } 103 virtual void 104 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 105 UnicodeString &safeMiddle, 106 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; 107 virtual UBool getDecomposition(UChar32 c,UnicodeString & decomposition)108 getDecomposition(UChar32 c, UnicodeString &decomposition) const { 109 UChar buffer[4]; 110 int32_t length; 111 const UChar *d=impl.getDecomposition(c, buffer, length); 112 if(d==NULL) { 113 return FALSE; 114 } 115 if(d==buffer) { 116 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) 117 } else { 118 decomposition.setTo(FALSE, d, length); // read-only alias 119 } 120 return TRUE; 121 } 122 virtual UBool getRawDecomposition(UChar32 c,UnicodeString & decomposition)123 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { 124 UChar buffer[30]; 125 int32_t length; 126 const UChar *d=impl.getRawDecomposition(c, buffer, length); 127 if(d==NULL) { 128 return FALSE; 129 } 130 if(d==buffer) { 131 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) 132 } else { 133 decomposition.setTo(FALSE, d, length); // read-only alias 134 } 135 return TRUE; 136 } 137 virtual UChar32 composePair(UChar32 a,UChar32 b)138 composePair(UChar32 a, UChar32 b) const { 139 return impl.composePair(a, b); 140 } 141 142 virtual uint8_t getCombiningClass(UChar32 c)143 getCombiningClass(UChar32 c) const { 144 return impl.getCC(impl.getNorm16(c)); 145 } 146 147 // quick checks 148 virtual UBool isNormalized(const UnicodeString & s,UErrorCode & errorCode)149 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 150 if(U_FAILURE(errorCode)) { 151 return FALSE; 152 } 153 const UChar *sArray=s.getBuffer(); 154 if(sArray==NULL) { 155 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 156 return FALSE; 157 } 158 const UChar *sLimit=sArray+s.length(); 159 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); 160 } 161 virtual UNormalizationCheckResult quickCheck(const UnicodeString & s,UErrorCode & errorCode)162 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 163 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; 164 } 165 virtual int32_t spanQuickCheckYes(const UnicodeString & s,UErrorCode & errorCode)166 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { 167 if(U_FAILURE(errorCode)) { 168 return 0; 169 } 170 const UChar *sArray=s.getBuffer(); 171 if(sArray==NULL) { 172 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 173 return 0; 174 } 175 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); 176 } 177 virtual const UChar * 178 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; 179 getQuickCheck(UChar32)180 virtual UNormalizationCheckResult getQuickCheck(UChar32) const { 181 return UNORM_YES; 182 } 183 184 const Normalizer2Impl &impl; 185 }; 186 187 class DecomposeNormalizer2 : public Normalizer2WithImpl { 188 public: DecomposeNormalizer2(const Normalizer2Impl & ni)189 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 190 virtual ~DecomposeNormalizer2(); 191 192 private: 193 virtual void normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)194 normalize(const UChar *src, const UChar *limit, 195 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 196 impl.decompose(src, limit, &buffer, errorCode); 197 } 198 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 199 virtual void normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)200 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 201 UnicodeString &safeMiddle, 202 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 203 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 204 } 205 virtual const UChar * spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode)206 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 207 return impl.decompose(src, limit, NULL, errorCode); 208 } 209 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. getQuickCheck(UChar32 c)210 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 211 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; 212 } hasBoundaryBefore(UChar32 c)213 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); } hasBoundaryAfter(UChar32 c)214 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); } isInert(UChar32 c)215 virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } 216 }; 217 218 class ComposeNormalizer2 : public Normalizer2WithImpl { 219 public: ComposeNormalizer2(const Normalizer2Impl & ni,UBool fcc)220 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : 221 Normalizer2WithImpl(ni), onlyContiguous(fcc) {} 222 virtual ~ComposeNormalizer2(); 223 224 private: 225 virtual void normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)226 normalize(const UChar *src, const UChar *limit, 227 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 228 impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); 229 } 230 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 231 virtual void normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)232 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 233 UnicodeString &safeMiddle, 234 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 235 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); 236 } 237 238 virtual UBool isNormalized(const UnicodeString & s,UErrorCode & errorCode)239 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { 240 if(U_FAILURE(errorCode)) { 241 return FALSE; 242 } 243 const UChar *sArray=s.getBuffer(); 244 if(sArray==NULL) { 245 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 246 return FALSE; 247 } 248 UnicodeString temp; 249 ReorderingBuffer buffer(impl, temp); 250 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization 251 return FALSE; 252 } 253 return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); 254 } 255 virtual UNormalizationCheckResult quickCheck(const UnicodeString & s,UErrorCode & errorCode)256 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { 257 if(U_FAILURE(errorCode)) { 258 return UNORM_MAYBE; 259 } 260 const UChar *sArray=s.getBuffer(); 261 if(sArray==NULL) { 262 errorCode=U_ILLEGAL_ARGUMENT_ERROR; 263 return UNORM_MAYBE; 264 } 265 UNormalizationCheckResult qcResult=UNORM_YES; 266 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); 267 return qcResult; 268 } 269 virtual const UChar * spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode &)270 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const { 271 return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); 272 } 273 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. getQuickCheck(UChar32 c)274 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { 275 return impl.getCompQuickCheck(impl.getNorm16(c)); 276 } hasBoundaryBefore(UChar32 c)277 virtual UBool hasBoundaryBefore(UChar32 c) const { 278 return impl.hasCompBoundaryBefore(c); 279 } hasBoundaryAfter(UChar32 c)280 virtual UBool hasBoundaryAfter(UChar32 c) const { 281 return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE); 282 } isInert(UChar32 c)283 virtual UBool isInert(UChar32 c) const { 284 return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE); 285 } 286 287 const UBool onlyContiguous; 288 }; 289 290 class FCDNormalizer2 : public Normalizer2WithImpl { 291 public: FCDNormalizer2(const Normalizer2Impl & ni)292 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} 293 virtual ~FCDNormalizer2(); 294 295 private: 296 virtual void normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)297 normalize(const UChar *src, const UChar *limit, 298 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 299 impl.makeFCD(src, limit, &buffer, errorCode); 300 } 301 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. 302 virtual void normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)303 normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, 304 UnicodeString &safeMiddle, 305 ReorderingBuffer &buffer, UErrorCode &errorCode) const { 306 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); 307 } 308 virtual const UChar * spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode)309 spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { 310 return impl.makeFCD(src, limit, NULL, errorCode); 311 } 312 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. hasBoundaryBefore(UChar32 c)313 virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } hasBoundaryAfter(UChar32 c)314 virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } isInert(UChar32 c)315 virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } 316 }; 317 318 struct Norm2AllModes : public UMemory { Norm2AllModesNorm2AllModes319 Norm2AllModes(Normalizer2Impl *i) 320 : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {} 321 ~Norm2AllModes(); 322 323 static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode); 324 static Norm2AllModes *createNFCInstance(UErrorCode &errorCode); 325 static Norm2AllModes *createInstance(const char *packageName, 326 const char *name, 327 UErrorCode &errorCode); 328 329 static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode); 330 static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode); 331 static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode); 332 333 Normalizer2Impl *impl; 334 ComposeNormalizer2 comp; 335 DecomposeNormalizer2 decomp; 336 FCDNormalizer2 fcd; 337 ComposeNormalizer2 fcc; 338 }; 339 340 U_NAMESPACE_END 341 342 #endif // !UCONFIG_NO_NORMALIZATION 343 #endif // __NORM2ALLMODES_H__ 344