1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2014, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 * loadednormalizer2impl.h
9 *
10 * created on: 2014sep07
11 * created by: Markus W. Scherer
12 */
13 
14 #ifndef __NORM2ALLMODES_H__
15 #define __NORM2ALLMODES_H__
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_NORMALIZATION
20 
21 #include "unicode/normalizer2.h"
22 #include "unicode/unistr.h"
23 #include "cpputils.h"
24 #include "normalizer2impl.h"
25 
26 U_NAMESPACE_BEGIN
27 
28 // Intermediate class:
29 // Has Normalizer2Impl and does boilerplate argument checking and setup.
30 class Normalizer2WithImpl : public Normalizer2 {
31 public:
Normalizer2WithImpl(const Normalizer2Impl & ni)32     Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
33     virtual ~Normalizer2WithImpl();
34 
35     // normalize
36     virtual UnicodeString &
normalize(const UnicodeString & src,UnicodeString & dest,UErrorCode & errorCode)37     normalize(const UnicodeString &src,
38               UnicodeString &dest,
39               UErrorCode &errorCode) const {
40         if(U_FAILURE(errorCode)) {
41             dest.setToBogus();
42             return dest;
43         }
44         const UChar *sArray=src.getBuffer();
45         if(&dest==&src || sArray==NULL) {
46             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
47             dest.setToBogus();
48             return dest;
49         }
50         dest.remove();
51         ReorderingBuffer buffer(impl, dest);
52         if(buffer.init(src.length(), errorCode)) {
53             normalize(sArray, sArray+src.length(), buffer, errorCode);
54         }
55         return dest;
56     }
57     virtual void
58     normalize(const UChar *src, const UChar *limit,
59               ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
60 
61     // normalize and append
62     virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode)63     normalizeSecondAndAppend(UnicodeString &first,
64                              const UnicodeString &second,
65                              UErrorCode &errorCode) const {
66         return normalizeSecondAndAppend(first, second, TRUE, errorCode);
67     }
68     virtual UnicodeString &
append(UnicodeString & first,const UnicodeString & second,UErrorCode & errorCode)69     append(UnicodeString &first,
70            const UnicodeString &second,
71            UErrorCode &errorCode) const {
72         return normalizeSecondAndAppend(first, second, FALSE, errorCode);
73     }
74     UnicodeString &
normalizeSecondAndAppend(UnicodeString & first,const UnicodeString & second,UBool doNormalize,UErrorCode & errorCode)75     normalizeSecondAndAppend(UnicodeString &first,
76                              const UnicodeString &second,
77                              UBool doNormalize,
78                              UErrorCode &errorCode) const {
79         uprv_checkCanGetBuffer(first, errorCode);
80         if(U_FAILURE(errorCode)) {
81             return first;
82         }
83         const UChar *secondArray=second.getBuffer();
84         if(&first==&second || secondArray==NULL) {
85             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
86             return first;
87         }
88         int32_t firstLength=first.length();
89         UnicodeString safeMiddle;
90         {
91             ReorderingBuffer buffer(impl, first);
92             if(buffer.init(firstLength+second.length(), errorCode)) {
93                 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
94                                    safeMiddle, buffer, errorCode);
95             }
96         }  // The ReorderingBuffer destructor finalizes the first string.
97         if(U_FAILURE(errorCode)) {
98             // Restore the modified suffix of the first string.
99             first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
100         }
101         return first;
102     }
103     virtual void
104     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
105                        UnicodeString &safeMiddle,
106                        ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
107     virtual UBool
getDecomposition(UChar32 c,UnicodeString & decomposition)108     getDecomposition(UChar32 c, UnicodeString &decomposition) const {
109         UChar buffer[4];
110         int32_t length;
111         const UChar *d=impl.getDecomposition(c, buffer, length);
112         if(d==NULL) {
113             return FALSE;
114         }
115         if(d==buffer) {
116             decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
117         } else {
118             decomposition.setTo(FALSE, d, length);  // read-only alias
119         }
120         return TRUE;
121     }
122     virtual UBool
getRawDecomposition(UChar32 c,UnicodeString & decomposition)123     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
124         UChar buffer[30];
125         int32_t length;
126         const UChar *d=impl.getRawDecomposition(c, buffer, length);
127         if(d==NULL) {
128             return FALSE;
129         }
130         if(d==buffer) {
131             decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
132         } else {
133             decomposition.setTo(FALSE, d, length);  // read-only alias
134         }
135         return TRUE;
136     }
137     virtual UChar32
composePair(UChar32 a,UChar32 b)138     composePair(UChar32 a, UChar32 b) const {
139         return impl.composePair(a, b);
140     }
141 
142     virtual uint8_t
getCombiningClass(UChar32 c)143     getCombiningClass(UChar32 c) const {
144         return impl.getCC(impl.getNorm16(c));
145     }
146 
147     // quick checks
148     virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode)149     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
150         if(U_FAILURE(errorCode)) {
151             return FALSE;
152         }
153         const UChar *sArray=s.getBuffer();
154         if(sArray==NULL) {
155             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
156             return FALSE;
157         }
158         const UChar *sLimit=sArray+s.length();
159         return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
160     }
161     virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode)162     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
163         return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
164     }
165     virtual int32_t
spanQuickCheckYes(const UnicodeString & s,UErrorCode & errorCode)166     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
167         if(U_FAILURE(errorCode)) {
168             return 0;
169         }
170         const UChar *sArray=s.getBuffer();
171         if(sArray==NULL) {
172             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
173             return 0;
174         }
175         return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
176     }
177     virtual const UChar *
178     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
179 
getQuickCheck(UChar32)180     virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
181         return UNORM_YES;
182     }
183 
184     const Normalizer2Impl &impl;
185 };
186 
187 class DecomposeNormalizer2 : public Normalizer2WithImpl {
188 public:
DecomposeNormalizer2(const Normalizer2Impl & ni)189     DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
190     virtual ~DecomposeNormalizer2();
191 
192 private:
193     virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)194     normalize(const UChar *src, const UChar *limit,
195               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
196         impl.decompose(src, limit, &buffer, errorCode);
197     }
198     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
199     virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)200     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
201                        UnicodeString &safeMiddle,
202                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
203         impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
204     }
205     virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode)206     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
207         return impl.decompose(src, limit, NULL, errorCode);
208     }
209     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c)210     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
211         return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
212     }
hasBoundaryBefore(UChar32 c)213     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
hasBoundaryAfter(UChar32 c)214     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
isInert(UChar32 c)215     virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
216 };
217 
218 class ComposeNormalizer2 : public Normalizer2WithImpl {
219 public:
ComposeNormalizer2(const Normalizer2Impl & ni,UBool fcc)220     ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
221         Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
222     virtual ~ComposeNormalizer2();
223 
224 private:
225     virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)226     normalize(const UChar *src, const UChar *limit,
227               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
228         impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
229     }
230     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
231     virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)232     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
233                        UnicodeString &safeMiddle,
234                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
235         impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
236     }
237 
238     virtual UBool
isNormalized(const UnicodeString & s,UErrorCode & errorCode)239     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
240         if(U_FAILURE(errorCode)) {
241             return FALSE;
242         }
243         const UChar *sArray=s.getBuffer();
244         if(sArray==NULL) {
245             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
246             return FALSE;
247         }
248         UnicodeString temp;
249         ReorderingBuffer buffer(impl, temp);
250         if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
251             return FALSE;
252         }
253         return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
254     }
255     virtual UNormalizationCheckResult
quickCheck(const UnicodeString & s,UErrorCode & errorCode)256     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
257         if(U_FAILURE(errorCode)) {
258             return UNORM_MAYBE;
259         }
260         const UChar *sArray=s.getBuffer();
261         if(sArray==NULL) {
262             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
263             return UNORM_MAYBE;
264         }
265         UNormalizationCheckResult qcResult=UNORM_YES;
266         impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
267         return qcResult;
268     }
269     virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode &)270     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
271         return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
272     }
273     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
getQuickCheck(UChar32 c)274     virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
275         return impl.getCompQuickCheck(impl.getNorm16(c));
276     }
hasBoundaryBefore(UChar32 c)277     virtual UBool hasBoundaryBefore(UChar32 c) const {
278         return impl.hasCompBoundaryBefore(c);
279     }
hasBoundaryAfter(UChar32 c)280     virtual UBool hasBoundaryAfter(UChar32 c) const {
281         return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
282     }
isInert(UChar32 c)283     virtual UBool isInert(UChar32 c) const {
284         return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
285     }
286 
287     const UBool onlyContiguous;
288 };
289 
290 class FCDNormalizer2 : public Normalizer2WithImpl {
291 public:
FCDNormalizer2(const Normalizer2Impl & ni)292     FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
293     virtual ~FCDNormalizer2();
294 
295 private:
296     virtual void
normalize(const UChar * src,const UChar * limit,ReorderingBuffer & buffer,UErrorCode & errorCode)297     normalize(const UChar *src, const UChar *limit,
298               ReorderingBuffer &buffer, UErrorCode &errorCode) const {
299         impl.makeFCD(src, limit, &buffer, errorCode);
300     }
301     using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
302     virtual void
normalizeAndAppend(const UChar * src,const UChar * limit,UBool doNormalize,UnicodeString & safeMiddle,ReorderingBuffer & buffer,UErrorCode & errorCode)303     normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
304                        UnicodeString &safeMiddle,
305                        ReorderingBuffer &buffer, UErrorCode &errorCode) const {
306         impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
307     }
308     virtual const UChar *
spanQuickCheckYes(const UChar * src,const UChar * limit,UErrorCode & errorCode)309     spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
310         return impl.makeFCD(src, limit, NULL, errorCode);
311     }
312     using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
hasBoundaryBefore(UChar32 c)313     virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
hasBoundaryAfter(UChar32 c)314     virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
isInert(UChar32 c)315     virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
316 };
317 
318 struct Norm2AllModes : public UMemory {
Norm2AllModesNorm2AllModes319     Norm2AllModes(Normalizer2Impl *i)
320             : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {}
321     ~Norm2AllModes();
322 
323     static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
324     static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
325     static Norm2AllModes *createInstance(const char *packageName,
326                                          const char *name,
327                                          UErrorCode &errorCode);
328 
329     static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
330     static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
331     static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
332 
333     Normalizer2Impl *impl;
334     ComposeNormalizer2 comp;
335     DecomposeNormalizer2 decomp;
336     FCDNormalizer2 fcd;
337     ComposeNormalizer2 fcc;
338 };
339 
340 U_NAMESPACE_END
341 
342 #endif  // !UCONFIG_NO_NORMALIZATION
343 #endif  // __NORM2ALLMODES_H__
344