1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2009-2013, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  normalizer2.h
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2009nov22
14 *   created by: Markus W. Scherer
15 */
16 
17 #ifndef __NORMALIZER2_H__
18 #define __NORMALIZER2_H__
19 
20 /**
21  * \file
22  * \brief C++ API: New API for Unicode Normalization.
23  */
24 
25 #include "unicode/utypes.h"
26 
27 #if !UCONFIG_NO_NORMALIZATION
28 
29 #include "unicode/uniset.h"
30 #include "unicode/unistr.h"
31 #include "unicode/unorm2.h"
32 
33 U_NAMESPACE_BEGIN
34 
35 /**
36  * Unicode normalization functionality for standard Unicode normalization or
37  * for using custom mapping tables.
38  * All instances of this class are unmodifiable/immutable.
39  * Instances returned by getInstance() are singletons that must not be deleted by the caller.
40  * The Normalizer2 class is not intended for public subclassing.
41  *
42  * The primary functions are to produce a normalized string and to detect whether
43  * a string is already normalized.
44  * The most commonly used normalization forms are those defined in
45  * http://www.unicode.org/unicode/reports/tr15/
46  * However, this API supports additional normalization forms for specialized purposes.
47  * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
48  * and can be used in implementations of UTS #46.
49  *
50  * Not only are the standard compose and decompose modes supplied,
51  * but additional modes are provided as documented in the Mode enum.
52  *
53  * Some of the functions in this class identify normalization boundaries.
54  * At a normalization boundary, the portions of the string
55  * before it and starting from it do not interact and can be handled independently.
56  *
57  * The spanQuickCheckYes() stops at a normalization boundary.
58  * When the goal is a normalized string, then the text before the boundary
59  * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
60  *
61  * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
62  * a character is guaranteed to be at a normalization boundary,
63  * regardless of context.
64  * This is used for moving from one normalization boundary to the next
65  * or preceding boundary, and for performing iterative normalization.
66  *
67  * Iterative normalization is useful when only a small portion of a
68  * longer string needs to be processed.
69  * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
70  * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
71  * (to process only the substring for which sort key bytes are computed).
72  *
73  * The set of normalization boundaries returned by these functions may not be
74  * complete: There may be more boundaries that could be returned.
75  * Different functions may return different boundaries.
76  * @stable ICU 4.4
77  */
78 class U_COMMON_API Normalizer2 : public UObject {
79 public:
80     /**
81      * Destructor.
82      * @stable ICU 4.4
83      */
84     ~Normalizer2();
85 
86     /**
87      * Returns a Normalizer2 instance for Unicode NFC normalization.
88      * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode).
89      * Returns an unmodifiable singleton instance. Do not delete it.
90      * @param errorCode Standard ICU error code. Its input value must
91      *                  pass the U_SUCCESS() test, or else the function returns
92      *                  immediately. Check for U_FAILURE() on output or use with
93      *                  function chaining. (See User Guide for details.)
94      * @return the requested Normalizer2, if successful
95      * @stable ICU 49
96      */
97     static const Normalizer2 *
98     getNFCInstance(UErrorCode &errorCode);
99 
100     /**
101      * Returns a Normalizer2 instance for Unicode NFD normalization.
102      * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode).
103      * Returns an unmodifiable singleton instance. Do not delete it.
104      * @param errorCode Standard ICU error code. Its input value must
105      *                  pass the U_SUCCESS() test, or else the function returns
106      *                  immediately. Check for U_FAILURE() on output or use with
107      *                  function chaining. (See User Guide for details.)
108      * @return the requested Normalizer2, if successful
109      * @stable ICU 49
110      */
111     static const Normalizer2 *
112     getNFDInstance(UErrorCode &errorCode);
113 
114     /**
115      * Returns a Normalizer2 instance for Unicode NFKC normalization.
116      * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode).
117      * Returns an unmodifiable singleton instance. Do not delete it.
118      * @param errorCode Standard ICU error code. Its input value must
119      *                  pass the U_SUCCESS() test, or else the function returns
120      *                  immediately. Check for U_FAILURE() on output or use with
121      *                  function chaining. (See User Guide for details.)
122      * @return the requested Normalizer2, if successful
123      * @stable ICU 49
124      */
125     static const Normalizer2 *
126     getNFKCInstance(UErrorCode &errorCode);
127 
128     /**
129      * Returns a Normalizer2 instance for Unicode NFKD normalization.
130      * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode).
131      * Returns an unmodifiable singleton instance. Do not delete it.
132      * @param errorCode Standard ICU error code. Its input value must
133      *                  pass the U_SUCCESS() test, or else the function returns
134      *                  immediately. Check for U_FAILURE() on output or use with
135      *                  function chaining. (See User Guide for details.)
136      * @return the requested Normalizer2, if successful
137      * @stable ICU 49
138      */
139     static const Normalizer2 *
140     getNFKDInstance(UErrorCode &errorCode);
141 
142     /**
143      * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
144      * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode).
145      * Returns an unmodifiable singleton instance. Do not delete it.
146      * @param errorCode Standard ICU error code. Its input value must
147      *                  pass the U_SUCCESS() test, or else the function returns
148      *                  immediately. Check for U_FAILURE() on output or use with
149      *                  function chaining. (See User Guide for details.)
150      * @return the requested Normalizer2, if successful
151      * @stable ICU 49
152      */
153     static const Normalizer2 *
154     getNFKCCasefoldInstance(UErrorCode &errorCode);
155 
156     /**
157      * Returns a Normalizer2 instance which uses the specified data file
158      * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
159      * and which composes or decomposes text according to the specified mode.
160      * Returns an unmodifiable singleton instance. Do not delete it.
161      *
162      * Use packageName=NULL for data files that are part of ICU's own data.
163      * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
164      * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
165      * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
166      *
167      * @param packageName NULL for ICU built-in data, otherwise application data package name
168      * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
169      * @param mode normalization mode (compose or decompose etc.)
170      * @param errorCode Standard ICU error code. Its input value must
171      *                  pass the U_SUCCESS() test, or else the function returns
172      *                  immediately. Check for U_FAILURE() on output or use with
173      *                  function chaining. (See User Guide for details.)
174      * @return the requested Normalizer2, if successful
175      * @stable ICU 4.4
176      */
177     static const Normalizer2 *
178     getInstance(const char *packageName,
179                 const char *name,
180                 UNormalization2Mode mode,
181                 UErrorCode &errorCode);
182 
183     /**
184      * Returns the normalized form of the source string.
185      * @param src source string
186      * @param errorCode Standard ICU error code. Its input value must
187      *                  pass the U_SUCCESS() test, or else the function returns
188      *                  immediately. Check for U_FAILURE() on output or use with
189      *                  function chaining. (See User Guide for details.)
190      * @return normalized src
191      * @stable ICU 4.4
192      */
193     UnicodeString
normalize(const UnicodeString & src,UErrorCode & errorCode)194     normalize(const UnicodeString &src, UErrorCode &errorCode) const {
195         UnicodeString result;
196         normalize(src, result, errorCode);
197         return result;
198     }
199     /**
200      * Writes the normalized form of the source string to the destination string
201      * (replacing its contents) and returns the destination string.
202      * The source and destination strings must be different objects.
203      * @param src source string
204      * @param dest destination string; its contents is replaced with normalized src
205      * @param errorCode Standard ICU error code. Its input value must
206      *                  pass the U_SUCCESS() test, or else the function returns
207      *                  immediately. Check for U_FAILURE() on output or use with
208      *                  function chaining. (See User Guide for details.)
209      * @return dest
210      * @stable ICU 4.4
211      */
212     virtual UnicodeString &
213     normalize(const UnicodeString &src,
214               UnicodeString &dest,
215               UErrorCode &errorCode) const = 0;
216     /**
217      * Appends the normalized form of the second string to the first string
218      * (merging them at the boundary) and returns the first string.
219      * The result is normalized if the first string was normalized.
220      * The first and second strings must be different objects.
221      * @param first string, should be normalized
222      * @param second string, will be normalized
223      * @param errorCode Standard ICU error code. Its input value must
224      *                  pass the U_SUCCESS() test, or else the function returns
225      *                  immediately. Check for U_FAILURE() on output or use with
226      *                  function chaining. (See User Guide for details.)
227      * @return first
228      * @stable ICU 4.4
229      */
230     virtual UnicodeString &
231     normalizeSecondAndAppend(UnicodeString &first,
232                              const UnicodeString &second,
233                              UErrorCode &errorCode) const = 0;
234     /**
235      * Appends the second string to the first string
236      * (merging them at the boundary) and returns the first string.
237      * The result is normalized if both the strings were normalized.
238      * The first and second strings must be different objects.
239      * @param first string, should be normalized
240      * @param second string, should be normalized
241      * @param errorCode Standard ICU error code. Its input value must
242      *                  pass the U_SUCCESS() test, or else the function returns
243      *                  immediately. Check for U_FAILURE() on output or use with
244      *                  function chaining. (See User Guide for details.)
245      * @return first
246      * @stable ICU 4.4
247      */
248     virtual UnicodeString &
249     append(UnicodeString &first,
250            const UnicodeString &second,
251            UErrorCode &errorCode) const = 0;
252 
253     /**
254      * Gets the decomposition mapping of c.
255      * Roughly equivalent to normalizing the String form of c
256      * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
257      * returns FALSE and does not write a string
258      * if c does not have a decomposition mapping in this instance's data.
259      * This function is independent of the mode of the Normalizer2.
260      * @param c code point
261      * @param decomposition String object which will be set to c's
262      *                      decomposition mapping, if there is one.
263      * @return TRUE if c has a decomposition, otherwise FALSE
264      * @stable ICU 4.6
265      */
266     virtual UBool
267     getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
268 
269     /**
270      * Gets the raw decomposition mapping of c.
271      *
272      * This is similar to the getDecomposition() method but returns the
273      * raw decomposition mapping as specified in UnicodeData.txt or
274      * (for custom data) in the mapping files processed by the gennorm2 tool.
275      * By contrast, getDecomposition() returns the processed,
276      * recursively-decomposed version of this mapping.
277      *
278      * When used on a standard NFKC Normalizer2 instance,
279      * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
280      *
281      * When used on a standard NFC Normalizer2 instance,
282      * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
283      * in this case, the result contains either one or two code points (=1..4 UChars).
284      *
285      * This function is independent of the mode of the Normalizer2.
286      * The default implementation returns FALSE.
287      * @param c code point
288      * @param decomposition String object which will be set to c's
289      *                      raw decomposition mapping, if there is one.
290      * @return TRUE if c has a decomposition, otherwise FALSE
291      * @stable ICU 49
292      */
293     virtual UBool
294     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
295 
296     /**
297      * Performs pairwise composition of a & b and returns the composite if there is one.
298      *
299      * Returns a composite code point c only if c has a two-way mapping to a+b.
300      * In standard Unicode normalization, this means that
301      * c has a canonical decomposition to a+b
302      * and c does not have the Full_Composition_Exclusion property.
303      *
304      * This function is independent of the mode of the Normalizer2.
305      * The default implementation returns a negative value.
306      * @param a A (normalization starter) code point.
307      * @param b Another code point.
308      * @return The non-negative composite code point if there is one; otherwise a negative value.
309      * @stable ICU 49
310      */
311     virtual UChar32
312     composePair(UChar32 a, UChar32 b) const;
313 
314     /**
315      * Gets the combining class of c.
316      * The default implementation returns 0
317      * but all standard implementations return the Unicode Canonical_Combining_Class value.
318      * @param c code point
319      * @return c's combining class
320      * @stable ICU 49
321      */
322     virtual uint8_t
323     getCombiningClass(UChar32 c) const;
324 
325     /**
326      * Tests if the string is normalized.
327      * Internally, in cases where the quickCheck() method would return "maybe"
328      * (which is only possible for the two COMPOSE modes) this method
329      * resolves to "yes" or "no" to provide a definitive result,
330      * at the cost of doing more work in those cases.
331      * @param s input string
332      * @param errorCode Standard ICU error code. Its input value must
333      *                  pass the U_SUCCESS() test, or else the function returns
334      *                  immediately. Check for U_FAILURE() on output or use with
335      *                  function chaining. (See User Guide for details.)
336      * @return TRUE if s is normalized
337      * @stable ICU 4.4
338      */
339     virtual UBool
340     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
341 
342     /**
343      * Tests if the string is normalized.
344      * For the two COMPOSE modes, the result could be "maybe" in cases that
345      * would take a little more work to resolve definitively.
346      * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
347      * combination of quick check + normalization, to avoid
348      * re-checking the "yes" prefix.
349      * @param s input string
350      * @param errorCode Standard ICU error code. Its input value must
351      *                  pass the U_SUCCESS() test, or else the function returns
352      *                  immediately. Check for U_FAILURE() on output or use with
353      *                  function chaining. (See User Guide for details.)
354      * @return UNormalizationCheckResult
355      * @stable ICU 4.4
356      */
357     virtual UNormalizationCheckResult
358     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
359 
360     /**
361      * Returns the end of the normalized substring of the input string.
362      * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
363      * the substring <code>UnicodeString(s, 0, end)</code>
364      * will pass the quick check with a "yes" result.
365      *
366      * The returned end index is usually one or more characters before the
367      * "no" or "maybe" character: The end index is at a normalization boundary.
368      * (See the class documentation for more about normalization boundaries.)
369      *
370      * When the goal is a normalized string and most input strings are expected
371      * to be normalized already, then call this method,
372      * and if it returns a prefix shorter than the input string,
373      * copy that prefix and use normalizeSecondAndAppend() for the remainder.
374      * @param s input string
375      * @param errorCode Standard ICU error code. Its input value must
376      *                  pass the U_SUCCESS() test, or else the function returns
377      *                  immediately. Check for U_FAILURE() on output or use with
378      *                  function chaining. (See User Guide for details.)
379      * @return "yes" span end index
380      * @stable ICU 4.4
381      */
382     virtual int32_t
383     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
384 
385     /**
386      * Tests if the character always has a normalization boundary before it,
387      * regardless of context.
388      * If true, then the character does not normalization-interact with
389      * preceding characters.
390      * In other words, a string containing this character can be normalized
391      * by processing portions before this character and starting from this
392      * character independently.
393      * This is used for iterative normalization. See the class documentation for details.
394      * @param c character to test
395      * @return TRUE if c has a normalization boundary before it
396      * @stable ICU 4.4
397      */
398     virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
399 
400     /**
401      * Tests if the character always has a normalization boundary after it,
402      * regardless of context.
403      * If true, then the character does not normalization-interact with
404      * following characters.
405      * In other words, a string containing this character can be normalized
406      * by processing portions up to this character and after this
407      * character independently.
408      * This is used for iterative normalization. See the class documentation for details.
409      * Note that this operation may be significantly slower than hasBoundaryBefore().
410      * @param c character to test
411      * @return TRUE if c has a normalization boundary after it
412      * @stable ICU 4.4
413      */
414     virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
415 
416     /**
417      * Tests if the character is normalization-inert.
418      * If true, then the character does not change, nor normalization-interact with
419      * preceding or following characters.
420      * In other words, a string containing this character can be normalized
421      * by processing portions before this character and after this
422      * character independently.
423      * This is used for iterative normalization. See the class documentation for details.
424      * Note that this operation may be significantly slower than hasBoundaryBefore().
425      * @param c character to test
426      * @return TRUE if c is normalization-inert
427      * @stable ICU 4.4
428      */
429     virtual UBool isInert(UChar32 c) const = 0;
430 };
431 
432 /**
433  * Normalization filtered by a UnicodeSet.
434  * Normalizes portions of the text contained in the filter set and leaves
435  * portions not contained in the filter set unchanged.
436  * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
437  * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
438  * This class implements all of (and only) the Normalizer2 API.
439  * An instance of this class is unmodifiable/immutable but is constructed and
440  * must be destructed by the owner.
441  * @stable ICU 4.4
442  */
443 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
444 public:
445     /**
446      * Constructs a filtered normalizer wrapping any Normalizer2 instance
447      * and a filter set.
448      * Both are aliased and must not be modified or deleted while this object
449      * is used.
450      * The filter set should be frozen; otherwise the performance will suffer greatly.
451      * @param n2 wrapped Normalizer2 instance
452      * @param filterSet UnicodeSet which determines the characters to be normalized
453      * @stable ICU 4.4
454      */
FilteredNormalizer2(const Normalizer2 & n2,const UnicodeSet & filterSet)455     FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
456             norm2(n2), set(filterSet) {}
457 
458     /**
459      * Destructor.
460      * @stable ICU 4.4
461      */
462     ~FilteredNormalizer2();
463 
464     /**
465      * Writes the normalized form of the source string to the destination string
466      * (replacing its contents) and returns the destination string.
467      * The source and destination strings must be different objects.
468      * @param src source string
469      * @param dest destination string; its contents is replaced with normalized src
470      * @param errorCode Standard ICU error code. Its input value must
471      *                  pass the U_SUCCESS() test, or else the function returns
472      *                  immediately. Check for U_FAILURE() on output or use with
473      *                  function chaining. (See User Guide for details.)
474      * @return dest
475      * @stable ICU 4.4
476      */
477     virtual UnicodeString &
478     normalize(const UnicodeString &src,
479               UnicodeString &dest,
480               UErrorCode &errorCode) const;
481     /**
482      * Appends the normalized form of the second string to the first string
483      * (merging them at the boundary) and returns the first string.
484      * The result is normalized if the first string was normalized.
485      * The first and second strings must be different objects.
486      * @param first string, should be normalized
487      * @param second string, will be normalized
488      * @param errorCode Standard ICU error code. Its input value must
489      *                  pass the U_SUCCESS() test, or else the function returns
490      *                  immediately. Check for U_FAILURE() on output or use with
491      *                  function chaining. (See User Guide for details.)
492      * @return first
493      * @stable ICU 4.4
494      */
495     virtual UnicodeString &
496     normalizeSecondAndAppend(UnicodeString &first,
497                              const UnicodeString &second,
498                              UErrorCode &errorCode) const;
499     /**
500      * Appends the second string to the first string
501      * (merging them at the boundary) and returns the first string.
502      * The result is normalized if both the strings were normalized.
503      * The first and second strings must be different objects.
504      * @param first string, should be normalized
505      * @param second string, should be normalized
506      * @param errorCode Standard ICU error code. Its input value must
507      *                  pass the U_SUCCESS() test, or else the function returns
508      *                  immediately. Check for U_FAILURE() on output or use with
509      *                  function chaining. (See User Guide for details.)
510      * @return first
511      * @stable ICU 4.4
512      */
513     virtual UnicodeString &
514     append(UnicodeString &first,
515            const UnicodeString &second,
516            UErrorCode &errorCode) const;
517 
518     /**
519      * Gets the decomposition mapping of c.
520      * For details see the base class documentation.
521      *
522      * This function is independent of the mode of the Normalizer2.
523      * @param c code point
524      * @param decomposition String object which will be set to c's
525      *                      decomposition mapping, if there is one.
526      * @return TRUE if c has a decomposition, otherwise FALSE
527      * @stable ICU 4.6
528      */
529     virtual UBool
530     getDecomposition(UChar32 c, UnicodeString &decomposition) const;
531 
532     /**
533      * Gets the raw decomposition mapping of c.
534      * For details see the base class documentation.
535      *
536      * This function is independent of the mode of the Normalizer2.
537      * @param c code point
538      * @param decomposition String object which will be set to c's
539      *                      raw decomposition mapping, if there is one.
540      * @return TRUE if c has a decomposition, otherwise FALSE
541      * @stable ICU 49
542      */
543     virtual UBool
544     getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
545 
546     /**
547      * Performs pairwise composition of a & b and returns the composite if there is one.
548      * For details see the base class documentation.
549      *
550      * This function is independent of the mode of the Normalizer2.
551      * @param a A (normalization starter) code point.
552      * @param b Another code point.
553      * @return The non-negative composite code point if there is one; otherwise a negative value.
554      * @stable ICU 49
555      */
556     virtual UChar32
557     composePair(UChar32 a, UChar32 b) const;
558 
559     /**
560      * Gets the combining class of c.
561      * The default implementation returns 0
562      * but all standard implementations return the Unicode Canonical_Combining_Class value.
563      * @param c code point
564      * @return c's combining class
565      * @stable ICU 49
566      */
567     virtual uint8_t
568     getCombiningClass(UChar32 c) const;
569 
570     /**
571      * Tests if the string is normalized.
572      * For details see the Normalizer2 base class documentation.
573      * @param s input string
574      * @param errorCode Standard ICU error code. Its input value must
575      *                  pass the U_SUCCESS() test, or else the function returns
576      *                  immediately. Check for U_FAILURE() on output or use with
577      *                  function chaining. (See User Guide for details.)
578      * @return TRUE if s is normalized
579      * @stable ICU 4.4
580      */
581     virtual UBool
582     isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
583     /**
584      * Tests if the string is normalized.
585      * For details see the Normalizer2 base class documentation.
586      * @param s input string
587      * @param errorCode Standard ICU error code. Its input value must
588      *                  pass the U_SUCCESS() test, or else the function returns
589      *                  immediately. Check for U_FAILURE() on output or use with
590      *                  function chaining. (See User Guide for details.)
591      * @return UNormalizationCheckResult
592      * @stable ICU 4.4
593      */
594     virtual UNormalizationCheckResult
595     quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
596     /**
597      * Returns the end of the normalized substring of the input string.
598      * For details see the Normalizer2 base class documentation.
599      * @param s input string
600      * @param errorCode Standard ICU error code. Its input value must
601      *                  pass the U_SUCCESS() test, or else the function returns
602      *                  immediately. Check for U_FAILURE() on output or use with
603      *                  function chaining. (See User Guide for details.)
604      * @return "yes" span end index
605      * @stable ICU 4.4
606      */
607     virtual int32_t
608     spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
609 
610     /**
611      * Tests if the character always has a normalization boundary before it,
612      * regardless of context.
613      * For details see the Normalizer2 base class documentation.
614      * @param c character to test
615      * @return TRUE if c has a normalization boundary before it
616      * @stable ICU 4.4
617      */
618     virtual UBool hasBoundaryBefore(UChar32 c) const;
619 
620     /**
621      * Tests if the character always has a normalization boundary after it,
622      * regardless of context.
623      * For details see the Normalizer2 base class documentation.
624      * @param c character to test
625      * @return TRUE if c has a normalization boundary after it
626      * @stable ICU 4.4
627      */
628     virtual UBool hasBoundaryAfter(UChar32 c) const;
629 
630     /**
631      * Tests if the character is normalization-inert.
632      * For details see the Normalizer2 base class documentation.
633      * @param c character to test
634      * @return TRUE if c is normalization-inert
635      * @stable ICU 4.4
636      */
637     virtual UBool isInert(UChar32 c) const;
638 private:
639     UnicodeString &
640     normalize(const UnicodeString &src,
641               UnicodeString &dest,
642               USetSpanCondition spanCondition,
643               UErrorCode &errorCode) const;
644 
645     UnicodeString &
646     normalizeSecondAndAppend(UnicodeString &first,
647                              const UnicodeString &second,
648                              UBool doNormalize,
649                              UErrorCode &errorCode) const;
650 
651     const Normalizer2 &norm2;
652     const UnicodeSet &set;
653 };
654 
655 U_NAMESPACE_END
656 
657 #endif  // !UCONFIG_NO_NORMALIZATION
658 #endif  // __NORMALIZER2_H__
659