1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2009-2013, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: normalizer2.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2009nov22 14 * created by: Markus W. Scherer 15 */ 16 17 #ifndef __NORMALIZER2_H__ 18 #define __NORMALIZER2_H__ 19 20 /** 21 * \file 22 * \brief C++ API: New API for Unicode Normalization. 23 */ 24 25 #include "unicode/utypes.h" 26 27 #if !UCONFIG_NO_NORMALIZATION 28 29 #include "unicode/uniset.h" 30 #include "unicode/unistr.h" 31 #include "unicode/unorm2.h" 32 33 U_NAMESPACE_BEGIN 34 35 /** 36 * Unicode normalization functionality for standard Unicode normalization or 37 * for using custom mapping tables. 38 * All instances of this class are unmodifiable/immutable. 39 * Instances returned by getInstance() are singletons that must not be deleted by the caller. 40 * The Normalizer2 class is not intended for public subclassing. 41 * 42 * The primary functions are to produce a normalized string and to detect whether 43 * a string is already normalized. 44 * The most commonly used normalization forms are those defined in 45 * http://www.unicode.org/unicode/reports/tr15/ 46 * However, this API supports additional normalization forms for specialized purposes. 47 * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE) 48 * and can be used in implementations of UTS #46. 49 * 50 * Not only are the standard compose and decompose modes supplied, 51 * but additional modes are provided as documented in the Mode enum. 52 * 53 * Some of the functions in this class identify normalization boundaries. 54 * At a normalization boundary, the portions of the string 55 * before it and starting from it do not interact and can be handled independently. 56 * 57 * The spanQuickCheckYes() stops at a normalization boundary. 58 * When the goal is a normalized string, then the text before the boundary 59 * can be copied, and the remainder can be processed with normalizeSecondAndAppend(). 60 * 61 * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether 62 * a character is guaranteed to be at a normalization boundary, 63 * regardless of context. 64 * This is used for moving from one normalization boundary to the next 65 * or preceding boundary, and for performing iterative normalization. 66 * 67 * Iterative normalization is useful when only a small portion of a 68 * longer string needs to be processed. 69 * For example, in ICU, iterative normalization is used by the NormalizationTransliterator 70 * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart() 71 * (to process only the substring for which sort key bytes are computed). 72 * 73 * The set of normalization boundaries returned by these functions may not be 74 * complete: There may be more boundaries that could be returned. 75 * Different functions may return different boundaries. 76 * @stable ICU 4.4 77 */ 78 class U_COMMON_API Normalizer2 : public UObject { 79 public: 80 /** 81 * Destructor. 82 * @stable ICU 4.4 83 */ 84 ~Normalizer2(); 85 86 /** 87 * Returns a Normalizer2 instance for Unicode NFC normalization. 88 * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode). 89 * Returns an unmodifiable singleton instance. Do not delete it. 90 * @param errorCode Standard ICU error code. Its input value must 91 * pass the U_SUCCESS() test, or else the function returns 92 * immediately. Check for U_FAILURE() on output or use with 93 * function chaining. (See User Guide for details.) 94 * @return the requested Normalizer2, if successful 95 * @stable ICU 49 96 */ 97 static const Normalizer2 * 98 getNFCInstance(UErrorCode &errorCode); 99 100 /** 101 * Returns a Normalizer2 instance for Unicode NFD normalization. 102 * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode). 103 * Returns an unmodifiable singleton instance. Do not delete it. 104 * @param errorCode Standard ICU error code. Its input value must 105 * pass the U_SUCCESS() test, or else the function returns 106 * immediately. Check for U_FAILURE() on output or use with 107 * function chaining. (See User Guide for details.) 108 * @return the requested Normalizer2, if successful 109 * @stable ICU 49 110 */ 111 static const Normalizer2 * 112 getNFDInstance(UErrorCode &errorCode); 113 114 /** 115 * Returns a Normalizer2 instance for Unicode NFKC normalization. 116 * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode). 117 * Returns an unmodifiable singleton instance. Do not delete it. 118 * @param errorCode Standard ICU error code. Its input value must 119 * pass the U_SUCCESS() test, or else the function returns 120 * immediately. Check for U_FAILURE() on output or use with 121 * function chaining. (See User Guide for details.) 122 * @return the requested Normalizer2, if successful 123 * @stable ICU 49 124 */ 125 static const Normalizer2 * 126 getNFKCInstance(UErrorCode &errorCode); 127 128 /** 129 * Returns a Normalizer2 instance for Unicode NFKD normalization. 130 * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode). 131 * Returns an unmodifiable singleton instance. Do not delete it. 132 * @param errorCode Standard ICU error code. Its input value must 133 * pass the U_SUCCESS() test, or else the function returns 134 * immediately. Check for U_FAILURE() on output or use with 135 * function chaining. (See User Guide for details.) 136 * @return the requested Normalizer2, if successful 137 * @stable ICU 49 138 */ 139 static const Normalizer2 * 140 getNFKDInstance(UErrorCode &errorCode); 141 142 /** 143 * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization. 144 * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode). 145 * Returns an unmodifiable singleton instance. Do not delete it. 146 * @param errorCode Standard ICU error code. Its input value must 147 * pass the U_SUCCESS() test, or else the function returns 148 * immediately. Check for U_FAILURE() on output or use with 149 * function chaining. (See User Guide for details.) 150 * @return the requested Normalizer2, if successful 151 * @stable ICU 49 152 */ 153 static const Normalizer2 * 154 getNFKCCasefoldInstance(UErrorCode &errorCode); 155 156 /** 157 * Returns a Normalizer2 instance which uses the specified data file 158 * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) 159 * and which composes or decomposes text according to the specified mode. 160 * Returns an unmodifiable singleton instance. Do not delete it. 161 * 162 * Use packageName=NULL for data files that are part of ICU's own data. 163 * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. 164 * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. 165 * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. 166 * 167 * @param packageName NULL for ICU built-in data, otherwise application data package name 168 * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file 169 * @param mode normalization mode (compose or decompose etc.) 170 * @param errorCode Standard ICU error code. Its input value must 171 * pass the U_SUCCESS() test, or else the function returns 172 * immediately. Check for U_FAILURE() on output or use with 173 * function chaining. (See User Guide for details.) 174 * @return the requested Normalizer2, if successful 175 * @stable ICU 4.4 176 */ 177 static const Normalizer2 * 178 getInstance(const char *packageName, 179 const char *name, 180 UNormalization2Mode mode, 181 UErrorCode &errorCode); 182 183 /** 184 * Returns the normalized form of the source string. 185 * @param src source string 186 * @param errorCode Standard ICU error code. Its input value must 187 * pass the U_SUCCESS() test, or else the function returns 188 * immediately. Check for U_FAILURE() on output or use with 189 * function chaining. (See User Guide for details.) 190 * @return normalized src 191 * @stable ICU 4.4 192 */ 193 UnicodeString normalize(const UnicodeString & src,UErrorCode & errorCode)194 normalize(const UnicodeString &src, UErrorCode &errorCode) const { 195 UnicodeString result; 196 normalize(src, result, errorCode); 197 return result; 198 } 199 /** 200 * Writes the normalized form of the source string to the destination string 201 * (replacing its contents) and returns the destination string. 202 * The source and destination strings must be different objects. 203 * @param src source string 204 * @param dest destination string; its contents is replaced with normalized src 205 * @param errorCode Standard ICU error code. Its input value must 206 * pass the U_SUCCESS() test, or else the function returns 207 * immediately. Check for U_FAILURE() on output or use with 208 * function chaining. (See User Guide for details.) 209 * @return dest 210 * @stable ICU 4.4 211 */ 212 virtual UnicodeString & 213 normalize(const UnicodeString &src, 214 UnicodeString &dest, 215 UErrorCode &errorCode) const = 0; 216 /** 217 * Appends the normalized form of the second string to the first string 218 * (merging them at the boundary) and returns the first string. 219 * The result is normalized if the first string was normalized. 220 * The first and second strings must be different objects. 221 * @param first string, should be normalized 222 * @param second string, will be normalized 223 * @param errorCode Standard ICU error code. Its input value must 224 * pass the U_SUCCESS() test, or else the function returns 225 * immediately. Check for U_FAILURE() on output or use with 226 * function chaining. (See User Guide for details.) 227 * @return first 228 * @stable ICU 4.4 229 */ 230 virtual UnicodeString & 231 normalizeSecondAndAppend(UnicodeString &first, 232 const UnicodeString &second, 233 UErrorCode &errorCode) const = 0; 234 /** 235 * Appends the second string to the first string 236 * (merging them at the boundary) and returns the first string. 237 * The result is normalized if both the strings were normalized. 238 * The first and second strings must be different objects. 239 * @param first string, should be normalized 240 * @param second string, should be normalized 241 * @param errorCode Standard ICU error code. Its input value must 242 * pass the U_SUCCESS() test, or else the function returns 243 * immediately. Check for U_FAILURE() on output or use with 244 * function chaining. (See User Guide for details.) 245 * @return first 246 * @stable ICU 4.4 247 */ 248 virtual UnicodeString & 249 append(UnicodeString &first, 250 const UnicodeString &second, 251 UErrorCode &errorCode) const = 0; 252 253 /** 254 * Gets the decomposition mapping of c. 255 * Roughly equivalent to normalizing the String form of c 256 * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function 257 * returns FALSE and does not write a string 258 * if c does not have a decomposition mapping in this instance's data. 259 * This function is independent of the mode of the Normalizer2. 260 * @param c code point 261 * @param decomposition String object which will be set to c's 262 * decomposition mapping, if there is one. 263 * @return TRUE if c has a decomposition, otherwise FALSE 264 * @stable ICU 4.6 265 */ 266 virtual UBool 267 getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0; 268 269 /** 270 * Gets the raw decomposition mapping of c. 271 * 272 * This is similar to the getDecomposition() method but returns the 273 * raw decomposition mapping as specified in UnicodeData.txt or 274 * (for custom data) in the mapping files processed by the gennorm2 tool. 275 * By contrast, getDecomposition() returns the processed, 276 * recursively-decomposed version of this mapping. 277 * 278 * When used on a standard NFKC Normalizer2 instance, 279 * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property. 280 * 281 * When used on a standard NFC Normalizer2 instance, 282 * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); 283 * in this case, the result contains either one or two code points (=1..4 UChars). 284 * 285 * This function is independent of the mode of the Normalizer2. 286 * The default implementation returns FALSE. 287 * @param c code point 288 * @param decomposition String object which will be set to c's 289 * raw decomposition mapping, if there is one. 290 * @return TRUE if c has a decomposition, otherwise FALSE 291 * @stable ICU 49 292 */ 293 virtual UBool 294 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; 295 296 /** 297 * Performs pairwise composition of a & b and returns the composite if there is one. 298 * 299 * Returns a composite code point c only if c has a two-way mapping to a+b. 300 * In standard Unicode normalization, this means that 301 * c has a canonical decomposition to a+b 302 * and c does not have the Full_Composition_Exclusion property. 303 * 304 * This function is independent of the mode of the Normalizer2. 305 * The default implementation returns a negative value. 306 * @param a A (normalization starter) code point. 307 * @param b Another code point. 308 * @return The non-negative composite code point if there is one; otherwise a negative value. 309 * @stable ICU 49 310 */ 311 virtual UChar32 312 composePair(UChar32 a, UChar32 b) const; 313 314 /** 315 * Gets the combining class of c. 316 * The default implementation returns 0 317 * but all standard implementations return the Unicode Canonical_Combining_Class value. 318 * @param c code point 319 * @return c's combining class 320 * @stable ICU 49 321 */ 322 virtual uint8_t 323 getCombiningClass(UChar32 c) const; 324 325 /** 326 * Tests if the string is normalized. 327 * Internally, in cases where the quickCheck() method would return "maybe" 328 * (which is only possible for the two COMPOSE modes) this method 329 * resolves to "yes" or "no" to provide a definitive result, 330 * at the cost of doing more work in those cases. 331 * @param s input string 332 * @param errorCode Standard ICU error code. Its input value must 333 * pass the U_SUCCESS() test, or else the function returns 334 * immediately. Check for U_FAILURE() on output or use with 335 * function chaining. (See User Guide for details.) 336 * @return TRUE if s is normalized 337 * @stable ICU 4.4 338 */ 339 virtual UBool 340 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0; 341 342 /** 343 * Tests if the string is normalized. 344 * For the two COMPOSE modes, the result could be "maybe" in cases that 345 * would take a little more work to resolve definitively. 346 * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster 347 * combination of quick check + normalization, to avoid 348 * re-checking the "yes" prefix. 349 * @param s input string 350 * @param errorCode Standard ICU error code. Its input value must 351 * pass the U_SUCCESS() test, or else the function returns 352 * immediately. Check for U_FAILURE() on output or use with 353 * function chaining. (See User Guide for details.) 354 * @return UNormalizationCheckResult 355 * @stable ICU 4.4 356 */ 357 virtual UNormalizationCheckResult 358 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0; 359 360 /** 361 * Returns the end of the normalized substring of the input string. 362 * In other words, with <code>end=spanQuickCheckYes(s, ec);</code> 363 * the substring <code>UnicodeString(s, 0, end)</code> 364 * will pass the quick check with a "yes" result. 365 * 366 * The returned end index is usually one or more characters before the 367 * "no" or "maybe" character: The end index is at a normalization boundary. 368 * (See the class documentation for more about normalization boundaries.) 369 * 370 * When the goal is a normalized string and most input strings are expected 371 * to be normalized already, then call this method, 372 * and if it returns a prefix shorter than the input string, 373 * copy that prefix and use normalizeSecondAndAppend() for the remainder. 374 * @param s input string 375 * @param errorCode Standard ICU error code. Its input value must 376 * pass the U_SUCCESS() test, or else the function returns 377 * immediately. Check for U_FAILURE() on output or use with 378 * function chaining. (See User Guide for details.) 379 * @return "yes" span end index 380 * @stable ICU 4.4 381 */ 382 virtual int32_t 383 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0; 384 385 /** 386 * Tests if the character always has a normalization boundary before it, 387 * regardless of context. 388 * If true, then the character does not normalization-interact with 389 * preceding characters. 390 * In other words, a string containing this character can be normalized 391 * by processing portions before this character and starting from this 392 * character independently. 393 * This is used for iterative normalization. See the class documentation for details. 394 * @param c character to test 395 * @return TRUE if c has a normalization boundary before it 396 * @stable ICU 4.4 397 */ 398 virtual UBool hasBoundaryBefore(UChar32 c) const = 0; 399 400 /** 401 * Tests if the character always has a normalization boundary after it, 402 * regardless of context. 403 * If true, then the character does not normalization-interact with 404 * following characters. 405 * In other words, a string containing this character can be normalized 406 * by processing portions up to this character and after this 407 * character independently. 408 * This is used for iterative normalization. See the class documentation for details. 409 * Note that this operation may be significantly slower than hasBoundaryBefore(). 410 * @param c character to test 411 * @return TRUE if c has a normalization boundary after it 412 * @stable ICU 4.4 413 */ 414 virtual UBool hasBoundaryAfter(UChar32 c) const = 0; 415 416 /** 417 * Tests if the character is normalization-inert. 418 * If true, then the character does not change, nor normalization-interact with 419 * preceding or following characters. 420 * In other words, a string containing this character can be normalized 421 * by processing portions before this character and after this 422 * character independently. 423 * This is used for iterative normalization. See the class documentation for details. 424 * Note that this operation may be significantly slower than hasBoundaryBefore(). 425 * @param c character to test 426 * @return TRUE if c is normalization-inert 427 * @stable ICU 4.4 428 */ 429 virtual UBool isInert(UChar32 c) const = 0; 430 }; 431 432 /** 433 * Normalization filtered by a UnicodeSet. 434 * Normalizes portions of the text contained in the filter set and leaves 435 * portions not contained in the filter set unchanged. 436 * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE). 437 * Not-in-the-filter text is treated as "is normalized" and "quick check yes". 438 * This class implements all of (and only) the Normalizer2 API. 439 * An instance of this class is unmodifiable/immutable but is constructed and 440 * must be destructed by the owner. 441 * @stable ICU 4.4 442 */ 443 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 { 444 public: 445 /** 446 * Constructs a filtered normalizer wrapping any Normalizer2 instance 447 * and a filter set. 448 * Both are aliased and must not be modified or deleted while this object 449 * is used. 450 * The filter set should be frozen; otherwise the performance will suffer greatly. 451 * @param n2 wrapped Normalizer2 instance 452 * @param filterSet UnicodeSet which determines the characters to be normalized 453 * @stable ICU 4.4 454 */ FilteredNormalizer2(const Normalizer2 & n2,const UnicodeSet & filterSet)455 FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) : 456 norm2(n2), set(filterSet) {} 457 458 /** 459 * Destructor. 460 * @stable ICU 4.4 461 */ 462 ~FilteredNormalizer2(); 463 464 /** 465 * Writes the normalized form of the source string to the destination string 466 * (replacing its contents) and returns the destination string. 467 * The source and destination strings must be different objects. 468 * @param src source string 469 * @param dest destination string; its contents is replaced with normalized src 470 * @param errorCode Standard ICU error code. Its input value must 471 * pass the U_SUCCESS() test, or else the function returns 472 * immediately. Check for U_FAILURE() on output or use with 473 * function chaining. (See User Guide for details.) 474 * @return dest 475 * @stable ICU 4.4 476 */ 477 virtual UnicodeString & 478 normalize(const UnicodeString &src, 479 UnicodeString &dest, 480 UErrorCode &errorCode) const; 481 /** 482 * Appends the normalized form of the second string to the first string 483 * (merging them at the boundary) and returns the first string. 484 * The result is normalized if the first string was normalized. 485 * The first and second strings must be different objects. 486 * @param first string, should be normalized 487 * @param second string, will be normalized 488 * @param errorCode Standard ICU error code. Its input value must 489 * pass the U_SUCCESS() test, or else the function returns 490 * immediately. Check for U_FAILURE() on output or use with 491 * function chaining. (See User Guide for details.) 492 * @return first 493 * @stable ICU 4.4 494 */ 495 virtual UnicodeString & 496 normalizeSecondAndAppend(UnicodeString &first, 497 const UnicodeString &second, 498 UErrorCode &errorCode) const; 499 /** 500 * Appends the second string to the first string 501 * (merging them at the boundary) and returns the first string. 502 * The result is normalized if both the strings were normalized. 503 * The first and second strings must be different objects. 504 * @param first string, should be normalized 505 * @param second string, should be normalized 506 * @param errorCode Standard ICU error code. Its input value must 507 * pass the U_SUCCESS() test, or else the function returns 508 * immediately. Check for U_FAILURE() on output or use with 509 * function chaining. (See User Guide for details.) 510 * @return first 511 * @stable ICU 4.4 512 */ 513 virtual UnicodeString & 514 append(UnicodeString &first, 515 const UnicodeString &second, 516 UErrorCode &errorCode) const; 517 518 /** 519 * Gets the decomposition mapping of c. 520 * For details see the base class documentation. 521 * 522 * This function is independent of the mode of the Normalizer2. 523 * @param c code point 524 * @param decomposition String object which will be set to c's 525 * decomposition mapping, if there is one. 526 * @return TRUE if c has a decomposition, otherwise FALSE 527 * @stable ICU 4.6 528 */ 529 virtual UBool 530 getDecomposition(UChar32 c, UnicodeString &decomposition) const; 531 532 /** 533 * Gets the raw decomposition mapping of c. 534 * For details see the base class documentation. 535 * 536 * This function is independent of the mode of the Normalizer2. 537 * @param c code point 538 * @param decomposition String object which will be set to c's 539 * raw decomposition mapping, if there is one. 540 * @return TRUE if c has a decomposition, otherwise FALSE 541 * @stable ICU 49 542 */ 543 virtual UBool 544 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; 545 546 /** 547 * Performs pairwise composition of a & b and returns the composite if there is one. 548 * For details see the base class documentation. 549 * 550 * This function is independent of the mode of the Normalizer2. 551 * @param a A (normalization starter) code point. 552 * @param b Another code point. 553 * @return The non-negative composite code point if there is one; otherwise a negative value. 554 * @stable ICU 49 555 */ 556 virtual UChar32 557 composePair(UChar32 a, UChar32 b) const; 558 559 /** 560 * Gets the combining class of c. 561 * The default implementation returns 0 562 * but all standard implementations return the Unicode Canonical_Combining_Class value. 563 * @param c code point 564 * @return c's combining class 565 * @stable ICU 49 566 */ 567 virtual uint8_t 568 getCombiningClass(UChar32 c) const; 569 570 /** 571 * Tests if the string is normalized. 572 * For details see the Normalizer2 base class documentation. 573 * @param s input string 574 * @param errorCode Standard ICU error code. Its input value must 575 * pass the U_SUCCESS() test, or else the function returns 576 * immediately. Check for U_FAILURE() on output or use with 577 * function chaining. (See User Guide for details.) 578 * @return TRUE if s is normalized 579 * @stable ICU 4.4 580 */ 581 virtual UBool 582 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const; 583 /** 584 * Tests if the string is normalized. 585 * For details see the Normalizer2 base class documentation. 586 * @param s input string 587 * @param errorCode Standard ICU error code. Its input value must 588 * pass the U_SUCCESS() test, or else the function returns 589 * immediately. Check for U_FAILURE() on output or use with 590 * function chaining. (See User Guide for details.) 591 * @return UNormalizationCheckResult 592 * @stable ICU 4.4 593 */ 594 virtual UNormalizationCheckResult 595 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const; 596 /** 597 * Returns the end of the normalized substring of the input string. 598 * For details see the Normalizer2 base class documentation. 599 * @param s input string 600 * @param errorCode Standard ICU error code. Its input value must 601 * pass the U_SUCCESS() test, or else the function returns 602 * immediately. Check for U_FAILURE() on output or use with 603 * function chaining. (See User Guide for details.) 604 * @return "yes" span end index 605 * @stable ICU 4.4 606 */ 607 virtual int32_t 608 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const; 609 610 /** 611 * Tests if the character always has a normalization boundary before it, 612 * regardless of context. 613 * For details see the Normalizer2 base class documentation. 614 * @param c character to test 615 * @return TRUE if c has a normalization boundary before it 616 * @stable ICU 4.4 617 */ 618 virtual UBool hasBoundaryBefore(UChar32 c) const; 619 620 /** 621 * Tests if the character always has a normalization boundary after it, 622 * regardless of context. 623 * For details see the Normalizer2 base class documentation. 624 * @param c character to test 625 * @return TRUE if c has a normalization boundary after it 626 * @stable ICU 4.4 627 */ 628 virtual UBool hasBoundaryAfter(UChar32 c) const; 629 630 /** 631 * Tests if the character is normalization-inert. 632 * For details see the Normalizer2 base class documentation. 633 * @param c character to test 634 * @return TRUE if c is normalization-inert 635 * @stable ICU 4.4 636 */ 637 virtual UBool isInert(UChar32 c) const; 638 private: 639 UnicodeString & 640 normalize(const UnicodeString &src, 641 UnicodeString &dest, 642 USetSpanCondition spanCondition, 643 UErrorCode &errorCode) const; 644 645 UnicodeString & 646 normalizeSecondAndAppend(UnicodeString &first, 647 const UnicodeString &second, 648 UBool doNormalize, 649 UErrorCode &errorCode) const; 650 651 const Normalizer2 &norm2; 652 const UnicodeSet &set; 653 }; 654 655 U_NAMESPACE_END 656 657 #endif // !UCONFIG_NO_NORMALIZATION 658 #endif // __NORMALIZER2_H__ 659