1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (c) 2001-2007, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *   Date        Name        Description
9 *   11/20/2001  aliu        Creation.
10 **********************************************************************
11 */
12 #ifndef ESCTRN_H
13 #define ESCTRN_H
14 
15 #include "unicode/utypes.h"
16 
17 #if !UCONFIG_NO_TRANSLITERATION
18 
19 #include "unicode/translit.h"
20 
21 U_NAMESPACE_BEGIN
22 
23 /**
24  * A transliterator that converts Unicode characters to an escape
25  * form.  Examples of escape forms are "U+4E01" and "".
26  * Escape forms have a prefix and suffix, either of which may be
27  * empty, a radix, typically 16 or 10, a minimum digit count,
28  * typically 1, 4, or 8, and a boolean that specifies whether
29  * supplemental characters are handled as 32-bit code points or as two
30  * 16-bit code units.  Most escape forms handle 32-bit code points,
31  * but some, such as the Java form, intentionally break them into two
32  * surrogate pairs, for backward compatibility.
33  *
34  * <p>Some escape forms actually have two different patterns, one for
35  * BMP characters (0..FFFF) and one for supplements (>FFFF).  To
36  * handle this, a second EscapeTransliterator may be defined that
37  * specifies the pattern to be produced for supplementals.  An example
38  * of a form that requires this is the C form, which uses "\\uFFFF"
39  * for BMP characters and "\\U0010FFFF" for supplementals.
40  *
41  * <p>This class is package private.  It registers several standard
42  * variants with the system which are then accessed via their IDs.
43  *
44  * @author Alan Liu
45  */
46 class EscapeTransliterator : public Transliterator {
47 
48  private:
49 
50     /**
51      * The prefix of the escape form; may be empty, but usually isn't.
52      */
53     UnicodeString prefix;
54 
55     /**
56      * The prefix of the escape form; often empty.
57      */
58     UnicodeString suffix;
59 
60     /**
61      * The radix to display the number in.  Typically 16 or 10.  Must
62      * be in the range 2 to 36.
63      */
64     int32_t radix;
65 
66     /**
67      * The minimum number of digits.  Typically 1, 4, or 8.  Values
68      * less than 1 are equivalent to 1.
69      */
70     int32_t minDigits;
71 
72     /**
73      * If true, supplementals are handled as 32-bit code points.  If
74      * false, they are handled as two 16-bit code units.
75      */
76     UBool grokSupplementals;
77 
78     /**
79      * The form to be used for supplementals.  If this is null then
80      * the same form is used for BMP characters and supplementals.  If
81      * this is not null and if grokSupplementals is true then the
82      * prefix, suffix, radix, and minDigits of this object are used
83      * for supplementals.  This pointer is owned.
84      */
85     EscapeTransliterator* supplementalHandler;
86 
87  public:
88 
89     /**
90      * Registers standard variants with the system.  Called by
91      * Transliterator during initialization.
92      */
93     static void registerIDs();
94 
95     /**
96      * Constructs an escape transliterator with the given ID and
97      * parameters.  See the class member documentation for details.
98      */
99     EscapeTransliterator(const UnicodeString& ID,
100                          const UnicodeString& prefix, const UnicodeString& suffix,
101                          int32_t radix, int32_t minDigits,
102                          UBool grokSupplementals,
103                          EscapeTransliterator* adoptedSupplementalHandler);
104 
105     /**
106      * Copy constructor.
107      */
108     EscapeTransliterator(const EscapeTransliterator&);
109 
110     /**
111      * Destructor.
112      */
113     virtual ~EscapeTransliterator();
114 
115     /**
116      * Transliterator API.
117      */
118     virtual Transliterator* clone() const;
119 
120     /**
121      * ICU "poor man's RTTI", returns a UClassID for the actual class.
122      */
123     virtual UClassID getDynamicClassID() const;
124 
125     /**
126      * ICU "poor man's RTTI", returns a UClassID for this class.
127      */
128     U_I18N_API static UClassID U_EXPORT2 getStaticClassID();
129 
130  protected:
131 
132     /**
133      * Implements {@link Transliterator#handleTransliterate}.
134      */
135     virtual void handleTransliterate(Replaceable& text, UTransPosition& offset,
136                              UBool isIncremental) const;
137 
138 };
139 
140 U_NAMESPACE_END
141 
142 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
143 
144 #endif
145