1 /*
2 **********************************************************************
3 *   Copyright (c) 2001-2007, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *   Date        Name        Description
7 *   11/20/2001  aliu        Creation.
8 **********************************************************************
9 */
10 #ifndef ESCTRN_H
11 #define ESCTRN_H
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_TRANSLITERATION
16 
17 #include "unicode/translit.h"
18 
19 U_NAMESPACE_BEGIN
20 
21 /**
22  * A transliterator that converts Unicode characters to an escape
23  * form.  Examples of escape forms are "U+4E01" and "".
24  * Escape forms have a prefix and suffix, either of which may be
25  * empty, a radix, typically 16 or 10, a minimum digit count,
26  * typically 1, 4, or 8, and a boolean that specifies whether
27  * supplemental characters are handled as 32-bit code points or as two
28  * 16-bit code units.  Most escape forms handle 32-bit code points,
29  * but some, such as the Java form, intentionally break them into two
30  * surrogate pairs, for backward compatibility.
31  *
32  * <p>Some escape forms actually have two different patterns, one for
33  * BMP characters (0..FFFF) and one for supplements (>FFFF).  To
34  * handle this, a second EscapeTransliterator may be defined that
35  * specifies the pattern to be produced for supplementals.  An example
36  * of a form that requires this is the C form, which uses "\\uFFFF"
37  * for BMP characters and "\\U0010FFFF" for supplementals.
38  *
39  * <p>This class is package private.  It registers several standard
40  * variants with the system which are then accessed via their IDs.
41  *
42  * @author Alan Liu
43  */
44 class EscapeTransliterator : public Transliterator {
45 
46  private:
47 
48     /**
49      * The prefix of the escape form; may be empty, but usually isn't.
50      */
51     UnicodeString prefix;
52 
53     /**
54      * The prefix of the escape form; often empty.
55      */
56     UnicodeString suffix;
57 
58     /**
59      * The radix to display the number in.  Typically 16 or 10.  Must
60      * be in the range 2 to 36.
61      */
62     int32_t radix;
63 
64     /**
65      * The minimum number of digits.  Typically 1, 4, or 8.  Values
66      * less than 1 are equivalent to 1.
67      */
68     int32_t minDigits;
69 
70     /**
71      * If true, supplementals are handled as 32-bit code points.  If
72      * false, they are handled as two 16-bit code units.
73      */
74     UBool grokSupplementals;
75 
76     /**
77      * The form to be used for supplementals.  If this is null then
78      * the same form is used for BMP characters and supplementals.  If
79      * this is not null and if grokSupplementals is true then the
80      * prefix, suffix, radix, and minDigits of this object are used
81      * for supplementals.  This pointer is owned.
82      */
83     EscapeTransliterator* supplementalHandler;
84 
85  public:
86 
87     /**
88      * Registers standard variants with the system.  Called by
89      * Transliterator during initialization.
90      */
91     static void registerIDs();
92 
93     /**
94      * Constructs an escape transliterator with the given ID and
95      * parameters.  See the class member documentation for details.
96      */
97     EscapeTransliterator(const UnicodeString& ID,
98                          const UnicodeString& prefix, const UnicodeString& suffix,
99                          int32_t radix, int32_t minDigits,
100                          UBool grokSupplementals,
101                          EscapeTransliterator* adoptedSupplementalHandler);
102 
103     /**
104      * Copy constructor.
105      */
106     EscapeTransliterator(const EscapeTransliterator&);
107 
108     /**
109      * Destructor.
110      */
111     virtual ~EscapeTransliterator();
112 
113     /**
114      * Transliterator API.
115      */
116     virtual Transliterator* clone() const;
117 
118     /**
119      * ICU "poor man's RTTI", returns a UClassID for the actual class.
120      */
121     virtual UClassID getDynamicClassID() const;
122 
123     /**
124      * ICU "poor man's RTTI", returns a UClassID for this class.
125      */
126     U_I18N_API static UClassID U_EXPORT2 getStaticClassID();
127 
128  protected:
129 
130     /**
131      * Implements {@link Transliterator#handleTransliterate}.
132      */
133     virtual void handleTransliterate(Replaceable& text, UTransPosition& offset,
134                              UBool isIncremental) const;
135 
136 };
137 
138 U_NAMESPACE_END
139 
140 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
141 
142 #endif
143