1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (c) 2001-2011, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *   Date        Name        Description
9 *   11/19/2001  aliu        Creation.
10 **********************************************************************
11 */
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_TRANSLITERATION
16 
17 #include "unicode/utf16.h"
18 #include "esctrn.h"
19 #include "util.h"
20 
21 U_NAMESPACE_BEGIN
22 
23 static const UChar UNIPRE[] = {85,43,0}; // "U+"
24 static const UChar BS_u[] = {92,117,0}; // "\\u"
25 static const UChar BS_U[] = {92,85,0}; // "\\U"
26 static const UChar XMLPRE[] = {38,35,120,0}; // "&#x"
27 static const UChar XML10PRE[] = {38,35,0}; // "&#"
28 static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{"
29 static const UChar SEMI[] = {59,0}; // ";"
30 static const UChar RBRACE[] = {125,0}; // "}"
31 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator)32 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator)
33 
34 /**
35  * Factory methods
36  */
37 static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) {
38     // Unicode: "U+10FFFF" hex, min=4, max=6
39     return new EscapeTransliterator(ID, UnicodeString(TRUE, UNIPRE, 2), UnicodeString(), 16, 4, TRUE, NULL);
40 }
_createEscJava(const UnicodeString & ID,Transliterator::Token)41 static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) {
42     // Java: "\\uFFFF" hex, min=4, max=4
43     return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, FALSE, NULL);
44 }
_createEscC(const UnicodeString & ID,Transliterator::Token)45 static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) {
46     // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
47     return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, TRUE,
48              new EscapeTransliterator(UnicodeString(), UnicodeString(TRUE, BS_U, 2), UnicodeString(), 16, 8, TRUE, NULL));
49 }
_createEscXML(const UnicodeString & ID,Transliterator::Token)50 static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) {
51     // XML: "" hex, min=1, max=6
52     return new EscapeTransliterator(ID, UnicodeString(TRUE, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, TRUE, NULL);
53 }
_createEscXML10(const UnicodeString & ID,Transliterator::Token)54 static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) {
55     // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
56     return new EscapeTransliterator(ID, UnicodeString(TRUE, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, TRUE, NULL);
57 }
_createEscPerl(const UnicodeString & ID,Transliterator::Token)58 static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) {
59     // Perl: "\\x{263A}" hex, min=1, max=6
60     return new EscapeTransliterator(ID, UnicodeString(TRUE, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, TRUE, NULL);
61 }
62 
63 /**
64  * Registers standard variants with the system.  Called by
65  * Transliterator during initialization.
66  */
registerIDs()67 void EscapeTransliterator::registerIDs() {
68     Token t = integerToken(0);
69 
70     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t);
71 
72     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t);
73 
74     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t);
75 
76     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t);
77 
78     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t);
79 
80     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t);
81 
82     Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t);
83 }
84 
85 /**
86  * Constructs an escape transliterator with the given ID and
87  * parameters.  See the class member documentation for details.
88  */
EscapeTransliterator(const UnicodeString & newID,const UnicodeString & _prefix,const UnicodeString & _suffix,int32_t _radix,int32_t _minDigits,UBool _grokSupplementals,EscapeTransliterator * adoptedSupplementalHandler)89 EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID,
90                          const UnicodeString& _prefix, const UnicodeString& _suffix,
91                          int32_t _radix, int32_t _minDigits,
92                          UBool _grokSupplementals,
93                          EscapeTransliterator* adoptedSupplementalHandler) :
94     Transliterator(newID, NULL)
95 {
96     this->prefix = _prefix;
97     this->suffix = _suffix;
98     this->radix = _radix;
99     this->minDigits = _minDigits;
100     this->grokSupplementals = _grokSupplementals;
101     this->supplementalHandler = adoptedSupplementalHandler;
102 }
103 
104 /**
105  * Copy constructor.
106  */
EscapeTransliterator(const EscapeTransliterator & o)107 EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) :
108     Transliterator(o),
109     prefix(o.prefix),
110     suffix(o.suffix),
111     radix(o.radix),
112     minDigits(o.minDigits),
113     grokSupplementals(o.grokSupplementals) {
114     supplementalHandler = (o.supplementalHandler != 0) ?
115         new EscapeTransliterator(*o.supplementalHandler) : NULL;
116 }
117 
~EscapeTransliterator()118 EscapeTransliterator::~EscapeTransliterator() {
119     delete supplementalHandler;
120 }
121 
122 /**
123  * Transliterator API.
124  */
clone() const125 Transliterator* EscapeTransliterator::clone() const {
126     return new EscapeTransliterator(*this);
127 }
128 
129 /**
130  * Implements {@link Transliterator#handleTransliterate}.
131  */
handleTransliterate(Replaceable & text,UTransPosition & pos,UBool) const132 void EscapeTransliterator::handleTransliterate(Replaceable& text,
133                                                UTransPosition& pos,
134                                                UBool /*isIncremental*/) const
135 {
136     /* TODO: Verify that isIncremental can be ignored */
137     int32_t start = pos.start;
138     int32_t limit = pos.limit;
139 
140     UnicodeString buf(prefix);
141     int32_t prefixLen = prefix.length();
142     UBool redoPrefix = FALSE;
143 
144     while (start < limit) {
145         int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
146         int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1;
147 
148         if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) {
149             buf.truncate(0);
150             buf.append(supplementalHandler->prefix);
151             ICU_Utility::appendNumber(buf, c, supplementalHandler->radix,
152                                   supplementalHandler->minDigits);
153             buf.append(supplementalHandler->suffix);
154             redoPrefix = TRUE;
155         } else {
156             if (redoPrefix) {
157                 buf.truncate(0);
158                 buf.append(prefix);
159                 redoPrefix = FALSE;
160             } else {
161                 buf.truncate(prefixLen);
162             }
163             ICU_Utility::appendNumber(buf, c, radix, minDigits);
164             buf.append(suffix);
165         }
166 
167         text.handleReplaceBetween(start, start + charLen, buf);
168         start += buf.length();
169         limit += buf.length() - charLen;
170     }
171 
172     pos.contextLimit += limit - pos.limit;
173     pos.limit = limit;
174     pos.start = start;
175 }
176 
177 U_NAMESPACE_END
178 
179 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
180 
181 //eof
182