1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  *   Copyright (C) 1997-2009,2014 International Business Machines
6  *   Corporation and others.  All Rights Reserved.
7  *******************************************************************************
8  *   Date        Name        Description
9  *   06/21/00    aliu        Creation.
10  *******************************************************************************
11  */
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_TRANSLITERATION
16 
17 #include "unicode/utrans.h"
18 #include "unicode/putil.h"
19 #include "unicode/rep.h"
20 #include "unicode/translit.h"
21 #include "unicode/unifilt.h"
22 #include "unicode/uniset.h"
23 #include "unicode/ustring.h"
24 #include "unicode/uenum.h"
25 #include "unicode/uset.h"
26 #include "uenumimp.h"
27 #include "cpputils.h"
28 #include "rbt.h"
29 
30 // Following macro is to be followed by <return value>';' or just ';'
31 #define utrans_ENTRY(s) if ((s)==NULL || U_FAILURE(*(s))) return
32 
33 /********************************************************************
34  * Replaceable-UReplaceableCallbacks glue
35  ********************************************************************/
36 
37 /**
38  * Make a UReplaceable + UReplaceableCallbacks into a Replaceable object.
39  */
40 U_NAMESPACE_BEGIN
41 class ReplaceableGlue : public Replaceable {
42 
43     UReplaceable *rep;
44     UReplaceableCallbacks *func;
45 
46 public:
47 
48     ReplaceableGlue(UReplaceable *replaceable,
49                     UReplaceableCallbacks *funcCallback);
50 
51     virtual ~ReplaceableGlue();
52 
53     virtual void handleReplaceBetween(int32_t start,
54                                       int32_t limit,
55                                       const UnicodeString& text);
56 
57     virtual void extractBetween(int32_t start,
58                                 int32_t limit,
59                                 UnicodeString& target) const;
60 
61     virtual void copy(int32_t start, int32_t limit, int32_t dest);
62 
63     // virtual Replaceable *clone() const { return NULL; } same as default
64 
65     /**
66      * ICU "poor man's RTTI", returns a UClassID for the actual class.
67      *
68      * @draft ICU 2.2
69      */
70     virtual UClassID getDynamicClassID() const;
71 
72     /**
73      * ICU "poor man's RTTI", returns a UClassID for this class.
74      *
75      * @draft ICU 2.2
76      */
77     static UClassID U_EXPORT2 getStaticClassID();
78 
79 protected:
80 
81     virtual int32_t getLength() const;
82 
83     virtual UChar getCharAt(int32_t offset) const;
84 
85     virtual UChar32 getChar32At(int32_t offset) const;
86 };
87 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ReplaceableGlue)88 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ReplaceableGlue)
89 
90 ReplaceableGlue::ReplaceableGlue(UReplaceable *replaceable,
91                                  UReplaceableCallbacks *funcCallback)
92   : Replaceable()
93 {
94     this->rep = replaceable;
95     this->func = funcCallback;
96 }
97 
~ReplaceableGlue()98 ReplaceableGlue::~ReplaceableGlue() {}
99 
getLength() const100 int32_t ReplaceableGlue::getLength() const {
101     return (*func->length)(rep);
102 }
103 
getCharAt(int32_t offset) const104 UChar ReplaceableGlue::getCharAt(int32_t offset) const {
105     return (*func->charAt)(rep, offset);
106 }
107 
getChar32At(int32_t offset) const108 UChar32 ReplaceableGlue::getChar32At(int32_t offset) const {
109     return (*func->char32At)(rep, offset);
110 }
111 
handleReplaceBetween(int32_t start,int32_t limit,const UnicodeString & text)112 void ReplaceableGlue::handleReplaceBetween(int32_t start,
113                           int32_t limit,
114                           const UnicodeString& text) {
115     (*func->replace)(rep, start, limit, text.getBuffer(), text.length());
116 }
117 
extractBetween(int32_t start,int32_t limit,UnicodeString & target) const118 void ReplaceableGlue::extractBetween(int32_t start,
119                                      int32_t limit,
120                                      UnicodeString& target) const {
121     (*func->extract)(rep, start, limit, target.getBuffer(limit-start));
122     target.releaseBuffer(limit-start);
123 }
124 
copy(int32_t start,int32_t limit,int32_t dest)125 void ReplaceableGlue::copy(int32_t start, int32_t limit, int32_t dest) {
126     (*func->copy)(rep, start, limit, dest);
127 }
128 U_NAMESPACE_END
129 /********************************************************************
130  * General API
131  ********************************************************************/
132 U_NAMESPACE_USE
133 
134 U_CAPI UTransliterator* U_EXPORT2
utrans_openU(const UChar * id,int32_t idLength,UTransDirection dir,const UChar * rules,int32_t rulesLength,UParseError * parseError,UErrorCode * status)135 utrans_openU(const UChar *id,
136              int32_t idLength,
137              UTransDirection dir,
138              const UChar *rules,
139              int32_t rulesLength,
140              UParseError *parseError,
141              UErrorCode *status) {
142     if(status==NULL || U_FAILURE(*status)) {
143         return NULL;
144     }
145     if (id == NULL) {
146         *status = U_ILLEGAL_ARGUMENT_ERROR;
147         return NULL;
148     }
149     UParseError temp;
150 
151     if(parseError == NULL){
152         parseError = &temp;
153     }
154 
155     UnicodeString ID(idLength<0, id, idLength); // r-o alias
156 
157     if(rules==NULL){
158 
159         Transliterator *trans = NULL;
160 
161         trans = Transliterator::createInstance(ID, dir, *parseError, *status);
162 
163         if(U_FAILURE(*status)){
164             return NULL;
165         }
166         return (UTransliterator*) trans;
167     }else{
168         UnicodeString ruleStr(rulesLength < 0,
169                               rules,
170                               rulesLength); // r-o alias
171 
172         Transliterator *trans = NULL;
173         trans = Transliterator::createFromRules(ID, ruleStr, dir, *parseError, *status);
174         if(U_FAILURE(*status)) {
175             return NULL;
176         }
177 
178         return (UTransliterator*) trans;
179     }
180 }
181 
182 U_CAPI UTransliterator* U_EXPORT2
utrans_open(const char * id,UTransDirection dir,const UChar * rules,int32_t rulesLength,UParseError * parseError,UErrorCode * status)183 utrans_open(const char* id,
184             UTransDirection dir,
185             const UChar* rules,         /* may be Null */
186             int32_t rulesLength,        /* -1 if null-terminated */
187             UParseError* parseError,    /* may be Null */
188             UErrorCode* status) {
189     UnicodeString ID(id, -1, US_INV); // use invariant converter
190     return utrans_openU(ID.getBuffer(), ID.length(), dir,
191                         rules, rulesLength,
192                         parseError, status);
193 }
194 
195 U_CAPI UTransliterator* U_EXPORT2
utrans_openInverse(const UTransliterator * trans,UErrorCode * status)196 utrans_openInverse(const UTransliterator* trans,
197                    UErrorCode* status) {
198 
199     utrans_ENTRY(status) NULL;
200 
201     UTransliterator* result =
202         (UTransliterator*) ((Transliterator*) trans)->createInverse(*status);
203 
204     return result;
205 }
206 
207 U_CAPI UTransliterator* U_EXPORT2
utrans_clone(const UTransliterator * trans,UErrorCode * status)208 utrans_clone(const UTransliterator* trans,
209              UErrorCode* status) {
210 
211     utrans_ENTRY(status) NULL;
212 
213     if (trans == NULL) {
214         *status = U_ILLEGAL_ARGUMENT_ERROR;
215         return NULL;
216     }
217 
218     Transliterator *t = ((Transliterator*) trans)->clone();
219     if (t == NULL) {
220         *status = U_MEMORY_ALLOCATION_ERROR;
221     }
222     return (UTransliterator*) t;
223 }
224 
225 U_CAPI void U_EXPORT2
utrans_close(UTransliterator * trans)226 utrans_close(UTransliterator* trans) {
227     delete (Transliterator*) trans;
228 }
229 
230 U_CAPI const UChar * U_EXPORT2
utrans_getUnicodeID(const UTransliterator * trans,int32_t * resultLength)231 utrans_getUnicodeID(const UTransliterator *trans,
232                     int32_t *resultLength) {
233     // Transliterator keeps its ID NUL-terminated
234     const UnicodeString &ID=((Transliterator*) trans)->getID();
235     if(resultLength!=NULL) {
236         *resultLength=ID.length();
237     }
238     return ID.getBuffer();
239 }
240 
241 U_CAPI int32_t U_EXPORT2
utrans_getID(const UTransliterator * trans,char * buf,int32_t bufCapacity)242 utrans_getID(const UTransliterator* trans,
243              char* buf,
244              int32_t bufCapacity) {
245     return ((Transliterator*) trans)->getID().extract(0, 0x7fffffff, buf, bufCapacity, US_INV);
246 }
247 
248 U_CAPI void U_EXPORT2
utrans_register(UTransliterator * adoptedTrans,UErrorCode * status)249 utrans_register(UTransliterator* adoptedTrans,
250                 UErrorCode* status) {
251     utrans_ENTRY(status);
252     // status currently ignored; may remove later
253     Transliterator::registerInstance((Transliterator*) adoptedTrans);
254 }
255 
256 U_CAPI void U_EXPORT2
utrans_unregisterID(const UChar * id,int32_t idLength)257 utrans_unregisterID(const UChar* id, int32_t idLength) {
258     UnicodeString ID(idLength<0, id, idLength); // r-o alias
259     Transliterator::unregister(ID);
260 }
261 
262 U_CAPI void U_EXPORT2
utrans_unregister(const char * id)263 utrans_unregister(const char* id) {
264     UnicodeString ID(id, -1, US_INV); // use invariant converter
265     Transliterator::unregister(ID);
266 }
267 
268 U_CAPI void U_EXPORT2
utrans_setFilter(UTransliterator * trans,const UChar * filterPattern,int32_t filterPatternLen,UErrorCode * status)269 utrans_setFilter(UTransliterator* trans,
270                  const UChar* filterPattern,
271                  int32_t filterPatternLen,
272                  UErrorCode* status) {
273 
274     utrans_ENTRY(status);
275     UnicodeFilter* filter = NULL;
276     if (filterPattern != NULL && *filterPattern != 0) {
277         // Create read only alias of filterPattern:
278         UnicodeString pat(filterPatternLen < 0, filterPattern, filterPatternLen);
279         filter = new UnicodeSet(pat, *status);
280         /* test for NULL */
281         if (filter == NULL) {
282             *status = U_MEMORY_ALLOCATION_ERROR;
283             return;
284         }
285         if (U_FAILURE(*status)) {
286             delete filter;
287             filter = NULL;
288         }
289     }
290     ((Transliterator*) trans)->adoptFilter(filter);
291 }
292 
293 U_CAPI int32_t U_EXPORT2
utrans_countAvailableIDs(void)294 utrans_countAvailableIDs(void) {
295     return Transliterator::countAvailableIDs();
296 }
297 
298 U_CAPI int32_t U_EXPORT2
utrans_getAvailableID(int32_t index,char * buf,int32_t bufCapacity)299 utrans_getAvailableID(int32_t index,
300                       char* buf, // may be NULL
301                       int32_t bufCapacity) {
302     return Transliterator::getAvailableID(index).extract(0, 0x7fffffff, buf, bufCapacity, US_INV);
303 }
304 
305 /* Transliterator UEnumeration ---------------------------------------------- */
306 
307 typedef struct UTransEnumeration {
308     UEnumeration uenum;
309     int32_t index, count;
310 } UTransEnumeration;
311 
312 U_CDECL_BEGIN
313 static int32_t U_CALLCONV
utrans_enum_count(UEnumeration * uenum,UErrorCode * pErrorCode)314 utrans_enum_count(UEnumeration *uenum, UErrorCode *pErrorCode) {
315     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
316         return 0;
317     }
318     return ((UTransEnumeration *)uenum)->count;
319 }
320 
321 static const UChar* U_CALLCONV
utrans_enum_unext(UEnumeration * uenum,int32_t * resultLength,UErrorCode * pErrorCode)322 utrans_enum_unext(UEnumeration *uenum,
323                   int32_t* resultLength,
324                   UErrorCode *pErrorCode) {
325     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
326         return 0;
327     }
328 
329     UTransEnumeration *ute=(UTransEnumeration *)uenum;
330     int32_t index=ute->index;
331     if(index<ute->count) {
332         const UnicodeString &ID=Transliterator::getAvailableID(index);
333         ute->index=index+1;
334         if(resultLength!=NULL) {
335             *resultLength=ID.length();
336         }
337         // Transliterator keeps its ID NUL-terminated
338         return ID.getBuffer();
339     }
340 
341     if(resultLength!=NULL) {
342         *resultLength=0;
343     }
344     return NULL;
345 }
346 
347 static void U_CALLCONV
utrans_enum_reset(UEnumeration * uenum,UErrorCode * pErrorCode)348 utrans_enum_reset(UEnumeration *uenum, UErrorCode *pErrorCode) {
349     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
350         return;
351     }
352 
353     UTransEnumeration *ute=(UTransEnumeration *)uenum;
354     ute->index=0;
355     ute->count=Transliterator::countAvailableIDs();
356 }
357 
358 static void U_CALLCONV
utrans_enum_close(UEnumeration * uenum)359 utrans_enum_close(UEnumeration *uenum) {
360     uprv_free(uenum);
361 }
362 U_CDECL_END
363 
364 static const UEnumeration utransEnumeration={
365     NULL,
366     NULL,
367     utrans_enum_close,
368     utrans_enum_count,
369     utrans_enum_unext,
370     uenum_nextDefault,
371     utrans_enum_reset
372 };
373 
374 U_CAPI UEnumeration * U_EXPORT2
utrans_openIDs(UErrorCode * pErrorCode)375 utrans_openIDs(UErrorCode *pErrorCode) {
376     UTransEnumeration *ute;
377 
378     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
379         return NULL;
380     }
381 
382     ute=(UTransEnumeration *)uprv_malloc(sizeof(UTransEnumeration));
383     if(ute==NULL) {
384         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
385         return NULL;
386     }
387 
388     ute->uenum=utransEnumeration;
389     ute->index=0;
390     ute->count=Transliterator::countAvailableIDs();
391     return (UEnumeration *)ute;
392 }
393 
394 /********************************************************************
395  * Transliteration API
396  ********************************************************************/
397 
398 U_CAPI void U_EXPORT2
utrans_trans(const UTransliterator * trans,UReplaceable * rep,UReplaceableCallbacks * repFunc,int32_t start,int32_t * limit,UErrorCode * status)399 utrans_trans(const UTransliterator* trans,
400              UReplaceable* rep,
401              UReplaceableCallbacks* repFunc,
402              int32_t start,
403              int32_t* limit,
404              UErrorCode* status) {
405 
406     utrans_ENTRY(status);
407 
408     if (trans == 0 || rep == 0 || repFunc == 0 || limit == 0) {
409         *status = U_ILLEGAL_ARGUMENT_ERROR;
410         return;
411     }
412 
413     ReplaceableGlue r(rep, repFunc);
414 
415     *limit = ((Transliterator*) trans)->transliterate(r, start, *limit);
416 }
417 
418 U_CAPI void U_EXPORT2
utrans_transIncremental(const UTransliterator * trans,UReplaceable * rep,UReplaceableCallbacks * repFunc,UTransPosition * pos,UErrorCode * status)419 utrans_transIncremental(const UTransliterator* trans,
420                         UReplaceable* rep,
421                         UReplaceableCallbacks* repFunc,
422                         UTransPosition* pos,
423                         UErrorCode* status) {
424 
425     utrans_ENTRY(status);
426 
427     if (trans == 0 || rep == 0 || repFunc == 0 || pos == 0) {
428         *status = U_ILLEGAL_ARGUMENT_ERROR;
429         return;
430     }
431 
432     ReplaceableGlue r(rep, repFunc);
433 
434     ((Transliterator*) trans)->transliterate(r, *pos, *status);
435 }
436 
437 U_CAPI void U_EXPORT2
utrans_transUChars(const UTransliterator * trans,UChar * text,int32_t * textLength,int32_t textCapacity,int32_t start,int32_t * limit,UErrorCode * status)438 utrans_transUChars(const UTransliterator* trans,
439                    UChar* text,
440                    int32_t* textLength,
441                    int32_t textCapacity,
442                    int32_t start,
443                    int32_t* limit,
444                    UErrorCode* status) {
445 
446     utrans_ENTRY(status);
447 
448     if (trans == 0 || text == 0 || limit == 0) {
449         *status = U_ILLEGAL_ARGUMENT_ERROR;
450         return;
451     }
452 
453     int32_t textLen = (textLength == NULL || *textLength < 0)
454         ? u_strlen(text) : *textLength;
455     // writeable alias: for this ct, len CANNOT be -1 (why?)
456     UnicodeString str(text, textLen, textCapacity);
457 
458     *limit = ((Transliterator*) trans)->transliterate(str, start, *limit);
459 
460     // Copy the string buffer back to text (only if necessary)
461     // and fill in *neededCapacity (if neededCapacity != NULL).
462     textLen = str.extract(text, textCapacity, *status);
463     if(textLength != NULL) {
464         *textLength = textLen;
465     }
466 }
467 
468 U_CAPI void U_EXPORT2
utrans_transIncrementalUChars(const UTransliterator * trans,UChar * text,int32_t * textLength,int32_t textCapacity,UTransPosition * pos,UErrorCode * status)469 utrans_transIncrementalUChars(const UTransliterator* trans,
470                               UChar* text,
471                               int32_t* textLength,
472                               int32_t textCapacity,
473                               UTransPosition* pos,
474                               UErrorCode* status) {
475 
476     utrans_ENTRY(status);
477 
478     if (trans == 0 || text == 0 || pos == 0) {
479         *status = U_ILLEGAL_ARGUMENT_ERROR;
480         return;
481     }
482 
483     int32_t textLen = (textLength == NULL || *textLength < 0)
484         ? u_strlen(text) : *textLength;
485     // writeable alias: for this ct, len CANNOT be -1 (why?)
486     UnicodeString str(text, textLen, textCapacity);
487 
488     ((Transliterator*) trans)->transliterate(str, *pos, *status);
489 
490     // Copy the string buffer back to text (only if necessary)
491     // and fill in *neededCapacity (if neededCapacity != NULL).
492     textLen = str.extract(text, textCapacity, *status);
493     if(textLength != NULL) {
494         *textLength = textLen;
495     }
496 }
497 
498 U_CAPI int32_t U_EXPORT2
utrans_toRules(const UTransliterator * trans,UBool escapeUnprintable,UChar * result,int32_t resultLength,UErrorCode * status)499 utrans_toRules(     const UTransliterator* trans,
500                     UBool escapeUnprintable,
501                     UChar* result, int32_t resultLength,
502                     UErrorCode* status) {
503     utrans_ENTRY(status) 0;
504     if ( (result==NULL)? resultLength!=0: resultLength<0 ) {
505         *status = U_ILLEGAL_ARGUMENT_ERROR;
506         return 0;
507     }
508 
509     UnicodeString res;
510     res.setTo(result, 0, resultLength);
511     ((Transliterator*) trans)->toRules(res, escapeUnprintable);
512     return res.extract(result, resultLength, *status);
513 }
514 
515 U_CAPI USet* U_EXPORT2
utrans_getSourceSet(const UTransliterator * trans,UBool ignoreFilter,USet * fillIn,UErrorCode * status)516 utrans_getSourceSet(const UTransliterator* trans,
517                     UBool ignoreFilter,
518                     USet* fillIn,
519                     UErrorCode* status) {
520     utrans_ENTRY(status) fillIn;
521 
522     if (fillIn == NULL) {
523         fillIn = uset_openEmpty();
524     }
525     if (ignoreFilter) {
526         ((Transliterator*) trans)->handleGetSourceSet(*((UnicodeSet*)fillIn));
527     } else {
528         ((Transliterator*) trans)->getSourceSet(*((UnicodeSet*)fillIn));
529     }
530     return fillIn;
531 }
532 
533 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
534