1 /*
2 **********************************************************************
3 *   Copyright (c) 2001-2014, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *   Date        Name        Description
7 *   08/10/2001  aliu        Creation.
8 **********************************************************************
9 */
10 
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_TRANSLITERATION
14 
15 #include "unicode/translit.h"
16 #include "unicode/resbund.h"
17 #include "unicode/uniset.h"
18 #include "unicode/uscript.h"
19 #include "rbt.h"
20 #include "cpdtrans.h"
21 #include "nultrans.h"
22 #include "transreg.h"
23 #include "rbt_data.h"
24 #include "rbt_pars.h"
25 #include "tridpars.h"
26 #include "charstr.h"
27 #include "uassert.h"
28 #include "locutil.h"
29 
30 // Enable the following symbol to add debugging code that tracks the
31 // allocation, deletion, and use of Entry objects.  BoundsChecker has
32 // reported dangling pointer errors with these objects, but I have
33 // been unable to confirm them.  I suspect BoundsChecker is getting
34 // confused with pointers going into and coming out of a UHashtable,
35 // despite the hinting code that is designed to help it.
36 // #define DEBUG_MEM
37 #ifdef DEBUG_MEM
38 #include <stdio.h>
39 #endif
40 
41 // UChar constants
42 static const UChar LOCALE_SEP  = 95; // '_'
43 //static const UChar ID_SEP      = 0x002D; /*-*/
44 //static const UChar VARIANT_SEP = 0x002F; // '/'
45 
46 // String constants
47 static const UChar ANY[] = { 65, 110, 121, 0 }; // Any
48 
49 // empty string
50 #define NO_VARIANT UnicodeString()
51 
52 /**
53  * Resource bundle key for the RuleBasedTransliterator rule.
54  */
55 //static const char RB_RULE[] = "Rule";
56 
57 U_NAMESPACE_BEGIN
58 
59 //------------------------------------------------------------------
60 // Alias
61 //------------------------------------------------------------------
62 
TransliteratorAlias(const UnicodeString & theAliasID,const UnicodeSet * cpdFilter)63 TransliteratorAlias::TransliteratorAlias(const UnicodeString& theAliasID,
64                                          const UnicodeSet* cpdFilter) :
65     ID(),
66     aliasesOrRules(theAliasID),
67     transes(0),
68     compoundFilter(cpdFilter),
69     direction(UTRANS_FORWARD),
70     type(TransliteratorAlias::SIMPLE) {
71 }
72 
TransliteratorAlias(const UnicodeString & theID,const UnicodeString & idBlocks,UVector * adoptedTransliterators,const UnicodeSet * cpdFilter)73 TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
74                                          const UnicodeString& idBlocks,
75                                          UVector* adoptedTransliterators,
76                                          const UnicodeSet* cpdFilter) :
77     ID(theID),
78     aliasesOrRules(idBlocks),
79     transes(adoptedTransliterators),
80     compoundFilter(cpdFilter),
81     direction(UTRANS_FORWARD),
82     type(TransliteratorAlias::COMPOUND) {
83 }
84 
TransliteratorAlias(const UnicodeString & theID,const UnicodeString & rules,UTransDirection dir)85 TransliteratorAlias::TransliteratorAlias(const UnicodeString& theID,
86                                          const UnicodeString& rules,
87                                          UTransDirection dir) :
88     ID(theID),
89     aliasesOrRules(rules),
90     transes(0),
91     compoundFilter(0),
92     direction(dir),
93     type(TransliteratorAlias::RULES) {
94 }
95 
~TransliteratorAlias()96 TransliteratorAlias::~TransliteratorAlias() {
97     delete transes;
98 }
99 
100 
create(UParseError & pe,UErrorCode & ec)101 Transliterator* TransliteratorAlias::create(UParseError& pe,
102                                             UErrorCode& ec) {
103     if (U_FAILURE(ec)) {
104         return 0;
105     }
106     Transliterator *t = NULL;
107     switch (type) {
108     case SIMPLE:
109         t = Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec);
110         if(U_FAILURE(ec)){
111             return 0;
112         }
113         if (compoundFilter != 0)
114             t->adoptFilter((UnicodeSet*)compoundFilter->clone());
115         break;
116     case COMPOUND:
117         {
118             // the total number of transliterators in the compound is the total number of anonymous transliterators
119             // plus the total number of ID blocks-- we start by assuming the list begins and ends with an ID
120             // block and that each pair anonymous transliterators has an ID block between them.  Then we go back
121             // to see whether there really are ID blocks at the beginning and end (by looking for U+FFFF, which
122             // marks the position where an anonymous transliterator goes) and adjust accordingly
123             int32_t anonymousRBTs = transes->size();
124             int32_t transCount = anonymousRBTs * 2 + 1;
125             if (!aliasesOrRules.isEmpty() && aliasesOrRules[0] == (UChar)(0xffff))
126                 --transCount;
127             if (aliasesOrRules.length() >= 2 && aliasesOrRules[aliasesOrRules.length() - 1] == (UChar)(0xffff))
128                 --transCount;
129             UnicodeString noIDBlock((UChar)(0xffff));
130             noIDBlock += ((UChar)(0xffff));
131             int32_t pos = aliasesOrRules.indexOf(noIDBlock);
132             while (pos >= 0) {
133                 --transCount;
134                 pos = aliasesOrRules.indexOf(noIDBlock, pos + 1);
135             }
136 
137             UVector transliterators(ec);
138             UnicodeString idBlock;
139             int32_t blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff));
140             while (blockSeparatorPos >= 0) {
141                 aliasesOrRules.extract(0, blockSeparatorPos, idBlock);
142                 aliasesOrRules.remove(0, blockSeparatorPos + 1);
143                 if (!idBlock.isEmpty())
144                     transliterators.addElement(Transliterator::createInstance(idBlock, UTRANS_FORWARD, pe, ec), ec);
145                 if (!transes->isEmpty())
146                     transliterators.addElement(transes->orphanElementAt(0), ec);
147                 blockSeparatorPos = aliasesOrRules.indexOf((UChar)(0xffff));
148             }
149             if (!aliasesOrRules.isEmpty())
150                 transliterators.addElement(Transliterator::createInstance(aliasesOrRules, UTRANS_FORWARD, pe, ec), ec);
151             while (!transes->isEmpty())
152                 transliterators.addElement(transes->orphanElementAt(0), ec);
153 
154             if (U_SUCCESS(ec)) {
155                 t = new CompoundTransliterator(ID, transliterators,
156                     (compoundFilter ? (UnicodeSet*)(compoundFilter->clone()) : 0),
157                     anonymousRBTs, pe, ec);
158                 if (t == 0) {
159                     ec = U_MEMORY_ALLOCATION_ERROR;
160                     return 0;
161                 }
162             } else {
163                 for (int32_t i = 0; i < transliterators.size(); i++)
164                     delete (Transliterator*)(transliterators.elementAt(i));
165             }
166         }
167         break;
168     case RULES:
169         U_ASSERT(FALSE); // don't call create() if isRuleBased() returns TRUE!
170         break;
171     }
172     return t;
173 }
174 
isRuleBased() const175 UBool TransliteratorAlias::isRuleBased() const {
176     return type == RULES;
177 }
178 
parse(TransliteratorParser & parser,UParseError & pe,UErrorCode & ec) const179 void TransliteratorAlias::parse(TransliteratorParser& parser,
180                                 UParseError& pe, UErrorCode& ec) const {
181     U_ASSERT(type == RULES);
182     if (U_FAILURE(ec)) {
183         return;
184     }
185 
186     parser.parse(aliasesOrRules, direction, pe, ec);
187 }
188 
189 //----------------------------------------------------------------------
190 // class TransliteratorSpec
191 //----------------------------------------------------------------------
192 
193 /**
194  * A TransliteratorSpec is a string specifying either a source or a target.  In more
195  * general terms, it may also specify a variant, but we only use the
196  * Spec class for sources and targets.
197  *
198  * A Spec may be a locale or a script.  If it is a locale, it has a
199  * fallback chain that goes xx_YY_ZZZ -> xx_YY -> xx -> ssss, where
200  * ssss is the script mapping of xx_YY_ZZZ.  The Spec API methods
201  * hasFallback(), next(), and reset() iterate over this fallback
202  * sequence.
203  *
204  * The Spec class canonicalizes itself, so the locale is put into
205  * canonical form, or the script is transformed from an abbreviation
206  * to a full name.
207  */
208 class TransliteratorSpec : public UMemory {
209  public:
210     TransliteratorSpec(const UnicodeString& spec);
211     ~TransliteratorSpec();
212 
213     const UnicodeString& get() const;
214     UBool hasFallback() const;
215     const UnicodeString& next();
216     void reset();
217 
218     UBool isLocale() const;
219     ResourceBundle& getBundle() const;
220 
operator const UnicodeString&() const221     operator const UnicodeString&() const { return get(); }
getTop() const222     const UnicodeString& getTop() const { return top; }
223 
224  private:
225     void setupNext();
226 
227     UnicodeString top;
228     UnicodeString spec;
229     UnicodeString nextSpec;
230     UnicodeString scriptName;
231     UBool isSpecLocale; // TRUE if spec is a locale
232     UBool isNextLocale; // TRUE if nextSpec is a locale
233     ResourceBundle* res;
234 
235     TransliteratorSpec(const TransliteratorSpec &other); // forbid copying of this class
236     TransliteratorSpec &operator=(const TransliteratorSpec &other); // forbid copying of this class
237 };
238 
TransliteratorSpec(const UnicodeString & theSpec)239 TransliteratorSpec::TransliteratorSpec(const UnicodeString& theSpec)
240 : top(theSpec),
241   res(0)
242 {
243     UErrorCode status = U_ZERO_ERROR;
244     Locale topLoc("");
245     LocaleUtility::initLocaleFromName(theSpec, topLoc);
246     if (!topLoc.isBogus()) {
247         res = new ResourceBundle(U_ICUDATA_TRANSLIT, topLoc, status);
248         /* test for NULL */
249         if (res == 0) {
250             return;
251         }
252         if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) {
253             delete res;
254             res = 0;
255         }
256     }
257 
258     // Canonicalize script name -or- do locale->script mapping
259     status = U_ZERO_ERROR;
260     static const int32_t capacity = 10;
261     UScriptCode script[capacity]={USCRIPT_INVALID_CODE};
262     int32_t num = uscript_getCode(CharString().appendInvariantChars(theSpec, status).data(),
263                                   script, capacity, &status);
264     if (num > 0 && script[0] != USCRIPT_INVALID_CODE) {
265         scriptName = UnicodeString(uscript_getName(script[0]), -1, US_INV);
266     }
267 
268     // Canonicalize top
269     if (res != 0) {
270         // Canonicalize locale name
271         UnicodeString locStr;
272         LocaleUtility::initNameFromLocale(topLoc, locStr);
273         if (!locStr.isBogus()) {
274             top = locStr;
275         }
276     } else if (scriptName.length() != 0) {
277         // We are a script; use canonical name
278         top = scriptName;
279     }
280 
281     // assert(spec != top);
282     reset();
283 }
284 
~TransliteratorSpec()285 TransliteratorSpec::~TransliteratorSpec() {
286     delete res;
287 }
288 
hasFallback() const289 UBool TransliteratorSpec::hasFallback() const {
290     return nextSpec.length() != 0;
291 }
292 
reset()293 void TransliteratorSpec::reset() {
294     if (spec != top) {
295         spec = top;
296         isSpecLocale = (res != 0);
297         setupNext();
298     }
299 }
300 
setupNext()301 void TransliteratorSpec::setupNext() {
302     isNextLocale = FALSE;
303     if (isSpecLocale) {
304         nextSpec = spec;
305         int32_t i = nextSpec.lastIndexOf(LOCALE_SEP);
306         // If i == 0 then we have _FOO, so we fall through
307         // to the scriptName.
308         if (i > 0) {
309             nextSpec.truncate(i);
310             isNextLocale = TRUE;
311         } else {
312             nextSpec = scriptName; // scriptName may be empty
313         }
314     } else {
315         // spec is a script, so we are at the end
316         nextSpec.truncate(0);
317     }
318 }
319 
320 // Protocol:
321 // for(const UnicodeString& s(spec.get());
322 //     spec.hasFallback(); s(spec.next())) { ...
323 
next()324 const UnicodeString& TransliteratorSpec::next() {
325     spec = nextSpec;
326     isSpecLocale = isNextLocale;
327     setupNext();
328     return spec;
329 }
330 
get() const331 const UnicodeString& TransliteratorSpec::get() const {
332     return spec;
333 }
334 
isLocale() const335 UBool TransliteratorSpec::isLocale() const {
336     return isSpecLocale;
337 }
338 
getBundle() const339 ResourceBundle& TransliteratorSpec::getBundle() const {
340     return *res;
341 }
342 
343 //----------------------------------------------------------------------
344 
345 #ifdef DEBUG_MEM
346 
347 // Vector of Entry pointers currently in use
348 static UVector* DEBUG_entries = NULL;
349 
DEBUG_setup()350 static void DEBUG_setup() {
351     if (DEBUG_entries == NULL) {
352         UErrorCode ec = U_ZERO_ERROR;
353         DEBUG_entries = new UVector(ec);
354     }
355 }
356 
357 // Caller must call DEBUG_setup first.  Return index of given Entry,
358 // if it is in use (not deleted yet), or -1 if not found.
DEBUG_findEntry(TransliteratorEntry * e)359 static int DEBUG_findEntry(TransliteratorEntry* e) {
360     for (int i=0; i<DEBUG_entries->size(); ++i) {
361         if (e == (TransliteratorEntry*) DEBUG_entries->elementAt(i)) {
362             return i;
363         }
364     }
365     return -1;
366 }
367 
368 // Track object creation
DEBUG_newEntry(TransliteratorEntry * e)369 static void DEBUG_newEntry(TransliteratorEntry* e) {
370     DEBUG_setup();
371     if (DEBUG_findEntry(e) >= 0) {
372         // This should really never happen unless the heap is broken
373         printf("ERROR DEBUG_newEntry duplicate new pointer %08X\n", e);
374         return;
375     }
376     UErrorCode ec = U_ZERO_ERROR;
377     DEBUG_entries->addElement(e, ec);
378 }
379 
380 // Track object deletion
DEBUG_delEntry(TransliteratorEntry * e)381 static void DEBUG_delEntry(TransliteratorEntry* e) {
382     DEBUG_setup();
383     int i = DEBUG_findEntry(e);
384     if (i < 0) {
385         printf("ERROR DEBUG_delEntry possible double deletion %08X\n", e);
386         return;
387     }
388     DEBUG_entries->removeElementAt(i);
389 }
390 
391 // Track object usage
DEBUG_useEntry(TransliteratorEntry * e)392 static void DEBUG_useEntry(TransliteratorEntry* e) {
393     if (e == NULL) return;
394     DEBUG_setup();
395     int i = DEBUG_findEntry(e);
396     if (i < 0) {
397         printf("ERROR DEBUG_useEntry possible dangling pointer %08X\n", e);
398     }
399 }
400 
401 #else
402 // If we're not debugging then make these macros into NOPs
403 #define DEBUG_newEntry(x)
404 #define DEBUG_delEntry(x)
405 #define DEBUG_useEntry(x)
406 #endif
407 
408 //----------------------------------------------------------------------
409 // class Entry
410 //----------------------------------------------------------------------
411 
412 /**
413  * The Entry object stores objects of different types and
414  * singleton objects as placeholders for rule-based transliterators to
415  * be built as needed.  Instances of this struct can be placeholders,
416  * can represent prototype transliterators to be cloned, or can
417  * represent TransliteratorData objects.  We don't support storing
418  * classes in the registry because we don't have the rtti infrastructure
419  * for it.  We could easily add this if there is a need for it in the
420  * future.
421  */
422 class TransliteratorEntry : public UMemory {
423 public:
424     enum Type {
425         RULES_FORWARD,
426         RULES_REVERSE,
427         LOCALE_RULES,
428         PROTOTYPE,
429         RBT_DATA,
430         COMPOUND_RBT,
431         ALIAS,
432         FACTORY,
433         NONE // Only used for uninitialized entries
434     } entryType;
435     // NOTE: stringArg cannot go inside the union because
436     // it has a copy constructor
437     UnicodeString stringArg; // For RULES_*, ALIAS, COMPOUND_RBT
438     int32_t intArg; // For COMPOUND_RBT, LOCALE_RULES
439     UnicodeSet* compoundFilter; // For COMPOUND_RBT
440     union {
441         Transliterator* prototype; // For PROTOTYPE
442         TransliterationRuleData* data; // For RBT_DATA
443         UVector* dataVector;    // For COMPOUND_RBT
444         struct {
445             Transliterator::Factory function;
446             Transliterator::Token   context;
447         } factory; // For FACTORY
448     } u;
449     TransliteratorEntry();
450     ~TransliteratorEntry();
451     void adoptPrototype(Transliterator* adopted);
452     void setFactory(Transliterator::Factory factory,
453                     Transliterator::Token context);
454 
455 private:
456 
457     TransliteratorEntry(const TransliteratorEntry &other); // forbid copying of this class
458     TransliteratorEntry &operator=(const TransliteratorEntry &other); // forbid copying of this class
459 };
460 
TransliteratorEntry()461 TransliteratorEntry::TransliteratorEntry() {
462     u.prototype = 0;
463     compoundFilter = NULL;
464     entryType = NONE;
465     DEBUG_newEntry(this);
466 }
467 
~TransliteratorEntry()468 TransliteratorEntry::~TransliteratorEntry() {
469     DEBUG_delEntry(this);
470     if (entryType == PROTOTYPE) {
471         delete u.prototype;
472     } else if (entryType == RBT_DATA) {
473         // The data object is shared between instances of RBT.  The
474         // entry object owns it.  It should only be deleted when the
475         // transliterator component is being cleaned up.  Doing so
476         // invalidates any RBTs that the user has instantiated.
477         delete u.data;
478     } else if (entryType == COMPOUND_RBT) {
479         while (u.dataVector != NULL && !u.dataVector->isEmpty())
480             delete (TransliterationRuleData*)u.dataVector->orphanElementAt(0);
481         delete u.dataVector;
482     }
483     delete compoundFilter;
484 }
485 
adoptPrototype(Transliterator * adopted)486 void TransliteratorEntry::adoptPrototype(Transliterator* adopted) {
487     if (entryType == PROTOTYPE) {
488         delete u.prototype;
489     }
490     entryType = PROTOTYPE;
491     u.prototype = adopted;
492 }
493 
setFactory(Transliterator::Factory factory,Transliterator::Token context)494 void TransliteratorEntry::setFactory(Transliterator::Factory factory,
495                        Transliterator::Token context) {
496     if (entryType == PROTOTYPE) {
497         delete u.prototype;
498     }
499     entryType = FACTORY;
500     u.factory.function = factory;
501     u.factory.context = context;
502 }
503 
504 // UObjectDeleter for Hashtable::setValueDeleter
505 U_CDECL_BEGIN
506 static void U_CALLCONV
deleteEntry(void * obj)507 deleteEntry(void* obj) {
508     delete (TransliteratorEntry*) obj;
509 }
510 U_CDECL_END
511 
512 //----------------------------------------------------------------------
513 // class TransliteratorRegistry: Basic public API
514 //----------------------------------------------------------------------
515 
TransliteratorRegistry(UErrorCode & status)516 TransliteratorRegistry::TransliteratorRegistry(UErrorCode& status) :
517     registry(TRUE, status),
518     specDAG(TRUE, status),
519     availableIDs(status)
520 {
521     registry.setValueDeleter(deleteEntry);
522     availableIDs.setDeleter(uprv_deleteUObject);
523     availableIDs.setComparer(uhash_compareCaselessUnicodeString);
524     specDAG.setValueDeleter(uhash_deleteHashtable);
525 }
526 
~TransliteratorRegistry()527 TransliteratorRegistry::~TransliteratorRegistry() {
528     // Through the magic of C++, everything cleans itself up
529 }
530 
get(const UnicodeString & ID,TransliteratorAlias * & aliasReturn,UErrorCode & status)531 Transliterator* TransliteratorRegistry::get(const UnicodeString& ID,
532                                             TransliteratorAlias*& aliasReturn,
533                                             UErrorCode& status) {
534     U_ASSERT(aliasReturn == NULL);
535     TransliteratorEntry *entry = find(ID);
536     return (entry == 0) ? 0
537         : instantiateEntry(ID, entry, aliasReturn, status);
538 }
539 
reget(const UnicodeString & ID,TransliteratorParser & parser,TransliteratorAlias * & aliasReturn,UErrorCode & status)540 Transliterator* TransliteratorRegistry::reget(const UnicodeString& ID,
541                                               TransliteratorParser& parser,
542                                               TransliteratorAlias*& aliasReturn,
543                                               UErrorCode& status) {
544     U_ASSERT(aliasReturn == NULL);
545     TransliteratorEntry *entry = find(ID);
546 
547     if (entry == 0) {
548         // We get to this point if there are two threads, one of which
549         // is instantiating an ID, and another of which is removing
550         // the same ID from the registry, and the timing is just right.
551         return 0;
552     }
553 
554     // The usage model for the caller is that they will first call
555     // reg->get() inside the mutex, they'll get back an alias, they call
556     // alias->isRuleBased(), and if they get TRUE, they call alias->parse()
557     // outside the mutex, then reg->reget() inside the mutex again.  A real
558     // mess, but it gets things working for ICU 3.0. [alan].
559 
560     // Note: It's possible that in between the caller calling
561     // alias->parse() and reg->reget(), that another thread will have
562     // called reg->reget(), and the entry will already have been fixed up.
563     // We have to detect this so we don't stomp over existing entry
564     // data members and potentially leak memory (u.data and compoundFilter).
565 
566     if (entry->entryType == TransliteratorEntry::RULES_FORWARD ||
567         entry->entryType == TransliteratorEntry::RULES_REVERSE ||
568         entry->entryType == TransliteratorEntry::LOCALE_RULES) {
569 
570         if (parser.idBlockVector.isEmpty() && parser.dataVector.isEmpty()) {
571             entry->u.data = 0;
572             entry->entryType = TransliteratorEntry::ALIAS;
573             entry->stringArg = UNICODE_STRING_SIMPLE("Any-NULL");
574         }
575         else if (parser.idBlockVector.isEmpty() && parser.dataVector.size() == 1) {
576             entry->u.data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
577             entry->entryType = TransliteratorEntry::RBT_DATA;
578         }
579         else if (parser.idBlockVector.size() == 1 && parser.dataVector.isEmpty()) {
580             entry->stringArg = *(UnicodeString*)(parser.idBlockVector.elementAt(0));
581             entry->compoundFilter = parser.orphanCompoundFilter();
582             entry->entryType = TransliteratorEntry::ALIAS;
583         }
584         else {
585             entry->entryType = TransliteratorEntry::COMPOUND_RBT;
586             entry->compoundFilter = parser.orphanCompoundFilter();
587             entry->u.dataVector = new UVector(status);
588             entry->stringArg.remove();
589 
590             int32_t limit = parser.idBlockVector.size();
591             if (parser.dataVector.size() > limit)
592                 limit = parser.dataVector.size();
593 
594             for (int32_t i = 0; i < limit; i++) {
595                 if (i < parser.idBlockVector.size()) {
596                     UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i);
597                     if (!idBlock->isEmpty())
598                         entry->stringArg += *idBlock;
599                 }
600                 if (!parser.dataVector.isEmpty()) {
601                     TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
602                     entry->u.dataVector->addElement(data, status);
603                     entry->stringArg += (UChar)0xffff;  // use U+FFFF to mark position of RBTs in ID block
604                 }
605             }
606         }
607     }
608 
609     Transliterator *t =
610         instantiateEntry(ID, entry, aliasReturn, status);
611     return t;
612 }
613 
put(Transliterator * adoptedProto,UBool visible,UErrorCode & ec)614 void TransliteratorRegistry::put(Transliterator* adoptedProto,
615                                  UBool visible,
616                                  UErrorCode& ec)
617 {
618     TransliteratorEntry *entry = new TransliteratorEntry();
619     if (entry == NULL) {
620         ec = U_MEMORY_ALLOCATION_ERROR;
621         return;
622     }
623     entry->adoptPrototype(adoptedProto);
624     registerEntry(adoptedProto->getID(), entry, visible);
625 }
626 
put(const UnicodeString & ID,Transliterator::Factory factory,Transliterator::Token context,UBool visible,UErrorCode & ec)627 void TransliteratorRegistry::put(const UnicodeString& ID,
628                                  Transliterator::Factory factory,
629                                  Transliterator::Token context,
630                                  UBool visible,
631                                  UErrorCode& ec) {
632     TransliteratorEntry *entry = new TransliteratorEntry();
633     if (entry == NULL) {
634         ec = U_MEMORY_ALLOCATION_ERROR;
635         return;
636     }
637     entry->setFactory(factory, context);
638     registerEntry(ID, entry, visible);
639 }
640 
put(const UnicodeString & ID,const UnicodeString & resourceName,UTransDirection dir,UBool readonlyResourceAlias,UBool visible,UErrorCode & ec)641 void TransliteratorRegistry::put(const UnicodeString& ID,
642                                  const UnicodeString& resourceName,
643                                  UTransDirection dir,
644                                  UBool readonlyResourceAlias,
645                                  UBool visible,
646                                  UErrorCode& ec) {
647     TransliteratorEntry *entry = new TransliteratorEntry();
648     if (entry == NULL) {
649         ec = U_MEMORY_ALLOCATION_ERROR;
650         return;
651     }
652     entry->entryType = (dir == UTRANS_FORWARD) ? TransliteratorEntry::RULES_FORWARD
653         : TransliteratorEntry::RULES_REVERSE;
654     if (readonlyResourceAlias) {
655         entry->stringArg.setTo(TRUE, resourceName.getBuffer(), -1);
656     }
657     else {
658         entry->stringArg = resourceName;
659     }
660     registerEntry(ID, entry, visible);
661 }
662 
put(const UnicodeString & ID,const UnicodeString & alias,UBool readonlyAliasAlias,UBool visible,UErrorCode &)663 void TransliteratorRegistry::put(const UnicodeString& ID,
664                                  const UnicodeString& alias,
665                                  UBool readonlyAliasAlias,
666                                  UBool visible,
667                                  UErrorCode& /*ec*/) {
668     TransliteratorEntry *entry = new TransliteratorEntry();
669     // Null pointer check
670     if (entry != NULL) {
671         entry->entryType = TransliteratorEntry::ALIAS;
672         if (readonlyAliasAlias) {
673             entry->stringArg.setTo(TRUE, alias.getBuffer(), -1);
674         }
675         else {
676             entry->stringArg = alias;
677         }
678         registerEntry(ID, entry, visible);
679     }
680 }
681 
remove(const UnicodeString & ID)682 void TransliteratorRegistry::remove(const UnicodeString& ID) {
683     UnicodeString source, target, variant;
684     UBool sawSource;
685     TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
686     // Only need to do this if ID.indexOf('-') < 0
687     UnicodeString id;
688     TransliteratorIDParser::STVtoID(source, target, variant, id);
689     registry.remove(id);
690     removeSTV(source, target, variant);
691     availableIDs.removeElement((void*) &id);
692 }
693 
694 //----------------------------------------------------------------------
695 // class TransliteratorRegistry: Public ID and spec management
696 //----------------------------------------------------------------------
697 
698 /**
699  * == OBSOLETE - remove in ICU 3.4 ==
700  * Return the number of IDs currently registered with the system.
701  * To retrieve the actual IDs, call getAvailableID(i) with
702  * i from 0 to countAvailableIDs() - 1.
703  */
countAvailableIDs(void) const704 int32_t TransliteratorRegistry::countAvailableIDs(void) const {
705     return availableIDs.size();
706 }
707 
708 /**
709  * == OBSOLETE - remove in ICU 3.4 ==
710  * Return the index-th available ID.  index must be between 0
711  * and countAvailableIDs() - 1, inclusive.  If index is out of
712  * range, the result of getAvailableID(0) is returned.
713  */
getAvailableID(int32_t index) const714 const UnicodeString& TransliteratorRegistry::getAvailableID(int32_t index) const {
715     if (index < 0 || index >= availableIDs.size()) {
716         index = 0;
717     }
718     return *(const UnicodeString*) availableIDs[index];
719 }
720 
getAvailableIDs() const721 StringEnumeration* TransliteratorRegistry::getAvailableIDs() const {
722     return new Enumeration(*this);
723 }
724 
countAvailableSources(void) const725 int32_t TransliteratorRegistry::countAvailableSources(void) const {
726     return specDAG.count();
727 }
728 
getAvailableSource(int32_t index,UnicodeString & result) const729 UnicodeString& TransliteratorRegistry::getAvailableSource(int32_t index,
730                                                           UnicodeString& result) const {
731     int32_t pos = UHASH_FIRST;
732     const UHashElement *e = 0;
733     while (index-- >= 0) {
734         e = specDAG.nextElement(pos);
735         if (e == 0) {
736             break;
737         }
738     }
739     if (e == 0) {
740         result.truncate(0);
741     } else {
742         result = *(UnicodeString*) e->key.pointer;
743     }
744     return result;
745 }
746 
countAvailableTargets(const UnicodeString & source) const747 int32_t TransliteratorRegistry::countAvailableTargets(const UnicodeString& source) const {
748     Hashtable *targets = (Hashtable*) specDAG.get(source);
749     return (targets == 0) ? 0 : targets->count();
750 }
751 
getAvailableTarget(int32_t index,const UnicodeString & source,UnicodeString & result) const752 UnicodeString& TransliteratorRegistry::getAvailableTarget(int32_t index,
753                                                           const UnicodeString& source,
754                                                           UnicodeString& result) const {
755     Hashtable *targets = (Hashtable*) specDAG.get(source);
756     if (targets == 0) {
757         result.truncate(0); // invalid source
758         return result;
759     }
760     int32_t pos = UHASH_FIRST;
761     const UHashElement *e = 0;
762     while (index-- >= 0) {
763         e = targets->nextElement(pos);
764         if (e == 0) {
765             break;
766         }
767     }
768     if (e == 0) {
769         result.truncate(0); // invalid index
770     } else {
771         result = *(UnicodeString*) e->key.pointer;
772     }
773     return result;
774 }
775 
countAvailableVariants(const UnicodeString & source,const UnicodeString & target) const776 int32_t TransliteratorRegistry::countAvailableVariants(const UnicodeString& source,
777                                                        const UnicodeString& target) const {
778     Hashtable *targets = (Hashtable*) specDAG.get(source);
779     if (targets == 0) {
780         return 0;
781     }
782     UVector *variants = (UVector*) targets->get(target);
783     // variants may be 0 if the source/target are invalid
784     return (variants == 0) ? 0 : variants->size();
785 }
786 
getAvailableVariant(int32_t index,const UnicodeString & source,const UnicodeString & target,UnicodeString & result) const787 UnicodeString& TransliteratorRegistry::getAvailableVariant(int32_t index,
788                                                            const UnicodeString& source,
789                                                            const UnicodeString& target,
790                                                            UnicodeString& result) const {
791     Hashtable *targets = (Hashtable*) specDAG.get(source);
792     if (targets == 0) {
793         result.truncate(0); // invalid source
794         return result;
795     }
796     UVector *variants = (UVector*) targets->get(target);
797     if (variants == 0) {
798         result.truncate(0); // invalid target
799         return result;
800     }
801     UnicodeString *v = (UnicodeString*) variants->elementAt(index);
802     if (v == 0) {
803         result.truncate(0); // invalid index
804     } else {
805         result = *v;
806     }
807     return result;
808 }
809 
810 //----------------------------------------------------------------------
811 // class TransliteratorRegistry::Enumeration
812 //----------------------------------------------------------------------
813 
Enumeration(const TransliteratorRegistry & _reg)814 TransliteratorRegistry::Enumeration::Enumeration(const TransliteratorRegistry& _reg) :
815     index(0), reg(_reg) {
816 }
817 
~Enumeration()818 TransliteratorRegistry::Enumeration::~Enumeration() {
819 }
820 
count(UErrorCode &) const821 int32_t TransliteratorRegistry::Enumeration::count(UErrorCode& /*status*/) const {
822     return reg.availableIDs.size();
823 }
824 
snext(UErrorCode & status)825 const UnicodeString* TransliteratorRegistry::Enumeration::snext(UErrorCode& status) {
826     // This is sloppy but safe -- if we get out of sync with the underlying
827     // registry, we will still return legal strings, but they might not
828     // correspond to the snapshot at construction time.  So there could be
829     // duplicate IDs or omitted IDs if insertions or deletions occur in one
830     // thread while another is iterating.  To be more rigorous, add a timestamp,
831     // which is incremented with any modification, and validate this iterator
832     // against the timestamp at construction time.  This probably isn't worth
833     // doing as long as there is some possibility of removing this code in favor
834     // of some new code based on Doug's service framework.
835     if (U_FAILURE(status)) {
836         return NULL;
837     }
838     int32_t n = reg.availableIDs.size();
839     if (index > n) {
840         status = U_ENUM_OUT_OF_SYNC_ERROR;
841     }
842     // index == n is okay -- this means we've reached the end
843     if (index < n) {
844         // Copy the string! This avoids lifetime problems.
845         unistr = *(const UnicodeString*)reg.availableIDs[index++];
846         return &unistr;
847     } else {
848         return NULL;
849     }
850 }
851 
reset(UErrorCode &)852 void TransliteratorRegistry::Enumeration::reset(UErrorCode& /*status*/) {
853     index = 0;
854 }
855 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TransliteratorRegistry::Enumeration)856 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TransliteratorRegistry::Enumeration)
857 
858 //----------------------------------------------------------------------
859 // class TransliteratorRegistry: internal
860 //----------------------------------------------------------------------
861 
862 /**
863  * Convenience method.  Calls 6-arg registerEntry().
864  */
865 void TransliteratorRegistry::registerEntry(const UnicodeString& source,
866                                            const UnicodeString& target,
867                                            const UnicodeString& variant,
868                                            TransliteratorEntry* adopted,
869                                            UBool visible) {
870     UnicodeString ID;
871     UnicodeString s(source);
872     if (s.length() == 0) {
873         s.setTo(TRUE, ANY, 3);
874     }
875     TransliteratorIDParser::STVtoID(source, target, variant, ID);
876     registerEntry(ID, s, target, variant, adopted, visible);
877 }
878 
879 /**
880  * Convenience method.  Calls 6-arg registerEntry().
881  */
registerEntry(const UnicodeString & ID,TransliteratorEntry * adopted,UBool visible)882 void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
883                                            TransliteratorEntry* adopted,
884                                            UBool visible) {
885     UnicodeString source, target, variant;
886     UBool sawSource;
887     TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
888     // Only need to do this if ID.indexOf('-') < 0
889     UnicodeString id;
890     TransliteratorIDParser::STVtoID(source, target, variant, id);
891     registerEntry(id, source, target, variant, adopted, visible);
892 }
893 
894 /**
895  * Register an entry object (adopted) with the given ID, source,
896  * target, and variant strings.
897  */
registerEntry(const UnicodeString & ID,const UnicodeString & source,const UnicodeString & target,const UnicodeString & variant,TransliteratorEntry * adopted,UBool visible)898 void TransliteratorRegistry::registerEntry(const UnicodeString& ID,
899                                            const UnicodeString& source,
900                                            const UnicodeString& target,
901                                            const UnicodeString& variant,
902                                            TransliteratorEntry* adopted,
903                                            UBool visible) {
904     UErrorCode status = U_ZERO_ERROR;
905     registry.put(ID, adopted, status);
906     if (visible) {
907         registerSTV(source, target, variant);
908         if (!availableIDs.contains((void*) &ID)) {
909             UnicodeString *newID = (UnicodeString *)ID.clone();
910             // Check to make sure newID was created.
911             if (newID != NULL) {
912 	            // NUL-terminate the ID string
913 	            newID->getTerminatedBuffer();
914 	            availableIDs.addElement(newID, status);
915             }
916         }
917     } else {
918         removeSTV(source, target, variant);
919         availableIDs.removeElement((void*) &ID);
920     }
921 }
922 
923 /**
924  * Register a source-target/variant in the specDAG.  Variant may be
925  * empty, but source and target must not be.  If variant is empty then
926  * the special variant NO_VARIANT is stored in slot zero of the
927  * UVector of variants.
928  */
registerSTV(const UnicodeString & source,const UnicodeString & target,const UnicodeString & variant)929 void TransliteratorRegistry::registerSTV(const UnicodeString& source,
930                                          const UnicodeString& target,
931                                          const UnicodeString& variant) {
932     // assert(source.length() > 0);
933     // assert(target.length() > 0);
934     UErrorCode status = U_ZERO_ERROR;
935     Hashtable *targets = (Hashtable*) specDAG.get(source);
936     if (targets == 0) {
937         targets = new Hashtable(TRUE, status);
938         if (U_FAILURE(status) || targets == 0) {
939             return;
940         }
941         targets->setValueDeleter(uprv_deleteUObject);
942         specDAG.put(source, targets, status);
943     }
944     UVector *variants = (UVector*) targets->get(target);
945     if (variants == 0) {
946         variants = new UVector(uprv_deleteUObject,
947                                uhash_compareCaselessUnicodeString, status);
948         if (variants == 0) {
949             return;
950         }
951         targets->put(target, variants, status);
952     }
953     // assert(NO_VARIANT == "");
954     // We add the variant string.  If it is the special "no variant"
955     // string, that is, the empty string, we add it at position zero.
956     if (!variants->contains((void*) &variant)) {
957     	UnicodeString *tempus; // Used for null pointer check.
958         if (variant.length() > 0) {
959         	tempus = new UnicodeString(variant);
960         	if (tempus != NULL) {
961         		variants->addElement(tempus, status);
962         	}
963         } else {
964         	tempus = new UnicodeString();  // = NO_VARIANT
965         	if (tempus != NULL) {
966         		variants->insertElementAt(tempus, 0, status);
967         	}
968         }
969     }
970 }
971 
972 /**
973  * Remove a source-target/variant from the specDAG.
974  */
removeSTV(const UnicodeString & source,const UnicodeString & target,const UnicodeString & variant)975 void TransliteratorRegistry::removeSTV(const UnicodeString& source,
976                                        const UnicodeString& target,
977                                        const UnicodeString& variant) {
978     // assert(source.length() > 0);
979     // assert(target.length() > 0);
980 //    UErrorCode status = U_ZERO_ERROR;
981     Hashtable *targets = (Hashtable*) specDAG.get(source);
982     if (targets == 0) {
983         return; // should never happen for valid s-t/v
984     }
985     UVector *variants = (UVector*) targets->get(target);
986     if (variants == 0) {
987         return; // should never happen for valid s-t/v
988     }
989     variants->removeElement((void*) &variant);
990     if (variants->size() == 0) {
991         targets->remove(target); // should delete variants
992         if (targets->count() == 0) {
993             specDAG.remove(source); // should delete targets
994         }
995     }
996 }
997 
998 /**
999  * Attempt to find a source-target/variant in the dynamic registry
1000  * store.  Return 0 on failure.
1001  *
1002  * Caller does NOT own returned object.
1003  */
findInDynamicStore(const TransliteratorSpec & src,const TransliteratorSpec & trg,const UnicodeString & variant) const1004 TransliteratorEntry* TransliteratorRegistry::findInDynamicStore(const TransliteratorSpec& src,
1005                                                   const TransliteratorSpec& trg,
1006                                                   const UnicodeString& variant) const {
1007     UnicodeString ID;
1008     TransliteratorIDParser::STVtoID(src, trg, variant, ID);
1009     TransliteratorEntry *e = (TransliteratorEntry*) registry.get(ID);
1010     DEBUG_useEntry(e);
1011     return e;
1012 }
1013 
1014 /**
1015  * Attempt to find a source-target/variant in the static locale
1016  * resource store.  Do not perform fallback.  Return 0 on failure.
1017  *
1018  * On success, create a new entry object, register it in the dynamic
1019  * store, and return a pointer to it, but do not make it public --
1020  * just because someone requested something, we do not expand the
1021  * available ID list (or spec DAG).
1022  *
1023  * Caller does NOT own returned object.
1024  */
findInStaticStore(const TransliteratorSpec & src,const TransliteratorSpec & trg,const UnicodeString & variant)1025 TransliteratorEntry* TransliteratorRegistry::findInStaticStore(const TransliteratorSpec& src,
1026                                                  const TransliteratorSpec& trg,
1027                                                  const UnicodeString& variant) {
1028     TransliteratorEntry* entry = 0;
1029     if (src.isLocale()) {
1030         entry = findInBundle(src, trg, variant, UTRANS_FORWARD);
1031     } else if (trg.isLocale()) {
1032         entry = findInBundle(trg, src, variant, UTRANS_REVERSE);
1033     }
1034 
1035     // If we found an entry, store it in the Hashtable for next
1036     // time.
1037     if (entry != 0) {
1038         registerEntry(src.getTop(), trg.getTop(), variant, entry, FALSE);
1039     }
1040 
1041     return entry;
1042 }
1043 
1044 // As of 2.0, resource bundle keys cannot contain '_'
1045 static const UChar TRANSLITERATE_TO[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,84,111,0}; // "TransliterateTo"
1046 
1047 static const UChar TRANSLITERATE_FROM[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,70,114,111,109,0}; // "TransliterateFrom"
1048 
1049 static const UChar TRANSLITERATE[] = {84,114,97,110,115,108,105,116,101,114,97,116,101,0}; // "Transliterate"
1050 
1051 /**
1052  * Attempt to find an entry in a single resource bundle.  This is
1053  * a one-sided lookup.  findInStaticStore() performs up to two such
1054  * lookups, one for the source, and one for the target.
1055  *
1056  * Do not perform fallback.  Return 0 on failure.
1057  *
1058  * On success, create a new Entry object, populate it, and return it.
1059  * The caller owns the returned object.
1060  */
findInBundle(const TransliteratorSpec & specToOpen,const TransliteratorSpec & specToFind,const UnicodeString & variant,UTransDirection direction)1061 TransliteratorEntry* TransliteratorRegistry::findInBundle(const TransliteratorSpec& specToOpen,
1062                                             const TransliteratorSpec& specToFind,
1063                                             const UnicodeString& variant,
1064                                             UTransDirection direction)
1065 {
1066     UnicodeString utag;
1067     UnicodeString resStr;
1068     int32_t pass;
1069 
1070     for (pass=0; pass<2; ++pass) {
1071         utag.truncate(0);
1072         // First try either TransliteratorTo_xxx or
1073         // TransliterateFrom_xxx, then try the bidirectional
1074         // Transliterate_xxx.  This precedence order is arbitrary
1075         // but must be consistent and documented.
1076         if (pass == 0) {
1077             utag.append(direction == UTRANS_FORWARD ?
1078                         TRANSLITERATE_TO : TRANSLITERATE_FROM, -1);
1079         } else {
1080             utag.append(TRANSLITERATE, -1);
1081         }
1082         UnicodeString s(specToFind.get());
1083         utag.append(s.toUpper(""));
1084         UErrorCode status = U_ZERO_ERROR;
1085         ResourceBundle subres(specToOpen.getBundle().get(
1086             CharString().appendInvariantChars(utag, status).data(), status));
1087         if (U_FAILURE(status) || status == U_USING_DEFAULT_WARNING) {
1088             continue;
1089         }
1090 
1091         s.truncate(0);
1092         if (specToOpen.get() != LocaleUtility::initNameFromLocale(subres.getLocale(), s)) {
1093             continue;
1094         }
1095 
1096         if (variant.length() != 0) {
1097             status = U_ZERO_ERROR;
1098             resStr = subres.getStringEx(
1099                 CharString().appendInvariantChars(variant, status).data(), status);
1100             if (U_SUCCESS(status)) {
1101                 // Exit loop successfully
1102                 break;
1103             }
1104         } else {
1105             // Variant is empty, which means match the first variant listed.
1106             status = U_ZERO_ERROR;
1107             resStr = subres.getStringEx(1, status);
1108             if (U_SUCCESS(status)) {
1109                 // Exit loop successfully
1110                 break;
1111             }
1112         }
1113     }
1114 
1115     if (pass==2) {
1116         // Failed
1117         return NULL;
1118     }
1119 
1120     // We have succeeded in loading a string from the locale
1121     // resources.  Create a new registry entry to hold it and return it.
1122     TransliteratorEntry *entry = new TransliteratorEntry();
1123     if (entry != 0) {
1124         // The direction is always forward for the
1125         // TransliterateTo_xxx and TransliterateFrom_xxx
1126         // items; those are unidirectional forward rules.
1127         // For the bidirectional Transliterate_xxx items,
1128         // the direction is the value passed in to this
1129         // function.
1130         int32_t dir = (pass == 0) ? UTRANS_FORWARD : direction;
1131         entry->entryType = TransliteratorEntry::LOCALE_RULES;
1132         entry->stringArg = resStr;
1133         entry->intArg = dir;
1134     }
1135 
1136     return entry;
1137 }
1138 
1139 /**
1140  * Convenience method.  Calls 3-arg find().
1141  */
find(const UnicodeString & ID)1142 TransliteratorEntry* TransliteratorRegistry::find(const UnicodeString& ID) {
1143     UnicodeString source, target, variant;
1144     UBool sawSource;
1145     TransliteratorIDParser::IDtoSTV(ID, source, target, variant, sawSource);
1146     return find(source, target, variant);
1147 }
1148 
1149 /**
1150  * Top-level find method.  Attempt to find a source-target/variant in
1151  * either the dynamic or the static (locale resource) store.  Perform
1152  * fallback.
1153  *
1154  * Lookup sequence for ss_SS_SSS-tt_TT_TTT/v:
1155  *
1156  *   ss_SS_SSS-tt_TT_TTT/v -- in hashtable
1157  *   ss_SS_SSS-tt_TT_TTT/v -- in ss_SS_SSS (no fallback)
1158  *
1159  *     repeat with t = tt_TT_TTT, tt_TT, tt, and tscript
1160  *
1161  *     ss_SS_SSS-t/ *
1162  *     ss_SS-t/ *
1163  *     ss-t/ *
1164  *     sscript-t/ *
1165  *
1166  * Here * matches the first variant listed.
1167  *
1168  * Caller does NOT own returned object.  Return 0 on failure.
1169  */
find(UnicodeString & source,UnicodeString & target,UnicodeString & variant)1170 TransliteratorEntry* TransliteratorRegistry::find(UnicodeString& source,
1171                                     UnicodeString& target,
1172                                     UnicodeString& variant) {
1173 
1174     TransliteratorSpec src(source);
1175     TransliteratorSpec trg(target);
1176     TransliteratorEntry* entry;
1177 
1178     // Seek exact match in hashtable.  Temporary fix for ICU 4.6.
1179     // TODO: The general logic for finding a matching transliterator needs to be reviewed.
1180     // ICU ticket #8089
1181     UnicodeString ID;
1182     TransliteratorIDParser::STVtoID(source, target, variant, ID);
1183     entry = (TransliteratorEntry*) registry.get(ID);
1184     if (entry != 0) {
1185         // std::string ss;
1186         // std::cout << ID.toUTF8String(ss) << std::endl;
1187         return entry;
1188     }
1189 
1190     if (variant.length() != 0) {
1191 
1192         // Seek exact match in hashtable
1193         entry = findInDynamicStore(src, trg, variant);
1194         if (entry != 0) {
1195             return entry;
1196         }
1197 
1198         // Seek exact match in locale resources
1199         entry = findInStaticStore(src, trg, variant);
1200         if (entry != 0) {
1201             return entry;
1202         }
1203     }
1204 
1205     for (;;) {
1206         src.reset();
1207         for (;;) {
1208             // Seek match in hashtable
1209             entry = findInDynamicStore(src, trg, NO_VARIANT);
1210             if (entry != 0) {
1211                 return entry;
1212             }
1213 
1214             // Seek match in locale resources
1215             entry = findInStaticStore(src, trg, NO_VARIANT);
1216             if (entry != 0) {
1217                 return entry;
1218             }
1219             if (!src.hasFallback()) {
1220                 break;
1221             }
1222             src.next();
1223         }
1224         if (!trg.hasFallback()) {
1225             break;
1226         }
1227         trg.next();
1228     }
1229 
1230     return 0;
1231 }
1232 
1233 /**
1234  * Given an Entry object, instantiate it.  Caller owns result.  Return
1235  * 0 on failure.
1236  *
1237  * Return a non-empty aliasReturn value if the ID points to an alias.
1238  * We cannot instantiate it ourselves because the alias may contain
1239  * filters or compounds, which we do not understand.  Caller should
1240  * make aliasReturn empty before calling.
1241  *
1242  * The entry object is assumed to reside in the dynamic store.  It may be
1243  * modified.
1244  */
instantiateEntry(const UnicodeString & ID,TransliteratorEntry * entry,TransliteratorAlias * & aliasReturn,UErrorCode & status)1245 Transliterator* TransliteratorRegistry::instantiateEntry(const UnicodeString& ID,
1246                                                          TransliteratorEntry *entry,
1247                                                          TransliteratorAlias* &aliasReturn,
1248                                                          UErrorCode& status) {
1249     Transliterator *t = 0;
1250     U_ASSERT(aliasReturn == 0);
1251 
1252     switch (entry->entryType) {
1253     case TransliteratorEntry::RBT_DATA:
1254         t = new RuleBasedTransliterator(ID, entry->u.data);
1255         if (t == 0) {
1256             status = U_MEMORY_ALLOCATION_ERROR;
1257         }
1258         return t;
1259     case TransliteratorEntry::PROTOTYPE:
1260         t = entry->u.prototype->clone();
1261         if (t == 0) {
1262             status = U_MEMORY_ALLOCATION_ERROR;
1263         }
1264         return t;
1265     case TransliteratorEntry::ALIAS:
1266         aliasReturn = new TransliteratorAlias(entry->stringArg, entry->compoundFilter);
1267         if (aliasReturn == 0) {
1268             status = U_MEMORY_ALLOCATION_ERROR;
1269         }
1270         return 0;
1271     case TransliteratorEntry::FACTORY:
1272         t = entry->u.factory.function(ID, entry->u.factory.context);
1273         if (t == 0) {
1274             status = U_MEMORY_ALLOCATION_ERROR;
1275         }
1276         return t;
1277     case TransliteratorEntry::COMPOUND_RBT:
1278         {
1279             UVector* rbts = new UVector(entry->u.dataVector->size(), status);
1280             // Check for null pointer
1281             if (rbts == NULL) {
1282             	status = U_MEMORY_ALLOCATION_ERROR;
1283             	return NULL;
1284             }
1285             int32_t passNumber = 1;
1286             for (int32_t i = 0; U_SUCCESS(status) && i < entry->u.dataVector->size(); i++) {
1287                 // TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")?
1288                 Transliterator* t = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++),
1289                     (TransliterationRuleData*)(entry->u.dataVector->elementAt(i)), FALSE);
1290                 if (t == 0)
1291                     status = U_MEMORY_ALLOCATION_ERROR;
1292                 else
1293                     rbts->addElement(t, status);
1294             }
1295             if (U_FAILURE(status)) {
1296                 delete rbts;
1297                 return 0;
1298             }
1299             aliasReturn = new TransliteratorAlias(ID, entry->stringArg, rbts, entry->compoundFilter);
1300         }
1301         if (aliasReturn == 0) {
1302             status = U_MEMORY_ALLOCATION_ERROR;
1303         }
1304         return 0;
1305     case TransliteratorEntry::LOCALE_RULES:
1306         aliasReturn = new TransliteratorAlias(ID, entry->stringArg,
1307                                               (UTransDirection) entry->intArg);
1308         if (aliasReturn == 0) {
1309             status = U_MEMORY_ALLOCATION_ERROR;
1310         }
1311         return 0;
1312     case TransliteratorEntry::RULES_FORWARD:
1313     case TransliteratorEntry::RULES_REVERSE:
1314         // Process the rule data into a TransliteratorRuleData object,
1315         // and possibly also into an ::id header and/or footer.  Then
1316         // we modify the registry with the parsed data and retry.
1317         {
1318             TransliteratorParser parser(status);
1319 
1320             // We use the file name, taken from another resource bundle
1321             // 2-d array at static init time, as a locale language.  We're
1322             // just using the locale mechanism to map through to a file
1323             // name; this in no way represents an actual locale.
1324             //CharString ch(entry->stringArg);
1325             //UResourceBundle *bundle = ures_openDirect(0, ch, &status);
1326             UnicodeString rules = entry->stringArg;
1327             //ures_close(bundle);
1328 
1329             //if (U_FAILURE(status)) {
1330                 // We have a failure of some kind.  Remove the ID from the
1331                 // registry so we don't keep trying.  NOTE: This will throw off
1332                 // anyone who is, at the moment, trying to iterate over the
1333                 // available IDs.  That's acceptable since we should never
1334                 // really get here except under installation, configuration,
1335                 // or unrecoverable run time memory failures.
1336             //    remove(ID);
1337             //} else {
1338 
1339                 // If the status indicates a failure, then we don't have any
1340                 // rules -- there is probably an installation error.  The list
1341                 // in the root locale should correspond to all the installed
1342                 // transliterators; if it lists something that's not
1343                 // installed, we'll get an error from ResourceBundle.
1344                 aliasReturn = new TransliteratorAlias(ID, rules,
1345                     ((entry->entryType == TransliteratorEntry::RULES_REVERSE) ?
1346                      UTRANS_REVERSE : UTRANS_FORWARD));
1347                 if (aliasReturn == 0) {
1348                     status = U_MEMORY_ALLOCATION_ERROR;
1349                 }
1350             //}
1351         }
1352         return 0;
1353     default:
1354         U_ASSERT(FALSE); // can't get here
1355         return 0;
1356     }
1357 }
1358 U_NAMESPACE_END
1359 
1360 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
1361 
1362 //eof
1363