1 /*
2 **********************************************************************
3 *   Copyright (c) 2001-2014, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 **********************************************************************
6 *   Date        Name        Description
7 *   08/10/2001  aliu        Creation.
8 **********************************************************************
9 */
10 #ifndef _TRANSREG_H
11 #define _TRANSREG_H
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_TRANSLITERATION
16 
17 #include "unicode/uobject.h"
18 #include "unicode/translit.h"
19 #include "hash.h"
20 #include "uvector.h"
21 
22 U_NAMESPACE_BEGIN
23 
24 class TransliteratorEntry;
25 class TransliteratorSpec;
26 class UnicodeString;
27 
28 //------------------------------------------------------------------
29 // TransliteratorAlias
30 //------------------------------------------------------------------
31 
32 /**
33  * A TransliteratorAlias object is returned by get() if the given ID
34  * actually translates into something else.  The caller then invokes
35  * the create() method on the alias to create the actual
36  * transliterator, and deletes the alias.
37  *
38  * Why all the shenanigans?  To prevent circular calls between
39  * the registry code and the transliterator code that deadlocks.
40  */
41 class TransliteratorAlias : public UMemory {
42  public:
43     /**
44      * Construct a simple alias (type == SIMPLE)
45      * @param aliasID the given id.
46      */
47     TransliteratorAlias(const UnicodeString& aliasID, const UnicodeSet* compoundFilter);
48 
49     /**
50      * Construct a compound RBT alias (type == COMPOUND)
51      */
52     TransliteratorAlias(const UnicodeString& ID, const UnicodeString& idBlocks,
53                         UVector* adoptedTransliterators,
54                         const UnicodeSet* compoundFilter);
55 
56     /**
57      * Construct a rules alias (type = RULES)
58      */
59     TransliteratorAlias(const UnicodeString& theID,
60                         const UnicodeString& rules,
61                         UTransDirection dir);
62 
63     ~TransliteratorAlias();
64 
65     /**
66      * The whole point of create() is that the caller must invoke
67      * it when the registry mutex is NOT held, to prevent deadlock.
68      * It may only be called once.
69      *
70      * Note: Only call create() if isRuleBased() returns FALSE.
71      *
72      * This method must be called *outside* of the TransliteratorRegistry
73      * mutex.
74      */
75     Transliterator* create(UParseError&, UErrorCode&);
76 
77     /**
78      * Return TRUE if this alias is rule-based.  If so, the caller
79      * must call parse() on it, then call TransliteratorRegistry::reget().
80      */
81     UBool isRuleBased() const;
82 
83     /**
84      * If isRuleBased() returns TRUE, then the caller must call this
85      * method, followed by TransliteratorRegistry::reget().  The latter
86      * method must be called inside the TransliteratorRegistry mutex.
87      *
88      * Note: Only call parse() if isRuleBased() returns TRUE.
89      *
90      * This method must be called *outside* of the TransliteratorRegistry
91      * mutex, because it can instantiate Transliterators embedded in
92      * the rules via the "&Latin-Arabic()" syntax.
93      */
94     void parse(TransliteratorParser& parser,
95                UParseError& pe, UErrorCode& ec) const;
96 
97  private:
98     // We actually come in three flavors:
99     // 1. Simple alias
100     //    Here aliasID is the alias string.  Everything else is
101     //    null, zero, empty.
102     // 2. CompoundRBT
103     //    Here ID is the ID, aliasID is the idBlock, trans is the
104     //    contained RBT, and idSplitPoint is the offet in aliasID
105     //    where the contained RBT goes.  compoundFilter is the
106     //    compound filter, and it is _not_ owned.
107     // 3. Rules
108     //    Here ID is the ID, aliasID is the rules string.
109     //    idSplitPoint is the UTransDirection.
110     UnicodeString ID;
111     UnicodeString aliasesOrRules;
112     UVector* transes; // owned
113     const UnicodeSet* compoundFilter; // alias
114     UTransDirection direction;
115     enum { SIMPLE, COMPOUND, RULES } type;
116 
117     TransliteratorAlias(const TransliteratorAlias &other); // forbid copying of this class
118     TransliteratorAlias &operator=(const TransliteratorAlias &other); // forbid copying of this class
119 };
120 
121 
122 /**
123  * A registry of system transliterators.  This is the data structure
124  * that implements the mapping between transliterator IDs and the data
125  * or function pointers used to create the corresponding
126  * transliterators.  There is one instance of the registry that is
127  * created statically.
128  *
129  * The registry consists of a dynamic component -- a hashtable -- and
130  * a static component -- locale resource bundles.  The dynamic store
131  * is semantically overlaid on the static store, so the static mapping
132  * can be dynamically overridden.
133  *
134  * This is an internal class that is only used by Transliterator.
135  * Transliterator maintains one static instance of this class and
136  * delegates all registry-related operations to it.
137  *
138  * @author Alan Liu
139  */
140 class TransliteratorRegistry : public UMemory {
141 
142  public:
143 
144     /**
145      * Contructor
146      * @param status Output param set to success/failure code.
147      */
148     TransliteratorRegistry(UErrorCode& status);
149 
150     /**
151      * Nonvirtual destructor -- this class is not subclassable.
152      */
153     ~TransliteratorRegistry();
154 
155     //------------------------------------------------------------------
156     // Basic public API
157     //------------------------------------------------------------------
158 
159     /**
160      * Given a simple ID (forward direction, no inline filter, not
161      * compound) attempt to instantiate it from the registry.  Return
162      * 0 on failure.
163      *
164      * Return a non-NULL aliasReturn value if the ID points to an alias.
165      * We cannot instantiate it ourselves because the alias may contain
166      * filters or compounds, which we do not understand.  Caller should
167      * make aliasReturn NULL before calling.
168      * @param ID          the given ID
169      * @param aliasReturn output param to receive TransliteratorAlias;
170      *                    should be NULL on entry
171      * @param parseError  Struct to recieve information on position
172      *                    of error if an error is encountered
173      * @param status      Output param set to success/failure code.
174      */
175     Transliterator* get(const UnicodeString& ID,
176                         TransliteratorAlias*& aliasReturn,
177                         UErrorCode& status);
178 
179     /**
180      * The caller must call this after calling get(), if [a] calling get()
181      * returns an alias, and [b] the alias is rule based.  In that
182      * situation the caller must call alias->parse() to do the parsing
183      * OUTSIDE THE REGISTRY MUTEX, then call this method to retry
184      * instantiating the transliterator.
185      *
186      * Note: Another alias might be returned by this method.
187      *
188      * This method (like all public methods of this class) must be called
189      * from within the TransliteratorRegistry mutex.
190      *
191      * @param aliasReturn output param to receive TransliteratorAlias;
192      *                    should be NULL on entry
193      */
194     Transliterator* reget(const UnicodeString& ID,
195                           TransliteratorParser& parser,
196                           TransliteratorAlias*& aliasReturn,
197                           UErrorCode& status);
198 
199     /**
200      * Register a prototype (adopted).  This adds an entry to the
201      * dynamic store, or replaces an existing entry.  Any entry in the
202      * underlying static locale resource store is masked.
203      */
204     void put(Transliterator* adoptedProto,
205              UBool visible,
206              UErrorCode& ec);
207 
208     /**
209      * Register an ID and a factory function pointer.  This adds an
210      * entry to the dynamic store, or replaces an existing entry.  Any
211      * entry in the underlying static locale resource store is masked.
212      */
213     void put(const UnicodeString& ID,
214              Transliterator::Factory factory,
215              Transliterator::Token context,
216              UBool visible,
217              UErrorCode& ec);
218 
219     /**
220      * Register an ID and a resource name.  This adds an entry to the
221      * dynamic store, or replaces an existing entry.  Any entry in the
222      * underlying static locale resource store is masked.
223      */
224     void put(const UnicodeString& ID,
225              const UnicodeString& resourceName,
226              UTransDirection dir,
227              UBool readonlyResourceAlias,
228              UBool visible,
229              UErrorCode& ec);
230 
231     /**
232      * Register an ID and an alias ID.  This adds an entry to the
233      * dynamic store, or replaces an existing entry.  Any entry in the
234      * underlying static locale resource store is masked.
235      */
236     void put(const UnicodeString& ID,
237              const UnicodeString& alias,
238              UBool readonlyAliasAlias,
239              UBool visible,
240              UErrorCode& ec);
241 
242     /**
243      * Unregister an ID.  This removes an entry from the dynamic store
244      * if there is one.  The static locale resource store is
245      * unaffected.
246      * @param ID    the given ID.
247      */
248     void remove(const UnicodeString& ID);
249 
250     //------------------------------------------------------------------
251     // Public ID and spec management
252     //------------------------------------------------------------------
253 
254     /**
255      * Return a StringEnumeration over the IDs currently registered
256      * with the system.
257      * @internal
258      */
259     StringEnumeration* getAvailableIDs() const;
260 
261     /**
262      * == OBSOLETE - remove in ICU 3.4 ==
263      * Return the number of IDs currently registered with the system.
264      * To retrieve the actual IDs, call getAvailableID(i) with
265      * i from 0 to countAvailableIDs() - 1.
266      * @return the number of IDs currently registered with the system.
267      * @internal
268      */
269     int32_t countAvailableIDs(void) const;
270 
271     /**
272      * == OBSOLETE - remove in ICU 3.4 ==
273      * Return the index-th available ID.  index must be between 0
274      * and countAvailableIDs() - 1, inclusive.  If index is out of
275      * range, the result of getAvailableID(0) is returned.
276      * @param index the given index.
277      * @return the index-th available ID.  index must be between 0
278      *         and countAvailableIDs() - 1, inclusive.  If index is out of
279      *         range, the result of getAvailableID(0) is returned.
280      * @internal
281      */
282     const UnicodeString& getAvailableID(int32_t index) const;
283 
284     /**
285      * Return the number of registered source specifiers.
286      * @return the number of registered source specifiers.
287      */
288     int32_t countAvailableSources(void) const;
289 
290     /**
291      * Return a registered source specifier.
292      * @param index which specifier to return, from 0 to n-1, where
293      * n = countAvailableSources()
294      * @param result fill-in paramter to receive the source specifier.
295      * If index is out of range, result will be empty.
296      * @return reference to result
297      */
298     UnicodeString& getAvailableSource(int32_t index,
299                                       UnicodeString& result) const;
300 
301     /**
302      * Return the number of registered target specifiers for a given
303      * source specifier.
304      * @param source the given source specifier.
305      * @return the number of registered target specifiers for a given
306      *         source specifier.
307      */
308     int32_t countAvailableTargets(const UnicodeString& source) const;
309 
310     /**
311      * Return a registered target specifier for a given source.
312      * @param index which specifier to return, from 0 to n-1, where
313      * n = countAvailableTargets(source)
314      * @param source the source specifier
315      * @param result fill-in paramter to receive the target specifier.
316      * If source is invalid or if index is out of range, result will
317      * be empty.
318      * @return reference to result
319      */
320     UnicodeString& getAvailableTarget(int32_t index,
321                                       const UnicodeString& source,
322                                       UnicodeString& result) const;
323 
324     /**
325      * Return the number of registered variant specifiers for a given
326      * source-target pair.  There is always at least one variant: If
327      * just source-target is registered, then the single variant
328      * NO_VARIANT is returned.  If source-target/variant is registered
329      * then that variant is returned.
330      * @param source the source specifiers
331      * @param target the target specifiers
332      * @return the number of registered variant specifiers for a given
333      *         source-target pair.
334      */
335     int32_t countAvailableVariants(const UnicodeString& source,
336                                    const UnicodeString& target) const;
337 
338     /**
339      * Return a registered variant specifier for a given source-target
340      * pair.  If NO_VARIANT is one of the variants, then it will be
341      * at index 0.
342      * @param index which specifier to return, from 0 to n-1, where
343      * n = countAvailableVariants(source, target)
344      * @param source the source specifier
345      * @param target the target specifier
346      * @param result fill-in paramter to receive the variant
347      * specifier.  If source is invalid or if target is invalid or if
348      * index is out of range, result will be empty.
349      * @return reference to result
350      */
351     UnicodeString& getAvailableVariant(int32_t index,
352                                        const UnicodeString& source,
353                                        const UnicodeString& target,
354                                        UnicodeString& result) const;
355 
356  private:
357 
358     //----------------------------------------------------------------
359     // Private implementation
360     //----------------------------------------------------------------
361 
362     TransliteratorEntry* find(const UnicodeString& ID);
363 
364     TransliteratorEntry* find(UnicodeString& source,
365                 UnicodeString& target,
366                 UnicodeString& variant);
367 
368     TransliteratorEntry* findInDynamicStore(const TransliteratorSpec& src,
369                               const TransliteratorSpec& trg,
370                               const UnicodeString& variant) const;
371 
372     TransliteratorEntry* findInStaticStore(const TransliteratorSpec& src,
373                              const TransliteratorSpec& trg,
374                              const UnicodeString& variant);
375 
376     static TransliteratorEntry* findInBundle(const TransliteratorSpec& specToOpen,
377                                const TransliteratorSpec& specToFind,
378                                const UnicodeString& variant,
379                                UTransDirection direction);
380 
381     void registerEntry(const UnicodeString& source,
382                        const UnicodeString& target,
383                        const UnicodeString& variant,
384                        TransliteratorEntry* adopted,
385                        UBool visible);
386 
387     void registerEntry(const UnicodeString& ID,
388                        TransliteratorEntry* adopted,
389                        UBool visible);
390 
391     void registerEntry(const UnicodeString& ID,
392                        const UnicodeString& source,
393                        const UnicodeString& target,
394                        const UnicodeString& variant,
395                        TransliteratorEntry* adopted,
396                        UBool visible);
397 
398     void registerSTV(const UnicodeString& source,
399                      const UnicodeString& target,
400                      const UnicodeString& variant);
401 
402     void removeSTV(const UnicodeString& source,
403                    const UnicodeString& target,
404                    const UnicodeString& variant);
405 
406     Transliterator* instantiateEntry(const UnicodeString& ID,
407                                      TransliteratorEntry *entry,
408                                      TransliteratorAlias*& aliasReturn,
409                                      UErrorCode& status);
410 
411     /**
412      * A StringEnumeration over the registered IDs in this object.
413      */
414     class Enumeration : public StringEnumeration {
415     public:
416         Enumeration(const TransliteratorRegistry& reg);
417         virtual ~Enumeration();
418         virtual int32_t count(UErrorCode& status) const;
419         virtual const UnicodeString* snext(UErrorCode& status);
420         virtual void reset(UErrorCode& status);
421         static UClassID U_EXPORT2 getStaticClassID();
422         virtual UClassID getDynamicClassID() const;
423     private:
424         int32_t index;
425         const TransliteratorRegistry& reg;
426     };
427     friend class Enumeration;
428 
429  private:
430 
431     /**
432      * Dynamic registry mapping full IDs to Entry objects.  This
433      * contains both public and internal entities.  The visibility is
434      * controlled by whether an entry is listed in availableIDs and
435      * specDAG or not.
436      */
437     Hashtable registry;
438 
439     /**
440      * DAG of visible IDs by spec.  Hashtable: source => (Hashtable:
441      * target => (UVector: variant)) The UVector of variants is never
442      * empty.  For a source-target with no variant, the special
443      * variant NO_VARIANT (the empty string) is stored in slot zero of
444      * the UVector.
445      */
446     Hashtable specDAG;
447 
448     /**
449      * Vector of public full IDs.
450      */
451     UVector availableIDs;
452 
453     TransliteratorRegistry(const TransliteratorRegistry &other); // forbid copying of this class
454     TransliteratorRegistry &operator=(const TransliteratorRegistry &other); // forbid copying of this class
455 };
456 
457 U_NAMESPACE_END
458 
459 U_CFUNC UBool utrans_transliterator_cleanup(void);
460 
461 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
462 
463 #endif
464 //eof
465