1 /*
2 **********************************************************************
3 * Copyright (C) 1999-2007, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 **********************************************************************
6 *   Date        Name        Description
7 *   11/17/99    aliu        Creation.
8 **********************************************************************
9 */
10 #ifndef RBT_DATA_H
11 #define RBT_DATA_H
12 
13 #include "unicode/utypes.h"
14 #include "unicode/uclean.h"
15 
16 #if !UCONFIG_NO_TRANSLITERATION
17 
18 #include "unicode/uobject.h"
19 #include "rbt_set.h"
20 #include "hash.h"
21 
22 U_NAMESPACE_BEGIN
23 
24 class UnicodeFunctor;
25 class UnicodeMatcher;
26 class UnicodeReplacer;
27 
28 /**
29  * The rule data for a RuleBasedTransliterators.  RBT objects hold
30  * a const pointer to a TRD object that they do not own.  TRD objects
31  * are essentially the parsed rules in compact, usable form.  The
32  * TRD objects themselves are held for the life of the process in
33  * a static cache owned by Transliterator.
34  *
35  * This class' API is a little asymmetric.  There is a method to
36  * define a variable, but no way to define a set.  This is because the
37  * sets are defined by the parser in a UVector, and the vector is
38  * copied into a fixed-size array here.  Once this is done, no new
39  * sets may be defined.  In practice, there is no need to do so, since
40  * generating the data and using it are discrete phases.  When there
41  * is a need to access the set data during the parse phase, another
42  * data structure handles this.  See the parsing code for more
43  * details.
44  */
45 class TransliterationRuleData : public UMemory {
46 
47 public:
48 
49     // PUBLIC DATA MEMBERS
50 
51     /**
52      * Rule table.  May be empty.
53      */
54     TransliterationRuleSet ruleSet;
55 
56     /**
57      * Map variable name (String) to variable (UnicodeString).  A variable name
58      * corresponds to zero or more characters, stored in a UnicodeString in
59      * this hash.  One or more of these chars may also correspond to a
60      * UnicodeMatcher, in which case the character in the UnicodeString in this hash is
61      * a stand-in: it is an index for a secondary lookup in
62      * data.variables.  The stand-in also represents the UnicodeMatcher in
63      * the stored rules.
64      */
65     Hashtable variableNames;
66 
67     /**
68      * Map category variable (UChar) to set (UnicodeFunctor).
69      * Variables that correspond to a set of characters are mapped
70      * from variable name to a stand-in character in data.variableNames.
71      * The stand-in then serves as a key in this hash to lookup the
72      * actual UnicodeFunctor object.  In addition, the stand-in is
73      * stored in the rule text to represent the set of characters.
74      * variables[i] represents character (variablesBase + i).
75      */
76     UnicodeFunctor** variables;
77 
78     /**
79      * Flag that indicates whether the variables are owned (if a single
80      * call to Transliterator::createFromRules() produces a CompoundTransliterator
81      * with more than one RuleBasedTransliterator as children, they all share
82      * the same variables list, so only the first one is considered to own
83      * the variables)
84      */
85     UBool variablesAreOwned;
86 
87     /**
88      * The character that represents variables[0].  Characters
89      * variablesBase through variablesBase +
90      * variablesLength - 1 represent UnicodeFunctor objects.
91      */
92     UChar variablesBase;
93 
94     /**
95      * The length of variables.
96      */
97     int32_t variablesLength;
98 
99 public:
100 
101     /**
102      * Constructor
103      * @param status Output param set to success/failure code on exit.
104      */
105     TransliterationRuleData(UErrorCode& status);
106 
107     /**
108      * Copy Constructor
109      */
110     TransliterationRuleData(const TransliterationRuleData&);
111 
112     /**
113      * destructor
114      */
115     ~TransliterationRuleData();
116 
117     /**
118      * Given a stand-in character, return the UnicodeFunctor that it
119      * represents, or NULL if it doesn't represent anything.
120      * @param standIn    the given stand-in character.
121      * @return           the UnicodeFunctor that 'standIn' represents
122      */
123     UnicodeFunctor* lookup(UChar32 standIn) const;
124 
125     /**
126      * Given a stand-in character, return the UnicodeMatcher that it
127      * represents, or NULL if it doesn't represent anything or if it
128      * represents something that is not a matcher.
129      * @param standIn    the given stand-in character.
130      * @return           return the UnicodeMatcher that 'standIn' represents
131      */
132     UnicodeMatcher* lookupMatcher(UChar32 standIn) const;
133 
134     /**
135      * Given a stand-in character, return the UnicodeReplacer that it
136      * represents, or NULL if it doesn't represent anything or if it
137      * represents something that is not a replacer.
138      * @param standIn    the given stand-in character.
139      * @return           return the UnicodeReplacer that 'standIn' represents
140      */
141     UnicodeReplacer* lookupReplacer(UChar32 standIn) const;
142 
143 
144 private:
145     TransliterationRuleData &operator=(const TransliterationRuleData &other); // forbid copying of this class
146 };
147 
148 U_NAMESPACE_END
149 
150 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
151 
152 #endif
153