1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1999-2007, International Business Machines Corporation
6 * and others. All Rights Reserved.
7 **********************************************************************
8 *   Date        Name        Description
9 *   11/17/99    aliu        Creation.
10 **********************************************************************
11 */
12 #ifndef RBT_DATA_H
13 #define RBT_DATA_H
14 
15 #include "unicode/utypes.h"
16 #include "unicode/uclean.h"
17 
18 #if !UCONFIG_NO_TRANSLITERATION
19 
20 #include "unicode/uobject.h"
21 #include "rbt_set.h"
22 #include "hash.h"
23 
24 U_NAMESPACE_BEGIN
25 
26 class UnicodeFunctor;
27 class UnicodeMatcher;
28 class UnicodeReplacer;
29 
30 /**
31  * The rule data for a RuleBasedTransliterators.  RBT objects hold
32  * a const pointer to a TRD object that they do not own.  TRD objects
33  * are essentially the parsed rules in compact, usable form.  The
34  * TRD objects themselves are held for the life of the process in
35  * a static cache owned by Transliterator.
36  *
37  * This class' API is a little asymmetric.  There is a method to
38  * define a variable, but no way to define a set.  This is because the
39  * sets are defined by the parser in a UVector, and the vector is
40  * copied into a fixed-size array here.  Once this is done, no new
41  * sets may be defined.  In practice, there is no need to do so, since
42  * generating the data and using it are discrete phases.  When there
43  * is a need to access the set data during the parse phase, another
44  * data structure handles this.  See the parsing code for more
45  * details.
46  */
47 class TransliterationRuleData : public UMemory {
48 
49 public:
50 
51     // PUBLIC DATA MEMBERS
52 
53     /**
54      * Rule table.  May be empty.
55      */
56     TransliterationRuleSet ruleSet;
57 
58     /**
59      * Map variable name (String) to variable (UnicodeString).  A variable name
60      * corresponds to zero or more characters, stored in a UnicodeString in
61      * this hash.  One or more of these chars may also correspond to a
62      * UnicodeMatcher, in which case the character in the UnicodeString in this hash is
63      * a stand-in: it is an index for a secondary lookup in
64      * data.variables.  The stand-in also represents the UnicodeMatcher in
65      * the stored rules.
66      */
67     Hashtable variableNames;
68 
69     /**
70      * Map category variable (UChar) to set (UnicodeFunctor).
71      * Variables that correspond to a set of characters are mapped
72      * from variable name to a stand-in character in data.variableNames.
73      * The stand-in then serves as a key in this hash to lookup the
74      * actual UnicodeFunctor object.  In addition, the stand-in is
75      * stored in the rule text to represent the set of characters.
76      * variables[i] represents character (variablesBase + i).
77      */
78     UnicodeFunctor** variables;
79 
80     /**
81      * Flag that indicates whether the variables are owned (if a single
82      * call to Transliterator::createFromRules() produces a CompoundTransliterator
83      * with more than one RuleBasedTransliterator as children, they all share
84      * the same variables list, so only the first one is considered to own
85      * the variables)
86      */
87     UBool variablesAreOwned;
88 
89     /**
90      * The character that represents variables[0].  Characters
91      * variablesBase through variablesBase +
92      * variablesLength - 1 represent UnicodeFunctor objects.
93      */
94     UChar variablesBase;
95 
96     /**
97      * The length of variables.
98      */
99     int32_t variablesLength;
100 
101 public:
102 
103     /**
104      * Constructor
105      * @param status Output param set to success/failure code on exit.
106      */
107     TransliterationRuleData(UErrorCode& status);
108 
109     /**
110      * Copy Constructor
111      */
112     TransliterationRuleData(const TransliterationRuleData&);
113 
114     /**
115      * destructor
116      */
117     ~TransliterationRuleData();
118 
119     /**
120      * Given a stand-in character, return the UnicodeFunctor that it
121      * represents, or NULL if it doesn't represent anything.
122      * @param standIn    the given stand-in character.
123      * @return           the UnicodeFunctor that 'standIn' represents
124      */
125     UnicodeFunctor* lookup(UChar32 standIn) const;
126 
127     /**
128      * Given a stand-in character, return the UnicodeMatcher that it
129      * represents, or NULL if it doesn't represent anything or if it
130      * represents something that is not a matcher.
131      * @param standIn    the given stand-in character.
132      * @return           return the UnicodeMatcher that 'standIn' represents
133      */
134     UnicodeMatcher* lookupMatcher(UChar32 standIn) const;
135 
136     /**
137      * Given a stand-in character, return the UnicodeReplacer that it
138      * represents, or NULL if it doesn't represent anything or if it
139      * represents something that is not a replacer.
140      * @param standIn    the given stand-in character.
141      * @return           return the UnicodeReplacer that 'standIn' represents
142      */
143     UnicodeReplacer* lookupReplacer(UChar32 standIn) const;
144 
145 
146 private:
147     TransliterationRuleData &operator=(const TransliterationRuleData &other); // forbid copying of this class
148 };
149 
150 U_NAMESPACE_END
151 
152 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
153 
154 #endif
155