1 /*
2 **********************************************************************
3 * Copyright (C) 1999-2007, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 **********************************************************************
6 *   Date        Name        Description
7 *   11/17/99    aliu        Creation.
8 **********************************************************************
9 */
10 #ifndef RBT_SET_H
11 #define RBT_SET_H
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_TRANSLITERATION
16 
17 #include "unicode/uobject.h"
18 #include "unicode/utrans.h"
19 #include "uvector.h"
20 
21 U_NAMESPACE_BEGIN
22 
23 class Replaceable;
24 class TransliterationRule;
25 class TransliterationRuleData;
26 class UnicodeFilter;
27 class UnicodeString;
28 class UnicodeSet;
29 
30 /**
31  * A set of rules for a <code>RuleBasedTransliterator</code>.
32  * @author Alan Liu
33  */
34 class TransliterationRuleSet : public UMemory {
35     /**
36      * Vector of rules, in the order added.  This is used while the
37      * rule set is getting built.  After that, freeze() reorders and
38      * indexes the rules into rules[].  Any given rule is stored once
39      * in ruleVector, and one or more times in rules[].  ruleVector
40      * owns and deletes the rules.
41      */
42     UVector* ruleVector;
43 
44     /**
45      * Sorted and indexed table of rules.  This is created by freeze()
46      * from the rules in ruleVector.  It contains alias pointers to
47      * the rules in ruleVector.  It is zero before freeze() is called
48      * and non-zero thereafter.
49      */
50     TransliterationRule** rules;
51 
52     /**
53      * Index table.  For text having a first character c, compute x = c&0xFF.
54      * Now use rules[index[x]..index[x+1]-1].  This index table is created by
55      * freeze().  Before freeze() is called it contains garbage.
56      */
57     int32_t index[257];
58 
59     /**
60      * Length of the longest preceding context
61      */
62     int32_t maxContextLength;
63 
64 public:
65 
66     /**
67      * Construct a new empty rule set.
68      * @param status    Output parameter filled in with success or failure status.
69      */
70     TransliterationRuleSet(UErrorCode& status);
71 
72     /**
73      * Copy constructor.
74      */
75     TransliterationRuleSet(const TransliterationRuleSet&);
76 
77     /**
78      * Destructor.
79      */
80     virtual ~TransliterationRuleSet();
81 
82     /**
83      * Change the data object that this rule belongs to.  Used
84      * internally by the TransliterationRuleData copy constructor.
85      * @param data    the new data value to be set.
86      */
87     void setData(const TransliterationRuleData* data);
88 
89     /**
90      * Return the maximum context length.
91      * @return the length of the longest preceding context.
92      */
93     virtual int32_t getMaximumContextLength(void) const;
94 
95     /**
96      * Add a rule to this set.  Rules are added in order, and order is
97      * significant.  The last call to this method must be followed by
98      * a call to <code>freeze()</code> before the rule set is used.
99      * This method must <em>not</em> be called after freeze() has been
100      * called.
101      *
102      * @param adoptedRule the rule to add
103      */
104     virtual void addRule(TransliterationRule* adoptedRule,
105                          UErrorCode& status);
106 
107     /**
108      * Check this for masked rules and index it to optimize performance.
109      * The sequence of operations is: (1) add rules to a set using
110      * <code>addRule()</code>; (2) freeze the set using
111      * <code>freeze()</code>; (3) use the rule set.  If
112      * <code>addRule()</code> is called after calling this method, it
113      * invalidates this object, and this method must be called again.
114      * That is, <code>freeze()</code> may be called multiple times,
115      * although for optimal performance it shouldn't be.
116      * @param parseError A pointer to UParseError to receive information about errors
117      *                   occurred.
118      * @param status     Output parameter filled in with success or failure status.
119      */
120     virtual void freeze(UParseError& parseError, UErrorCode& status);
121 
122     /**
123      * Transliterate the given text with the given UTransPosition
124      * indices.  Return TRUE if the transliteration should continue
125      * or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
126      * Note that FALSE is only ever returned if isIncremental is TRUE.
127      * @param text the text to be transliterated
128      * @param index the position indices, which will be updated
129      * @param isIncremental if TRUE, assume new text may be inserted
130      * at index.limit, and return FALSE if thre is a partial match.
131      * @return TRUE unless a U_PARTIAL_MATCH has been obtained,
132      * indicating that transliteration should stop until more text
133      * arrives.
134      */
135     UBool transliterate(Replaceable& text,
136                         UTransPosition& index,
137                         UBool isIncremental);
138 
139     /**
140      * Create rule strings that represents this rule set.
141      * @param result string to receive the rule strings.  Current
142      * contents will be deleted.
143      * @param escapeUnprintable  True, will escape the unprintable characters
144      * @return    A reference to 'result'.
145      */
146     virtual UnicodeString& toRules(UnicodeString& result,
147                                    UBool escapeUnprintable) const;
148 
149     /**
150      * Return the set of all characters that may be modified
151      * (getTarget=false) or emitted (getTarget=true) by this set.
152      */
153     UnicodeSet& getSourceTargetSet(UnicodeSet& result,
154                    UBool getTarget) const;
155 
156 private:
157 
158     TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class
159 };
160 
161 U_NAMESPACE_END
162 
163 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
164 
165 #endif
166