1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1999-2007, International Business Machines Corporation
6 * and others. All Rights Reserved.
7 **********************************************************************
8 *   Date        Name        Description
9 *   11/17/99    aliu        Creation.
10 **********************************************************************
11 */
12 #ifndef RBT_SET_H
13 #define RBT_SET_H
14 
15 #include "unicode/utypes.h"
16 
17 #if !UCONFIG_NO_TRANSLITERATION
18 
19 #include "unicode/uobject.h"
20 #include "unicode/utrans.h"
21 #include "uvector.h"
22 
23 U_NAMESPACE_BEGIN
24 
25 class Replaceable;
26 class TransliterationRule;
27 class TransliterationRuleData;
28 class UnicodeFilter;
29 class UnicodeString;
30 class UnicodeSet;
31 
32 /**
33  * A set of rules for a <code>RuleBasedTransliterator</code>.
34  * @author Alan Liu
35  */
36 class TransliterationRuleSet : public UMemory {
37     /**
38      * Vector of rules, in the order added.  This is used while the
39      * rule set is getting built.  After that, freeze() reorders and
40      * indexes the rules into rules[].  Any given rule is stored once
41      * in ruleVector, and one or more times in rules[].  ruleVector
42      * owns and deletes the rules.
43      */
44     UVector* ruleVector;
45 
46     /**
47      * Sorted and indexed table of rules.  This is created by freeze()
48      * from the rules in ruleVector.  It contains alias pointers to
49      * the rules in ruleVector.  It is zero before freeze() is called
50      * and non-zero thereafter.
51      */
52     TransliterationRule** rules;
53 
54     /**
55      * Index table.  For text having a first character c, compute x = c&0xFF.
56      * Now use rules[index[x]..index[x+1]-1].  This index table is created by
57      * freeze().  Before freeze() is called it contains garbage.
58      */
59     int32_t index[257];
60 
61     /**
62      * Length of the longest preceding context
63      */
64     int32_t maxContextLength;
65 
66 public:
67 
68     /**
69      * Construct a new empty rule set.
70      * @param status    Output parameter filled in with success or failure status.
71      */
72     TransliterationRuleSet(UErrorCode& status);
73 
74     /**
75      * Copy constructor.
76      */
77     TransliterationRuleSet(const TransliterationRuleSet&);
78 
79     /**
80      * Destructor.
81      */
82     virtual ~TransliterationRuleSet();
83 
84     /**
85      * Change the data object that this rule belongs to.  Used
86      * internally by the TransliterationRuleData copy constructor.
87      * @param data    the new data value to be set.
88      */
89     void setData(const TransliterationRuleData* data);
90 
91     /**
92      * Return the maximum context length.
93      * @return the length of the longest preceding context.
94      */
95     virtual int32_t getMaximumContextLength(void) const;
96 
97     /**
98      * Add a rule to this set.  Rules are added in order, and order is
99      * significant.  The last call to this method must be followed by
100      * a call to <code>freeze()</code> before the rule set is used.
101      * This method must <em>not</em> be called after freeze() has been
102      * called.
103      *
104      * @param adoptedRule the rule to add
105      */
106     virtual void addRule(TransliterationRule* adoptedRule,
107                          UErrorCode& status);
108 
109     /**
110      * Check this for masked rules and index it to optimize performance.
111      * The sequence of operations is: (1) add rules to a set using
112      * <code>addRule()</code>; (2) freeze the set using
113      * <code>freeze()</code>; (3) use the rule set.  If
114      * <code>addRule()</code> is called after calling this method, it
115      * invalidates this object, and this method must be called again.
116      * That is, <code>freeze()</code> may be called multiple times,
117      * although for optimal performance it shouldn't be.
118      * @param parseError A pointer to UParseError to receive information about errors
119      *                   occurred.
120      * @param status     Output parameter filled in with success or failure status.
121      */
122     virtual void freeze(UParseError& parseError, UErrorCode& status);
123 
124     /**
125      * Transliterate the given text with the given UTransPosition
126      * indices.  Return TRUE if the transliteration should continue
127      * or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
128      * Note that FALSE is only ever returned if isIncremental is TRUE.
129      * @param text the text to be transliterated
130      * @param index the position indices, which will be updated
131      * @param isIncremental if TRUE, assume new text may be inserted
132      * at index.limit, and return FALSE if thre is a partial match.
133      * @return TRUE unless a U_PARTIAL_MATCH has been obtained,
134      * indicating that transliteration should stop until more text
135      * arrives.
136      */
137     UBool transliterate(Replaceable& text,
138                         UTransPosition& index,
139                         UBool isIncremental);
140 
141     /**
142      * Create rule strings that represents this rule set.
143      * @param result string to receive the rule strings.  Current
144      * contents will be deleted.
145      * @param escapeUnprintable  True, will escape the unprintable characters
146      * @return    A reference to 'result'.
147      */
148     virtual UnicodeString& toRules(UnicodeString& result,
149                                    UBool escapeUnprintable) const;
150 
151     /**
152      * Return the set of all characters that may be modified
153      * (getTarget=false) or emitted (getTarget=true) by this set.
154      */
155     UnicodeSet& getSourceTargetSet(UnicodeSet& result,
156                    UBool getTarget) const;
157 
158 private:
159 
160     TransliterationRuleSet &operator=(const TransliterationRuleSet &other); // forbid copying of this class
161 };
162 
163 U_NAMESPACE_END
164 
165 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
166 
167 #endif
168