1 /*
2 ******************************************************************************
3 *
4 *   Copyright (C) 2008-2011, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 ******************************************************************************
8 *   file name:  uspoof_conf.h
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2009Jan05
14 *   created by: Andy Heninger
15 *
16 *   Internal classes for compiling confusable data into its binary (runtime) form.
17 */
18 
19 #ifndef __USPOOF_BUILDCONF_H__
20 #define __USPOOF_BUILDCONF_H__
21 
22 #if !UCONFIG_NO_NORMALIZATION
23 
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
25 
26 #include "uspoof_impl.h"
27 
28 U_NAMESPACE_BEGIN
29 
30 // SPUString
31 //              Holds a string that is the result of one of the mappings defined
32 //              by the confusable mapping data (confusables.txt from Unicode.org)
33 //              Instances of SPUString exist during the compilation process only.
34 
35 struct SPUString : public UMemory {
36     UnicodeString  *fStr;             // The actual string.
37     int32_t         fStrTableIndex;   // Index into the final runtime data for this string.
38                                       //  (or, for length 1, the single string char itself,
39                                       //   there being no string table entry for it.)
40     SPUString(UnicodeString *s);
41     ~SPUString();
42 };
43 
44 
45 //  String Pool   A utility class for holding the strings that are the result of
46 //                the spoof mappings.  These strings will utimately end up in the
47 //                run-time String Table.
48 //                This is sort of like a sorted set of strings, except that ICU's anemic
49 //                built-in collections don't support those, so it is implemented with a
50 //                combination of a uhash and a UVector.
51 
52 
53 class SPUStringPool : public UMemory {
54   public:
55     SPUStringPool(UErrorCode &status);
56     ~SPUStringPool();
57 
58     // Add a string. Return the string from the table.
59     // If the input parameter string is already in the table, delete the
60     //  input parameter and return the existing string.
61     SPUString *addString(UnicodeString *src, UErrorCode &status);
62 
63 
64     // Get the n-th string in the collection.
65     SPUString *getByIndex(int32_t i);
66 
67     // Sort the contents; affects the ordering of getByIndex().
68     void sort(UErrorCode &status);
69 
70     int32_t size();
71 
72   private:
73     UVector     *fVec;    // Elements are SPUString *
74     UHashtable  *fHash;   // Key: UnicodeString  Value: SPUString
75 };
76 
77 
78 // class ConfusabledataBuilder
79 //     An instance of this class exists while the confusable data is being built from source.
80 //     It encapsulates the intermediate data structures that are used for building.
81 //     It exports one static function, to do a confusable data build.
82 
83 class ConfusabledataBuilder : public UMemory {
84   private:
85     SpoofImpl  *fSpoofImpl;
86     UChar      *fInput;
87     UHashtable *fSLTable;
88     UHashtable *fSATable;
89     UHashtable *fMLTable;
90     UHashtable *fMATable;
91     UnicodeSet *fKeySet;     // A set of all keys (UChar32s) that go into the four mapping tables.
92 
93     // The binary data is first assembled into the following four collections, then
94     //   copied to its final raw-memory destination.
95     UVector            *fKeyVec;
96     UVector            *fValueVec;
97     UnicodeString      *fStringTable;
98     UVector            *fStringLengthsTable;
99 
100     SPUStringPool      *stringPool;
101     URegularExpression *fParseLine;
102     URegularExpression *fParseHexNum;
103     int32_t             fLineNum;
104 
105     ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
106     ~ConfusabledataBuilder();
107     void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
108 
109     // Add an entry to the key and value tables being built
110     //   input:  data from SLTable, MATable, etc.
111     //   outut:  entry added to fKeyVec and fValueVec
112     void addKeyEntry(UChar32     keyChar,     // The key character
113                      UHashtable *table,       // The table, one of SATable, MATable, etc.
114                      int32_t     tableFlag,   // One of USPOOF_SA_TABLE_FLAG, etc.
115                      UErrorCode &status);
116 
117     // From an index into fKeyVec & fValueVec
118     //   get a UnicodeString with the corresponding mapping.
119     UnicodeString getMapping(int32_t index);
120 
121     // Populate the final binary output data array with the compiled data.
122     void outputData(UErrorCode &status);
123 
124   public:
125     static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
126         int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
127 };
128 U_NAMESPACE_END
129 
130 #endif
131 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
132 #endif  // __USPOOF_BUILDCONF_H__
133