1 /*
2  ***************************************************************************
3  * Copyright (C) 2008-2009, International Business Machines Corporation
4  * and others. All Rights Reserved.
5  ***************************************************************************
6  *   file name:  uspoof_build.cpp
7  *   encoding:   US-ASCII
8  *   tab size:   8 (not used)
9  *   indentation:4
10  *
11  *   created on: 2008 Dec 8
12  *   created by: Andy Heninger
13  *
14  *   Unicode Spoof Detection Data Builder
15  *   Builder-related functions are kept in separate files so that applications not needing
16  *   the builder can more easily exclude them, typically by means of static linking.
17  *
18  *   There are three relatively independent sets of Spoof data,
19  *      Confusables,
20  *      Whole Script Confusables
21  *      ID character extensions.
22  *
23  *   The data tables for each are built separately, each from its own definitions
24  */
25 
26 #include "unicode/utypes.h"
27 #include "unicode/uspoof.h"
28 #include "unicode/unorm.h"
29 #include "unicode/uregex.h"
30 #include "unicode/ustring.h"
31 #include "cmemory.h"
32 #include "uspoof_impl.h"
33 #include "uhash.h"
34 #include "uvector.h"
35 #include "uassert.h"
36 #include "uarrsort.h"
37 #include "uspoof_conf.h"
38 #include "uspoof_wsconf.h"
39 
40 #if !UCONFIG_NO_NORMALIZATION
41 
42 U_NAMESPACE_USE
43 
44 
45 // The main data building function
46 
47 U_CAPI USpoofChecker * U_EXPORT2
uspoof_openFromSource(const char * confusables,int32_t confusablesLen,const char * confusablesWholeScript,int32_t confusablesWholeScriptLen,int32_t * errorType,UParseError * pe,UErrorCode * status)48 uspoof_openFromSource(const char *confusables,  int32_t confusablesLen,
49                       const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
50                       int32_t *errorType, UParseError *pe, UErrorCode *status) {
51 
52     if (U_FAILURE(*status)) {
53         return NULL;
54     }
55 #if UCONFIG_NO_REGULAR_EXPRESSIONS
56     *status = U_UNSUPPORTED_ERROR;
57     return NULL;
58 #else
59     if (errorType!=NULL) {
60         *errorType = 0;
61     }
62     if (pe != NULL) {
63         pe->line = 0;
64         pe->offset = 0;
65         pe->preContext[0] = 0;
66         pe->postContext[0] = 0;
67     }
68 
69     // Set up a shell of a spoof detector, with empty data.
70     SpoofData *newSpoofData = new SpoofData(*status);
71     SpoofImpl *This = new SpoofImpl(newSpoofData, *status);
72 
73     // Compile the binary data from the source (text) format.
74     ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status);
75     buildWSConfusableData(This, confusablesWholeScript, confusablesWholeScriptLen, pe, *status);
76 
77     if (U_FAILURE(*status)) {
78         delete This;
79         This = NULL;
80     }
81     return (USpoofChecker *)This;
82 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
83 }
84 
85 #endif
86