1 /**
2  * Copyright (c) 1999-2015, International Business Machines Corporation and
3  * others. All Rights Reserved.
4  *
5  * Generator for source/i18n/collunsafe.h
6  * see Makefile
7  */
8 
9 #include <stdio.h>
10 #include "unicode/uversion.h"
11 #include "unicode/uniset.h"
12 #include "collationroot.h"
13 #include "collationtailoring.h"
14 
15 /**
16  * Define the type of generator to use. Choose one.
17  */
18 #define SERIALIZE 1   //< Default: use UnicodeSet.serialize() and a new internal c'tor
19 #define RANGES 0      //< Enumerate ranges (works, not as fast. No support in collationdatareader.cpp)
20 #define PATTERN 0     //< Generate a UnicodeSet pattern (depends on #11891 AND probably slower. No support in collationdatareader.cpp)
21 
main(int argc,const char * argv[])22 int main(int argc, const char *argv[]) {
23     UErrorCode errorCode = U_ZERO_ERROR;
24 
25     // Get the unsafeBackwardsSet
26     const CollationCacheEntry *rootEntry = CollationRoot::getRootCacheEntry(errorCode);
27     if(U_FAILURE(errorCode)) {
28       fprintf(stderr, "Err: %s getting root cache entry\n", u_errorName(errorCode));
29       return 1;
30     }
31     const UVersionInfo &version = rootEntry->tailoring->version;
32     const UnicodeSet *unsafeBackwardSet = rootEntry->tailoring->unsafeBackwardSet;
33     char verString[20];
34     u_versionToString(version, verString);
35     fprintf(stderr, "Generating data for ICU %s, Collation %s\n", U_ICU_VERSION, verString);
36     int32_t rangeCount = unsafeBackwardSet->getRangeCount();
37 
38 #if SERIALIZE
39     fprintf(stderr, ".. serializing\n");
40     // UnicodeSet serialization
41 
42     UErrorCode preflightCode = U_ZERO_ERROR;
43     // preflight
44     int32_t serializedCount = unsafeBackwardSet->serialize(NULL,0,preflightCode);
45     if(U_FAILURE(preflightCode) && preflightCode != U_BUFFER_OVERFLOW_ERROR) {
46       fprintf(stderr, "Err: %s preflighting unicode set\n", u_errorName(preflightCode));
47       return 1;
48     }
49     uint16_t *serializedData = new uint16_t[serializedCount];
50     // serialize
51     unsafeBackwardSet->serialize(serializedData, serializedCount, errorCode);
52     if(U_FAILURE(errorCode)) {
53       delete [] serializedData;
54       fprintf(stderr, "Err: %s serializing unicodeset\n", u_errorName(errorCode));
55       return 1;
56     }
57 #endif
58 
59 #if PATTERN
60     fprintf(stderr,".. pattern. (Note: collationdatareader.cpp does not support this form also see #11891)\n");
61     // attempt to use pattern
62 
63     UnicodeString pattern;
64     UnicodeSet set(*unsafeBackwardSet);
65     set.compact();
66     set.toPattern(pattern, FALSE);
67 
68     if(U_SUCCESS(errorCode)) {
69       // This fails (bug# ?) - which is why this method was abandoned.
70 
71       // UnicodeSet usA(pattern, errorCode);
72       // fprintf(stderr, "\n%s:%d: err creating set A %s\n", __FILE__, __LINE__, u_errorName(errorCode));
73       // return 1;
74     }
75 
76 
77     const UChar *buf = pattern.getBuffer();
78     int32_t needed = pattern.length();
79 
80     // print
81     {
82       char buf2[2048];
83       int32_t len2 = pattern.extract(0, pattern.length(), buf2, "utf-8");
84       buf2[len2]=0;
85       fprintf(stderr,"===\n%s\n===\n", buf2);
86     }
87 
88     const UnicodeString unsafeBackwardPattern(FALSE, buf, needed);
89   if(U_SUCCESS(errorCode)) {
90     //UnicodeSet us(unsafeBackwardPattern, errorCode);
91     //    fprintf(stderr, "\n%s:%d: err creating set %s\n", __FILE__, __LINE__, u_errorName(errorCode));
92   } else {
93     fprintf(stderr, "Uset OK - \n");
94   }
95 #endif
96 
97 
98   // Generate the output file.
99 
100   printf("// collunsafe.h\n");
101   printf("// %s\n", U_COPYRIGHT_STRING);
102   printf("\n");
103   printf("// To be included by collationdatareader.cpp, and generated by gencolusb.\n");
104   printf("// Machine generated, do not edit.\n");
105   printf("\n");
106   printf("#ifndef COLLUNSAFE_H\n"
107          "#define COLLUNSAFE_H\n"
108          "\n"
109          "#define COLLUNSAFE_ICU_VERSION \"" U_ICU_VERSION "\"\n");
110   printf("#define COLLUNSAFE_COLL_VERSION \"%s\"\n", verString);
111 
112 
113 
114 #if PATTERN
115   printf("#define COLLUNSAFE_PATTERN 1\n");
116   printf("static const int32_t collunsafe_len = %d;\n", needed);
117   printf("static const UChar collunsafe_pattern[collunsafe_len] = {\n");
118   for(int i=0;i<needed;i++) {
119     if( (i>0) && (i%8 == 0) ) {
120       printf(" // %d\n", i);
121     }
122     printf("0x%04X", buf[i]); // TODO check
123     if(i != (needed-1)) {
124       printf(", ");
125     }
126     }
127   printf(" //%d\n};\n", (needed-1));
128 #endif
129 
130 #if RANGE
131     fprintf(stderr, "COLLUNSAFE_RANGE - no code support in collationdatareader.cpp for this\n");
132     printf("#define COLLUNSAFE_RANGE 1\n");
133     printf("static const int32_t unsafe_rangeCount = %d;\n", rangeCount);
134     printf("static const UChar32 unsafe_ranges[%d] = { \n", rangeCount*2);
135     for(int32_t i=0;i<rangeCount;i++) {
136       printf(" 0x%04X, 0x%04X, // %d\n",
137              unsafeBackwardSet->getRangeStart(i),
138              unsafeBackwardSet->getRangeEnd(i),
139              i);
140     }
141     printf("};\n");
142 #endif
143 
144 #if SERIALIZE
145     printf("#define COLLUNSAFE_SERIALIZE 1\n");
146     printf("static const int32_t unsafe_serializedCount = %d;\n", serializedCount);
147     printf("static const uint16_t unsafe_serializedData[%d] = { \n", serializedCount);
148     for(int32_t i=0;i<serializedCount;i++) {
149       if( (i>0) && (i%8 == 0) ) {
150         printf(" // %d\n", i);
151       }
152       printf("0x%04X", serializedData[i]); // TODO check
153       if(i != (serializedCount-1)) {
154         printf(", ");
155       }
156     }
157     printf("};\n");
158 #endif
159 
160     printf("#endif\n");
161     fflush(stderr);
162     fflush(stdout);
163     return(U_SUCCESS(errorCode)?0:1);
164 }
165