1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2005-2012, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  writesrc.c
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2005apr23
16 *   created by: Markus W. Scherer
17 *
18 *   Helper functions for writing source code for data.
19 */
20 
21 #include <stdio.h>
22 #include <time.h>
23 #include "unicode/utypes.h"
24 #include "unicode/putil.h"
25 #include "unicode/ucptrie.h"
26 #include "utrie2.h"
27 #include "cstring.h"
28 #include "writesrc.h"
29 
30 static FILE *
usrc_createWithHeader(const char * path,const char * filename,const char * header,const char * generator)31 usrc_createWithHeader(const char *path, const char *filename,
32                       const char *header, const char *generator) {
33     char buffer[1024];
34     const char *p;
35     char *q;
36     FILE *f;
37     char c;
38 
39     if(path==NULL) {
40         p=filename;
41     } else {
42         /* concatenate path and filename, with U_FILE_SEP_CHAR in between if necessary */
43         uprv_strcpy(buffer, path);
44         q=buffer+uprv_strlen(buffer);
45         if(q>buffer && (c=*(q-1))!=U_FILE_SEP_CHAR && c!=U_FILE_ALT_SEP_CHAR) {
46             *q++=U_FILE_SEP_CHAR;
47         }
48         uprv_strcpy(q, filename);
49         p=buffer;
50     }
51 
52     f=fopen(p, "w");
53     if(f!=NULL) {
54         const struct tm *lt;
55         time_t t;
56 
57         time(&t);
58         lt=localtime(&t);
59         if(generator==NULL) {
60             strftime(buffer, sizeof(buffer), "%Y-%m-%d", lt);
61             fprintf(f, header, filename, buffer);
62         } else {
63             fprintf(f, header, filename, generator);
64         }
65     } else {
66         fprintf(
67             stderr,
68             "usrc_create(%s, %s): unable to create file\n",
69             path!=NULL ? path : "", filename);
70     }
71     return f;
72 }
73 
74 U_CAPI FILE * U_EXPORT2
usrc_create(const char * path,const char * filename,int32_t copyrightYear,const char * generator)75 usrc_create(const char *path, const char *filename, int32_t copyrightYear, const char *generator) {
76     const char *header;
77     char buffer[200];
78     if(copyrightYear<=2016) {
79         header=
80             "// © 2016 and later: Unicode, Inc. and others.\n"
81             "// License & terms of use: http://www.unicode.org/copyright.html\n"
82             "//\n"
83             "// Copyright (C) 1999-2016, International Business Machines\n"
84             "// Corporation and others.  All Rights Reserved.\n"
85             "//\n"
86             "// file name: %s\n"
87             "//\n"
88             "// machine-generated by: %s\n"
89             "\n\n";
90     } else {
91         sprintf(buffer,
92                 "// © %d and later: Unicode, Inc. and others.\n"
93                 "// License & terms of use: http://www.unicode.org/copyright.html\n"
94                 "//\n"
95                 "// file name: %%s\n"
96                 "//\n"
97                 "// machine-generated by: %%s\n"
98                 "\n\n",
99                 (int)copyrightYear);
100         header=buffer;
101     }
102     return usrc_createWithHeader(path, filename, header, generator);
103 }
104 
105 U_CAPI FILE * U_EXPORT2
usrc_createTextData(const char * path,const char * filename,const char * generator)106 usrc_createTextData(const char *path, const char *filename, const char *generator) {
107     // TODO: Add parameter for the first year this file was generated, not before 2016.
108     static const char *header=
109         "# Copyright (C) 2016 and later: Unicode, Inc. and others.\n"
110         "# License & terms of use: http://www.unicode.org/copyright.html\n"
111         "# Copyright (C) 1999-2016, International Business Machines\n"
112         "# Corporation and others.  All Rights Reserved.\n"
113         "#\n"
114         "# file name: %s\n"
115         "#\n"
116         "# machine-generated by: %s\n"
117         "\n\n";
118     return usrc_createWithHeader(path, filename, header, generator);
119 }
120 
121 U_CAPI void U_EXPORT2
usrc_writeArray(FILE * f,const char * prefix,const void * p,int32_t width,int32_t length,const char * postfix)122 usrc_writeArray(FILE *f,
123                 const char *prefix,
124                 const void *p, int32_t width, int32_t length,
125                 const char *postfix) {
126     const uint8_t *p8;
127     const uint16_t *p16;
128     const uint32_t *p32;
129     uint32_t value;
130     int32_t i, col;
131 
132     p8=NULL;
133     p16=NULL;
134     p32=NULL;
135     switch(width) {
136     case 8:
137         p8=(const uint8_t *)p;
138         break;
139     case 16:
140         p16=(const uint16_t *)p;
141         break;
142     case 32:
143         p32=(const uint32_t *)p;
144         break;
145     default:
146         fprintf(stderr, "usrc_writeArray(width=%ld) unrecognized width\n", (long)width);
147         return;
148     }
149     if(prefix!=NULL) {
150         fprintf(f, prefix, (long)length);
151     }
152     for(i=col=0; i<length; ++i, ++col) {
153         if(i>0) {
154             if(col<16) {
155                 fputc(',', f);
156             } else {
157                 fputs(",\n", f);
158                 col=0;
159             }
160         }
161         switch(width) {
162         case 8:
163             value=p8[i];
164             break;
165         case 16:
166             value=p16[i];
167             break;
168         case 32:
169             value=p32[i];
170             break;
171         default:
172             value=0; /* unreachable */
173             break;
174         }
175         fprintf(f, value<=9 ? "%lu" : "0x%lx", (unsigned long)value);
176     }
177     if(postfix!=NULL) {
178         fputs(postfix, f);
179     }
180 }
181 
182 U_CAPI void U_EXPORT2
usrc_writeUTrie2Arrays(FILE * f,const char * indexPrefix,const char * data32Prefix,const UTrie2 * pTrie,const char * postfix)183 usrc_writeUTrie2Arrays(FILE *f,
184                        const char *indexPrefix, const char *data32Prefix,
185                        const UTrie2 *pTrie,
186                        const char *postfix) {
187     if(pTrie->data32==NULL) {
188         /* 16-bit trie */
189         usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength+pTrie->dataLength, postfix);
190     } else {
191         /* 32-bit trie */
192         usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, postfix);
193         usrc_writeArray(f, data32Prefix, pTrie->data32, 32, pTrie->dataLength, postfix);
194     }
195 }
196 
197 U_CAPI void U_EXPORT2
usrc_writeUTrie2Struct(FILE * f,const char * prefix,const UTrie2 * pTrie,const char * indexName,const char * data32Name,const char * postfix)198 usrc_writeUTrie2Struct(FILE *f,
199                        const char *prefix,
200                        const UTrie2 *pTrie,
201                        const char *indexName, const char *data32Name,
202                        const char *postfix) {
203     if(prefix!=NULL) {
204         fputs(prefix, f);
205     }
206     if(pTrie->data32==NULL) {
207         /* 16-bit trie */
208         fprintf(
209             f,
210             "    %s,\n"         /* index */
211             "    %s+%ld,\n"     /* data16 */
212             "    NULL,\n",      /* data32 */
213             indexName,
214             indexName,
215             (long)pTrie->indexLength);
216     } else {
217         /* 32-bit trie */
218         fprintf(
219             f,
220             "    %s,\n"         /* index */
221             "    NULL,\n"       /* data16 */
222             "    %s,\n",        /* data32 */
223             indexName,
224             data32Name);
225     }
226     fprintf(
227         f,
228         "    %ld,\n"            /* indexLength */
229         "    %ld,\n"            /* dataLength */
230         "    0x%hx,\n"          /* index2NullOffset */
231         "    0x%hx,\n"          /* dataNullOffset */
232         "    0x%lx,\n"          /* initialValue */
233         "    0x%lx,\n"          /* errorValue */
234         "    0x%lx,\n"          /* highStart */
235         "    0x%lx,\n"          /* highValueIndex */
236         "    NULL, 0, FALSE, FALSE, 0, NULL\n",
237         (long)pTrie->indexLength, (long)pTrie->dataLength,
238         (short)pTrie->index2NullOffset, (short)pTrie->dataNullOffset,
239         (long)pTrie->initialValue, (long)pTrie->errorValue,
240         (long)pTrie->highStart, (long)pTrie->highValueIndex);
241     if(postfix!=NULL) {
242         fputs(postfix, f);
243     }
244 }
245 
246 U_CAPI void U_EXPORT2
usrc_writeUCPTrieArrays(FILE * f,const char * indexPrefix,const char * dataPrefix,const UCPTrie * pTrie,const char * postfix)247 usrc_writeUCPTrieArrays(FILE *f,
248                         const char *indexPrefix, const char *dataPrefix,
249                         const UCPTrie *pTrie,
250                         const char *postfix) {
251     usrc_writeArray(f, indexPrefix, pTrie->index, 16, pTrie->indexLength, postfix);
252     int32_t width=
253         pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
254         pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
255         pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
256     usrc_writeArray(f, dataPrefix, pTrie->data.ptr0, width, pTrie->dataLength, postfix);
257 }
258 
259 U_CAPI void U_EXPORT2
usrc_writeUCPTrieStruct(FILE * f,const char * prefix,const UCPTrie * pTrie,const char * indexName,const char * dataName,const char * postfix)260 usrc_writeUCPTrieStruct(FILE *f,
261                         const char *prefix,
262                         const UCPTrie *pTrie,
263                         const char *indexName, const char *dataName,
264                         const char *postfix) {
265     if(prefix!=NULL) {
266         fputs(prefix, f);
267     }
268     fprintf(
269         f,
270         "    %s,\n"             // index
271         "    { %s },\n",        // data (union)
272         indexName,
273         dataName);
274     fprintf(
275         f,
276         "    %ld, %ld,\n"       // indexLength, dataLength
277         "    0x%lx, 0x%x,\n"    // highStart, shifted12HighStart
278         "    %d, %d,\n"         // type, valueWidth
279         "    0, 0,\n"           // reserved32, reserved16
280         "    0x%x, 0x%lx,\n"    // index3NullOffset, dataNullOffset
281         "    0x%lx,\n",         // nullValue
282         (long)pTrie->indexLength, (long)pTrie->dataLength,
283         (long)pTrie->highStart, pTrie->shifted12HighStart,
284         pTrie->type, pTrie->valueWidth,
285         pTrie->index3NullOffset, (long)pTrie->dataNullOffset,
286         (long)pTrie->nullValue);
287     if(postfix!=NULL) {
288         fputs(postfix, f);
289     }
290 }
291 
292 U_CAPI void U_EXPORT2
usrc_writeUCPTrie(FILE * f,const char * name,const UCPTrie * pTrie)293 usrc_writeUCPTrie(FILE *f, const char *name, const UCPTrie *pTrie) {
294     int32_t width=
295         pTrie->valueWidth==UCPTRIE_VALUE_BITS_16 ? 16 :
296         pTrie->valueWidth==UCPTRIE_VALUE_BITS_32 ? 32 :
297         pTrie->valueWidth==UCPTRIE_VALUE_BITS_8 ? 8 : 0;
298     char line[100], line2[100], line3[100];
299     sprintf(line, "static const uint16_t %s_trieIndex[%%ld]={\n", name);
300     sprintf(line2, "static const uint%d_t %s_trieData[%%ld]={\n", (int)width, name);
301     usrc_writeUCPTrieArrays(f, line, line2, pTrie, "\n};\n\n");
302     sprintf(line, "static const UCPTrie %s_trie={\n", name);
303     sprintf(line2, "%s_trieIndex", name);
304     sprintf(line3, "%s_trieData", name);
305     usrc_writeUCPTrieStruct(f, line, pTrie, line2, line3, "};\n\n");
306 }
307 
308 U_CAPI void U_EXPORT2
usrc_writeArrayOfMostlyInvChars(FILE * f,const char * prefix,const char * p,int32_t length,const char * postfix)309 usrc_writeArrayOfMostlyInvChars(FILE *f,
310                                 const char *prefix,
311                                 const char *p, int32_t length,
312                                 const char *postfix) {
313     int32_t i, col;
314     int prev2, prev, c;
315 
316     if(prefix!=NULL) {
317         fprintf(f, prefix, (long)length);
318     }
319     prev2=prev=-1;
320     for(i=col=0; i<length; ++i, ++col) {
321         c=(uint8_t)p[i];
322         if(i>0) {
323             /* Break long lines. Try to break at interesting places, to minimize revision diffs. */
324             if(
325                 /* Very long line. */
326                 col>=32 ||
327                 /* Long line, break after terminating NUL. */
328                 (col>=24 && prev2>=0x20 && prev==0) ||
329                 /* Medium-long line, break before non-NUL, non-character byte. */
330                 (col>=16 && (prev==0 || prev>=0x20) && 0<c && c<0x20)
331             ) {
332                 fputs(",\n", f);
333                 col=0;
334             } else {
335                 fputc(',', f);
336             }
337         }
338         fprintf(f, c<0x20 ? "%u" : "'%c'", c);
339         prev2=prev;
340         prev=c;
341     }
342     if(postfix!=NULL) {
343         fputs(postfix, f);
344     }
345 }
346