1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1999-2015 International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  gencnval.c
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 1999nov05
14 *   created by: Markus W. Scherer
15 *
16 *   This program reads convrtrs.txt and writes a memory-mappable
17 *   converter name alias table to cnvalias.dat .
18 *
19 *   This program currently writes version 2.1 of the data format. See
20 *   ucnv_io.c for more details on the format. Note that version 2.1
21 *   is written in such a way that a 2.0 reader will be able to use it,
22 *   and a 2.1 reader will be able to read 2.0.
23 */
24 
25 #include "unicode/utypes.h"
26 #include "unicode/putil.h"
27 #include "unicode/ucnv.h" /* ucnv_compareNames() */
28 #include "ucnv_io.h"
29 #include "cmemory.h"
30 #include "cstring.h"
31 #include "uinvchar.h"
32 #include "filestrm.h"
33 #include "unicode/uclean.h"
34 #include "unewdata.h"
35 #include "uoptions.h"
36 
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <ctype.h>
40 
41 /* TODO: Need to check alias name length is less than UCNV_MAX_CONVERTER_NAME_LENGTH */
42 
43 /* STRING_STORE_SIZE + TAG_STORE_SIZE <= ((2^16 - 1) * 2)
44  That is the maximum size for the string stores combined
45  because the strings are index at 16-bit boundries by a
46  16-bit index, and there is only one section for the
47  strings.
48  */
49 #define STRING_STORE_SIZE 0x1FBFE   /* 130046 */
50 #define TAG_STORE_SIZE      0x400   /* 1024 */
51 
52 /* The combined tag and converter count can affect the number of lists
53  created.  The size of all lists must be less than (2^17 - 1)
54  because the lists are indexed as a 16-bit array with a 16-bit index.
55  */
56 #define MAX_TAG_COUNT 0x3F      /* 63 */
57 #define MAX_CONV_COUNT UCNV_CONVERTER_INDEX_MASK
58 #define MAX_ALIAS_COUNT 0xFFFF  /* 65535 */
59 
60 /* The maximum number of aliases that a standard tag/converter combination can have.
61  At this moment 6/18/2002, IANA has 12 names for ASCII. Don't go below 15 for
62  this value. I don't recommend more than 31 for this value.
63  */
64 #define MAX_TC_ALIAS_COUNT 0x1F    /* 31 */
65 
66 #define MAX_LINE_SIZE 0x7FFF    /* 32767 */
67 #define MAX_LIST_SIZE 0xFFFF    /* 65535 */
68 
69 #define DATA_NAME "cnvalias"
70 #define DATA_TYPE "icu" /* ICU alias table */
71 
72 #define ALL_TAG_STR "ALL"
73 #define ALL_TAG_NUM 1
74 #define EMPTY_TAG_NUM 0
75 
76 /* UDataInfo cf. udata.h */
77 static const UDataInfo dataInfo={
78     sizeof(UDataInfo),
79     0,
80 
81     U_IS_BIG_ENDIAN,
82     U_CHARSET_FAMILY,
83     sizeof(UChar),
84     0,
85 
86     {0x43, 0x76, 0x41, 0x6c},     /* dataFormat="CvAl" */
87     {3, 0, 1, 0},                 /* formatVersion */
88     {1, 4, 2, 0}                  /* dataVersion */
89 };
90 
91 typedef struct {
92     char *store;
93     uint32_t top;
94     uint32_t max;
95 } StringBlock;
96 
97 static char stringStore[STRING_STORE_SIZE];
98 static StringBlock stringBlock = { stringStore, 0, STRING_STORE_SIZE };
99 
100 typedef struct {
101     uint16_t    aliasCount;
102     uint16_t    *aliases;     /* Index into stringStore */
103 } AliasList;
104 
105 typedef struct {
106     uint16_t converter;     /* Index into stringStore */
107     uint16_t totalAliasCount;    /* Total aliases in this column */
108 } Converter;
109 
110 static Converter converters[MAX_CONV_COUNT];
111 static uint16_t converterCount=0;
112 
113 static char tagStore[TAG_STORE_SIZE];
114 static StringBlock tagBlock = { tagStore, 0, TAG_STORE_SIZE };
115 
116 typedef struct {
117     uint16_t    tag;        /* Index into tagStore */
118     uint16_t    totalAliasCount; /* Total aliases in this row */
119     AliasList   aliasList[MAX_CONV_COUNT];
120 } Tag;
121 
122 /* Think of this as a 3D array. It's tagCount by converterCount by aliasCount */
123 static Tag tags[MAX_TAG_COUNT];
124 static uint16_t tagCount = 0;
125 
126 /* Used for storing all aliases  */
127 static uint16_t knownAliases[MAX_ALIAS_COUNT];
128 static uint16_t knownAliasesCount = 0;
129 /*static uint16_t duplicateKnownAliasesCount = 0;*/
130 
131 /* Used for storing the lists section that point to aliases */
132 static uint16_t aliasLists[MAX_LIST_SIZE];
133 static uint16_t aliasListsSize = 0;
134 
135 /* Were the standard tags declared before the aliases. */
136 static UBool standardTagsUsed = FALSE;
137 static UBool verbose = FALSE;
138 static UBool quiet = FALSE;
139 static int lineNum = 1;
140 
141 static UConverterAliasOptions tableOptions = {
142     UCNV_IO_STD_NORMALIZED,
143     1 /* containsCnvOptionInfo */
144 };
145 
146 
147 /**
148  * path to convrtrs.txt
149  */
150 const char *path;
151 
152 /* prototypes --------------------------------------------------------------- */
153 
154 static void
155 parseLine(const char *line);
156 
157 static void
158 parseFile(FileStream *in);
159 
160 static int32_t
161 chomp(char *line);
162 
163 static void
164 addOfficialTaggedStandards(char *line, int32_t lineLen);
165 
166 static uint16_t
167 addAlias(const char *alias, uint16_t standard, uint16_t converter, UBool defaultName);
168 
169 static uint16_t
170 addConverter(const char *converter);
171 
172 static char *
173 allocString(StringBlock *block, const char *s, int32_t length);
174 
175 static uint16_t
176 addToKnownAliases(const char *alias);
177 
178 static int
179 compareAliases(const void *alias1, const void *alias2);
180 
181 static uint16_t
182 getTagNumber(const char *tag, uint16_t tagLen);
183 
184 /*static void
185 addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter);*/
186 
187 static void
188 writeAliasTable(UNewDataMemory *out);
189 
190 /* -------------------------------------------------------------------------- */
191 
192 /* Presumes that you used allocString() */
193 #define GET_ALIAS_STR(index) (stringStore + ((size_t)(index) << 1))
194 #define GET_TAG_STR(index) (tagStore + ((size_t)(index) << 1))
195 
196 /* Presumes that you used allocString() */
197 #define GET_ALIAS_NUM(str) ((uint16_t)((str - stringStore) >> 1))
198 #define GET_TAG_NUM(str) ((uint16_t)((str - tagStore) >> 1))
199 
200 enum
201 {
202     HELP1,
203     HELP2,
204     VERBOSE,
205     COPYRIGHT,
206     DESTDIR,
207     SOURCEDIR,
208     QUIET
209 };
210 
211 static UOption options[]={
212     UOPTION_HELP_H,
213     UOPTION_HELP_QUESTION_MARK,
214     UOPTION_VERBOSE,
215     UOPTION_COPYRIGHT,
216     UOPTION_DESTDIR,
217     UOPTION_SOURCEDIR,
218     UOPTION_QUIET
219 };
220 
221 extern int
main(int argc,char * argv[])222 main(int argc, char* argv[]) {
223     int i, n;
224     char pathBuf[512];
225     FileStream *in;
226     UNewDataMemory *out;
227     UErrorCode errorCode=U_ZERO_ERROR;
228 
229     U_MAIN_INIT_ARGS(argc, argv);
230 
231     /* preset then read command line options */
232     options[DESTDIR].value=options[SOURCEDIR].value=u_getDataDirectory();
233     argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
234 
235     /* error handling, printing usage message */
236     if(argc<0) {
237         fprintf(stderr,
238             "error in command line argument \"%s\"\n",
239             argv[-argc]);
240     }
241     if(argc<0 || options[HELP1].doesOccur || options[HELP2].doesOccur) {
242         fprintf(stderr,
243             "usage: %s [-options] [convrtrs.txt]\n"
244             "\tread convrtrs.txt and create " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE "\n"
245             "options:\n"
246             "\t-h or -? or --help  this usage text\n"
247             "\t-v or --verbose     prints out extra information about the alias table\n"
248             "\t-q or --quiet       do not display warnings and progress\n"
249             "\t-c or --copyright   include a copyright notice\n"
250             "\t-d or --destdir     destination directory, followed by the path\n"
251             "\t-s or --sourcedir   source directory, followed by the path\n",
252             argv[0]);
253         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
254     }
255 
256     if(options[VERBOSE].doesOccur) {
257         verbose = TRUE;
258     }
259 
260     if(options[QUIET].doesOccur) {
261         quiet = TRUE;
262     }
263 
264     if(argc>=2) {
265         path=argv[1];
266     } else {
267         path=options[SOURCEDIR].value;
268         if(path!=NULL && *path!=0) {
269             char *end;
270 
271             uprv_strcpy(pathBuf, path);
272             end = uprv_strchr(pathBuf, 0);
273             if(*(end-1)!=U_FILE_SEP_CHAR) {
274                 *(end++)=U_FILE_SEP_CHAR;
275             }
276             uprv_strcpy(end, "convrtrs.txt");
277             path=pathBuf;
278         } else {
279             path = "convrtrs.txt";
280         }
281     }
282 
283     uprv_memset(stringStore, 0, sizeof(stringStore));
284     uprv_memset(tagStore, 0, sizeof(tagStore));
285     uprv_memset(converters, 0, sizeof(converters));
286     uprv_memset(tags, 0, sizeof(tags));
287     uprv_memset(aliasLists, 0, sizeof(aliasLists));
288     uprv_memset(knownAliases, 0, sizeof(aliasLists));
289 
290 
291     in=T_FileStream_open(path, "r");
292     if(in==NULL) {
293         fprintf(stderr, "gencnval: unable to open input file %s\n", path);
294         exit(U_FILE_ACCESS_ERROR);
295     }
296     parseFile(in);
297     T_FileStream_close(in);
298 
299     /* create the output file */
300     out=udata_create(options[DESTDIR].value, DATA_TYPE, DATA_NAME, &dataInfo,
301                      options[COPYRIGHT].doesOccur ? U_COPYRIGHT_STRING : NULL, &errorCode);
302     if(U_FAILURE(errorCode)) {
303         fprintf(stderr, "gencnval: unable to open output file - error %s\n", u_errorName(errorCode));
304         exit(errorCode);
305     }
306 
307     /* write the table of aliases based on a tag/converter name combination */
308     writeAliasTable(out);
309 
310     /* finish */
311     udata_finish(out, &errorCode);
312     if(U_FAILURE(errorCode)) {
313         fprintf(stderr, "gencnval: error finishing output file - %s\n", u_errorName(errorCode));
314         exit(errorCode);
315     }
316 
317     /* clean up tags */
318     for (i = 0; i < MAX_TAG_COUNT; i++) {
319         for (n = 0; n < MAX_CONV_COUNT; n++) {
320             if (tags[i].aliasList[n].aliases!=NULL) {
321                 uprv_free(tags[i].aliasList[n].aliases);
322             }
323         }
324     }
325 
326     return 0;
327 }
328 
329 static void
parseFile(FileStream * in)330 parseFile(FileStream *in) {
331     char line[MAX_LINE_SIZE];
332     char lastLine[MAX_LINE_SIZE];
333     int32_t lineSize = 0;
334     int32_t lastLineSize = 0;
335     UBool validParse = TRUE;
336 
337     lineNum = 0;
338 
339     /* Add the empty tag, which is for untagged aliases */
340     getTagNumber("", 0);
341     getTagNumber(ALL_TAG_STR, 3);
342     allocString(&stringBlock, "", 0);
343 
344     /* read the list of aliases */
345     while (validParse) {
346         validParse = FALSE;
347 
348         /* Read non-empty lines that don't start with a space character. */
349         while (T_FileStream_readLine(in, lastLine, MAX_LINE_SIZE) != NULL) {
350             lastLineSize = chomp(lastLine);
351             if (lineSize == 0 || (lastLineSize > 0 && isspace((int)*lastLine))) {
352                 uprv_strcpy(line + lineSize, lastLine);
353                 lineSize += lastLineSize;
354             } else if (lineSize > 0) {
355                 validParse = TRUE;
356                 break;
357             }
358             lineNum++;
359         }
360 
361         if (validParse || lineSize > 0) {
362             if (isspace((int)*line)) {
363                 fprintf(stderr, "%s:%d: error: cannot start an alias with a space\n", path, lineNum-1);
364                 exit(U_PARSE_ERROR);
365             } else if (line[0] == '{') {
366                 if (!standardTagsUsed && line[lineSize - 1] != '}') {
367                     fprintf(stderr, "%s:%d: error: alias needs to start with a converter name\n", path, lineNum);
368                     exit(U_PARSE_ERROR);
369                 }
370                 addOfficialTaggedStandards(line, lineSize);
371                 standardTagsUsed = TRUE;
372             } else {
373                 if (standardTagsUsed) {
374                     parseLine(line);
375                 }
376                 else {
377                     fprintf(stderr, "%s:%d: error: alias table needs to start a list of standard tags\n", path, lineNum);
378                     exit(U_PARSE_ERROR);
379                 }
380             }
381             /* Was the last line consumed */
382             if (lastLineSize > 0) {
383                 uprv_strcpy(line, lastLine);
384                 lineSize = lastLineSize;
385             }
386             else {
387                 lineSize = 0;
388             }
389         }
390         lineNum++;
391     }
392 }
393 
394 /* This works almost like the Perl chomp.
395  It removes the newlines, comments and trailing whitespace (not preceding whitespace).
396 */
397 static int32_t
chomp(char * line)398 chomp(char *line) {
399     char *s = line;
400     char *lastNonSpace = line;
401     while(*s!=0) {
402         /* truncate at a newline or a comment */
403         if(*s == '\r' || *s == '\n' || *s == '#') {
404             *s = 0;
405             break;
406         }
407         if (!isspace((int)*s)) {
408             lastNonSpace = s;
409         }
410         ++s;
411     }
412     if (lastNonSpace++ > line) {
413         *lastNonSpace = 0;
414         s = lastNonSpace;
415     }
416     return (int32_t)(s - line);
417 }
418 
419 static void
parseLine(const char * line)420 parseLine(const char *line) {
421     uint16_t pos=0, start, limit, length, cnv;
422     char *converter, *alias;
423 
424     /* skip leading white space */
425     /* There is no whitespace at the beginning anymore */
426 /*    while(line[pos]!=0 && isspace(line[pos])) {
427         ++pos;
428     }
429 */
430 
431     /* is there nothing on this line? */
432     if(line[pos]==0) {
433         return;
434     }
435 
436     /* get the converter name */
437     start=pos;
438     while(line[pos]!=0 && !isspace((int)line[pos])) {
439         ++pos;
440     }
441     limit=pos;
442 
443     /* store the converter name */
444     length=(uint16_t)(limit-start);
445     converter=allocString(&stringBlock, line+start, length);
446 
447     /* add the converter to the converter table */
448     cnv=addConverter(converter);
449 
450     /* The name itself may be tagged, so let's added it to the aliases list properly */
451     pos = start;
452 
453     /* get all the real aliases */
454     for(;;) {
455 
456         /* skip white space */
457         while(line[pos]!=0 && isspace((int)line[pos])) {
458             ++pos;
459         }
460 
461         /* is there no more alias name on this line? */
462         if(line[pos]==0) {
463             break;
464         }
465 
466         /* get an alias name */
467         start=pos;
468         while(line[pos]!=0 && line[pos]!='{' && !isspace((int)line[pos])) {
469             ++pos;
470         }
471         limit=pos;
472 
473         /* store the alias name */
474         length=(uint16_t)(limit-start);
475         if (start == 0) {
476             /* add the converter as its own alias to the alias table */
477             alias = converter;
478             addAlias(alias, ALL_TAG_NUM, cnv, TRUE);
479         }
480         else {
481             alias=allocString(&stringBlock, line+start, length);
482             addAlias(alias, ALL_TAG_NUM, cnv, FALSE);
483         }
484         addToKnownAliases(alias);
485 
486         /* add the alias/converter pair to the alias table */
487         /* addAlias(alias, 0, cnv, FALSE);*/
488 
489         /* skip whitespace */
490         while (line[pos] && isspace((int)line[pos])) {
491             ++pos;
492         }
493 
494         /* handle tags if they are present */
495         if (line[pos] == '{') {
496             ++pos;
497             do {
498                 start = pos;
499                 while (line[pos] && line[pos] != '}' && !isspace((int)line[pos])) {
500                     ++pos;
501                 }
502                 limit = pos;
503 
504                 if (start != limit) {
505                     /* add the tag to the tag table */
506                     uint16_t tag = getTagNumber(line + start, (uint16_t)(limit - start));
507                     addAlias(alias, tag, cnv, (UBool)(line[limit-1] == '*'));
508                 }
509 
510                 while (line[pos] && isspace((int)line[pos])) {
511                     ++pos;
512                 }
513             } while (line[pos] && line[pos] != '}');
514 
515             if (line[pos] == '}') {
516                 ++pos;
517             } else {
518                 fprintf(stderr, "%s:%d: Unterminated tag list\n", path, lineNum);
519                 exit(U_UNMATCHED_BRACES);
520             }
521         } else {
522             addAlias(alias, EMPTY_TAG_NUM, cnv, (UBool)(tags[0].aliasList[cnv].aliasCount == 0));
523         }
524     }
525 }
526 
527 static uint16_t
getTagNumber(const char * tag,uint16_t tagLen)528 getTagNumber(const char *tag, uint16_t tagLen) {
529     char *atag;
530     uint16_t t;
531     UBool preferredName = ((tagLen > 0) ? (tag[tagLen - 1] == '*') : (FALSE));
532 
533     if (tagCount >= MAX_TAG_COUNT) {
534         fprintf(stderr, "%s:%d: too many tags\n", path, lineNum);
535         exit(U_BUFFER_OVERFLOW_ERROR);
536     }
537 
538     if (preferredName) {
539 /*        puts(tag);*/
540         tagLen--;
541     }
542 
543     for (t = 0; t < tagCount; ++t) {
544         const char *currTag = GET_TAG_STR(tags[t].tag);
545         if (uprv_strlen(currTag) == tagLen && !uprv_strnicmp(currTag, tag, tagLen)) {
546             return t;
547         }
548     }
549 
550     /* we need to add this tag */
551     if (tagCount >= MAX_TAG_COUNT) {
552         fprintf(stderr, "%s:%d: error: too many tags\n", path, lineNum);
553         exit(U_BUFFER_OVERFLOW_ERROR);
554     }
555 
556     /* allocate a new entry in the tag table */
557     atag = allocString(&tagBlock, tag, tagLen);
558 
559     if (standardTagsUsed) {
560         fprintf(stderr, "%s:%d: error: Tag \"%s\" is not declared at the beginning of the alias table.\n",
561             path, lineNum, atag);
562         exit(1);
563     }
564     else if (tagLen > 0 && strcmp(tag, ALL_TAG_STR) != 0) {
565         fprintf(stderr, "%s:%d: warning: Tag \"%s\" was added to the list of standards because it was not declared at beginning of the alias table.\n",
566             path, lineNum, atag);
567     }
568 
569     /* add the tag to the tag table */
570     tags[tagCount].tag = GET_TAG_NUM(atag);
571     /* The aliasList should be set to 0's already */
572 
573     return tagCount++;
574 }
575 
576 /*static void
577 addTaggedAlias(uint16_t tag, const char *alias, uint16_t converter) {
578     tags[tag].aliases[converter] = alias;
579 }
580 */
581 
582 static void
addOfficialTaggedStandards(char * line,int32_t lineLen)583 addOfficialTaggedStandards(char *line, int32_t lineLen) {
584     char *atag;
585     char *endTagExp;
586     char *tag;
587     static const char WHITESPACE[] = " \t";
588 
589     if (tagCount > UCNV_NUM_RESERVED_TAGS) {
590         fprintf(stderr, "%s:%d: error: official tags already added\n", path, lineNum);
591         exit(U_BUFFER_OVERFLOW_ERROR);
592     }
593     tag = strchr(line, '{');
594     if (tag == NULL) {
595         /* Why were we called? */
596         fprintf(stderr, "%s:%d: error: Missing start of tag group\n", path, lineNum);
597         exit(U_PARSE_ERROR);
598     }
599     tag++;
600     endTagExp = strchr(tag, '}');
601     if (endTagExp == NULL) {
602         fprintf(stderr, "%s:%d: error: Missing end of tag group\n", path, lineNum);
603         exit(U_PARSE_ERROR);
604     }
605     endTagExp[0] = 0;
606 
607     tag = strtok(tag, WHITESPACE);
608     while (tag != NULL) {
609 /*        printf("Adding original tag \"%s\"\n", tag);*/
610 
611         /* allocate a new entry in the tag table */
612         atag = allocString(&tagBlock, tag, -1);
613 
614         /* add the tag to the tag table */
615         tags[tagCount++].tag = (uint16_t)((atag - tagStore) >> 1);
616 
617         /* The aliasList should already be set to 0's */
618 
619         /* Get next tag */
620         tag = strtok(NULL, WHITESPACE);
621     }
622 }
623 
624 static uint16_t
addToKnownAliases(const char * alias)625 addToKnownAliases(const char *alias) {
626 /*    uint32_t idx; */
627     /* strict matching */
628 /*    for (idx = 0; idx < knownAliasesCount; idx++) {
629         uint16_t num = GET_ALIAS_NUM(alias);
630         if (knownAliases[idx] != num
631             && uprv_strcmp(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
632         {
633             fprintf(stderr, "%s:%d: warning: duplicate alias %s and %s found\n", path,
634                 lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
635             duplicateKnownAliasesCount++;
636             break;
637         }
638         else if (knownAliases[idx] != num
639             && ucnv_compareNames(alias, GET_ALIAS_STR(knownAliases[idx])) == 0)
640         {
641             if (verbose) {
642                 fprintf(stderr, "%s:%d: information: duplicate alias %s and %s found\n", path,
643                     lineNum, alias, GET_ALIAS_STR(knownAliases[idx]));
644             }
645             duplicateKnownAliasesCount++;
646             break;
647         }
648     }
649 */
650     if (knownAliasesCount >= MAX_ALIAS_COUNT) {
651         fprintf(stderr, "%s:%d: warning: Too many aliases defined for all converters\n",
652             path, lineNum);
653         exit(U_BUFFER_OVERFLOW_ERROR);
654     }
655     /* TODO: We could try to unlist exact duplicates. */
656     return knownAliases[knownAliasesCount++] = GET_ALIAS_NUM(alias);
657 }
658 
659 /*
660 @param standard When standard is 0, then it's the "empty" tag.
661 */
662 static uint16_t
addAlias(const char * alias,uint16_t standard,uint16_t converter,UBool defaultName)663 addAlias(const char *alias, uint16_t standard, uint16_t converter, UBool defaultName) {
664     uint32_t idx, idx2;
665     UBool startEmptyWithoutDefault = FALSE;
666     AliasList *aliasList;
667 
668     if(standard>=MAX_TAG_COUNT) {
669         fprintf(stderr, "%s:%d: error: too many standard tags\n", path, lineNum);
670         exit(U_BUFFER_OVERFLOW_ERROR);
671     }
672     if(converter>=MAX_CONV_COUNT) {
673         fprintf(stderr, "%s:%d: error: too many converter names\n", path, lineNum);
674         exit(U_BUFFER_OVERFLOW_ERROR);
675     }
676     aliasList = &tags[standard].aliasList[converter];
677 
678     if (strchr(alias, '}')) {
679         fprintf(stderr, "%s:%d: error: unmatched } found\n", path,
680             lineNum);
681     }
682 
683     if(aliasList->aliasCount + 1 >= MAX_TC_ALIAS_COUNT) {
684         fprintf(stderr, "%s:%d: error: too many aliases for alias %s and converter %s\n", path,
685             lineNum, alias, GET_ALIAS_STR(converters[converter].converter));
686         exit(U_BUFFER_OVERFLOW_ERROR);
687     }
688 
689     /* Show this warning only once. All aliases are added to the "ALL" tag. */
690     if (standard == ALL_TAG_NUM && GET_ALIAS_STR(converters[converter].converter) != alias) {
691         /* Normally these option values are parsed at runtime, and they can
692            be discarded when the alias is a default converter. Options should
693            only be on a converter and not an alias. */
694         if (uprv_strchr(alias, UCNV_OPTION_SEP_CHAR) != 0)
695         {
696             fprintf(stderr, "warning(line %d): alias %s contains a \""UCNV_OPTION_SEP_STRING"\". Options are parsed at run-time and do not need to be in the alias table.\n",
697                 lineNum, alias);
698         }
699         if (uprv_strchr(alias, UCNV_VALUE_SEP_CHAR) != 0)
700         {
701             fprintf(stderr, "warning(line %d): alias %s contains an \""UCNV_VALUE_SEP_STRING"\". Options are parsed at run-time and do not need to be in the alias table.\n",
702                 lineNum, alias);
703         }
704     }
705 
706     if (standard != ALL_TAG_NUM) {
707         /* Check for duplicate aliases for this tag on all converters */
708         for (idx = 0; idx < converterCount; idx++) {
709             for (idx2 = 0; idx2 < tags[standard].aliasList[idx].aliasCount; idx2++) {
710                 uint16_t aliasNum = tags[standard].aliasList[idx].aliases[idx2];
711                 if (aliasNum
712                     && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0)
713                 {
714                     if (idx == converter) {
715                         /*
716                          * (alias, standard) duplicates are harmless if they map to the same converter.
717                          * Only print a warning in verbose mode, or if the alias is a precise duplicate,
718                          * not just a lenient-match duplicate.
719                          */
720                         if (verbose || 0 == uprv_strcmp(alias, GET_ALIAS_STR(aliasNum))) {
721                             fprintf(stderr, "%s:%d: warning: duplicate aliases %s and %s found for standard %s and converter %s\n", path,
722                                 lineNum, alias, GET_ALIAS_STR(aliasNum),
723                                 GET_TAG_STR(tags[standard].tag),
724                                 GET_ALIAS_STR(converters[converter].converter));
725                         }
726                     } else {
727                         fprintf(stderr, "%s:%d: warning: duplicate aliases %s and %s found for standard tag %s between converter %s and converter %s\n", path,
728                             lineNum, alias, GET_ALIAS_STR(aliasNum),
729                             GET_TAG_STR(tags[standard].tag),
730                             GET_ALIAS_STR(converters[converter].converter),
731                             GET_ALIAS_STR(converters[idx].converter));
732                     }
733                     break;
734                 }
735             }
736         }
737 
738         /* Check for duplicate default aliases for this converter on all tags */
739         /* It's okay to have multiple standards prefer the same name */
740 /*        if (verbose && !dupFound) {
741             for (idx = 0; idx < tagCount; idx++) {
742                 if (tags[idx].aliasList[converter].aliases) {
743                     uint16_t aliasNum = tags[idx].aliasList[converter].aliases[0];
744                     if (aliasNum
745                         && ucnv_compareNames(alias, GET_ALIAS_STR(aliasNum)) == 0)
746                     {
747                         fprintf(stderr, "%s:%d: warning: duplicate alias %s found for converter %s and standard tag %s\n", path,
748                             lineNum, alias, GET_ALIAS_STR(converters[converter].converter), GET_TAG_STR(tags[standard].tag));
749                         break;
750                     }
751                 }
752             }
753         }*/
754     }
755 
756     if (aliasList->aliasCount <= 0) {
757         aliasList->aliasCount++;
758         startEmptyWithoutDefault = TRUE;
759     }
760     aliasList->aliases = (uint16_t *)uprv_realloc(aliasList->aliases, (aliasList->aliasCount + 1) * sizeof(aliasList->aliases[0]));
761     if (startEmptyWithoutDefault) {
762         aliasList->aliases[0] = 0;
763     }
764     if (defaultName) {
765         if (aliasList->aliases[0] != 0) {
766             fprintf(stderr, "%s:%d: error: Alias %s and %s cannot both be the default alias for standard tag %s and converter %s\n", path,
767                 lineNum,
768                 alias,
769                 GET_ALIAS_STR(aliasList->aliases[0]),
770                 GET_TAG_STR(tags[standard].tag),
771                 GET_ALIAS_STR(converters[converter].converter));
772             exit(U_PARSE_ERROR);
773         }
774         aliasList->aliases[0] = GET_ALIAS_NUM(alias);
775     } else {
776         aliasList->aliases[aliasList->aliasCount++] = GET_ALIAS_NUM(alias);
777     }
778 /*    aliasList->converter = converter;*/
779 
780     converters[converter].totalAliasCount++; /* One more to the column */
781     tags[standard].totalAliasCount++; /* One more to the row */
782 
783     return aliasList->aliasCount;
784 }
785 
786 static uint16_t
addConverter(const char * converter)787 addConverter(const char *converter) {
788     uint32_t idx;
789     if(converterCount>=MAX_CONV_COUNT) {
790         fprintf(stderr, "%s:%d: error: too many converters\n", path, lineNum);
791         exit(U_BUFFER_OVERFLOW_ERROR);
792     }
793 
794     for (idx = 0; idx < converterCount; idx++) {
795         if (ucnv_compareNames(converter, GET_ALIAS_STR(converters[idx].converter)) == 0) {
796             fprintf(stderr, "%s:%d: error: duplicate converter %s found!\n", path, lineNum, converter);
797             exit(U_PARSE_ERROR);
798             break;
799         }
800     }
801 
802     converters[converterCount].converter = GET_ALIAS_NUM(converter);
803     converters[converterCount].totalAliasCount = 0;
804 
805     return converterCount++;
806 }
807 
808 /* resolve this alias based on the prioritization of the standard tags. */
809 static void
resolveAliasToConverter(uint16_t alias,uint16_t * tagNum,uint16_t * converterNum)810 resolveAliasToConverter(uint16_t alias, uint16_t *tagNum, uint16_t *converterNum) {
811     uint16_t idx, idx2, idx3;
812 
813     for (idx = UCNV_NUM_RESERVED_TAGS; idx < tagCount; idx++) {
814         for (idx2 = 0; idx2 < converterCount; idx2++) {
815             for (idx3 = 0; idx3 < tags[idx].aliasList[idx2].aliasCount; idx3++) {
816                 uint16_t aliasNum = tags[idx].aliasList[idx2].aliases[idx3];
817                 if (aliasNum == alias) {
818                     *tagNum = idx;
819                     *converterNum = idx2;
820                     return;
821                 }
822             }
823         }
824     }
825     /* Do the leftovers last, just in case */
826     /* There is no need to do the ALL tag */
827     idx = 0;
828     for (idx2 = 0; idx2 < converterCount; idx2++) {
829         for (idx3 = 0; idx3 < tags[idx].aliasList[idx2].aliasCount; idx3++) {
830             uint16_t aliasNum = tags[idx].aliasList[idx2].aliases[idx3];
831             if (aliasNum == alias) {
832                 *tagNum = idx;
833                 *converterNum = idx2;
834                 return;
835             }
836         }
837     }
838     *tagNum = UINT16_MAX;
839     *converterNum = UINT16_MAX;
840     fprintf(stderr, "%s: warning: alias %s not found\n",
841         path,
842         GET_ALIAS_STR(alias));
843     return;
844 }
845 
846 /* The knownAliases should be sorted before calling this function */
847 static uint32_t
resolveAliases(uint16_t * uniqueAliasArr,uint16_t * uniqueAliasToConverterArr,uint16_t aliasOffset)848 resolveAliases(uint16_t *uniqueAliasArr, uint16_t *uniqueAliasToConverterArr, uint16_t aliasOffset) {
849     uint32_t uniqueAliasIdx = 0;
850     uint32_t idx;
851     uint16_t currTagNum, oldTagNum;
852     uint16_t currConvNum, oldConvNum;
853     const char *lastName;
854 
855     resolveAliasToConverter(knownAliases[0], &oldTagNum, &currConvNum);
856     uniqueAliasToConverterArr[uniqueAliasIdx] = currConvNum;
857     oldConvNum = currConvNum;
858     uniqueAliasArr[uniqueAliasIdx] = knownAliases[0] + aliasOffset;
859     uniqueAliasIdx++;
860     lastName = GET_ALIAS_STR(knownAliases[0]);
861 
862     for (idx = 1; idx < knownAliasesCount; idx++) {
863         resolveAliasToConverter(knownAliases[idx], &currTagNum, &currConvNum);
864         if (ucnv_compareNames(lastName, GET_ALIAS_STR(knownAliases[idx])) == 0) {
865             /* duplicate found */
866             if ((currTagNum < oldTagNum && currTagNum >= UCNV_NUM_RESERVED_TAGS)
867                 || oldTagNum == 0) {
868                 oldTagNum = currTagNum;
869                 uniqueAliasToConverterArr[uniqueAliasIdx - 1] = currConvNum;
870                 uniqueAliasArr[uniqueAliasIdx - 1] = knownAliases[idx] + aliasOffset;
871                 if (verbose) {
872                     printf("using %s instead of %s -> %s",
873                         GET_ALIAS_STR(knownAliases[idx]),
874                         lastName,
875                         GET_ALIAS_STR(converters[currConvNum].converter));
876                     if (oldConvNum != currConvNum) {
877                         printf(" (alias conflict)");
878                     }
879                     puts("");
880                 }
881             }
882             else {
883                 /* else ignore it */
884                 if (verbose) {
885                     printf("folding %s into %s -> %s",
886                         GET_ALIAS_STR(knownAliases[idx]),
887                         lastName,
888                         GET_ALIAS_STR(converters[oldConvNum].converter));
889                     if (oldConvNum != currConvNum) {
890                         printf(" (alias conflict)");
891                     }
892                     puts("");
893                 }
894             }
895             if (oldConvNum != currConvNum) {
896                 uniqueAliasToConverterArr[uniqueAliasIdx - 1] |= UCNV_AMBIGUOUS_ALIAS_MAP_BIT;
897             }
898         }
899         else {
900             uniqueAliasToConverterArr[uniqueAliasIdx] = currConvNum;
901             oldConvNum = currConvNum;
902             uniqueAliasArr[uniqueAliasIdx] = knownAliases[idx] + aliasOffset;
903             uniqueAliasIdx++;
904             lastName = GET_ALIAS_STR(knownAliases[idx]);
905             oldTagNum = currTagNum;
906             /*printf("%s -> %s\n", GET_ALIAS_STR(knownAliases[idx]), GET_ALIAS_STR(converters[currConvNum].converter));*/
907         }
908         if (uprv_strchr(GET_ALIAS_STR(converters[currConvNum].converter), UCNV_OPTION_SEP_CHAR) != NULL) {
909             uniqueAliasToConverterArr[uniqueAliasIdx-1] |= UCNV_CONTAINS_OPTION_BIT;
910         }
911     }
912     return uniqueAliasIdx;
913 }
914 
915 static void
createOneAliasList(uint16_t * aliasArrLists,uint32_t tag,uint32_t converter,uint16_t offset)916 createOneAliasList(uint16_t *aliasArrLists, uint32_t tag, uint32_t converter, uint16_t offset) {
917     uint32_t aliasNum;
918     AliasList *aliasList = &tags[tag].aliasList[converter];
919 
920     if (aliasList->aliasCount == 0) {
921         aliasArrLists[tag*converterCount + converter] = 0;
922     }
923     else {
924         aliasLists[aliasListsSize++] = aliasList->aliasCount;
925 
926         /* write into the array area a 1's based index. */
927         aliasArrLists[tag*converterCount + converter] = aliasListsSize;
928 
929 /*        printf("tag %s converter %s\n",
930             GET_TAG_STR(tags[tag].tag),
931             GET_ALIAS_STR(converters[converter].converter));*/
932         for (aliasNum = 0; aliasNum < aliasList->aliasCount; aliasNum++) {
933             uint16_t value;
934 /*            printf("   %s\n",
935                 GET_ALIAS_STR(aliasList->aliases[aliasNum]));*/
936             if (aliasList->aliases[aliasNum]) {
937                 value = aliasList->aliases[aliasNum] + offset;
938             } else {
939                 value = 0;
940                 if (tag != 0 && !quiet) { /* Only show the warning when it's not the leftover tag. */
941                     fprintf(stderr, "%s: warning: tag %s does not have a default alias for %s\n",
942                             path,
943                             GET_TAG_STR(tags[tag].tag),
944                             GET_ALIAS_STR(converters[converter].converter));
945                 }
946             }
947             aliasLists[aliasListsSize++] = value;
948             if (aliasListsSize >= MAX_LIST_SIZE) {
949                 fprintf(stderr, "%s: error: Too many alias lists\n", path);
950                 exit(U_BUFFER_OVERFLOW_ERROR);
951             }
952 
953         }
954     }
955 }
956 
957 static void
createNormalizedAliasStrings(char * normalizedStrings,const char * origStringBlock,int32_t stringBlockLength)958 createNormalizedAliasStrings(char *normalizedStrings, const char *origStringBlock, int32_t stringBlockLength) {
959     int32_t currStrLen;
960     uprv_memcpy(normalizedStrings, origStringBlock, stringBlockLength);
961     while ((currStrLen = (int32_t)uprv_strlen(origStringBlock)) < stringBlockLength) {
962         int32_t currStrSize = currStrLen + 1;
963         if (currStrLen > 0) {
964             int32_t normStrLen;
965             ucnv_io_stripForCompare(normalizedStrings, origStringBlock);
966             normStrLen = uprv_strlen(normalizedStrings);
967             if (normStrLen > 0) {
968                 uprv_memset(normalizedStrings + normStrLen, 0, currStrSize - normStrLen);
969             }
970         }
971         stringBlockLength -= currStrSize;
972         normalizedStrings += currStrSize;
973         origStringBlock += currStrSize;
974     }
975 }
976 
977 static void
writeAliasTable(UNewDataMemory * out)978 writeAliasTable(UNewDataMemory *out) {
979     uint32_t i, j;
980     uint32_t uniqueAliasesSize;
981     uint16_t aliasOffset = (uint16_t)(tagBlock.top/sizeof(uint16_t));
982     uint16_t *aliasArrLists = (uint16_t *)uprv_malloc(tagCount * converterCount * sizeof(uint16_t));
983     uint16_t *uniqueAliases = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t));
984     uint16_t *uniqueAliasesToConverter = (uint16_t *)uprv_malloc(knownAliasesCount * sizeof(uint16_t));
985 
986     qsort(knownAliases, knownAliasesCount, sizeof(knownAliases[0]), compareAliases);
987     uniqueAliasesSize = resolveAliases(uniqueAliases, uniqueAliasesToConverter, aliasOffset);
988 
989     /* Array index starts at 1. aliasLists[0] is the size of the lists section. */
990     aliasListsSize = 0;
991 
992     /* write the offsets of all the aliases lists in a 2D array, and create the lists. */
993     for (i = 0; i < tagCount; ++i) {
994         for (j = 0; j < converterCount; ++j) {
995             createOneAliasList(aliasArrLists, i, j, aliasOffset);
996         }
997     }
998 
999     /* Write the size of the TOC */
1000     if (tableOptions.stringNormalizationType == UCNV_IO_UNNORMALIZED) {
1001         udata_write32(out, 8);
1002     }
1003     else {
1004         udata_write32(out, 9);
1005     }
1006 
1007     /* Write the sizes of each section */
1008     /* All sizes are the number of uint16_t units, not bytes */
1009     udata_write32(out, converterCount);
1010     udata_write32(out, tagCount);
1011     udata_write32(out, uniqueAliasesSize);  /* list of aliases */
1012     udata_write32(out, uniqueAliasesSize);  /* The preresolved form of mapping an untagged the alias to a converter */
1013     udata_write32(out, tagCount * converterCount);
1014     udata_write32(out, aliasListsSize + 1);
1015     udata_write32(out, sizeof(tableOptions) / sizeof(uint16_t));
1016     udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t));
1017     if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
1018         udata_write32(out, (tagBlock.top + stringBlock.top) / sizeof(uint16_t));
1019     }
1020 
1021     /* write the table of converters */
1022     /* Think of this as the column headers */
1023     for(i=0; i<converterCount; ++i) {
1024         udata_write16(out, (uint16_t)(converters[i].converter + aliasOffset));
1025     }
1026 
1027     /* write the table of tags */
1028     /* Think of this as the row headers */
1029     for(i=UCNV_NUM_RESERVED_TAGS; i<tagCount; ++i) {
1030         udata_write16(out, tags[i].tag);
1031     }
1032     /* The empty tag is considered the leftover list, and put that at the end of the priority list. */
1033     udata_write16(out, tags[EMPTY_TAG_NUM].tag);
1034     udata_write16(out, tags[ALL_TAG_NUM].tag);
1035 
1036     /* Write the unique list of aliases */
1037     udata_writeBlock(out, uniqueAliases, uniqueAliasesSize * sizeof(uint16_t));
1038 
1039     /* Write the unique list of aliases */
1040     udata_writeBlock(out, uniqueAliasesToConverter, uniqueAliasesSize * sizeof(uint16_t));
1041 
1042     /* Write the array to the lists */
1043     udata_writeBlock(out, (const void *)(aliasArrLists + (2*converterCount)), (((tagCount - 2) * converterCount) * sizeof(uint16_t)));
1044     /* Now write the leftover part of the array for the EMPTY and ALL lists */
1045     udata_writeBlock(out, (const void *)aliasArrLists, (2 * converterCount * sizeof(uint16_t)));
1046 
1047     /* Offset the next array to make the index start at 1. */
1048     udata_write16(out, 0xDEAD);
1049 
1050     /* Write the lists */
1051     udata_writeBlock(out, (const void *)aliasLists, aliasListsSize * sizeof(uint16_t));
1052 
1053     /* Write any options for the alias table. */
1054     udata_writeBlock(out, (const void *)&tableOptions, sizeof(tableOptions));
1055 
1056     /* write the tags strings */
1057     udata_writeString(out, tagBlock.store, tagBlock.top);
1058 
1059     /* write the aliases strings */
1060     udata_writeString(out, stringBlock.store, stringBlock.top);
1061 
1062     /* write the normalized aliases strings */
1063     if (tableOptions.stringNormalizationType != UCNV_IO_UNNORMALIZED) {
1064         char *normalizedStrings = (char *)uprv_malloc(tagBlock.top + stringBlock.top);
1065         createNormalizedAliasStrings(normalizedStrings, tagBlock.store, tagBlock.top);
1066         createNormalizedAliasStrings(normalizedStrings + tagBlock.top, stringBlock.store, stringBlock.top);
1067 
1068         /* Write out the complete normalized array. */
1069         udata_writeString(out, normalizedStrings, tagBlock.top + stringBlock.top);
1070         uprv_free(normalizedStrings);
1071     }
1072 
1073     uprv_free(uniqueAliasesToConverter);
1074     uprv_free(uniqueAliases);
1075     uprv_free(aliasArrLists);
1076 }
1077 
1078 static char *
allocString(StringBlock * block,const char * s,int32_t length)1079 allocString(StringBlock *block, const char *s, int32_t length) {
1080     uint32_t top;
1081     char *p;
1082 
1083     if(length<0) {
1084         length=(int32_t)uprv_strlen(s);
1085     }
1086 
1087     /*
1088      * add 1 for the terminating NUL
1089      * and round up (+1 &~1)
1090      * to keep the addresses on a 16-bit boundary
1091      */
1092     top=block->top + (uint32_t)((length + 1 + 1) & ~1);
1093 
1094     if(top >= block->max) {
1095         fprintf(stderr, "%s:%d: error: out of memory\n", path, lineNum);
1096         exit(U_MEMORY_ALLOCATION_ERROR);
1097     }
1098 
1099     /* get the pointer and copy the string */
1100     p = block->store + block->top;
1101     uprv_memcpy(p, s, length);
1102     p[length] = 0; /* NUL-terminate it */
1103     if((length & 1) == 0) {
1104         p[length + 1] = 0; /* set the padding byte */
1105     }
1106 
1107     /* check for invariant characters now that we have a NUL-terminated string for easy output */
1108     if(!uprv_isInvariantString(p, length)) {
1109         fprintf(stderr, "%s:%d: error: the name %s contains not just invariant characters\n", path, lineNum, p);
1110         exit(U_INVALID_TABLE_FORMAT);
1111     }
1112 
1113     block->top = top;
1114     return p;
1115 }
1116 
1117 static int
compareAliases(const void * alias1,const void * alias2)1118 compareAliases(const void *alias1, const void *alias2) {
1119     /* Names like IBM850 and ibm-850 need to be sorted together */
1120     int result = ucnv_compareNames(GET_ALIAS_STR(*(uint16_t*)alias1), GET_ALIAS_STR(*(uint16_t*)alias2));
1121     if (!result) {
1122         /* Sort the shortest first */
1123         return (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias1)) - (int)uprv_strlen(GET_ALIAS_STR(*(uint16_t*)alias2));
1124     }
1125     return result;
1126 }
1127 
1128 /*
1129  * Hey, Emacs, please set the following:
1130  *
1131  * Local Variables:
1132  * indent-tabs-mode: nil
1133  * End:
1134  *
1135  */
1136 
1137