1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2005-2014, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  icupkg.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2005jul29
14 *   created by: Markus W. Scherer
15 *
16 *   This tool operates on ICU data (.dat package) files.
17 *   It takes one as input, or creates an empty one, and can remove, add, and
18 *   extract data pieces according to command-line options.
19 *   At the same time, it swaps each piece to a consistent set of platform
20 *   properties as desired.
21 *   Useful as an install-time tool for shipping only one flavor of ICU data
22 *   and preparing data files for the target platform.
23 *   Also for customizing ICU data (pruning, augmenting, replacing) and for
24 *   taking it apart.
25 *   Subsumes functionality and implementation code from
26 *   gencmn, decmn, and icuswap tools.
27 *   Will not work with data DLLs (shared libraries).
28 */
29 
30 #include "unicode/utypes.h"
31 #include "unicode/putil.h"
32 #include "cstring.h"
33 #include "toolutil.h"
34 #include "uoptions.h"
35 #include "uparse.h"
36 #include "filestrm.h"
37 #include "package.h"
38 #include "pkg_icu.h"
39 
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 
44 U_NAMESPACE_USE
45 
46 // TODO: add --matchmode=regex for using the ICU regex engine for item name pattern matching?
47 
48 // general definitions ----------------------------------------------------- ***
49 
50 // main() ------------------------------------------------------------------ ***
51 
52 static void
printUsage(const char * pname,UBool isHelp)53 printUsage(const char *pname, UBool isHelp) {
54     FILE *where=isHelp ? stdout : stderr;
55 
56     fprintf(where,
57             "%csage: %s [-h|-?|--help ] [-tl|-tb|-te] [-c] [-C comment]\n"
58             "\t[-a list] [-r list] [-x list] [-l [-o outputListFileName]]\n"
59             "\t[-s path] [-d path] [-w] [-m mode]\n"
60             "\t[--auto_toc_prefix] [--auto_toc_prefix_with_type] [--toc_prefix]\n"
61             "\tinfilename [outfilename]\n",
62             isHelp ? 'U' : 'u', pname);
63     if(isHelp) {
64         fprintf(where,
65             "\n"
66             "Read the input ICU .dat package file, modify it according to the options,\n"
67             "swap it to the desired platform properties (charset & endianness),\n"
68             "and optionally write the resulting ICU .dat package to the output file.\n"
69             "Items are removed, then added, then extracted and listed.\n"
70             "An ICU .dat package is written if items are removed or added,\n"
71             "or if the input and output filenames differ,\n"
72             "or if the --writepkg (-w) option is set.\n");
73         fprintf(where,
74             "\n"
75             "If the input filename is \"new\" then an empty package is created.\n"
76             "If the output filename is missing, then it is automatically generated\n"
77             "from the input filename: If the input filename ends with an l, b, or e\n"
78             "matching its platform properties, then the output filename will\n"
79             "contain the letter from the -t (--type) option.\n");
80         fprintf(where,
81             "\n"
82             "This tool can also be used to just swap a single ICU data file, replacing the\n"
83             "former icuswap tool. For this mode, provide the infilename (and optional\n"
84             "outfilename) for a non-package ICU data file.\n"
85             "Allowed options include -t, -w, -s and -d.\n"
86             "The filenames can be absolute, or relative to the source/dest dir paths.\n"
87             "Other options are not allowed in this mode.\n");
88         fprintf(where,
89             "\n"
90             "Options:\n"
91             "\t(Only the last occurrence of an option is used.)\n"
92             "\n"
93             "\t-h or -? or --help    print this message and exit\n");
94         fprintf(where,
95             "\n"
96             "\t-tl or --type l   output for little-endian/ASCII charset family\n"
97             "\t-tb or --type b   output for big-endian/ASCII charset family\n"
98             "\t-te or --type e   output for big-endian/EBCDIC charset family\n"
99             "\t                  The output type defaults to the input type.\n"
100             "\n"
101             "\t-c or --copyright include the ICU copyright notice\n"
102             "\t-C comment or --comment comment   include a comment string\n");
103         fprintf(where,
104             "\n"
105             "\t-a list or --add list      add items to the package\n"
106             "\t-r list or --remove list   remove items from the package\n"
107             "\t-x list or --extract list  extract items from the package\n"
108             "\tThe list can be a single item's filename,\n"
109             "\tor a .txt filename with a list of item filenames,\n"
110             "\tor an ICU .dat package filename.\n");
111         fprintf(where,
112             "\n"
113             "\t-w or --writepkg  write the output package even if no items are removed\n"
114             "\t                  or added (e.g., for only swapping the data)\n");
115         fprintf(where,
116             "\n"
117             "\t-m mode or --matchmode mode  set the matching mode for item names with\n"
118             "\t                             wildcards\n"
119             "\t        noslash: the '*' wildcard does not match the '/' tree separator\n");
120         fprintf(where,
121             "\n"
122             "\tIn the .dat package, the Table of Contents (ToC) contains an entry\n"
123             "\tfor each item of the form prefix/tree/itemname .\n"
124             "\tThe prefix normally matches the package basename, and icupkg checks that,\n"
125             "\tbut this is not necessary when ICU need not find and load the package by filename.\n"
126             "\tICU package names end with the platform type letter, and thus differ\n"
127             "\tbetween platform types. This is not required for user data packages.\n");
128         fprintf(where,
129             "\n"
130             "\t--auto_toc_prefix            automatic ToC entries prefix\n"
131             "\t                             Uses the prefix of the first entry of the\n"
132             "\t                             input package, rather than its basename.\n"
133             "\t                             Requires a non-empty input package.\n"
134             "\t--auto_toc_prefix_with_type  auto_toc_prefix + adjust platform type\n"
135             "\t                             Same as auto_toc_prefix but also checks that\n"
136             "\t                             the prefix ends with the input platform\n"
137             "\t                             type letter, and modifies it to the output\n"
138             "\t                             platform type letter.\n"
139             "\t                At most one of the auto_toc_prefix options\n"
140             "\t                can be used at a time.\n"
141             "\t--toc_prefix prefix          ToC prefix to be used in the output package\n"
142             "\t                             Overrides the package basename\n"
143             "\t                             and --auto_toc_prefix.\n"
144             "\t                             Cannot be combined with --auto_toc_prefix_with_type.\n");
145         /*
146          * Usage text columns, starting after the initial TAB.
147          *      1         2         3         4         5         6         7         8
148          *     901234567890123456789012345678901234567890123456789012345678901234567890
149          */
150         fprintf(where,
151             "\n"
152             "\tList file syntax: Items are listed on one or more lines and separated\n"
153             "\tby whitespace (space+tab).\n"
154             "\tComments begin with # and are ignored. Empty lines are ignored.\n"
155             "\tLines where the first non-whitespace character is one of %s\n"
156             "\tare also ignored, to reserve for future syntax.\n",
157             U_PKG_RESERVED_CHARS);
158         fprintf(where,
159             "\tItems for removal or extraction may contain a single '*' wildcard\n"
160             "\tcharacter. The '*' matches zero or more characters.\n"
161             "\tIf --matchmode noslash (-m noslash) is set, then the '*'\n"
162             "\tdoes not match '/'.\n");
163         fprintf(where,
164             "\n"
165             "\tItems must be listed relative to the package, and the --sourcedir or\n"
166             "\tthe --destdir path will be prepended.\n"
167             "\tThe paths are only prepended to item filenames while adding or\n"
168             "\textracting items, not to ICU .dat package or list filenames.\n"
169             "\t\n"
170             "\tPaths may contain '/' instead of the platform's\n"
171             "\tfile separator character, and are converted as appropriate.\n");
172         fprintf(where,
173             "\n"
174             "\t-s path or --sourcedir path  directory for the --add items\n"
175             "\t-d path or --destdir path    directory for the --extract items\n"
176             "\n"
177             "\t-l or --list                 list the package items\n"
178             "\t                             (after modifying the package)\n"
179             "\t                             to stdout or to output list file\n"
180             "\t-o path or --outlist path    path/filename for the --list output\n");
181     }
182 }
183 
184 static UOption options[]={
185     UOPTION_HELP_H,
186     UOPTION_HELP_QUESTION_MARK,
187     UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG),
188 
189     UOPTION_COPYRIGHT,
190     UOPTION_DEF("comment", 'C', UOPT_REQUIRES_ARG),
191 
192     UOPTION_SOURCEDIR,
193     UOPTION_DESTDIR,
194 
195     UOPTION_DEF("writepkg", 'w', UOPT_NO_ARG),
196 
197     UOPTION_DEF("matchmode", 'm', UOPT_REQUIRES_ARG),
198 
199     UOPTION_DEF("add", 'a', UOPT_REQUIRES_ARG),
200     UOPTION_DEF("remove", 'r', UOPT_REQUIRES_ARG),
201     UOPTION_DEF("extract", 'x', UOPT_REQUIRES_ARG),
202 
203     UOPTION_DEF("list", 'l', UOPT_NO_ARG),
204     UOPTION_DEF("outlist", 'o', UOPT_REQUIRES_ARG),
205 
206     UOPTION_DEF("auto_toc_prefix", '\1', UOPT_NO_ARG),
207     UOPTION_DEF("auto_toc_prefix_with_type", '\1', UOPT_NO_ARG),
208     UOPTION_DEF("toc_prefix", '\1', UOPT_REQUIRES_ARG)
209 };
210 
211 enum {
212     OPT_HELP_H,
213     OPT_HELP_QUESTION_MARK,
214     OPT_OUT_TYPE,
215 
216     OPT_COPYRIGHT,
217     OPT_COMMENT,
218 
219     OPT_SOURCEDIR,
220     OPT_DESTDIR,
221 
222     OPT_WRITEPKG,
223 
224     OPT_MATCHMODE,
225 
226     OPT_ADD_LIST,
227     OPT_REMOVE_LIST,
228     OPT_EXTRACT_LIST,
229 
230     OPT_LIST_ITEMS,
231     OPT_LIST_FILE,
232 
233     OPT_AUTO_TOC_PREFIX,
234     OPT_AUTO_TOC_PREFIX_WITH_TYPE,
235     OPT_TOC_PREFIX,
236 
237     OPT_COUNT
238 };
239 
240 static UBool
isPackageName(const char * filename)241 isPackageName(const char *filename) {
242     int32_t len;
243 
244     len=(int32_t)strlen(filename)-4; /* -4: subtract the length of ".dat" */
245     return (UBool)(len>0 && 0==strcmp(filename+len, ".dat"));
246 }
247 /*
248 This line is required by MinGW because it incorrectly globs the arguments.
249 So when \* is used, it turns into a list of files instead of a literal "*"
250 */
251 int _CRT_glob = 0;
252 
253 extern int
main(int argc,char * argv[])254 main(int argc, char *argv[]) {
255     const char *pname, *sourcePath, *destPath, *inFilename, *outFilename, *outComment;
256     char outType;
257     UBool isHelp, isModified, isPackage;
258     int result = 0;
259 
260     Package *pkg, *listPkg, *addListPkg;
261 
262     U_MAIN_INIT_ARGS(argc, argv);
263 
264     /* get the program basename */
265     pname=findBasename(argv[0]);
266 
267     argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
268     isHelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur;
269     if(isHelp) {
270         printUsage(pname, TRUE);
271         return U_ZERO_ERROR;
272     }
273 
274     pkg=new Package;
275     if(pkg==NULL) {
276         fprintf(stderr, "icupkg: not enough memory\n");
277         return U_MEMORY_ALLOCATION_ERROR;
278     }
279     isModified=FALSE;
280 
281     int autoPrefix=0;
282     if(options[OPT_AUTO_TOC_PREFIX].doesOccur) {
283         pkg->setAutoPrefix();
284         ++autoPrefix;
285     }
286     if(options[OPT_AUTO_TOC_PREFIX_WITH_TYPE].doesOccur) {
287         if(options[OPT_TOC_PREFIX].doesOccur) {
288             fprintf(stderr, "icupkg: --auto_toc_prefix_with_type and also --toc_prefix\n");
289             printUsage(pname, FALSE);
290             return U_ILLEGAL_ARGUMENT_ERROR;
291         }
292         pkg->setAutoPrefixWithType();
293         ++autoPrefix;
294     }
295     if(argc<2 || 3<argc || autoPrefix>1) {
296         printUsage(pname, FALSE);
297         return U_ILLEGAL_ARGUMENT_ERROR;
298     }
299 
300     if(options[OPT_SOURCEDIR].doesOccur) {
301         sourcePath=options[OPT_SOURCEDIR].value;
302     } else {
303         // work relative to the current working directory
304         sourcePath=NULL;
305     }
306     if(options[OPT_DESTDIR].doesOccur) {
307         destPath=options[OPT_DESTDIR].value;
308     } else {
309         // work relative to the current working directory
310         destPath=NULL;
311     }
312 
313     if(0==strcmp(argv[1], "new")) {
314         if(autoPrefix) {
315             fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but no input package\n");
316             printUsage(pname, FALSE);
317             return U_ILLEGAL_ARGUMENT_ERROR;
318         }
319         inFilename=NULL;
320         isPackage=TRUE;
321     } else {
322         inFilename=argv[1];
323         if(isPackageName(inFilename)) {
324             pkg->readPackage(inFilename);
325             isPackage=TRUE;
326         } else {
327             /* swap a single file (icuswap replacement) rather than work on a package */
328             pkg->addFile(sourcePath, inFilename);
329             isPackage=FALSE;
330         }
331     }
332 
333     if(argc>=3) {
334         outFilename=argv[2];
335         if(0!=strcmp(argv[1], argv[2])) {
336             isModified=TRUE;
337         }
338     } else if(isPackage) {
339         outFilename=NULL;
340     } else /* !isPackage */ {
341         outFilename=inFilename;
342         isModified=(UBool)(sourcePath!=destPath);
343     }
344 
345     /* parse the output type option */
346     if(options[OPT_OUT_TYPE].doesOccur) {
347         const char *type=options[OPT_OUT_TYPE].value;
348         if(type[0]==0 || type[1]!=0) {
349             /* the type must be exactly one letter */
350             printUsage(pname, FALSE);
351             return U_ILLEGAL_ARGUMENT_ERROR;
352         }
353         outType=type[0];
354         switch(outType) {
355         case 'l':
356         case 'b':
357         case 'e':
358             break;
359         default:
360             printUsage(pname, FALSE);
361             return U_ILLEGAL_ARGUMENT_ERROR;
362         }
363 
364         /*
365          * Set the isModified flag if the output type differs from the
366          * input package type.
367          * If we swap a single file, just assume that we are modifying it.
368          * The Package class does not give us access to the item and its type.
369          */
370         isModified|=(UBool)(!isPackage || outType!=pkg->getInType());
371     } else if(isPackage) {
372         outType=pkg->getInType(); // default to input type
373     } else /* !isPackage: swap single file */ {
374         outType=0; /* tells extractItem() to not swap */
375     }
376 
377     if(options[OPT_WRITEPKG].doesOccur) {
378         isModified=TRUE;
379     }
380 
381     if(!isPackage) {
382         /*
383          * icuswap tool replacement: Only swap a single file.
384          * Check that irrelevant options are not set.
385          */
386         if( options[OPT_COMMENT].doesOccur ||
387             options[OPT_COPYRIGHT].doesOccur ||
388             options[OPT_MATCHMODE].doesOccur ||
389             options[OPT_REMOVE_LIST].doesOccur ||
390             options[OPT_ADD_LIST].doesOccur ||
391             options[OPT_EXTRACT_LIST].doesOccur ||
392             options[OPT_LIST_ITEMS].doesOccur
393         ) {
394             printUsage(pname, FALSE);
395             return U_ILLEGAL_ARGUMENT_ERROR;
396         }
397         if(isModified) {
398             pkg->extractItem(destPath, outFilename, 0, outType);
399         }
400 
401         delete pkg;
402         return result;
403     }
404 
405     /* Work with a package. */
406 
407     if(options[OPT_COMMENT].doesOccur) {
408         outComment=options[OPT_COMMENT].value;
409     } else if(options[OPT_COPYRIGHT].doesOccur) {
410         outComment=U_COPYRIGHT_STRING;
411     } else {
412         outComment=NULL;
413     }
414 
415     if(options[OPT_MATCHMODE].doesOccur) {
416         if(0==strcmp(options[OPT_MATCHMODE].value, "noslash")) {
417             pkg->setMatchMode(Package::MATCH_NOSLASH);
418         } else {
419             printUsage(pname, FALSE);
420             return U_ILLEGAL_ARGUMENT_ERROR;
421         }
422     }
423 
424     /* remove items */
425     if(options[OPT_REMOVE_LIST].doesOccur) {
426         listPkg=new Package();
427         if(listPkg==NULL) {
428             fprintf(stderr, "icupkg: not enough memory\n");
429             exit(U_MEMORY_ALLOCATION_ERROR);
430         }
431         if(readList(NULL, options[OPT_REMOVE_LIST].value, FALSE, listPkg)) {
432             pkg->removeItems(*listPkg);
433             delete listPkg;
434             isModified=TRUE;
435         } else {
436             printUsage(pname, FALSE);
437             return U_ILLEGAL_ARGUMENT_ERROR;
438         }
439     }
440 
441     /*
442      * add items
443      * use a separate Package so that its memory and items stay around
444      * as long as the main Package
445      */
446     addListPkg=NULL;
447     if(options[OPT_ADD_LIST].doesOccur) {
448         addListPkg=new Package();
449         if(addListPkg==NULL) {
450             fprintf(stderr, "icupkg: not enough memory\n");
451             exit(U_MEMORY_ALLOCATION_ERROR);
452         }
453         if(readList(sourcePath, options[OPT_ADD_LIST].value, TRUE, addListPkg)) {
454             pkg->addItems(*addListPkg);
455             // delete addListPkg; deferred until after writePackage()
456             isModified=TRUE;
457         } else {
458             printUsage(pname, FALSE);
459             return U_ILLEGAL_ARGUMENT_ERROR;
460         }
461     }
462 
463     /* extract items */
464     if(options[OPT_EXTRACT_LIST].doesOccur) {
465         listPkg=new Package();
466         if(listPkg==NULL) {
467             fprintf(stderr, "icupkg: not enough memory\n");
468             exit(U_MEMORY_ALLOCATION_ERROR);
469         }
470         if(readList(NULL, options[OPT_EXTRACT_LIST].value, FALSE, listPkg)) {
471             pkg->extractItems(destPath, *listPkg, outType);
472             delete listPkg;
473         } else {
474             printUsage(pname, FALSE);
475             return U_ILLEGAL_ARGUMENT_ERROR;
476         }
477     }
478 
479     /* list items */
480     if(options[OPT_LIST_ITEMS].doesOccur) {
481         int32_t i;
482         if (options[OPT_LIST_FILE].doesOccur) {
483             FileStream *out;
484             out = T_FileStream_open(options[OPT_LIST_FILE].value, "w");
485             if (out != NULL) {
486                 for(i=0; i<pkg->getItemCount(); ++i) {
487                     T_FileStream_writeLine(out, pkg->getItem(i)->name);
488                     T_FileStream_writeLine(out, "\n");
489                 }
490                 T_FileStream_close(out);
491             } else {
492                 return U_ILLEGAL_ARGUMENT_ERROR;
493             }
494         } else {
495             for(i=0; i<pkg->getItemCount(); ++i) {
496                 fprintf(stdout, "%s\n", pkg->getItem(i)->name);
497             }
498         }
499     }
500 
501     /* check dependencies between items */
502     if(!pkg->checkDependencies()) {
503         /* some dependencies are not fulfilled */
504         return U_MISSING_RESOURCE_ERROR;
505     }
506 
507     /* write the output .dat package if there are any modifications */
508     if(isModified) {
509         char outFilenameBuffer[1024]; // for auto-generated output filename, if necessary
510 
511         if(outFilename==NULL || outFilename[0]==0) {
512             if(inFilename==NULL || inFilename[0]==0) {
513                 fprintf(stderr, "icupkg: unable to auto-generate an output filename if there is no input filename\n");
514                 exit(U_ILLEGAL_ARGUMENT_ERROR);
515             }
516 
517             /*
518              * auto-generate a filename:
519              * copy the inFilename,
520              * and if the last basename character matches the input file's type,
521              * then replace it with the output file's type
522              */
523             char suffix[6]="?.dat";
524             char *s;
525 
526             suffix[0]=pkg->getInType();
527             strcpy(outFilenameBuffer, inFilename);
528             s=strchr(outFilenameBuffer, 0);
529             if((s-outFilenameBuffer)>5 && 0==memcmp(s-5, suffix, 5)) {
530                 *(s-5)=outType;
531             }
532             outFilename=outFilenameBuffer;
533         }
534         if(options[OPT_TOC_PREFIX].doesOccur) {
535             pkg->setPrefix(options[OPT_TOC_PREFIX].value);
536         }
537         result = writePackageDatFile(outFilename, outComment, NULL, NULL, pkg, outType);
538     }
539 
540     delete addListPkg;
541     delete pkg;
542     return result;
543 }
544 
545 /*
546  * Hey, Emacs, please set the following:
547  *
548  * Local Variables:
549  * indent-tabs-mode: nil
550  * End:
551  *
552  */
553