1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1999-2015, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: derb.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2000sep6
14 * created by: Vladimir Weinstein as an ICU workshop example
15 * maintained by: Yves Arrouye <yves@realnames.com>
16 */
17
18 #include "unicode/stringpiece.h"
19 #include "unicode/ucnv.h"
20 #include "unicode/unistr.h"
21 #include "unicode/ustring.h"
22 #include "unicode/putil.h"
23 #include "unicode/ustdio.h"
24
25 #include "charstr.h"
26 #include "uresimp.h"
27 #include "cmemory.h"
28 #include "cstring.h"
29 #include "uoptions.h"
30 #include "toolutil.h"
31 #include "ustrfmt.h"
32
33 #if !UCONFIG_NO_FORMATTING
34
35 #define DERB_VERSION "1.1"
36
37 #define DERB_DEFAULT_TRUNC 80
38
39 static const int32_t indentsize = 4;
40 static int32_t truncsize = DERB_DEFAULT_TRUNC;
41 static UBool opt_truncate = FALSE;
42
43 static const char *getEncodingName(const char *encoding);
44 static void reportError(const char *pname, UErrorCode *status, const char *when);
45 static UChar *quotedString(const UChar *string);
46 static void printOutBundle(UFILE *out, UResourceBundle *resource, int32_t indent, const char *pname, UErrorCode *status);
47 static void printString(UFILE *out, const UChar *str, int32_t len);
48 static void printCString(UFILE *out, const char *str, int32_t len);
49 static void printIndent(UFILE *out, int32_t indent);
50 static void printHex(UFILE *out, uint8_t what);
51
52 static UOption options[]={
53 UOPTION_HELP_H,
54 UOPTION_HELP_QUESTION_MARK,
55 /* 2 */ UOPTION_ENCODING,
56 /* 3 */ { "to-stdout", NULL, NULL, NULL, 'c', UOPT_NO_ARG, 0 } ,
57 /* 4 */ { "truncate", NULL, NULL, NULL, 't', UOPT_OPTIONAL_ARG, 0 },
58 /* 5 */ UOPTION_VERBOSE,
59 /* 6 */ UOPTION_DESTDIR,
60 /* 7 */ UOPTION_SOURCEDIR,
61 /* 8 */ { "bom", NULL, NULL, NULL, 0, UOPT_NO_ARG, 0 },
62 /* 9 */ UOPTION_ICUDATADIR,
63 /* 10 */ UOPTION_VERSION,
64 /* 11 */ { "suppressAliases", NULL, NULL, NULL, 'A', UOPT_NO_ARG, 0 },
65 };
66
67 static UBool verbose = FALSE;
68 static UBool suppressAliases = FALSE;
69 static UFILE *ustderr = NULL;
70
71 extern int
main(int argc,char * argv[])72 main(int argc, char* argv[]) {
73 const char *encoding = NULL;
74 const char *outputDir = NULL; /* NULL = no output directory, use current */
75 const char *inputDir = ".";
76 int tostdout = 0;
77 int prbom = 0;
78
79 const char *pname;
80
81 UResourceBundle *bundle = NULL;
82 int32_t i = 0;
83
84 const char* arg;
85
86 /* Get the name of tool. */
87 pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
88 #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
89 if (!pname) {
90 pname = uprv_strrchr(*argv, U_FILE_ALT_SEP_CHAR);
91 }
92 #endif
93 if (!pname) {
94 pname = *argv;
95 } else {
96 ++pname;
97 }
98
99 /* error handling, printing usage message */
100 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);
101
102 /* error handling, printing usage message */
103 if(argc<0) {
104 fprintf(stderr,
105 "%s: error in command line argument \"%s\"\n", pname,
106 argv[-argc]);
107 }
108 if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
109 fprintf(argc < 0 ? stderr : stdout,
110 "%csage: %s [ -h, -?, --help ] [ -V, --version ]\n"
111 " [ -v, --verbose ] [ -e, --encoding encoding ] [ --bom ]\n"
112 " [ -t, --truncate [ size ] ]\n"
113 " [ -s, --sourcedir source ] [ -d, --destdir destination ]\n"
114 " [ -i, --icudatadir directory ] [ -c, --to-stdout ]\n"
115 " [ -A, --suppressAliases]\n"
116 " bundle ...\n", argc < 0 ? 'u' : 'U',
117 pname);
118 return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
119 }
120
121 if(options[10].doesOccur) {
122 fprintf(stderr,
123 "%s version %s (ICU version %s).\n"
124 "%s\n",
125 pname, DERB_VERSION, U_ICU_VERSION, U_COPYRIGHT_STRING);
126 return U_ZERO_ERROR;
127 }
128 if(options[2].doesOccur) {
129 encoding = options[2].value;
130 }
131
132 if (options[3].doesOccur) {
133 if(options[2].doesOccur) {
134 fprintf(stderr, "%s: Error: don't specify an encoding (-e) when writing to stdout (-c).\n", pname);
135 return 3;
136 }
137 tostdout = 1;
138 }
139
140 if(options[4].doesOccur) {
141 opt_truncate = TRUE;
142 if(options[4].value != NULL) {
143 truncsize = atoi(options[4].value); /* user defined printable size */
144 } else {
145 truncsize = DERB_DEFAULT_TRUNC; /* we'll use default omitting size */
146 }
147 } else {
148 opt_truncate = FALSE;
149 }
150
151 if(options[5].doesOccur) {
152 verbose = TRUE;
153 }
154
155 if (options[6].doesOccur) {
156 outputDir = options[6].value;
157 }
158
159 if(options[7].doesOccur) {
160 inputDir = options[7].value; /* we'll use users resources */
161 }
162
163 if (options[8].doesOccur) {
164 prbom = 1;
165 }
166
167 if (options[9].doesOccur) {
168 u_setDataDirectory(options[9].value);
169 }
170
171 if (options[11].doesOccur) {
172 suppressAliases = TRUE;
173 }
174
175 fflush(stderr); // use ustderr now.
176 ustderr = u_finit(stderr, NULL, NULL);
177
178 for (i = 1; i < argc; ++i) {
179 static const UChar sp[] = { 0x0020 }; /* " " */
180
181 arg = getLongPathname(argv[i]);
182
183 if (verbose) {
184 u_fprintf(ustderr, "processing bundle \"%s\"\n", argv[i]);
185 }
186
187 icu::CharString locale;
188 UErrorCode status = U_ZERO_ERROR;
189 {
190 const char *p = findBasename(arg);
191 const char *q = uprv_strrchr(p, '.');
192 if (q == NULL) {
193 locale.append(p, status);
194 } else {
195 locale.append(p, (int32_t)(q - p), status);
196 }
197 }
198 if (U_FAILURE(status)) {
199 return status;
200 }
201
202 icu::CharString infile;
203 const char *thename = NULL;
204 UBool fromICUData = !uprv_strcmp(inputDir, "-");
205 if (!fromICUData) {
206 UBool absfilename = *arg == U_FILE_SEP_CHAR;
207 #if U_PLATFORM_HAS_WIN32_API
208 if (!absfilename) {
209 absfilename = (uprv_strlen(arg) > 2 && isalpha(arg[0])
210 && arg[1] == ':' && arg[2] == U_FILE_SEP_CHAR);
211 }
212 #endif
213 if (absfilename) {
214 thename = arg;
215 } else {
216 const char *q = uprv_strrchr(arg, U_FILE_SEP_CHAR);
217 #if U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR
218 if (q == NULL) {
219 q = uprv_strrchr(arg, U_FILE_ALT_SEP_CHAR);
220 }
221 #endif
222 infile.append(inputDir, status);
223 if(q != NULL) {
224 infile.appendPathPart(icu::StringPiece(arg, (int32_t)(q - arg)), status);
225 }
226 if (U_FAILURE(status)) {
227 return status;
228 }
229 thename = infile.data();
230 }
231 }
232 if (thename) {
233 bundle = ures_openDirect(thename, locale.data(), &status);
234 } else {
235 bundle = ures_open(fromICUData ? 0 : inputDir, locale.data(), &status);
236 }
237 if (U_SUCCESS(status)) {
238 UFILE *out = NULL;
239
240 const char *filename = 0;
241 const char *ext = 0;
242
243 if (locale.isEmpty() || !tostdout) {
244 filename = findBasename(arg);
245 ext = uprv_strrchr(filename, '.');
246 if (!ext) {
247 ext = uprv_strchr(filename, 0);
248 }
249 }
250
251 if (tostdout) {
252 out = u_get_stdout();
253 } else {
254 icu::CharString thefile;
255 if (outputDir) {
256 thefile.append(outputDir, status);
257 }
258 thefile.appendPathPart(filename, status);
259 if (*ext) {
260 thefile.truncate(thefile.length() - (int32_t)uprv_strlen(ext));
261 }
262 thefile.append(".txt", status);
263 if (U_FAILURE(status)) {
264 return status;
265 }
266
267 out = u_fopen(thefile.data(), "w", NULL, encoding);
268 if (!out) {
269 u_fprintf(ustderr, "%s: couldn't create %s\n", pname, thefile.data());
270 u_fclose(ustderr);
271 return 4;
272 }
273 }
274
275 // now, set the callback.
276 ucnv_setFromUCallBack(u_fgetConverter(out), UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, 0, 0, &status);
277 if (U_FAILURE(status)) {
278 u_fprintf(ustderr, "%s: couldn't configure converter for encoding\n", pname);
279 u_fclose(ustderr);
280 if(!tostdout) {
281 u_fclose(out);
282 }
283 return 3;
284 }
285
286 if (prbom) { /* XXX: Should be done only for UTFs */
287 u_fputc(0xFEFF, out);
288 }
289 u_fprintf(out, "// -*- Coding: %s; -*-\n//\n", encoding ? encoding : getEncodingName(ucnv_getDefaultName()));
290 u_fprintf(out, "// This file was dumped by derb(8) from ");
291 if (thename) {
292 u_fprintf(out, "%s", thename);
293 } else if (fromICUData) {
294 u_fprintf(out, "the ICU internal %s locale", locale.data());
295 }
296
297 u_fprintf(out, "\n// derb(8) by Vladimir Weinstein and Yves Arrouye\n\n");
298
299 if (!locale.isEmpty()) {
300 u_fprintf(out, "%s", locale.data());
301 } else {
302 u_fprintf(out, "%.*s%.*S", (int32_t)(ext - filename), filename, UPRV_LENGTHOF(sp), sp);
303 }
304 printOutBundle(out, bundle, 0, pname, &status);
305
306 if (!tostdout) {
307 u_fclose(out);
308 }
309 }
310 else {
311 reportError(pname, &status, "opening resource file");
312 }
313
314 ures_close(bundle);
315 }
316
317 return 0;
318 }
319
quotedString(const UChar * string)320 static UChar *quotedString(const UChar *string) {
321 int len = u_strlen(string);
322 int alen = len;
323 const UChar *sp;
324 UChar *newstr, *np;
325
326 for (sp = string; *sp; ++sp) {
327 switch (*sp) {
328 case '\n':
329 case 0x0022:
330 ++alen;
331 break;
332 }
333 }
334
335 newstr = (UChar *) uprv_malloc((1 + alen) * U_SIZEOF_UCHAR);
336 for (sp = string, np = newstr; *sp; ++sp) {
337 switch (*sp) {
338 case '\n':
339 *np++ = 0x005C;
340 *np++ = 0x006E;
341 break;
342
343 case 0x0022:
344 *np++ = 0x005C;
345
346 default:
347 *np++ = *sp;
348 break;
349 }
350 }
351 *np = 0;
352
353 return newstr;
354 }
355
356
printString(UFILE * out,const UChar * str,int32_t len)357 static void printString(UFILE *out, const UChar *str, int32_t len) {
358 u_file_write(str, len, out);
359 }
360
printCString(UFILE * out,const char * str,int32_t len)361 static void printCString(UFILE *out, const char *str, int32_t len) {
362 if(len==-1) {
363 u_fprintf(out, "%s", str);
364 } else {
365 u_fprintf(out, "%.*s", len, str);
366 }
367 }
368
printIndent(UFILE * out,int32_t indent)369 static void printIndent(UFILE *out, int32_t indent) {
370 icu::UnicodeString inchar(indent, 0x20, indent);
371 printString(out, inchar.getBuffer(), indent);
372 }
373
printHex(UFILE * out,uint8_t what)374 static void printHex(UFILE *out, uint8_t what) {
375 static const char map[] = "0123456789ABCDEF";
376 UChar hex[2];
377
378 hex[0] = map[what >> 4];
379 hex[1] = map[what & 0xf];
380
381 printString(out, hex, 2);
382 }
383
printOutAlias(UFILE * out,UResourceBundle * parent,Resource r,const char * key,int32_t indent,const char * pname,UErrorCode * status)384 static void printOutAlias(UFILE *out, UResourceBundle *parent, Resource r, const char *key, int32_t indent, const char *pname, UErrorCode *status) {
385 static const UChar cr[] = { 0xA }; // LF
386 int32_t len = 0;
387 const UChar* thestr = res_getAlias(&(parent->fResData), r, &len);
388 UChar *string = quotedString(thestr);
389 if(opt_truncate && len > truncsize) {
390 char msg[128];
391 printIndent(out, indent);
392 sprintf(msg, "// WARNING: this resource, size %li is truncated to %li\n",
393 (long)len, (long)truncsize/2);
394 printCString(out, msg, -1);
395 len = truncsize;
396 }
397 if(U_SUCCESS(*status)) {
398 static const UChar openStr[] = { 0x003A, 0x0061, 0x006C, 0x0069, 0x0061, 0x0073, 0x0020, 0x007B, 0x0020, 0x0022 }; /* ":alias { \"" */
399 static const UChar closeStr[] = { 0x0022, 0x0020, 0x007D, 0x0020 }; /* "\" } " */
400 printIndent(out, indent);
401 if(key != NULL) {
402 printCString(out, key, -1);
403 }
404 printString(out, openStr, UPRV_LENGTHOF(openStr));
405 printString(out, string, len);
406 printString(out, closeStr, UPRV_LENGTHOF(closeStr));
407 if(verbose) {
408 printCString(out, " // ALIAS", -1);
409 }
410 printString(out, cr, UPRV_LENGTHOF(cr));
411 } else {
412 reportError(pname, status, "getting binary value");
413 }
414 uprv_free(string);
415 }
416
printOutBundle(UFILE * out,UResourceBundle * resource,int32_t indent,const char * pname,UErrorCode * status)417 static void printOutBundle(UFILE *out, UResourceBundle *resource, int32_t indent, const char *pname, UErrorCode *status)
418 {
419 static const UChar cr[] = { 0xA }; // LF
420
421 /* int32_t noOfElements = ures_getSize(resource);*/
422 int32_t i = 0;
423 const char *key = ures_getKey(resource);
424
425 switch(ures_getType(resource)) {
426 case URES_STRING :
427 {
428 int32_t len=0;
429 const UChar* thestr = ures_getString(resource, &len, status);
430 UChar *string = quotedString(thestr);
431
432 /* TODO: String truncation */
433 if(opt_truncate && len > truncsize) {
434 char msg[128];
435 printIndent(out, indent);
436 sprintf(msg, "// WARNING: this resource, size %li is truncated to %li\n",
437 (long)len, (long)(truncsize/2));
438 printCString(out, msg, -1);
439 len = truncsize/2;
440 }
441 printIndent(out, indent);
442 if(key != NULL) {
443 static const UChar openStr[] = { 0x0020, 0x007B, 0x0020, 0x0022 }; /* " { \"" */
444 static const UChar closeStr[] = { 0x0022, 0x0020, 0x007D }; /* "\" }" */
445 printCString(out, key, (int32_t)uprv_strlen(key));
446 printString(out, openStr, UPRV_LENGTHOF(openStr));
447 printString(out, string, len);
448 printString(out, closeStr, UPRV_LENGTHOF(closeStr));
449 } else {
450 static const UChar openStr[] = { 0x0022 }; /* "\"" */
451 static const UChar closeStr[] = { 0x0022, 0x002C }; /* "\"," */
452
453 printString(out, openStr, UPRV_LENGTHOF(openStr));
454 printString(out, string, (int32_t)(u_strlen(string)));
455 printString(out, closeStr, UPRV_LENGTHOF(closeStr));
456 }
457
458 if(verbose) {
459 printCString(out, "// STRING", -1);
460 }
461 printString(out, cr, UPRV_LENGTHOF(cr));
462
463 uprv_free(string);
464 }
465 break;
466
467 case URES_INT :
468 {
469 static const UChar openStr[] = { 0x003A, 0x0069, 0x006E, 0x0074, 0x0020, 0x007B, 0x0020 }; /* ":int { " */
470 static const UChar closeStr[] = { 0x0020, 0x007D }; /* " }" */
471 UChar num[20];
472
473 printIndent(out, indent);
474 if(key != NULL) {
475 printCString(out, key, -1);
476 }
477 printString(out, openStr, UPRV_LENGTHOF(openStr));
478 uprv_itou(num, 20, ures_getInt(resource, status), 10, 0);
479 printString(out, num, u_strlen(num));
480 printString(out, closeStr, UPRV_LENGTHOF(closeStr));
481
482 if(verbose) {
483 printCString(out, "// INT", -1);
484 }
485 printString(out, cr, UPRV_LENGTHOF(cr));
486 break;
487 }
488 case URES_BINARY :
489 {
490 int32_t len = 0;
491 const int8_t *data = (const int8_t *)ures_getBinary(resource, &len, status);
492 if(opt_truncate && len > truncsize) {
493 char msg[128];
494 printIndent(out, indent);
495 sprintf(msg, "// WARNING: this resource, size %li is truncated to %li\n",
496 (long)len, (long)(truncsize/2));
497 printCString(out, msg, -1);
498 len = truncsize;
499 }
500 if(U_SUCCESS(*status)) {
501 static const UChar openStr[] = { 0x003A, 0x0062, 0x0069, 0x006E, 0x0061, 0x0072, 0x0079, 0x0020, 0x007B, 0x0020 }; /* ":binary { " */
502 static const UChar closeStr[] = { 0x0020, 0x007D, 0x0020 }; /* " } " */
503 printIndent(out, indent);
504 if(key != NULL) {
505 printCString(out, key, -1);
506 }
507 printString(out, openStr, UPRV_LENGTHOF(openStr));
508 for(i = 0; i<len; i++) {
509 printHex(out, *data++);
510 }
511 printString(out, closeStr, UPRV_LENGTHOF(closeStr));
512 if(verbose) {
513 printCString(out, " // BINARY", -1);
514 }
515 printString(out, cr, UPRV_LENGTHOF(cr));
516 } else {
517 reportError(pname, status, "getting binary value");
518 }
519 }
520 break;
521 case URES_INT_VECTOR :
522 {
523 int32_t len = 0;
524 const int32_t *data = ures_getIntVector(resource, &len, status);
525 if(U_SUCCESS(*status)) {
526 static const UChar openStr[] = { 0x003A, 0x0069, 0x006E, 0x0074, 0x0076, 0x0065, 0x0063, 0x0074, 0x006F, 0x0072, 0x0020, 0x007B, 0x0020 }; /* ":intvector { " */
527 static const UChar closeStr[] = { 0x0020, 0x007D, 0x0020 }; /* " } " */
528 UChar num[20];
529
530 printIndent(out, indent);
531 if(key != NULL) {
532 printCString(out, key, -1);
533 }
534 printString(out, openStr, UPRV_LENGTHOF(openStr));
535 for(i = 0; i < len - 1; i++) {
536 int32_t numLen = uprv_itou(num, 20, data[i], 10, 0);
537 num[numLen++] = 0x002C; /* ',' */
538 num[numLen++] = 0x0020; /* ' ' */
539 num[numLen] = 0;
540 printString(out, num, u_strlen(num));
541 }
542 if(len > 0) {
543 uprv_itou(num, 20, data[len - 1], 10, 0);
544 printString(out, num, u_strlen(num));
545 }
546 printString(out, closeStr, UPRV_LENGTHOF(closeStr));
547 if(verbose) {
548 printCString(out, "// INTVECTOR", -1);
549 }
550 printString(out, cr, UPRV_LENGTHOF(cr));
551 } else {
552 reportError(pname, status, "getting int vector");
553 }
554 }
555 break;
556 case URES_TABLE :
557 case URES_ARRAY :
558 {
559 static const UChar openStr[] = { 0x007B }; /* "{" */
560 static const UChar closeStr[] = { 0x007D, '\n' }; /* "}\n" */
561
562 UResourceBundle *t = NULL;
563 ures_resetIterator(resource);
564 printIndent(out, indent);
565 if(key != NULL) {
566 printCString(out, key, -1);
567 }
568 printString(out, openStr, UPRV_LENGTHOF(openStr));
569 if(verbose) {
570 if(ures_getType(resource) == URES_TABLE) {
571 printCString(out, "// TABLE", -1);
572 } else {
573 printCString(out, "// ARRAY", -1);
574 }
575 }
576 printString(out, cr, UPRV_LENGTHOF(cr));
577
578 if(suppressAliases == FALSE) {
579 while(U_SUCCESS(*status) && ures_hasNext(resource)) {
580 t = ures_getNextResource(resource, t, status);
581 if(U_SUCCESS(*status)) {
582 printOutBundle(out, t, indent+indentsize, pname, status);
583 } else {
584 reportError(pname, status, "While processing table");
585 *status = U_ZERO_ERROR;
586 }
587 }
588 } else { /* we have to use low level access to do this */
589 Resource r;
590 int32_t resSize = ures_getSize(resource);
591 UBool isTable = (UBool)(ures_getType(resource) == URES_TABLE);
592 for(i = 0; i < resSize; i++) {
593 /* need to know if it's an alias */
594 if(isTable) {
595 r = res_getTableItemByIndex(&resource->fResData, resource->fRes, i, &key);
596 } else {
597 r = res_getArrayItem(&resource->fResData, resource->fRes, i);
598 }
599 if(U_SUCCESS(*status)) {
600 if(res_getPublicType(r) == URES_ALIAS) {
601 printOutAlias(out, resource, r, key, indent+indentsize, pname, status);
602 } else {
603 t = ures_getByIndex(resource, i, t, status);
604 printOutBundle(out, t, indent+indentsize, pname, status);
605 }
606 } else {
607 reportError(pname, status, "While processing table");
608 *status = U_ZERO_ERROR;
609 }
610 }
611 }
612
613 printIndent(out, indent);
614 printString(out, closeStr, UPRV_LENGTHOF(closeStr));
615 ures_close(t);
616 }
617 break;
618 default:
619 break;
620 }
621
622 }
623
getEncodingName(const char * encoding)624 static const char *getEncodingName(const char *encoding) {
625 UErrorCode err;
626 const char *enc;
627
628 err = U_ZERO_ERROR;
629 if (!(enc = ucnv_getStandardName(encoding, "MIME", &err))) {
630 err = U_ZERO_ERROR;
631 if (!(enc = ucnv_getStandardName(encoding, "IANA", &err))) {
632 ;
633 }
634 }
635
636 return enc;
637 }
638
reportError(const char * pname,UErrorCode * status,const char * when)639 static void reportError(const char *pname, UErrorCode *status, const char *when) {
640 u_fprintf(ustderr, "%s: error %d while %s: %s\n", pname, *status, when, u_errorName(*status));
641 }
642
643 #else
644 extern int
main(int argc,char * argv[])645 main(int argc, char* argv[]) {
646 /* Changing stdio.h ustdio.h requires that formatting not be disabled. */
647 return 3;
648 }
649 #endif /* !UCONFIG_NO_FORMATTING */
650
651 /*
652 * Local Variables:
653 * indent-tabs-mode: nil
654 * End:
655 */
656