1 /*****************************************************************************
2 *
3 *   Copyright (C) 1999-2014, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *
6 ******************************************************************************/
7 
8 /*
9  * uconv(1): an iconv(1)-like converter using ICU.
10  *
11  * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom@vittran.norrnod.se>
12  * contributed in 1999.
13  *
14  * Conversion to the C conversion API and many improvements by
15  * Yves Arrouye <yves@realnames.com>, current maintainer.
16  *
17  * Markus Scherer maintainer from 2003.
18  * See source code repository history for changes.
19  */
20 
21 #include <unicode/utypes.h>
22 #include <unicode/putil.h>
23 #include <unicode/ucnv.h>
24 #include <unicode/uenum.h>
25 #include <unicode/unistr.h>
26 #include <unicode/translit.h>
27 #include <unicode/uset.h>
28 #include <unicode/uclean.h>
29 #include <unicode/utf16.h>
30 
31 #include <stdio.h>
32 #include <errno.h>
33 #include <string.h>
34 #include <stdlib.h>
35 
36 #include "cmemory.h"
37 #include "cstring.h"
38 #include "ustrfmt.h"
39 
40 #include "unicode/uwmsg.h"
41 
42 U_NAMESPACE_USE
43 
44 #if U_PLATFORM_USES_ONLY_WIN32_API && !defined(__STRICT_ANSI__)
45 #include <io.h>
46 #include <fcntl.h>
47 #if U_PLATFORM_USES_ONLY_WIN32_API
48 #define USE_FILENO_BINARY_MODE 1
49 /* Windows likes to rename Unix-like functions */
50 #ifndef fileno
51 #define fileno _fileno
52 #endif
53 #ifndef setmode
54 #define setmode _setmode
55 #endif
56 #ifndef O_BINARY
57 #define O_BINARY _O_BINARY
58 #endif
59 #endif
60 #endif
61 
62 #ifdef UCONVMSG_LINK
63 /* below from the README */
64 #include "unicode/utypes.h"
65 #include "unicode/udata.h"
66 U_CFUNC char uconvmsg_dat[];
67 #endif
68 
69 #define DEFAULT_BUFSZ   4096
70 #define UCONVMSG "uconvmsg"
71 
72 static UResourceBundle *gBundle = 0;    /* Bundle containing messages. */
73 
74 /*
75  * Initialize the message bundle so that message strings can be fetched
76  * by u_wmsg().
77  *
78  */
79 
initMsg(const char * pname)80 static void initMsg(const char *pname) {
81     static int ps = 0;
82 
83     if (!ps) {
84         char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
85         UErrorCode err = U_ZERO_ERROR;
86 
87         ps = 1;
88 
89         /* Set up our static data - if any */
90 #if defined(UCONVMSG_LINK) && U_PLATFORM != U_PF_OS390 /* On z/OS, this is failing. */
91         udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
92         if (U_FAILURE(err)) {
93           fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
94                   pname, u_errorName(err));
95           err = U_ZERO_ERROR; /* It may still fail */
96         }
97 #endif
98 
99         /* Get messages. */
100         gBundle = u_wmsg_setPath(UCONVMSG, &err);
101         if (U_FAILURE(err)) {
102             fprintf(stderr,
103                     "%s: warning: couldn't open bundle %s: %s\n",
104                     pname, UCONVMSG, u_errorName(err));
105 #ifdef UCONVMSG_LINK
106             fprintf(stderr,
107                     "%s: setAppData was called, internal data %s failed to load\n",
108                         pname, UCONVMSG);
109 #endif
110 
111             err = U_ZERO_ERROR;
112             /* that was try #1, try again with a path */
113             uprv_strcpy(dataPath, u_getDataDirectory());
114             uprv_strcat(dataPath, U_FILE_SEP_STRING);
115             uprv_strcat(dataPath, UCONVMSG);
116 
117             gBundle = u_wmsg_setPath(dataPath, &err);
118             if (U_FAILURE(err)) {
119                 fprintf(stderr,
120                     "%s: warning: still couldn't open bundle %s: %s\n",
121                     pname, dataPath, u_errorName(err));
122                 fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
123             }
124         }
125     }
126 }
127 
128 /* Mapping of callback names to the callbacks passed to the converter
129    API. */
130 
131 static struct callback_ent {
132     const char *name;
133     UConverterFromUCallback fromu;
134     const void *fromuctxt;
135     UConverterToUCallback tou;
136     const void *touctxt;
137 } transcode_callbacks[] = {
138     { "substitute",
139       UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
140       UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
141     { "skip",
142       UCNV_FROM_U_CALLBACK_SKIP, 0,
143       UCNV_TO_U_CALLBACK_SKIP, 0 },
144     { "stop",
145       UCNV_FROM_U_CALLBACK_STOP, 0,
146       UCNV_TO_U_CALLBACK_STOP, 0 },
147     { "escape",
148       UCNV_FROM_U_CALLBACK_ESCAPE, 0,
149       UCNV_TO_U_CALLBACK_ESCAPE, 0},
150     { "escape-icu",
151       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
152       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
153     { "escape-java",
154       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
155       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
156     { "escape-c",
157       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
158       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
159     { "escape-xml",
160       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
161       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
162     { "escape-xml-hex",
163       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
164       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
165     { "escape-xml-dec",
166       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
167       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
168     { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
169       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
170 };
171 
172 /* Return a pointer to a callback record given its name. */
173 
findCallback(const char * name)174 static const struct callback_ent *findCallback(const char *name) {
175     int i, count =
176         sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
177 
178     /* We'll do a linear search, there aren't many of them and bsearch()
179        may not be that portable. */
180 
181     for (i = 0; i < count; ++i) {
182         if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
183             return &transcode_callbacks[i];
184         }
185     }
186 
187     return 0;
188 }
189 
190 /* Print converter information. If lookfor is set, only that converter will
191    be printed, otherwise all converters will be printed. If canon is non
192    zero, tags and aliases for each converter are printed too, in the format
193    expected for convrters.txt(5). */
194 
printConverters(const char * pname,const char * lookfor,UBool canon)195 static int printConverters(const char *pname, const char *lookfor,
196     UBool canon)
197 {
198     UErrorCode err = U_ZERO_ERROR;
199     int32_t num;
200     uint16_t num_stds;
201     const char **stds;
202 
203     /* If there is a specified name, just handle that now. */
204 
205     if (lookfor) {
206         if (!canon) {
207             printf("%s\n", lookfor);
208             return 0;
209         } else {
210         /*  Because we are printing a canonical name, we need the
211             true converter name. We've done that already except for
212             the default name (because we want to print the exact
213             name one would get when calling ucnv_getDefaultName()
214             in non-canon mode). But since we do not know at this
215             point if we have the default name or something else, we
216             need to normalize again to the canonical converter
217             name. */
218 
219             const char *truename = ucnv_getAlias(lookfor, 0, &err);
220             if (U_SUCCESS(err)) {
221                 lookfor = truename;
222             } else {
223                 err = U_ZERO_ERROR;
224             }
225         }
226     }
227 
228     /* Print converter names. We come here for one of two reasons: we
229        are printing all the names (lookfor was null), or we have a
230        single converter to print but in canon mode, hence we need to
231        get to it in order to print everything. */
232 
233     num = ucnv_countAvailable();
234     if (num <= 0) {
235         initMsg(pname);
236         u_wmsg(stderr, "cantGetNames");
237         return -1;
238     }
239     if (lookfor) {
240         num = 1;                /* We know where we want to be. */
241     }
242 
243     num_stds = ucnv_countStandards();
244     stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
245     if (!stds) {
246         u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
247         return -1;
248     } else {
249         uint16_t s;
250 
251         if (canon) {
252             printf("{ ");
253         }
254         for (s = 0; s < num_stds; ++s) {
255             stds[s] = ucnv_getStandard(s, &err);
256             if (canon) {
257                 printf("%s ", stds[s]);
258             }
259             if (U_FAILURE(err)) {
260                 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
261                 goto error_cleanup;
262             }
263         }
264         if (canon) {
265             puts("}");
266         }
267     }
268 
269     for (int32_t i = 0; i < num; i++) {
270         const char *name;
271         uint16_t num_aliases;
272 
273         /* Set the name either to what we are looking for, or
274         to the current converter name. */
275 
276         if (lookfor) {
277             name = lookfor;
278         } else {
279             name = ucnv_getAvailableName(i);
280         }
281 
282         /* Get all the aliases associated to the name. */
283 
284         err = U_ZERO_ERROR;
285         num_aliases = ucnv_countAliases(name, &err);
286         if (U_FAILURE(err)) {
287             printf("%s", name);
288 
289             UnicodeString str(name, "");
290             putchar('\t');
291             u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
292                 u_wmsg_errorName(err));
293             goto error_cleanup;
294         } else {
295             uint16_t a, s, t;
296 
297             /* Write all the aliases and their tags. */
298 
299             for (a = 0; a < num_aliases; ++a) {
300                 const char *alias = ucnv_getAlias(name, a, &err);
301 
302                 if (U_FAILURE(err)) {
303                     UnicodeString str(name, "");
304                     putchar('\t');
305                     u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
306                         u_wmsg_errorName(err));
307                     goto error_cleanup;
308                 }
309 
310                 /* Print the current alias so that it looks right. */
311                 printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
312                                  alias,
313                                  (canon ? "" : " "));
314 
315                 /* Look (slowly, linear searching) for a tag. */
316 
317                 if (canon) {
318                     /* -1 to skip the last standard */
319                     for (s = t = 0; s < num_stds-1; ++s) {
320                         UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
321                         if (U_SUCCESS(err)) {
322                             /* List the standard tags */
323                             const char *standardName;
324                             UBool isFirst = TRUE;
325                             UErrorCode enumError = U_ZERO_ERROR;
326                             while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
327                                 /* See if this alias is supported by this standard. */
328                                 if (!strcmp(standardName, alias)) {
329                                     if (!t) {
330                                         printf(" {");
331                                         t = 1;
332                                     }
333                                     /* Print a * after the default standard name */
334                                     printf(" %s%s", stds[s], (isFirst ? "*" : ""));
335                                 }
336                                 isFirst = FALSE;
337                             }
338                         }
339                     }
340                     if (t) {
341                         printf(" }");
342                     }
343                 }
344                 /* Terminate this entry. */
345                 if (canon) {
346                     puts("");
347                 }
348 
349                 /* Move on. */
350             }
351             /* Terminate this entry. */
352             if (!canon) {
353                 puts("");
354             }
355         }
356     }
357 
358     /* Free temporary data. */
359 
360     uprv_free(stds);
361 
362     /* Success. */
363 
364     return 0;
365 error_cleanup:
366     uprv_free(stds);
367     return -1;
368 }
369 
370 /* Print all available transliterators. If canon is non zero, print
371    one transliterator per line. */
372 
printTransliterators(UBool canon)373 static int printTransliterators(UBool canon)
374 {
375 #if UCONFIG_NO_TRANSLITERATION
376     printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
377     return 1;
378 #else
379     UErrorCode status = U_ZERO_ERROR;
380     UEnumeration *ids = utrans_openIDs(&status);
381     int32_t i, numtrans = uenum_count(ids, &status);
382 
383     char sepchar = canon ? '\n' : ' ';
384 
385     for (i = 0; U_SUCCESS(status)&& (i < numtrans); ++i) {
386     	int32_t len;
387     	const char *nextTrans = uenum_next(ids, &len, &status);
388 
389         printf("%s", nextTrans);
390         if (i < numtrans - 1) {
391             putchar(sepchar);
392         }
393     }
394 
395     uenum_close(ids);
396 
397     /* Add a terminating newline if needed. */
398 
399     if (sepchar != '\n') {
400         putchar('\n');
401     }
402 
403     /* Success. */
404 
405     return 0;
406 #endif
407 }
408 
409 enum {
410     uSP = 0x20,         // space
411     uCR = 0xd,          // carriage return
412     uLF = 0xa,          // line feed
413     uNL = 0x85,         // newline
414     uLS = 0x2028,       // line separator
415     uPS = 0x2029,       // paragraph separator
416     uSig = 0xfeff       // signature/BOM character
417 };
418 
419 static inline int32_t
getChunkLimit(const UnicodeString & prev,const UnicodeString & s)420 getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
421     // find one of
422     // CR, LF, CRLF, NL, LS, PS
423     // for paragraph ends (see UAX #13/Unicode 4)
424     // and include it in the chunk
425     // all of these characters are on the BMP
426     // do not include FF or VT in case they are part of a paragraph
427     // (important for bidi contexts)
428     static const UChar paraEnds[] = {
429         0xd, 0xa, 0x85, 0x2028, 0x2029
430     };
431     enum {
432         iCR, iLF, iNL, iLS, iPS, iCount
433     };
434 
435     // first, see if there is a CRLF split between prev and s
436     if (prev.endsWith(paraEnds + iCR, 1)) {
437         if (s.startsWith(paraEnds + iLF, 1)) {
438             return 1; // split CRLF, include the LF
439         } else if (!s.isEmpty()) {
440             return 0; // complete the last chunk
441         } else {
442             return -1; // wait for actual further contents to arrive
443         }
444     }
445 
446     const UChar *u = s.getBuffer(), *limit = u + s.length();
447     UChar c;
448 
449     while (u < limit) {
450         c = *u++;
451         if (
452             ((c < uSP) && (c == uCR || c == uLF)) ||
453             (c == uNL) ||
454             ((c & uLS) == uLS)
455         ) {
456             if (c == uCR) {
457                 // check for CRLF
458                 if (u == limit) {
459                     return -1; // LF may be in the next chunk
460                 } else if (*u == uLF) {
461                     ++u; // include the LF in this chunk
462                 }
463             }
464             return (int32_t)(u - s.getBuffer());
465         }
466     }
467 
468     return -1; // continue collecting the chunk
469 }
470 
471 enum {
472     CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
473     CNV_WITH_FEFF,  // can convert the U+FEFF signature character
474     CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
475 };
476 
477 static inline UChar
nibbleToHex(uint8_t n)478 nibbleToHex(uint8_t n) {
479     n &= 0xf;
480     return
481         n <= 9 ?
482             (UChar)(0x30 + n) :
483             (UChar)((0x61 - 10) + n);
484 }
485 
486 // check the converter's Unicode signature properties;
487 // the fromUnicode side of the converter must be in its initial state
488 // and will be reset again if it was used
489 static int32_t
cnvSigType(UConverter * cnv)490 cnvSigType(UConverter *cnv) {
491     UErrorCode err;
492     int32_t result;
493 
494     // test if the output charset can convert U+FEFF
495     USet *set = uset_open(1, 0);
496     err = U_ZERO_ERROR;
497     ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
498     if (U_SUCCESS(err) && uset_contains(set, uSig)) {
499         result = CNV_WITH_FEFF;
500     } else {
501         result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
502     }
503     uset_close(set);
504 
505     if (result == CNV_WITH_FEFF) {
506         // test if the output charset emits a signature anyway
507         const UChar a[1] = { 0x61 }; // "a"
508         const UChar *in;
509 
510         char buffer[20];
511         char *out;
512 
513         in = a;
514         out = buffer;
515         err = U_ZERO_ERROR;
516         ucnv_fromUnicode(cnv,
517             &out, buffer + sizeof(buffer),
518             &in, a + 1,
519             NULL, TRUE, &err);
520         ucnv_resetFromUnicode(cnv);
521 
522         if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
523             U_SUCCESS(err)
524         ) {
525             result = CNV_ADDS_FEFF;
526         }
527     }
528 
529     return result;
530 }
531 
532 class ConvertFile {
533 public:
ConvertFile()534     ConvertFile() :
535         buf(NULL), outbuf(NULL), fromoffsets(NULL),
536         bufsz(0), signature(0) {}
537 
538     void
setBufferSize(size_t bufferSize)539     setBufferSize(size_t bufferSize) {
540         bufsz = bufferSize;
541 
542         buf = new char[2 * bufsz];
543         outbuf = buf + bufsz;
544 
545         // +1 for an added U+FEFF in the intermediate Unicode buffer
546         fromoffsets = new int32_t[bufsz + 1];
547     }
548 
~ConvertFile()549     ~ConvertFile() {
550         delete [] buf;
551         delete [] fromoffsets;
552     }
553 
554     UBool convertFile(const char *pname,
555                       const char *fromcpage,
556                       UConverterToUCallback toucallback,
557                       const void *touctxt,
558                       const char *tocpage,
559                       UConverterFromUCallback fromucallback,
560                       const void *fromuctxt,
561                       UBool fallback,
562                       const char *translit,
563                       const char *infilestr,
564                       FILE * outfile, int verbose);
565 private:
566     friend int main(int argc, char **argv);
567 
568     char *buf, *outbuf;
569     int32_t *fromoffsets;
570 
571     size_t bufsz;
572     int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
573 };
574 
575 // Convert a file from one encoding to another
576 UBool
convertFile(const char * pname,const char * fromcpage,UConverterToUCallback toucallback,const void * touctxt,const char * tocpage,UConverterFromUCallback fromucallback,const void * fromuctxt,UBool fallback,const char * translit,const char * infilestr,FILE * outfile,int verbose)577 ConvertFile::convertFile(const char *pname,
578                          const char *fromcpage,
579                          UConverterToUCallback toucallback,
580                          const void *touctxt,
581                          const char *tocpage,
582                          UConverterFromUCallback fromucallback,
583                          const void *fromuctxt,
584                          UBool fallback,
585                          const char *translit,
586                          const char *infilestr,
587                          FILE * outfile, int verbose)
588 {
589     FILE *infile;
590     UBool ret = TRUE;
591     UConverter *convfrom = 0;
592     UConverter *convto = 0;
593     UErrorCode err = U_ZERO_ERROR;
594     UBool flush;
595     UBool closeFile = FALSE;
596     const char *cbufp, *prevbufp;
597     char *bufp;
598 
599     uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
600 
601     const UChar *unibuf, *unibufbp;
602     UChar *unibufp;
603 
604     size_t rd, wr;
605 
606 #if !UCONFIG_NO_TRANSLITERATION
607     Transliterator *t = 0;      // Transliterator acting on Unicode data.
608     UnicodeString chunk;        // One chunk of the text being collected for transformation.
609 #endif
610     UnicodeString u;            // String to do the transliteration.
611     int32_t ulen;
612 
613     // use conversion offsets for error messages
614     // unless a transliterator is used -
615     // a text transformation will reorder characters in unpredictable ways
616     UBool useOffsets = TRUE;
617 
618     // Open the correct input file or connect to stdin for reading input
619 
620     if (infilestr != 0 && strcmp(infilestr, "-")) {
621         infile = fopen(infilestr, "rb");
622         if (infile == 0) {
623             UnicodeString str1(infilestr, "");
624             str1.append((UChar32) 0);
625             UnicodeString str2(strerror(errno), "");
626             str2.append((UChar32) 0);
627             initMsg(pname);
628             u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
629             return FALSE;
630         }
631         closeFile = TRUE;
632     } else {
633         infilestr = "-";
634         infile = stdin;
635 #ifdef USE_FILENO_BINARY_MODE
636         if (setmode(fileno(stdin), O_BINARY) == -1) {
637             initMsg(pname);
638             u_wmsg(stderr, "cantSetInBinMode");
639             return FALSE;
640         }
641 #endif
642     }
643 
644     if (verbose) {
645         fprintf(stderr, "%s:\n", infilestr);
646     }
647 
648 #if !UCONFIG_NO_TRANSLITERATION
649     // Create transliterator as needed.
650 
651     if (translit != NULL && *translit) {
652         UParseError parse;
653         UnicodeString str(translit), pestr;
654 
655         /* Create from rules or by ID as needed. */
656 
657         parse.line = -1;
658 
659         if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
660             t = Transliterator::createFromRules(UNICODE_STRING_SIMPLE("Uconv"), str, UTRANS_FORWARD, parse, err);
661         } else {
662             t = Transliterator::createInstance(UnicodeString(translit, -1, US_INV), UTRANS_FORWARD, err);
663         }
664 
665         if (U_FAILURE(err)) {
666             str.append((UChar32) 0);
667             initMsg(pname);
668 
669             if (parse.line >= 0) {
670                 UChar linebuf[20], offsetbuf[20];
671                 uprv_itou(linebuf, 20, parse.line, 10, 0);
672                 uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
673                 u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
674                     u_wmsg_errorName(err), linebuf, offsetbuf);
675             } else {
676                 u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
677                     u_wmsg_errorName(err));
678             }
679 
680             if (t) {
681                 delete t;
682                 t = 0;
683             }
684             goto error_exit;
685         }
686 
687         useOffsets = FALSE;
688     }
689 #endif
690 
691     // Create codepage converter. If the codepage or its aliases weren't
692     // available, it returns NULL and a failure code. We also set the
693     // callbacks, and return errors in the same way.
694 
695     convfrom = ucnv_open(fromcpage, &err);
696     if (U_FAILURE(err)) {
697         UnicodeString str(fromcpage, "");
698         initMsg(pname);
699         u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
700             u_wmsg_errorName(err));
701         goto error_exit;
702     }
703     ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
704     if (U_FAILURE(err)) {
705         initMsg(pname);
706         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
707         goto error_exit;
708     }
709 
710     convto = ucnv_open(tocpage, &err);
711     if (U_FAILURE(err)) {
712         UnicodeString str(tocpage, "");
713         initMsg(pname);
714         u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
715             u_wmsg_errorName(err));
716         goto error_exit;
717     }
718     ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
719     if (U_FAILURE(err)) {
720         initMsg(pname);
721         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
722         goto error_exit;
723     }
724     ucnv_setFallback(convto, fallback);
725 
726     UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
727     int8_t sig;
728 
729     // OK, we can convert now.
730     sig = signature;
731     rd = 0;
732 
733     do {
734         willexit = FALSE;
735 
736         // input file offset at the beginning of the next buffer
737         infoffset += rd;
738 
739         rd = fread(buf, 1, bufsz, infile);
740         if (ferror(infile) != 0) {
741             UnicodeString str(strerror(errno));
742             initMsg(pname);
743             u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
744             goto error_exit;
745         }
746 
747         // Convert the read buffer into the new encoding via Unicode.
748         // After the call 'unibufp' will be placed behind the last
749         // character that was converted in the 'unibuf'.
750         // Also the 'cbufp' is positioned behind the last converted
751         // character.
752         // At the last conversion in the file, flush should be set to
753         // true so that we get all characters converted.
754         //
755         // The converter must be flushed at the end of conversion so
756         // that characters on hold also will be written.
757 
758         cbufp = buf;
759         flush = (UBool)(rd != bufsz);
760 
761         // convert until the input is consumed
762         do {
763             // remember the start of the current byte-to-Unicode conversion
764             prevbufp = cbufp;
765 
766             unibuf = unibufp = u.getBuffer((int32_t)bufsz);
767 
768             // Use bufsz instead of u.getCapacity() for the targetLimit
769             // so that we don't overflow fromoffsets[].
770             ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
771                 buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
772 
773             ulen = (int32_t)(unibufp - unibuf);
774             u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
775 
776             // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
777             // converting all of the input bytes.
778             // It works like this because ucnv_toUnicode() returns only under the
779             // following conditions:
780             // - an error occurred during conversion (an error code is set)
781             // - the target buffer is filled (the error code indicates an overflow)
782             // - the source is consumed
783             // That is, if the error code does not indicate a failure,
784             // not even an overflow, then the source must be consumed entirely.
785             fromSawEndOfBytes = (UBool)U_SUCCESS(err);
786 
787             if (err == U_BUFFER_OVERFLOW_ERROR) {
788                 err = U_ZERO_ERROR;
789             } else if (U_FAILURE(err)) {
790                 char pos[32], errorBytes[32];
791                 int8_t i, length, errorLength;
792 
793                 UErrorCode localError = U_ZERO_ERROR;
794                 errorLength = (int8_t)sizeof(errorBytes);
795                 ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
796                 if (U_FAILURE(localError) || errorLength == 0) {
797                     errorLength = 1;
798                 }
799 
800                 // print the input file offset of the start of the error bytes:
801                 // input file offset of the current byte buffer +
802                 // length of the just consumed bytes -
803                 // length of the error bytes
804                 length =
805                     (int8_t)sprintf(pos, "%d",
806                         (int)(infoffset + (cbufp - buf) - errorLength));
807 
808                 // output the bytes that caused the error
809                 UnicodeString str;
810                 for (i = 0; i < errorLength; ++i) {
811                     if (i > 0) {
812                         str.append((UChar)uSP);
813                     }
814                     str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
815                     str.append(nibbleToHex((uint8_t)errorBytes[i]));
816                 }
817 
818                 initMsg(pname);
819                 u_wmsg(stderr, "problemCvtToU",
820                         UnicodeString(pos, length, "").getTerminatedBuffer(),
821                         str.getTerminatedBuffer(),
822                         u_wmsg_errorName(err));
823 
824                 willexit = TRUE;
825                 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
826             }
827 
828             // Replaced a check for whether the input was consumed by
829             // looping until it is; message key "premEndInput" now obsolete.
830 
831             if (ulen == 0) {
832                 continue;
833             }
834 
835             // remove a U+FEFF Unicode signature character if requested
836             if (sig < 0) {
837                 if (u.charAt(0) == uSig) {
838                     u.remove(0, 1);
839 
840                     // account for the removed UChar and offset
841                     --ulen;
842 
843                     if (useOffsets) {
844                         // remove an offset from fromoffsets[] as well
845                         // to keep the array parallel with the UChars
846                         memmove(fromoffsets, fromoffsets + 1, ulen * 4);
847                     }
848 
849                 }
850                 sig = 0;
851             }
852 
853 #if !UCONFIG_NO_TRANSLITERATION
854             // Transliterate/transform if needed.
855 
856             // For transformation, we use chunking code -
857             // collect Unicode input until, for example, an end-of-line,
858             // then transform and output-convert that and continue collecting.
859             // This makes the transformation result independent of the buffer size
860             // while avoiding the slower keyboard mode.
861             // The end-of-chunk characters are completely included in the
862             // transformed string in case they are to be transformed themselves.
863             if (t != NULL) {
864                 UnicodeString out;
865                 int32_t chunkLimit;
866 
867                 do {
868                     chunkLimit = getChunkLimit(chunk, u);
869                     if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
870                         // use all of the rest at the end of the text
871                         chunkLimit = u.length();
872                     }
873                     if (chunkLimit >= 0) {
874                         // complete the chunk and transform it
875                         chunk.append(u, 0, chunkLimit);
876                         u.remove(0, chunkLimit);
877                         t->transliterate(chunk);
878 
879                         // append the transformation result to the result and empty the chunk
880                         out.append(chunk);
881                         chunk.remove();
882                     } else {
883                         // continue collecting the chunk
884                         chunk.append(u);
885                         break;
886                     }
887                 } while (!u.isEmpty());
888 
889                 u = out;
890                 ulen = u.length();
891             }
892 #endif
893 
894             // add a U+FEFF Unicode signature character if requested
895             // and possible/necessary
896             if (sig > 0) {
897                 if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
898                     u.insert(0, (UChar)uSig);
899 
900                     if (useOffsets) {
901                         // insert a pseudo-offset into fromoffsets[] as well
902                         // to keep the array parallel with the UChars
903                         memmove(fromoffsets + 1, fromoffsets, ulen * 4);
904                         fromoffsets[0] = -1;
905                     }
906 
907                     // account for the additional UChar and offset
908                     ++ulen;
909                 }
910                 sig = 0;
911             }
912 
913             // Convert the Unicode buffer into the destination codepage
914             // Again 'bufp' will be placed behind the last converted character
915             // And 'unibufp' will be placed behind the last converted unicode character
916             // At the last conversion flush should be set to true to ensure that
917             // all characters left get converted
918 
919             unibuf = unibufbp = u.getBuffer();
920 
921             do {
922                 bufp = outbuf;
923 
924                 // Use fromSawEndOfBytes in addition to the flush flag -
925                 // it indicates whether the intermediate Unicode string
926                 // contains the very last UChars for the very last input bytes.
927                 ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
928                                  &unibufbp,
929                                  unibuf + ulen,
930                                  NULL, (UBool)(flush && fromSawEndOfBytes), &err);
931 
932                 // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
933                 // converting all of the intermediate UChars.
934                 // See comment for fromSawEndOfBytes.
935                 toSawEndOfUnicode = (UBool)U_SUCCESS(err);
936 
937                 if (err == U_BUFFER_OVERFLOW_ERROR) {
938                     err = U_ZERO_ERROR;
939                 } else if (U_FAILURE(err)) {
940                     UChar errorUChars[4];
941                     const char *errtag;
942                     char pos[32];
943                     UChar32 c;
944                     int8_t i, length, errorLength;
945 
946                     UErrorCode localError = U_ZERO_ERROR;
947                     errorLength = (int8_t)UPRV_LENGTHOF(errorUChars);
948                     ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
949                     if (U_FAILURE(localError) || errorLength == 0) {
950                         // need at least 1 so that we don't access beyond the length of fromoffsets[]
951                         errorLength = 1;
952                     }
953 
954                     int32_t ferroffset;
955 
956                     if (useOffsets) {
957                         // Unicode buffer offset of the start of the error UChars
958                         ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
959                         if (ferroffset < 0) {
960                             // approximation - the character started in the previous Unicode buffer
961                             ferroffset = 0;
962                         }
963 
964                         // get the corresponding byte offset out of fromoffsets[]
965                         // go back if the offset is not known for some of the UChars
966                         int32_t fromoffset;
967                         do {
968                             fromoffset = fromoffsets[ferroffset];
969                         } while (fromoffset < 0 && --ferroffset >= 0);
970 
971                         // total input file offset =
972                         // input file offset of the current byte buffer +
973                         // byte buffer offset of where the current Unicode buffer is converted from +
974                         // fromoffsets[Unicode offset]
975                         ferroffset = infoffset + (prevbufp - buf) + fromoffset;
976                         errtag = "problemCvtFromU";
977                     } else {
978                         // Do not use fromoffsets if (t != NULL) because the Unicode text may
979                         // be different from what the offsets refer to.
980 
981                         // output file offset
982                         ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
983                         errtag = "problemCvtFromUOut";
984                     }
985 
986                     length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
987 
988                     // output the code points that caused the error
989                     UnicodeString str;
990                     for (i = 0; i < errorLength;) {
991                         if (i > 0) {
992                             str.append((UChar)uSP);
993                         }
994                         U16_NEXT(errorUChars, i, errorLength, c);
995                         if (c >= 0x100000) {
996                             str.append(nibbleToHex((uint8_t)(c >> 20)));
997                         }
998                         if (c >= 0x10000) {
999                             str.append(nibbleToHex((uint8_t)(c >> 16)));
1000                         }
1001                         str.append(nibbleToHex((uint8_t)(c >> 12)));
1002                         str.append(nibbleToHex((uint8_t)(c >> 8)));
1003                         str.append(nibbleToHex((uint8_t)(c >> 4)));
1004                         str.append(nibbleToHex((uint8_t)c));
1005                     }
1006 
1007                     initMsg(pname);
1008                     u_wmsg(stderr, errtag,
1009                             UnicodeString(pos, length, "").getTerminatedBuffer(),
1010                             str.getTerminatedBuffer(),
1011                            u_wmsg_errorName(err));
1012                     u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
1013 
1014                     willexit = TRUE;
1015                     err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
1016                 }
1017 
1018                 // Replaced a check for whether the intermediate Unicode characters were all consumed by
1019                 // looping until they are; message key "premEnd" now obsolete.
1020 
1021                 // Finally, write the converted buffer to the output file
1022                 size_t outlen = (size_t) (bufp - outbuf);
1023                 outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
1024                 if (wr != outlen) {
1025                     UnicodeString str(strerror(errno));
1026                     initMsg(pname);
1027                     u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
1028                     willexit = TRUE;
1029                 }
1030 
1031                 if (willexit) {
1032                     goto error_exit;
1033                 }
1034             } while (!toSawEndOfUnicode);
1035         } while (!fromSawEndOfBytes);
1036     } while (!flush);           // Stop when we have flushed the
1037                                 // converters (this means that it's
1038                                 // the end of output)
1039 
1040     goto normal_exit;
1041 
1042 error_exit:
1043     ret = FALSE;
1044 
1045 normal_exit:
1046     // Cleanup.
1047 
1048     ucnv_close(convfrom);
1049     ucnv_close(convto);
1050 
1051 #if !UCONFIG_NO_TRANSLITERATION
1052     delete t;
1053 #endif
1054 
1055     if (closeFile) {
1056         fclose(infile);
1057     }
1058 
1059     return ret;
1060 }
1061 
usage(const char * pname,int ecode)1062 static void usage(const char *pname, int ecode) {
1063     const UChar *msg;
1064     int32_t msgLen;
1065     UErrorCode err = U_ZERO_ERROR;
1066     FILE *fp = ecode ? stderr : stdout;
1067     int res;
1068 
1069     initMsg(pname);
1070     msg =
1071         ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
1072                             &msgLen, &err);
1073     UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
1074     UnicodeString mname(msg, msgLen + 1);
1075 
1076     res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
1077     if (!ecode) {
1078         if (!res) {
1079             fputc('\n', fp);
1080         }
1081         if (!u_wmsg(fp, "help")) {
1082             /* Now dump callbacks and finish. */
1083 
1084             int i, count =
1085                 sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
1086             for (i = 0; i < count; ++i) {
1087                 fprintf(fp, " %s", transcode_callbacks[i].name);
1088             }
1089             fputc('\n', fp);
1090         }
1091     }
1092 
1093     exit(ecode);
1094 }
1095 
1096 extern int
main(int argc,char ** argv)1097 main(int argc, char **argv)
1098 {
1099     FILE *outfile;
1100     int ret = 0;
1101 
1102     size_t bufsz = DEFAULT_BUFSZ;
1103 
1104     const char *fromcpage = 0;
1105     const char *tocpage = 0;
1106     const char *translit = 0;
1107     const char *outfilestr = 0;
1108     UBool fallback = FALSE;
1109 
1110     UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
1111     const void *fromuctxt = 0;
1112     UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
1113     const void *touctxt = 0;
1114 
1115     char **iter, **remainArgv, **remainArgvLimit;
1116     char **end = argv + argc;
1117 
1118     const char *pname;
1119 
1120     UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
1121     const char *printName = 0;
1122 
1123     UBool verbose = FALSE;
1124     UErrorCode status = U_ZERO_ERROR;
1125 
1126     ConvertFile cf;
1127 
1128     /* Initialize ICU */
1129     u_init(&status);
1130     if (U_FAILURE(status)) {
1131         fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
1132             argv[0], u_errorName(status));
1133         exit(1);
1134     }
1135 
1136     // Get and prettify pname.
1137     pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
1138 #if U_PLATFORM_USES_ONLY_WIN32_API
1139     if (!pname) {
1140         pname = uprv_strrchr(*argv, '/');
1141     }
1142 #endif
1143     if (!pname) {
1144         pname = *argv;
1145     } else {
1146         ++pname;
1147     }
1148 
1149     // First, get the arguments from command-line
1150     // to know the codepages to convert between
1151 
1152     remainArgv = remainArgvLimit = argv + 1;
1153     for (iter = argv + 1; iter != end; iter++) {
1154         // Check for from charset
1155         if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
1156             iter++;
1157             if (iter != end)
1158                 fromcpage = *iter;
1159             else
1160                 usage(pname, 1);
1161         } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
1162             iter++;
1163             if (iter != end)
1164                 tocpage = *iter;
1165             else
1166                 usage(pname, 1);
1167         } else if (strcmp("-x", *iter) == 0) {
1168             iter++;
1169             if (iter != end)
1170                 translit = *iter;
1171             else
1172                 usage(pname, 1);
1173         } else if (!strcmp("--fallback", *iter)) {
1174             fallback = TRUE;
1175         } else if (!strcmp("--no-fallback", *iter)) {
1176             fallback = FALSE;
1177         } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
1178             iter++;
1179             if (iter != end) {
1180                 bufsz = atoi(*iter);
1181                 if ((int) bufsz <= 0) {
1182                     initMsg(pname);
1183                     UnicodeString str(*iter);
1184                     initMsg(pname);
1185                     u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
1186                     return 3;
1187                 }
1188             } else {
1189                 usage(pname, 1);
1190             }
1191         } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
1192             if (printTranslits) {
1193                 usage(pname, 1);
1194             }
1195             printConvs = TRUE;
1196         } else if (strcmp("--default-code", *iter) == 0) {
1197             if (printTranslits) {
1198                 usage(pname, 1);
1199             }
1200             printName = ucnv_getDefaultName();
1201         } else if (strcmp("--list-code", *iter) == 0) {
1202             if (printTranslits) {
1203                 usage(pname, 1);
1204             }
1205 
1206             iter++;
1207             if (iter != end) {
1208                 UErrorCode e = U_ZERO_ERROR;
1209                 printName = ucnv_getAlias(*iter, 0, &e);
1210                 if (U_FAILURE(e) || !printName) {
1211                     UnicodeString str(*iter);
1212                     initMsg(pname);
1213                     u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
1214                     return 2;
1215                 }
1216             } else
1217                 usage(pname, 1);
1218         } else if (strcmp("--canon", *iter) == 0) {
1219             printCanon = TRUE;
1220         } else if (strcmp("-L", *iter) == 0
1221             || !strcmp("--list-transliterators", *iter)) {
1222             if (printConvs) {
1223                 usage(pname, 1);
1224             }
1225             printTranslits = TRUE;
1226         } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
1227             || !strcmp("--help", *iter)) {
1228             usage(pname, 0);
1229         } else if (!strcmp("-c", *iter)) {
1230             fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
1231         } else if (!strcmp("--to-callback", *iter)) {
1232             iter++;
1233             if (iter != end) {
1234                 const struct callback_ent *cbe = findCallback(*iter);
1235                 if (cbe) {
1236                     fromucallback = cbe->fromu;
1237                     fromuctxt = cbe->fromuctxt;
1238                 } else {
1239                     UnicodeString str(*iter);
1240                     initMsg(pname);
1241                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1242                     return 4;
1243                 }
1244             } else {
1245                 usage(pname, 1);
1246             }
1247         } else if (!strcmp("--from-callback", *iter)) {
1248             iter++;
1249             if (iter != end) {
1250                 const struct callback_ent *cbe = findCallback(*iter);
1251                 if (cbe) {
1252                     toucallback = cbe->tou;
1253                     touctxt = cbe->touctxt;
1254                 } else {
1255                     UnicodeString str(*iter);
1256                     initMsg(pname);
1257                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1258                     return 4;
1259                 }
1260             } else {
1261                 usage(pname, 1);
1262             }
1263         } else if (!strcmp("-i", *iter)) {
1264             toucallback = UCNV_TO_U_CALLBACK_SKIP;
1265         } else if (!strcmp("--callback", *iter)) {
1266             iter++;
1267             if (iter != end) {
1268                 const struct callback_ent *cbe = findCallback(*iter);
1269                 if (cbe) {
1270                     fromucallback = cbe->fromu;
1271                     fromuctxt = cbe->fromuctxt;
1272                     toucallback = cbe->tou;
1273                     touctxt = cbe->touctxt;
1274                 } else {
1275                     UnicodeString str(*iter);
1276                     initMsg(pname);
1277                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1278                     return 4;
1279                 }
1280             } else {
1281                 usage(pname, 1);
1282             }
1283         } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
1284             verbose = FALSE;
1285         } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
1286             verbose = TRUE;
1287         } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
1288             printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
1289             return 0;
1290         } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
1291             ++iter;
1292             if (iter != end && !outfilestr) {
1293                 outfilestr = *iter;
1294             } else {
1295                 usage(pname, 1);
1296             }
1297         } else if (0 == strcmp("--add-signature", *iter)) {
1298             cf.signature = 1;
1299         } else if (0 == strcmp("--remove-signature", *iter)) {
1300             cf.signature = -1;
1301         } else if (**iter == '-' && (*iter)[1]) {
1302             usage(pname, 1);
1303         } else {
1304             // move a non-option up in argv[]
1305             *remainArgvLimit++ = *iter;
1306         }
1307     }
1308 
1309     if (printConvs || printName) {
1310         return printConverters(pname, printName, printCanon) ? 2 : 0;
1311     } else if (printTranslits) {
1312         return printTransliterators(printCanon) ? 3 : 0;
1313     }
1314 
1315     if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
1316         fromcpage = ucnv_getDefaultName();
1317     }
1318     if (!tocpage || !uprv_strcmp(tocpage, "-")) {
1319         tocpage = ucnv_getDefaultName();
1320     }
1321 
1322     // Open the correct output file or connect to stdout for reading input
1323     if (outfilestr != 0 && strcmp(outfilestr, "-")) {
1324         outfile = fopen(outfilestr, "wb");
1325         if (outfile == 0) {
1326             UnicodeString str1(outfilestr, "");
1327             UnicodeString str2(strerror(errno), "");
1328             initMsg(pname);
1329             u_wmsg(stderr, "cantCreateOutputF",
1330                 str1.getBuffer(), str2.getBuffer());
1331             return 1;
1332         }
1333     } else {
1334         outfilestr = "-";
1335         outfile = stdout;
1336 #ifdef USE_FILENO_BINARY_MODE
1337         if (setmode(fileno(outfile), O_BINARY) == -1) {
1338             u_wmsg(stderr, "cantSetOutBinMode");
1339             exit(-1);
1340         }
1341 #endif
1342     }
1343 
1344     /* Loop again on the arguments to find all the input files, and
1345     convert them. */
1346 
1347     cf.setBufferSize(bufsz);
1348 
1349     if(remainArgv < remainArgvLimit) {
1350         for (iter = remainArgv; iter != remainArgvLimit; iter++) {
1351             if (!cf.convertFile(
1352                     pname, fromcpage, toucallback, touctxt, tocpage,
1353                     fromucallback, fromuctxt, fallback, translit, *iter,
1354                     outfile, verbose)
1355             ) {
1356                 goto error_exit;
1357             }
1358         }
1359     } else {
1360         if (!cf.convertFile(
1361                 pname, fromcpage, toucallback, touctxt, tocpage,
1362                 fromucallback, fromuctxt, fallback, translit, 0,
1363                 outfile, verbose)
1364         ) {
1365             goto error_exit;
1366         }
1367     }
1368 
1369     goto normal_exit;
1370 error_exit:
1371 #if !UCONFIG_NO_LEGACY_CONVERSION
1372     ret = 1;
1373 #else
1374     fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n");
1375 #endif
1376 normal_exit:
1377 
1378     if (outfile != stdout) {
1379         fclose(outfile);
1380     }
1381 
1382     u_cleanup();
1383 
1384     return ret;
1385 }
1386 
1387 
1388 /*
1389  * Hey, Emacs, please set the following:
1390  *
1391  * Local Variables:
1392  * indent-tabs-mode: nil
1393  * End:
1394  *
1395  */
1396