1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 2002-2015, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File wrtxml.cpp
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   10/01/02    Ram         Creation.
15 *   02/07/08    Spieth      Correct XLIFF generation on EBCDIC platform
16 *
17 *******************************************************************************
18 */
19 
20 // Safer use of UnicodeString.
21 #ifndef UNISTR_FROM_CHAR_EXPLICIT
22 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
23 #endif
24 
25 // Less important, but still a good idea.
26 #ifndef UNISTR_FROM_STRING_EXPLICIT
27 #   define UNISTR_FROM_STRING_EXPLICIT explicit
28 #endif
29 
30 #include "reslist.h"
31 #include "unewdata.h"
32 #include "unicode/ures.h"
33 #include "errmsg.h"
34 #include "filestrm.h"
35 #include "cstring.h"
36 #include "unicode/ucnv.h"
37 #include "genrb.h"
38 #include "rle.h"
39 #include "uhash.h"
40 #include "uresimp.h"
41 #include "unicode/ustring.h"
42 #include "unicode/uchar.h"
43 #include "ustr.h"
44 #include "prscmnts.h"
45 #include "unicode/unistr.h"
46 #include "unicode/utf8.h"
47 #include "unicode/utf16.h"
48 #include <time.h>
49 
50 U_NAMESPACE_USE
51 
52 static int tabCount = 0;
53 
54 static FileStream* out=NULL;
55 static struct SRBRoot* srBundle ;
56 static const char* outDir = NULL;
57 static const char* enc ="";
58 static UConverter* conv = NULL;
59 
60 const char* const* ISOLanguages;
61 const char* const* ISOCountries;
62 const char* textExt = ".txt";
63 const char* xliffExt = ".xlf";
64 
write_utf8_file(FileStream * fileStream,UnicodeString outString)65 static int32_t write_utf8_file(FileStream* fileStream, UnicodeString outString)
66 {
67     UErrorCode status = U_ZERO_ERROR;
68     int32_t len = 0;
69 
70     // preflight to get the destination buffer size
71     u_strToUTF8(NULL,
72                 0,
73                 &len,
74                 outString.getBuffer(),
75                 outString.length(),
76                 &status);
77 
78     // allocate the buffer
79     char* dest = (char*)uprv_malloc(len);
80     status = U_ZERO_ERROR;
81 
82     // convert the data
83     u_strToUTF8(dest,
84                 len,
85                 &len,
86                 outString.getBuffer(),
87                 outString.length(),
88                 &status);
89 
90     // write data to out file
91     int32_t ret = T_FileStream_write(fileStream, dest, len);
92     uprv_free(dest);
93     return (ret);
94 }
95 
96 /*write indentation for formatting*/
write_tabs(FileStream * os)97 static void write_tabs(FileStream* os){
98     int i=0;
99     for(;i<=tabCount;i++){
100         write_utf8_file(os,UnicodeString("    "));
101     }
102 }
103 
104 /*get ID for each element. ID is globally unique.*/
getID(const char * id,const char * curKey,char * result)105 static char* getID(const char* id, const char* curKey, char* result) {
106     if(curKey == NULL) {
107         result = (char *)uprv_malloc(sizeof(char)*uprv_strlen(id) + 1);
108         uprv_memset(result, 0, sizeof(char)*uprv_strlen(id) + 1);
109         uprv_strcpy(result, id);
110     } else {
111         result = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
112         uprv_memset(result, 0, sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
113         if(id[0]!='\0'){
114             uprv_strcpy(result, id);
115             uprv_strcat(result, "_");
116         }
117         uprv_strcat(result, curKey);
118     }
119     return result;
120 }
121 
122 /*compute CRC for binary code*/
123 /* The code is from  http://www.theorem.com/java/CRC32.java
124  * Calculates the CRC32 - 32 bit Cyclical Redundancy Check
125  * <P> This check is used in numerous systems to verify the integrity
126  * of information.  It's also used as a hashing function.  Unlike a regular
127  * checksum, it's sensitive to the order of the characters.
128  * It produces a 32 bit
129  *
130  * @author Michael Lecuyer (mjl@theorem.com)
131  * @version 1.1 August 11, 1998
132  */
133 
134 /* ICU is not endian portable, because ICU data generated on big endian machines can be
135  * ported to big endian machines but not to little endian machines and vice versa. The
136  * conversion is not portable across platforms with different endianess.
137  */
138 
computeCRC(const char * ptr,uint32_t len,uint32_t lastcrc)139 uint32_t computeCRC(const char *ptr, uint32_t len, uint32_t lastcrc){
140     int32_t crc;
141     uint32_t temp1;
142     uint32_t temp2;
143 
144     int32_t crc_ta[256];
145     int i = 0;
146     int j = 0;
147     uint32_t crc2 = 0;
148 
149 #define CRC32_POLYNOMIAL 0xEDB88320
150 
151     /*build crc table*/
152     for (i = 0; i <= 255; i++) {
153         crc2 = i;
154         for (j = 8; j > 0; j--) {
155             if ((crc2 & 1) == 1) {
156                 crc2 = (crc2 >> 1) ^ CRC32_POLYNOMIAL;
157             } else {
158                 crc2 >>= 1;
159             }
160         }
161         crc_ta[i] = crc2;
162     }
163 
164     crc = lastcrc;
165     while(len--!=0) {
166         temp1 = (uint32_t)crc>>8;
167         temp2 = crc_ta[(crc^*ptr) & 0xFF];
168         crc = temp1^temp2;
169         ptr++;
170     }
171     return(crc);
172 }
173 
strnrepchr(char * src,int32_t srcLen,char s,char r)174 static void strnrepchr(char* src, int32_t srcLen, char s, char r){
175     int32_t i = 0;
176     for(i=0;i<srcLen;i++){
177         if(src[i]==s){
178             src[i]=r;
179         }
180     }
181 }
182 /* Parse the filename, and get its language information.
183  * If it fails to get the language information from the filename,
184  * use "en" as the default value for language
185  */
parseFilename(const char * id,char *)186 static char* parseFilename(const char* id, char* /*lang*/) {
187     int idLen = (int) uprv_strlen(id);
188     char* localeID = (char*) uprv_malloc(idLen);
189     int pos = 0;
190     int canonCapacity = 0;
191     char* canon = NULL;
192     int canonLen = 0;
193     /*int i;*/
194     UErrorCode status = U_ZERO_ERROR;
195     const char *ext = uprv_strchr(id, '.');
196 
197     if(ext != NULL){
198         pos = (int) (ext - id);
199     } else {
200         pos = idLen;
201     }
202     uprv_memcpy(localeID, id, pos);
203     localeID[pos]=0; /* NUL terminate the string */
204 
205     canonCapacity =pos*3;
206     canon = (char*) uprv_malloc(canonCapacity);
207     canonLen = uloc_canonicalize(localeID, canon, canonCapacity, &status);
208 
209     if(U_FAILURE(status)){
210         fprintf(stderr, "Could not canonicalize the locale ID: %s. Error: %s\n", localeID, u_errorName(status));
211         exit(status);
212     }
213     strnrepchr(canon, canonLen, '_', '-');
214     return canon;
215 }
216 
217 static const char* xmlHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
218 #if 0
219 static const char* bundleStart = "<xliff version = \"1.2\" "
220                                         "xmlns='urn:oasis:names:tc:xliff:document:1.2' "
221                                         "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
222                                         "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.2 xliff-core-1.2-transitional.xsd'>\n";
223 #else
224 static const char* bundleStart = "<xliff version = \"1.1\" "
225                                         "xmlns='urn:oasis:names:tc:xliff:document:1.1' "
226                                         "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
227                                         "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.1 http://www.oasis-open.org/committees/xliff/documents/xliff-core-1.1.xsd'>\n";
228 #endif
229 static const char* bundleEnd   = "</xliff>\n";
230 
231 void res_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status);
232 
convertAndEscape(char ** pDest,int32_t destCap,int32_t * destLength,const UChar * src,int32_t srcLen,UErrorCode * status)233 static char* convertAndEscape(char** pDest, int32_t destCap, int32_t* destLength,
234                               const UChar* src, int32_t srcLen, UErrorCode* status){
235     int32_t srcIndex=0;
236     char* dest=NULL;
237     char* temp=NULL;
238     int32_t destLen=0;
239     UChar32 c = 0;
240 
241     if(status==NULL || U_FAILURE(*status) || pDest==NULL  || srcLen==0 || src == NULL){
242         return NULL;
243     }
244     dest =*pDest;
245     if(dest==NULL || destCap <=0){
246         destCap = srcLen * 8;
247         dest = (char*) uprv_malloc(sizeof(char) * destCap);
248         if(dest==NULL){
249             *status=U_MEMORY_ALLOCATION_ERROR;
250             return NULL;
251         }
252     }
253 
254     dest[0]=0;
255 
256     while(srcIndex<srcLen){
257         U16_NEXT(src, srcIndex, srcLen, c);
258 
259         if (U16_IS_LEAD(c) || U16_IS_TRAIL(c)) {
260             *status = U_ILLEGAL_CHAR_FOUND;
261             fprintf(stderr, "Illegal Surrogate! \n");
262             uprv_free(dest);
263             return NULL;
264         }
265 
266         if((destLen+U8_LENGTH(c)) < destCap){
267 
268             /* ASCII Range */
269             if(c <=0x007F){
270                 switch(c) {
271                 case '\x26':
272                     uprv_strcpy(dest+( destLen),"\x26\x61\x6d\x70\x3b"); /* &amp;*/
273                     destLen+=(int32_t)uprv_strlen("\x26\x61\x6d\x70\x3b");
274                     break;
275                 case '\x3c':
276                     uprv_strcpy(dest+(destLen),"\x26\x6c\x74\x3b"); /* &lt;*/
277                     destLen+=(int32_t)uprv_strlen("\x26\x6c\x74\x3b");
278                     break;
279                 case '\x3e':
280                     uprv_strcpy(dest+(destLen),"\x26\x67\x74\x3b"); /* &gt;*/
281                     destLen+=(int32_t)uprv_strlen("\x26\x67\x74\x3b");
282                     break;
283                 case '\x22':
284                     uprv_strcpy(dest+(destLen),"\x26\x71\x75\x6f\x74\x3b"); /* &quot;*/
285                     destLen+=(int32_t)uprv_strlen("\x26\x71\x75\x6f\x74\x3b");
286                     break;
287                 case '\x27':
288                     uprv_strcpy(dest+(destLen),"\x26\x61\x70\x6f\x73\x3b"); /* &apos; */
289                     destLen+=(int32_t)uprv_strlen("\x26\x61\x70\x6f\x73\x3b");
290                     break;
291 
292                  /* Disallow C0 controls except TAB, CR, LF*/
293                 case 0x00:
294                 case 0x01:
295                 case 0x02:
296                 case 0x03:
297                 case 0x04:
298                 case 0x05:
299                 case 0x06:
300                 case 0x07:
301                 case 0x08:
302                 /*case 0x09:*/
303                 /*case 0x0A: */
304                 case 0x0B:
305                 case 0x0C:
306                 /*case 0x0D:*/
307                 case 0x0E:
308                 case 0x0F:
309                 case 0x10:
310                 case 0x11:
311                 case 0x12:
312                 case 0x13:
313                 case 0x14:
314                 case 0x15:
315                 case 0x16:
316                 case 0x17:
317                 case 0x18:
318                 case 0x19:
319                 case 0x1A:
320                 case 0x1B:
321                 case 0x1C:
322                 case 0x1D:
323                 case 0x1E:
324                 case 0x1F:
325                     *status = U_ILLEGAL_CHAR_FOUND;
326                     fprintf(stderr, "Illegal Character \\u%04X!\n",(int)c);
327                     uprv_free(dest);
328                     return NULL;
329                 default:
330                     dest[destLen++]=(char)c;
331                 }
332             }else{
333                 UBool isError = FALSE;
334                 U8_APPEND((unsigned char*)dest,destLen,destCap,c,isError);
335                 if(isError){
336                     *status = U_ILLEGAL_CHAR_FOUND;
337                     fprintf(stderr, "Illegal Character \\U%08X!\n",(int)c);
338                     uprv_free(dest);
339                     return NULL;
340                 }
341             }
342         }else{
343             destCap += destLen;
344 
345             temp = (char*) uprv_malloc(sizeof(char)*destCap);
346             if(temp==NULL){
347                 *status=U_MEMORY_ALLOCATION_ERROR;
348                 uprv_free(dest);
349                 return NULL;
350             }
351             uprv_memmove(temp,dest,destLen);
352             destLen=0;
353             uprv_free(dest);
354             dest=temp;
355             temp=NULL;
356         }
357 
358     }
359     *destLength = destLen;
360     return dest;
361 }
362 
363 #define ASTERISK 0x002A
364 #define SPACE    0x0020
365 #define CR       0x000A
366 #define LF       0x000D
367 #define AT_SIGN  0x0040
368 
369 static void
trim(char ** src,int32_t * len)370 trim(char **src, int32_t *len){
371 
372     char *s = NULL;
373     int32_t i = 0;
374     if(src == NULL || *src == NULL){
375         return;
376     }
377     s = *src;
378     /* trim from the end */
379     for( i=(*len-1); i>= 0; i--){
380         switch(s[i]){
381         case ASTERISK:
382         case SPACE:
383         case CR:
384         case LF:
385             s[i] = 0;
386             continue;
387         default:
388             break;
389         }
390         break;
391 
392     }
393     *len = i+1;
394 }
395 
396 static void
print(UChar * src,int32_t srcLen,const char * tagStart,const char * tagEnd,UErrorCode * status)397 print(UChar* src, int32_t srcLen,const char *tagStart,const char *tagEnd,  UErrorCode *status){
398     int32_t bufCapacity   = srcLen*4;
399     char *buf       = NULL;
400     int32_t bufLen = 0;
401 
402     if(U_FAILURE(*status)){
403         return;
404     }
405 
406     buf = (char*) (uprv_malloc(bufCapacity));
407     if(buf==0){
408         fprintf(stderr, "Could not allocate memory!!");
409         exit(U_MEMORY_ALLOCATION_ERROR);
410     }
411     buf = convertAndEscape(&buf, bufCapacity, &bufLen, src, srcLen,status);
412     if(U_SUCCESS(*status)){
413         trim(&buf,&bufLen);
414         write_utf8_file(out,UnicodeString(tagStart));
415         write_utf8_file(out,UnicodeString(buf, bufLen, "UTF-8"));
416         write_utf8_file(out,UnicodeString(tagEnd));
417         write_utf8_file(out,UnicodeString("\n"));
418 
419     }
420 }
421 static void
printNoteElements(const UString * src,UErrorCode * status)422 printNoteElements(const UString *src, UErrorCode *status){
423 
424 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
425 
426     int32_t capacity = 0;
427     UChar* note = NULL;
428     int32_t noteLen = 0;
429     int32_t count = 0,i;
430 
431     if(src == NULL){
432         return;
433     }
434 
435     capacity = src->fLength;
436     note  = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
437 
438     count = getCount(src->fChars,src->fLength, UPC_NOTE, status);
439     if(U_FAILURE(*status)){
440         uprv_free(note);
441         return;
442     }
443     for(i=0; i < count; i++){
444         noteLen =  getAt(src->fChars,src->fLength, &note, capacity, i, UPC_NOTE, status);
445         if(U_FAILURE(*status)){
446             uprv_free(note);
447             return;
448         }
449         if(noteLen > 0){
450             write_tabs(out);
451             print(note, noteLen,"<note>", "</note>", status);
452         }
453     }
454     uprv_free(note);
455 #else
456 
457     fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
458 
459 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
460 
461 }
462 
printAttribute(const char * name,const char * value,int32_t)463 static void printAttribute(const char *name, const char *value, int32_t /*len*/)
464 {
465     write_utf8_file(out, UnicodeString(" "));
466     write_utf8_file(out, UnicodeString(name));
467     write_utf8_file(out, UnicodeString(" = \""));
468     write_utf8_file(out, UnicodeString(value));
469     write_utf8_file(out, UnicodeString("\""));
470 }
471 
printAttribute(const char * name,const UnicodeString value,int32_t)472 static void printAttribute(const char *name, const UnicodeString value, int32_t /*len*/)
473 {
474     write_utf8_file(out, UnicodeString(" "));
475     write_utf8_file(out, UnicodeString(name));
476     write_utf8_file(out, UnicodeString(" = \""));
477     write_utf8_file(out, value);
478     write_utf8_file(out, UnicodeString("\""));
479 }
480 
481 static void
printComments(struct UString * src,const char * resName,UBool printTranslate,UErrorCode * status)482 printComments(struct UString *src, const char *resName, UBool printTranslate, UErrorCode *status){
483 
484 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
485 
486     if(status==NULL || U_FAILURE(*status)){
487         return;
488     }
489 
490     int32_t capacity = src->fLength + 1;
491     char* buf = NULL;
492     int32_t bufLen = 0;
493     UChar* desc  = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
494     UChar* trans = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
495 
496     int32_t descLen = 0, transLen=0;
497     if(desc==NULL || trans==NULL){
498         *status = U_MEMORY_ALLOCATION_ERROR;
499         uprv_free(desc);
500         uprv_free(trans);
501         return;
502     }
503     // TODO: make src const, stop modifying it in-place, make printContainer() take const resource, etc.
504     src->fLength = removeCmtText(src->fChars, src->fLength, status);
505     descLen  = getDescription(src->fChars,src->fLength, &desc, capacity, status);
506     transLen = getTranslate(src->fChars,src->fLength, &trans, capacity, status);
507 
508     /* first print translate attribute */
509     if(transLen > 0){
510         if(printTranslate){
511             /* print translate attribute */
512             buf = convertAndEscape(&buf, 0, &bufLen, trans, transLen, status);
513             if(U_SUCCESS(*status)){
514                 printAttribute("translate", UnicodeString(buf, bufLen, "UTF-8"), bufLen);
515                 write_utf8_file(out,UnicodeString(">\n"));
516             }
517         }else if(getShowWarning()){
518             fprintf(stderr, "Warning: Tranlate attribute for resource %s cannot be set. XLIFF prohibits it.\n", resName);
519             /* no translate attribute .. just close the tag */
520             write_utf8_file(out,UnicodeString(">\n"));
521         }
522     }else{
523         /* no translate attribute .. just close the tag */
524         write_utf8_file(out,UnicodeString(">\n"));
525     }
526 
527     if(descLen > 0){
528         write_tabs(out);
529         print(desc, descLen, "<!--", "-->", status);
530     }
531 
532     uprv_free(desc);
533     uprv_free(trans);
534 #else
535 
536     fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
537 
538 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
539 
540 }
541 
542 /*
543  * Print out a containing element, like:
544  * <trans-unit id = "blah" resname = "blah" restype = "x-id-alias" translate = "no">
545  * <group id "calendar_gregorian" resname = "gregorian" restype = "x-icu-array">
546  */
printContainer(SResource * res,const char * container,const char * restype,const char * mimetype,const char * id,UErrorCode * status)547 static char *printContainer(SResource *res, const char *container, const char *restype, const char *mimetype, const char *id, UErrorCode *status)
548 {
549     const char *resname = NULL;
550     char *sid = NULL;
551 
552     write_tabs(out);
553 
554     resname = res->getKeyString(srBundle);
555     if (resname != NULL && *resname != 0) {
556         sid = getID(id, resname, sid);
557     } else {
558         sid = getID(id, NULL, sid);
559     }
560 
561     write_utf8_file(out, UnicodeString("<"));
562     write_utf8_file(out, UnicodeString(container));
563     printAttribute("id", sid, (int32_t) uprv_strlen(sid));
564 
565     if (resname != NULL) {
566         printAttribute("resname", resname, (int32_t) uprv_strlen(resname));
567     }
568 
569     if (mimetype != NULL) {
570         printAttribute("mime-type", mimetype, (int32_t) uprv_strlen(mimetype));
571     }
572 
573     if (restype != NULL) {
574         printAttribute("restype", restype, (int32_t) uprv_strlen(restype));
575     }
576 
577     tabCount += 1;
578     if (res->fComment.fLength > 0) {
579         /* printComments will print the closing ">\n" */
580         printComments(&res->fComment, resname, TRUE, status);
581     } else {
582         write_utf8_file(out, UnicodeString(">\n"));
583     }
584 
585     return sid;
586 }
587 
588 /* Writing Functions */
589 
590 static const char *trans_unit = "trans-unit";
591 static const char *close_trans_unit = "</trans-unit>\n";
592 static const char *source = "<source>";
593 static const char *close_source = "</source>\n";
594 static const char *group = "group";
595 static const char *close_group = "</group>\n";
596 
597 static const char *bin_unit = "bin-unit";
598 static const char *close_bin_unit = "</bin-unit>\n";
599 static const char *bin_source = "<bin-source>\n";
600 static const char *close_bin_source = "</bin-source>\n";
601 static const char *external_file = "<external-file";
602 /*static const char *close_external_file = "</external-file>\n";*/
603 static const char *internal_file = "<internal-file";
604 static const char *close_internal_file = "</internal-file>\n";
605 
606 static const char *application_mimetype = "application"; /* add "/octet-stream"? */
607 
608 static const char *alias_restype     = "x-icu-alias";
609 static const char *array_restype     = "x-icu-array";
610 static const char *binary_restype    = "x-icu-binary";
611 static const char *integer_restype   = "x-icu-integer";
612 static const char *intvector_restype = "x-icu-intvector";
613 static const char *table_restype     = "x-icu-table";
614 
615 static void
string_write_xml(StringResource * res,const char * id,const char *,UErrorCode * status)616 string_write_xml(StringResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
617 
618     char *sid = NULL;
619     char* buf = NULL;
620     int32_t bufLen = 0;
621 
622     if(status==NULL || U_FAILURE(*status)){
623         return;
624     }
625 
626     sid = printContainer(res, trans_unit, NULL, NULL, id, status);
627 
628     write_tabs(out);
629 
630     write_utf8_file(out, UnicodeString(source));
631 
632     buf = convertAndEscape(&buf, 0, &bufLen, res->getBuffer(), res->length(), status);
633 
634     if (U_FAILURE(*status)) {
635         return;
636     }
637 
638     write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
639     write_utf8_file(out, UnicodeString(close_source));
640 
641     printNoteElements(&res->fComment, status);
642 
643     tabCount -= 1;
644     write_tabs(out);
645 
646     write_utf8_file(out, UnicodeString(close_trans_unit));
647 
648     uprv_free(buf);
649     uprv_free(sid);
650 }
651 
652 static void
alias_write_xml(AliasResource * res,const char * id,const char *,UErrorCode * status)653 alias_write_xml(AliasResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
654     char *sid = NULL;
655     char* buf = NULL;
656     int32_t bufLen=0;
657 
658     sid = printContainer(res, trans_unit, alias_restype, NULL, id, status);
659 
660     write_tabs(out);
661 
662     write_utf8_file(out, UnicodeString(source));
663 
664     buf = convertAndEscape(&buf, 0, &bufLen, res->getBuffer(), res->length(), status);
665 
666     if(U_FAILURE(*status)){
667         return;
668     }
669     write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
670     write_utf8_file(out, UnicodeString(close_source));
671 
672     printNoteElements(&res->fComment, status);
673 
674     tabCount -= 1;
675     write_tabs(out);
676 
677     write_utf8_file(out, UnicodeString(close_trans_unit));
678 
679     uprv_free(buf);
680     uprv_free(sid);
681 }
682 
683 static void
array_write_xml(ArrayResource * res,const char * id,const char * language,UErrorCode * status)684 array_write_xml(ArrayResource *res, const char* id, const char* language, UErrorCode *status) {
685     char* sid = NULL;
686     int index = 0;
687 
688     struct SResource *current = NULL;
689 
690     sid = printContainer(res, group, array_restype, NULL, id, status);
691 
692     current = res->fFirst;
693 
694     while (current != NULL) {
695         char c[256] = {0};
696         char* subId = NULL;
697 
698         itostr(c, index, 10, 0);
699         index += 1;
700         subId = getID(sid, c, subId);
701 
702         res_write_xml(current, subId, language, FALSE, status);
703         uprv_free(subId);
704         subId = NULL;
705 
706         if(U_FAILURE(*status)){
707             return;
708         }
709 
710         current = current->fNext;
711     }
712 
713     tabCount -= 1;
714     write_tabs(out);
715     write_utf8_file(out, UnicodeString(close_group));
716 
717     uprv_free(sid);
718 }
719 
720 static void
intvector_write_xml(IntVectorResource * res,const char * id,const char *,UErrorCode * status)721 intvector_write_xml(IntVectorResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
722     char* sid = NULL;
723     char* ivd = NULL;
724     uint32_t i=0;
725     uint32_t len=0;
726     char buf[256] = {'0'};
727 
728     sid = printContainer(res, group, intvector_restype, NULL, id, status);
729 
730     for(i = 0; i < res->fCount; i += 1) {
731         char c[256] = {0};
732 
733         itostr(c, i, 10, 0);
734         ivd = getID(sid, c, ivd);
735         len = itostr(buf, res->fArray[i], 10, 0);
736 
737         write_tabs(out);
738         write_utf8_file(out, UnicodeString("<"));
739         write_utf8_file(out, UnicodeString(trans_unit));
740 
741         printAttribute("id", ivd, (int32_t)uprv_strlen(ivd));
742         printAttribute("restype", integer_restype, (int32_t) strlen(integer_restype));
743 
744         write_utf8_file(out, UnicodeString(">\n"));
745 
746         tabCount += 1;
747         write_tabs(out);
748         write_utf8_file(out, UnicodeString(source));
749 
750         write_utf8_file(out, UnicodeString(buf, len));
751 
752         write_utf8_file(out, UnicodeString(close_source));
753         tabCount -= 1;
754         write_tabs(out);
755         write_utf8_file(out, UnicodeString(close_trans_unit));
756 
757         uprv_free(ivd);
758         ivd = NULL;
759     }
760 
761     tabCount -= 1;
762     write_tabs(out);
763 
764     write_utf8_file(out, UnicodeString(close_group));
765     uprv_free(sid);
766     sid = NULL;
767 }
768 
769 static void
int_write_xml(IntResource * res,const char * id,const char *,UErrorCode * status)770 int_write_xml(IntResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
771     char* sid = NULL;
772     char buf[256] = {0};
773     uint32_t len = 0;
774 
775     sid = printContainer(res, trans_unit, integer_restype, NULL, id, status);
776 
777     write_tabs(out);
778 
779     write_utf8_file(out, UnicodeString(source));
780 
781     len = itostr(buf, res->fValue, 10, 0);
782     write_utf8_file(out, UnicodeString(buf, len));
783 
784     write_utf8_file(out, UnicodeString(close_source));
785 
786     printNoteElements(&res->fComment, status);
787 
788     tabCount -= 1;
789     write_tabs(out);
790 
791     write_utf8_file(out, UnicodeString(close_trans_unit));
792 
793     uprv_free(sid);
794     sid = NULL;
795 }
796 
797 static void
bin_write_xml(BinaryResource * res,const char * id,const char *,UErrorCode * status)798 bin_write_xml(BinaryResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
799     const char* m_type = application_mimetype;
800     char* sid = NULL;
801     uint32_t crc = 0xFFFFFFFF;
802 
803     char fileName[1024] ={0};
804     int32_t tLen = ( outDir == NULL) ? 0 :(int32_t)uprv_strlen(outDir);
805     char* fn =  (char*) uprv_malloc(sizeof(char) * (tLen+1024 +
806                                                     (res->fFileName !=NULL ?
807                                                     uprv_strlen(res->fFileName) :0)));
808     const char* ext = NULL;
809 
810     char* f = NULL;
811 
812     fn[0]=0;
813 
814     if(res->fFileName != NULL){
815         uprv_strcpy(fileName, res->fFileName);
816         f = uprv_strrchr(fileName, '\\');
817 
818         if (f != NULL) {
819             f++;
820         } else {
821             f = fileName;
822         }
823 
824         ext = uprv_strrchr(fileName, '.');
825 
826         if (ext == NULL) {
827             fprintf(stderr, "Error: %s is an unknown binary filename type.\n", fileName);
828             exit(U_ILLEGAL_ARGUMENT_ERROR);
829         }
830 
831         if(uprv_strcmp(ext, ".jpg")==0 || uprv_strcmp(ext, ".jpeg")==0 || uprv_strcmp(ext, ".gif")==0 ){
832             m_type = "image";
833         } else if(uprv_strcmp(ext, ".wav")==0 || uprv_strcmp(ext, ".au")==0 ){
834             m_type = "audio";
835         } else if(uprv_strcmp(ext, ".avi")==0 || uprv_strcmp(ext, ".mpg")==0 || uprv_strcmp(ext, ".mpeg")==0){
836             m_type = "video";
837         } else if(uprv_strcmp(ext, ".txt")==0 || uprv_strcmp(ext, ".text")==0){
838             m_type = "text";
839         }
840 
841         sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
842 
843         write_tabs(out);
844 
845         write_utf8_file(out, UnicodeString(bin_source));
846 
847         tabCount+= 1;
848         write_tabs(out);
849 
850         write_utf8_file(out, UnicodeString(external_file));
851         printAttribute("href", f, (int32_t)uprv_strlen(f));
852         write_utf8_file(out, UnicodeString("/>\n"));
853         tabCount -= 1;
854         write_tabs(out);
855 
856         write_utf8_file(out, UnicodeString(close_bin_source));
857 
858         printNoteElements(&res->fComment, status);
859         tabCount -= 1;
860         write_tabs(out);
861         write_utf8_file(out, UnicodeString(close_bin_unit));
862     } else {
863         char temp[256] = {0};
864         uint32_t i = 0;
865         int32_t len=0;
866 
867         sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
868 
869         write_tabs(out);
870         write_utf8_file(out, UnicodeString(bin_source));
871 
872         tabCount += 1;
873         write_tabs(out);
874 
875         write_utf8_file(out, UnicodeString(internal_file));
876         printAttribute("form", application_mimetype, (int32_t) uprv_strlen(application_mimetype));
877 
878         while(i <res->fLength){
879             len = itostr(temp, res->fData[i], 16, 2);
880             crc = computeCRC(temp, len, crc);
881             i++;
882         }
883 
884         len = itostr(temp, crc, 10, 0);
885         printAttribute("crc", temp, len);
886 
887         write_utf8_file(out, UnicodeString(">"));
888 
889         i = 0;
890         while(i <res->fLength){
891             len = itostr(temp, res->fData[i], 16, 2);
892             write_utf8_file(out, UnicodeString(temp));
893             i += 1;
894         }
895 
896         write_utf8_file(out, UnicodeString(close_internal_file));
897 
898         tabCount -= 2;
899         write_tabs(out);
900 
901         write_utf8_file(out, UnicodeString(close_bin_source));
902         printNoteElements(&res->fComment, status);
903 
904         tabCount -= 1;
905         write_tabs(out);
906         write_utf8_file(out, UnicodeString(close_bin_unit));
907 
908         uprv_free(sid);
909         sid = NULL;
910     }
911 
912     uprv_free(fn);
913 }
914 
915 
916 
917 static void
table_write_xml(TableResource * res,const char * id,const char * language,UBool isTopLevel,UErrorCode * status)918 table_write_xml(TableResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status) {
919 
920     uint32_t  i         = 0;
921 
922     struct SResource *current = NULL;
923     char* sid = NULL;
924 
925     if (U_FAILURE(*status)) {
926         return ;
927     }
928 
929     sid = printContainer(res, group, table_restype, NULL, id, status);
930 
931     if(isTopLevel) {
932         sid[0] = '\0';
933     }
934 
935     current = res->fFirst;
936     i = 0;
937 
938     while (current != NULL) {
939         res_write_xml(current, sid, language, FALSE, status);
940 
941         if(U_FAILURE(*status)){
942             return;
943         }
944 
945         i += 1;
946         current = current->fNext;
947     }
948 
949     tabCount -= 1;
950     write_tabs(out);
951 
952     write_utf8_file(out, UnicodeString(close_group));
953 
954     uprv_free(sid);
955     sid = NULL;
956 }
957 
958 void
res_write_xml(struct SResource * res,const char * id,const char * language,UBool isTopLevel,UErrorCode * status)959 res_write_xml(struct SResource *res, const char* id,  const char* language, UBool isTopLevel, UErrorCode *status) {
960 
961     if (U_FAILURE(*status)) {
962         return ;
963     }
964 
965     if (res != NULL) {
966         switch (res->fType) {
967         case URES_STRING:
968              string_write_xml    (static_cast<StringResource *>(res), id, language, status);
969              return;
970 
971         case URES_ALIAS:
972              alias_write_xml     (static_cast<AliasResource *>(res), id, language, status);
973              return;
974 
975         case URES_INT_VECTOR:
976              intvector_write_xml (static_cast<IntVectorResource *>(res), id, language, status);
977              return;
978 
979         case URES_BINARY:
980              bin_write_xml       (static_cast<BinaryResource *>(res), id, language, status);
981              return;
982 
983         case URES_INT:
984              int_write_xml       (static_cast<IntResource *>(res), id, language, status);
985              return;
986 
987         case URES_ARRAY:
988              array_write_xml     (static_cast<ArrayResource *>(res), id, language, status);
989              return;
990 
991         case URES_TABLE:
992              table_write_xml     (static_cast<TableResource *>(res), id, language, isTopLevel, status);
993              return;
994 
995         default:
996             break;
997         }
998     }
999 
1000     *status = U_INTERNAL_PROGRAM_ERROR;
1001 }
1002 
1003 void
bundle_write_xml(struct SRBRoot * bundle,const char * outputDir,const char * outputEnc,const char * filename,char * writtenFilename,int writtenFilenameLen,const char * language,const char * outFileName,UErrorCode * status)1004 bundle_write_xml(struct SRBRoot *bundle, const char *outputDir,const char* outputEnc, const char* filename,
1005                   char *writtenFilename, int writtenFilenameLen,
1006                   const char* language, const char* outFileName, UErrorCode *status) {
1007 
1008     char* xmlfileName = NULL;
1009     char* outputFileName = NULL;
1010     char* originalFileName = NULL;
1011     const char* fileStart = "<file xml:space = \"preserve\" source-language = \"";
1012     const char* file1 = "\" datatype = \"x-icu-resource-bundle\" ";
1013     const char* file2 = "original = \"";
1014     const char* file4 = "\" date = \"";
1015     const char* fileEnd = "</file>\n";
1016     const char* headerStart = "<header>\n";
1017     const char* headerEnd = "</header>\n";
1018     const char* bodyStart = "<body>\n";
1019     const char* bodyEnd = "</body>\n";
1020 
1021     const char *tool_start = "<tool";
1022     const char *tool_id = "genrb-" GENRB_VERSION "-icu-" U_ICU_VERSION;
1023     const char *tool_name = "genrb";
1024 
1025     char* temp = NULL;
1026     char* lang = NULL;
1027     const char* pos = NULL;
1028     int32_t first, index;
1029     time_t currTime;
1030     char timeBuf[128];
1031 
1032     outDir = outputDir;
1033 
1034     srBundle = bundle;
1035 
1036     pos = uprv_strrchr(filename, '\\');
1037     if(pos != NULL) {
1038         first = (int32_t)(pos - filename + 1);
1039     } else {
1040         first = 0;
1041     }
1042     index = (int32_t)(uprv_strlen(filename) - uprv_strlen(textExt) - first);
1043     originalFileName = (char *)uprv_malloc(sizeof(char)*index+1);
1044     uprv_memset(originalFileName, 0, sizeof(char)*index+1);
1045     uprv_strncpy(originalFileName, filename + first, index);
1046 
1047     if(uprv_strcmp(originalFileName, srBundle->fLocale) != 0) {
1048         fprintf(stdout, "Warning: The file name is not same as the resource name!\n");
1049     }
1050 
1051     temp = originalFileName;
1052     originalFileName = (char *)uprv_malloc(sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
1053     uprv_memset(originalFileName, 0, sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
1054     uprv_strcat(originalFileName, temp);
1055     uprv_strcat(originalFileName, textExt);
1056     uprv_free(temp);
1057     temp = NULL;
1058 
1059 
1060     if (language == NULL) {
1061 /*        lang = parseFilename(filename, lang);
1062         if (lang == NULL) {*/
1063             /* now check if locale name is valid or not
1064              * this is to cater for situation where
1065              * pegasusServer.txt contains
1066              *
1067              * en{
1068              *      ..
1069              * }
1070              */
1071              lang = parseFilename(srBundle->fLocale, lang);
1072              /*
1073               * Neither  the file name nor the table name inside the
1074               * txt file contain a valid country and language codes
1075               * throw an error.
1076               * pegasusServer.txt contains
1077               *
1078               * testelements{
1079               *     ....
1080               * }
1081               */
1082              if(lang==NULL){
1083                  fprintf(stderr, "Error: The file name and table name do not contain a valid language code. Please use -l option to specify it.\n");
1084                  exit(U_ILLEGAL_ARGUMENT_ERROR);
1085              }
1086        /* }*/
1087     } else {
1088         lang = (char *)uprv_malloc(sizeof(char)*uprv_strlen(language) +1);
1089         uprv_memset(lang, 0, sizeof(char)*uprv_strlen(language) +1);
1090         uprv_strcpy(lang, language);
1091     }
1092 
1093     if(outFileName) {
1094         outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(outFileName) + 1);
1095         uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(outFileName) + 1);
1096         uprv_strcpy(outputFileName,outFileName);
1097     } else {
1098         outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
1099         uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
1100         uprv_strcpy(outputFileName,srBundle->fLocale);
1101     }
1102 
1103     if(outputDir) {
1104         xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputDir) + uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
1105         uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputDir)+ uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
1106     } else {
1107         xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
1108         uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
1109     }
1110 
1111     if(outputDir){
1112         uprv_strcpy(xmlfileName, outputDir);
1113         if(outputDir[uprv_strlen(outputDir)-1] !=U_FILE_SEP_CHAR){
1114             uprv_strcat(xmlfileName,U_FILE_SEP_STRING);
1115         }
1116     }
1117     uprv_strcat(xmlfileName,outputFileName);
1118     uprv_strcat(xmlfileName,xliffExt);
1119 
1120     if (writtenFilename) {
1121         uprv_strncpy(writtenFilename, xmlfileName, writtenFilenameLen);
1122     }
1123 
1124     if (U_FAILURE(*status)) {
1125         goto cleanup_bundle_write_xml;
1126     }
1127 
1128     out= T_FileStream_open(xmlfileName,"w");
1129 
1130     if(out==NULL){
1131         *status = U_FILE_ACCESS_ERROR;
1132         goto cleanup_bundle_write_xml;
1133     }
1134     write_utf8_file(out, UnicodeString(xmlHeader));
1135 
1136     if(outputEnc && *outputEnc!='\0'){
1137         /* store the output encoding */
1138         enc = outputEnc;
1139         conv=ucnv_open(enc,status);
1140         if(U_FAILURE(*status)){
1141             goto cleanup_bundle_write_xml;
1142         }
1143     }
1144     write_utf8_file(out, UnicodeString(bundleStart));
1145     write_tabs(out);
1146     write_utf8_file(out, UnicodeString(fileStart));
1147     /* check if lang and language are the same */
1148     if(language != NULL && uprv_strcmp(lang, srBundle->fLocale)!=0){
1149         fprintf(stderr,"Warning: The top level tag in the resource and language specified are not the same. Please check the input.\n");
1150     }
1151     write_utf8_file(out, UnicodeString(lang));
1152     write_utf8_file(out, UnicodeString(file1));
1153     write_utf8_file(out, UnicodeString(file2));
1154     write_utf8_file(out, UnicodeString(originalFileName));
1155     write_utf8_file(out, UnicodeString(file4));
1156 
1157     time(&currTime);
1158     strftime(timeBuf, sizeof(timeBuf), "%Y-%m-%dT%H:%M:%SZ", gmtime(&currTime));
1159     write_utf8_file(out, UnicodeString(timeBuf));
1160     write_utf8_file(out, UnicodeString("\">\n"));
1161 
1162     tabCount += 1;
1163     write_tabs(out);
1164     write_utf8_file(out, UnicodeString(headerStart));
1165 
1166     tabCount += 1;
1167     write_tabs(out);
1168 
1169     write_utf8_file(out, UnicodeString(tool_start));
1170     printAttribute("tool-id", tool_id, (int32_t) uprv_strlen(tool_id));
1171     printAttribute("tool-name", tool_name, (int32_t) uprv_strlen(tool_name));
1172     write_utf8_file(out, UnicodeString("/>\n"));
1173 
1174     tabCount -= 1;
1175     write_tabs(out);
1176 
1177     write_utf8_file(out, UnicodeString(headerEnd));
1178 
1179     write_tabs(out);
1180     tabCount += 1;
1181 
1182     write_utf8_file(out, UnicodeString(bodyStart));
1183 
1184 
1185     res_write_xml(bundle->fRoot, bundle->fLocale, lang, TRUE, status);
1186 
1187     tabCount -= 1;
1188     write_tabs(out);
1189 
1190     write_utf8_file(out, UnicodeString(bodyEnd));
1191     tabCount--;
1192     write_tabs(out);
1193     write_utf8_file(out, UnicodeString(fileEnd));
1194     tabCount--;
1195     write_tabs(out);
1196     write_utf8_file(out, UnicodeString(bundleEnd));
1197     T_FileStream_close(out);
1198 
1199     ucnv_close(conv);
1200 
1201 cleanup_bundle_write_xml:
1202     uprv_free(originalFileName);
1203     uprv_free(lang);
1204     if(xmlfileName != NULL) {
1205         uprv_free(xmlfileName);
1206     }
1207     if(outputFileName != NULL){
1208         uprv_free(outputFileName);
1209     }
1210 }
1211