1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2002-2015, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *
11 * File wrtxml.cpp
12 *
13 * Modification History:
14 *
15 *   Date        Name        Description
16 *   10/01/02    Ram         Creation.
17 *   02/07/08    Spieth      Correct XLIFF generation on EBCDIC platform
18 *
19 *******************************************************************************
20 */
21 
22 // Safer use of UnicodeString.
23 #ifndef UNISTR_FROM_CHAR_EXPLICIT
24 #   define UNISTR_FROM_CHAR_EXPLICIT explicit
25 #endif
26 
27 // Less important, but still a good idea.
28 #ifndef UNISTR_FROM_STRING_EXPLICIT
29 #   define UNISTR_FROM_STRING_EXPLICIT explicit
30 #endif
31 
32 #include "reslist.h"
33 #include "unewdata.h"
34 #include "unicode/ures.h"
35 #include "errmsg.h"
36 #include "filestrm.h"
37 #include "cstring.h"
38 #include "unicode/ucnv.h"
39 #include "genrb.h"
40 #include "rle.h"
41 #include "uhash.h"
42 #include "uresimp.h"
43 #include "unicode/ustring.h"
44 #include "unicode/uchar.h"
45 #include "ustr.h"
46 #include "prscmnts.h"
47 #include "unicode/unistr.h"
48 #include "unicode/utf8.h"
49 #include "unicode/utf16.h"
50 #include <time.h>
51 
52 U_NAMESPACE_USE
53 
54 static int tabCount = 0;
55 
56 static FileStream* out=NULL;
57 static struct SRBRoot* srBundle ;
58 static const char* outDir = NULL;
59 static const char* enc ="";
60 static UConverter* conv = NULL;
61 
62 const char* const* ISOLanguages;
63 const char* const* ISOCountries;
64 const char* textExt = ".txt";
65 const char* xliffExt = ".xlf";
66 
write_utf8_file(FileStream * fileStream,UnicodeString outString)67 static int32_t write_utf8_file(FileStream* fileStream, UnicodeString outString)
68 {
69     UErrorCode status = U_ZERO_ERROR;
70     int32_t len = 0;
71 
72     // preflight to get the destination buffer size
73     u_strToUTF8(NULL,
74                 0,
75                 &len,
76                 toUCharPtr(outString.getBuffer()),
77                 outString.length(),
78                 &status);
79 
80     // allocate the buffer
81     char* dest = (char*)uprv_malloc(len);
82     status = U_ZERO_ERROR;
83 
84     // convert the data
85     u_strToUTF8(dest,
86                 len,
87                 &len,
88                 toUCharPtr(outString.getBuffer()),
89                 outString.length(),
90                 &status);
91 
92     // write data to out file
93     int32_t ret = T_FileStream_write(fileStream, dest, len);
94     uprv_free(dest);
95     return (ret);
96 }
97 
98 /*write indentation for formatting*/
write_tabs(FileStream * os)99 static void write_tabs(FileStream* os){
100     int i=0;
101     for(;i<=tabCount;i++){
102         write_utf8_file(os,UnicodeString("    "));
103     }
104 }
105 
106 /*get ID for each element. ID is globally unique.*/
getID(const char * id,const char * curKey,char * result)107 static char* getID(const char* id, const char* curKey, char* result) {
108     if(curKey == NULL) {
109         result = (char *)uprv_malloc(sizeof(char)*uprv_strlen(id) + 1);
110         uprv_memset(result, 0, sizeof(char)*uprv_strlen(id) + 1);
111         uprv_strcpy(result, id);
112     } else {
113         result = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
114         uprv_memset(result, 0, sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
115         if(id[0]!='\0'){
116             uprv_strcpy(result, id);
117             uprv_strcat(result, "_");
118         }
119         uprv_strcat(result, curKey);
120     }
121     return result;
122 }
123 
124 /*compute CRC for binary code*/
125 /* The code is from  http://www.theorem.com/java/CRC32.java
126  * Calculates the CRC32 - 32 bit Cyclical Redundancy Check
127  * <P> This check is used in numerous systems to verify the integrity
128  * of information.  It's also used as a hashing function.  Unlike a regular
129  * checksum, it's sensitive to the order of the characters.
130  * It produces a 32 bit
131  *
132  * @author Michael Lecuyer (mjl@theorem.com)
133  * @version 1.1 August 11, 1998
134  */
135 
136 /* ICU is not endian portable, because ICU data generated on big endian machines can be
137  * ported to big endian machines but not to little endian machines and vice versa. The
138  * conversion is not portable across platforms with different endianess.
139  */
140 
computeCRC(const char * ptr,uint32_t len,uint32_t lastcrc)141 uint32_t computeCRC(const char *ptr, uint32_t len, uint32_t lastcrc){
142     int32_t crc;
143     uint32_t temp1;
144     uint32_t temp2;
145 
146     int32_t crc_ta[256];
147     int i = 0;
148     int j = 0;
149     uint32_t crc2 = 0;
150 
151 #define CRC32_POLYNOMIAL 0xEDB88320
152 
153     /*build crc table*/
154     for (i = 0; i <= 255; i++) {
155         crc2 = i;
156         for (j = 8; j > 0; j--) {
157             if ((crc2 & 1) == 1) {
158                 crc2 = (crc2 >> 1) ^ CRC32_POLYNOMIAL;
159             } else {
160                 crc2 >>= 1;
161             }
162         }
163         crc_ta[i] = crc2;
164     }
165 
166     crc = lastcrc;
167     while(len--!=0) {
168         temp1 = (uint32_t)crc>>8;
169         temp2 = crc_ta[(crc^*ptr) & 0xFF];
170         crc = temp1^temp2;
171         ptr++;
172     }
173     return(crc);
174 }
175 
strnrepchr(char * src,int32_t srcLen,char s,char r)176 static void strnrepchr(char* src, int32_t srcLen, char s, char r){
177     int32_t i = 0;
178     for(i=0;i<srcLen;i++){
179         if(src[i]==s){
180             src[i]=r;
181         }
182     }
183 }
184 /* Parse the filename, and get its language information.
185  * If it fails to get the language information from the filename,
186  * use "en" as the default value for language
187  */
parseFilename(const char * id,char *)188 static char* parseFilename(const char* id, char* /*lang*/) {
189     int idLen = (int) uprv_strlen(id);
190     char* localeID = (char*) uprv_malloc(idLen);
191     int pos = 0;
192     int canonCapacity = 0;
193     char* canon = NULL;
194     int canonLen = 0;
195     /*int i;*/
196     UErrorCode status = U_ZERO_ERROR;
197     const char *ext = uprv_strchr(id, '.');
198 
199     if(ext != NULL){
200         pos = (int) (ext - id);
201     } else {
202         pos = idLen;
203     }
204     uprv_memcpy(localeID, id, pos);
205     localeID[pos]=0; /* NUL terminate the string */
206 
207     canonCapacity =pos*3;
208     canon = (char*) uprv_malloc(canonCapacity);
209     canonLen = uloc_canonicalize(localeID, canon, canonCapacity, &status);
210 
211     if(U_FAILURE(status)){
212         fprintf(stderr, "Could not canonicalize the locale ID: %s. Error: %s\n", localeID, u_errorName(status));
213         exit(status);
214     }
215     strnrepchr(canon, canonLen, '_', '-');
216     return canon;
217 }
218 
219 static const char* xmlHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
220 #if 0
221 static const char* bundleStart = "<xliff version = \"1.2\" "
222                                         "xmlns='urn:oasis:names:tc:xliff:document:1.2' "
223                                         "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
224                                         "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.2 xliff-core-1.2-transitional.xsd'>\n";
225 #else
226 static const char* bundleStart = "<xliff version = \"1.1\" "
227                                         "xmlns='urn:oasis:names:tc:xliff:document:1.1' "
228                                         "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
229                                         "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.1 http://www.oasis-open.org/committees/xliff/documents/xliff-core-1.1.xsd'>\n";
230 #endif
231 static const char* bundleEnd   = "</xliff>\n";
232 
233 void res_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status);
234 
convertAndEscape(char ** pDest,int32_t destCap,int32_t * destLength,const UChar * src,int32_t srcLen,UErrorCode * status)235 static char* convertAndEscape(char** pDest, int32_t destCap, int32_t* destLength,
236                               const UChar* src, int32_t srcLen, UErrorCode* status){
237     int32_t srcIndex=0;
238     char* dest=NULL;
239     char* temp=NULL;
240     int32_t destLen=0;
241     UChar32 c = 0;
242 
243     if(status==NULL || U_FAILURE(*status) || pDest==NULL  || srcLen==0 || src == NULL){
244         return NULL;
245     }
246     dest =*pDest;
247     if(dest==NULL || destCap <=0){
248         destCap = srcLen * 8;
249         dest = (char*) uprv_malloc(sizeof(char) * destCap);
250         if(dest==NULL){
251             *status=U_MEMORY_ALLOCATION_ERROR;
252             return NULL;
253         }
254     }
255 
256     dest[0]=0;
257 
258     while(srcIndex<srcLen){
259         U16_NEXT(src, srcIndex, srcLen, c);
260 
261         if (U16_IS_LEAD(c) || U16_IS_TRAIL(c)) {
262             *status = U_ILLEGAL_CHAR_FOUND;
263             fprintf(stderr, "Illegal Surrogate! \n");
264             uprv_free(dest);
265             return NULL;
266         }
267 
268         if((destLen+U8_LENGTH(c)) < destCap){
269 
270             /* ASCII Range */
271             if(c <=0x007F){
272                 switch(c) {
273                 case '\x26':
274                     uprv_strcpy(dest+( destLen),"\x26\x61\x6d\x70\x3b"); /* &amp;*/
275                     destLen+=(int32_t)uprv_strlen("\x26\x61\x6d\x70\x3b");
276                     break;
277                 case '\x3c':
278                     uprv_strcpy(dest+(destLen),"\x26\x6c\x74\x3b"); /* &lt;*/
279                     destLen+=(int32_t)uprv_strlen("\x26\x6c\x74\x3b");
280                     break;
281                 case '\x3e':
282                     uprv_strcpy(dest+(destLen),"\x26\x67\x74\x3b"); /* &gt;*/
283                     destLen+=(int32_t)uprv_strlen("\x26\x67\x74\x3b");
284                     break;
285                 case '\x22':
286                     uprv_strcpy(dest+(destLen),"\x26\x71\x75\x6f\x74\x3b"); /* &quot;*/
287                     destLen+=(int32_t)uprv_strlen("\x26\x71\x75\x6f\x74\x3b");
288                     break;
289                 case '\x27':
290                     uprv_strcpy(dest+(destLen),"\x26\x61\x70\x6f\x73\x3b"); /* &apos; */
291                     destLen+=(int32_t)uprv_strlen("\x26\x61\x70\x6f\x73\x3b");
292                     break;
293 
294                  /* Disallow C0 controls except TAB, CR, LF*/
295                 case 0x00:
296                 case 0x01:
297                 case 0x02:
298                 case 0x03:
299                 case 0x04:
300                 case 0x05:
301                 case 0x06:
302                 case 0x07:
303                 case 0x08:
304                 /*case 0x09:*/
305                 /*case 0x0A: */
306                 case 0x0B:
307                 case 0x0C:
308                 /*case 0x0D:*/
309                 case 0x0E:
310                 case 0x0F:
311                 case 0x10:
312                 case 0x11:
313                 case 0x12:
314                 case 0x13:
315                 case 0x14:
316                 case 0x15:
317                 case 0x16:
318                 case 0x17:
319                 case 0x18:
320                 case 0x19:
321                 case 0x1A:
322                 case 0x1B:
323                 case 0x1C:
324                 case 0x1D:
325                 case 0x1E:
326                 case 0x1F:
327                     *status = U_ILLEGAL_CHAR_FOUND;
328                     fprintf(stderr, "Illegal Character \\u%04X!\n",(int)c);
329                     uprv_free(dest);
330                     return NULL;
331                 default:
332                     dest[destLen++]=(char)c;
333                 }
334             }else{
335                 UBool isError = FALSE;
336                 U8_APPEND((unsigned char*)dest,destLen,destCap,c,isError);
337                 if(isError){
338                     *status = U_ILLEGAL_CHAR_FOUND;
339                     fprintf(stderr, "Illegal Character \\U%08X!\n",(int)c);
340                     uprv_free(dest);
341                     return NULL;
342                 }
343             }
344         }else{
345             destCap += destLen;
346 
347             temp = (char*) uprv_malloc(sizeof(char)*destCap);
348             if(temp==NULL){
349                 *status=U_MEMORY_ALLOCATION_ERROR;
350                 uprv_free(dest);
351                 return NULL;
352             }
353             uprv_memmove(temp,dest,destLen);
354             destLen=0;
355             uprv_free(dest);
356             dest=temp;
357             temp=NULL;
358         }
359 
360     }
361     *destLength = destLen;
362     return dest;
363 }
364 
365 #define ASTERISK 0x002A
366 #define SPACE    0x0020
367 #define CR       0x000A
368 #define LF       0x000D
369 #define AT_SIGN  0x0040
370 
371 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0
372 static void
trim(char ** src,int32_t * len)373 trim(char **src, int32_t *len){
374 
375     char *s = NULL;
376     int32_t i = 0;
377     if(src == NULL || *src == NULL){
378         return;
379     }
380     s = *src;
381     /* trim from the end */
382     for( i=(*len-1); i>= 0; i--){
383         switch(s[i]){
384         case ASTERISK:
385         case SPACE:
386         case CR:
387         case LF:
388             s[i] = 0;
389             continue;
390         default:
391             break;
392         }
393         break;
394 
395     }
396     *len = i+1;
397 }
398 
399 static void
print(UChar * src,int32_t srcLen,const char * tagStart,const char * tagEnd,UErrorCode * status)400 print(UChar* src, int32_t srcLen,const char *tagStart,const char *tagEnd,  UErrorCode *status){
401     int32_t bufCapacity   = srcLen*4;
402     char *buf       = NULL;
403     int32_t bufLen = 0;
404 
405     if(U_FAILURE(*status)){
406         return;
407     }
408 
409     buf = (char*) (uprv_malloc(bufCapacity));
410     if(buf==0){
411         fprintf(stderr, "Could not allocate memory!!");
412         exit(U_MEMORY_ALLOCATION_ERROR);
413     }
414     buf = convertAndEscape(&buf, bufCapacity, &bufLen, src, srcLen,status);
415     if(U_SUCCESS(*status)){
416         trim(&buf,&bufLen);
417         write_utf8_file(out,UnicodeString(tagStart));
418         write_utf8_file(out,UnicodeString(buf, bufLen, "UTF-8"));
419         write_utf8_file(out,UnicodeString(tagEnd));
420         write_utf8_file(out,UnicodeString("\n"));
421 
422     }
423 }
424 #endif
425 
426 static void
printNoteElements(const UString * src,UErrorCode * status)427 printNoteElements(const UString *src, UErrorCode *status){
428 
429 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
430 
431     int32_t capacity = 0;
432     UChar* note = NULL;
433     int32_t noteLen = 0;
434     int32_t count = 0,i;
435 
436     if(src == NULL){
437         return;
438     }
439 
440     capacity = src->fLength;
441     note  = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
442 
443     count = getCount(src->fChars,src->fLength, UPC_NOTE, status);
444     if(U_FAILURE(*status)){
445         uprv_free(note);
446         return;
447     }
448     for(i=0; i < count; i++){
449         noteLen =  getAt(src->fChars,src->fLength, &note, capacity, i, UPC_NOTE, status);
450         if(U_FAILURE(*status)){
451             uprv_free(note);
452             return;
453         }
454         if(noteLen > 0){
455             write_tabs(out);
456             print(note, noteLen,"<note>", "</note>", status);
457         }
458     }
459     uprv_free(note);
460 #else
461 
462     fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
463 
464 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
465 
466 }
467 
printAttribute(const char * name,const char * value,int32_t)468 static void printAttribute(const char *name, const char *value, int32_t /*len*/)
469 {
470     write_utf8_file(out, UnicodeString(" "));
471     write_utf8_file(out, UnicodeString(name));
472     write_utf8_file(out, UnicodeString(" = \""));
473     write_utf8_file(out, UnicodeString(value));
474     write_utf8_file(out, UnicodeString("\""));
475 }
476 
477 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
printAttribute(const char * name,const UnicodeString value,int32_t)478 static void printAttribute(const char *name, const UnicodeString value, int32_t /*len*/)
479 {
480     write_utf8_file(out, UnicodeString(" "));
481     write_utf8_file(out, UnicodeString(name));
482     write_utf8_file(out, UnicodeString(" = \""));
483     write_utf8_file(out, value);
484     write_utf8_file(out, UnicodeString("\""));
485 }
486 #endif
487 
488 static void
printComments(struct UString * src,const char * resName,UBool printTranslate,UErrorCode * status)489 printComments(struct UString *src, const char *resName, UBool printTranslate, UErrorCode *status){
490 
491 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
492 
493     if(status==NULL || U_FAILURE(*status)){
494         return;
495     }
496 
497     int32_t capacity = src->fLength + 1;
498     char* buf = NULL;
499     int32_t bufLen = 0;
500     UChar* desc  = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
501     UChar* trans = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
502 
503     int32_t descLen = 0, transLen=0;
504     if(desc==NULL || trans==NULL){
505         *status = U_MEMORY_ALLOCATION_ERROR;
506         uprv_free(desc);
507         uprv_free(trans);
508         return;
509     }
510     // TODO: make src const, stop modifying it in-place, make printContainer() take const resource, etc.
511     src->fLength = removeCmtText(src->fChars, src->fLength, status);
512     descLen  = getDescription(src->fChars,src->fLength, &desc, capacity, status);
513     transLen = getTranslate(src->fChars,src->fLength, &trans, capacity, status);
514 
515     /* first print translate attribute */
516     if(transLen > 0){
517         if(printTranslate){
518             /* print translate attribute */
519             buf = convertAndEscape(&buf, 0, &bufLen, trans, transLen, status);
520             if(U_SUCCESS(*status)){
521                 printAttribute("translate", UnicodeString(buf, bufLen, "UTF-8"), bufLen);
522                 write_utf8_file(out,UnicodeString(">\n"));
523             }
524         }else if(getShowWarning()){
525             fprintf(stderr, "Warning: Tranlate attribute for resource %s cannot be set. XLIFF prohibits it.\n", resName);
526             /* no translate attribute .. just close the tag */
527             write_utf8_file(out,UnicodeString(">\n"));
528         }
529     }else{
530         /* no translate attribute .. just close the tag */
531         write_utf8_file(out,UnicodeString(">\n"));
532     }
533 
534     if(descLen > 0){
535         write_tabs(out);
536         print(desc, descLen, "<!--", "-->", status);
537     }
538 
539     uprv_free(desc);
540     uprv_free(trans);
541 #else
542 
543     fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
544 
545 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
546 
547 }
548 
549 /*
550  * Print out a containing element, like:
551  * <trans-unit id = "blah" resname = "blah" restype = "x-id-alias" translate = "no">
552  * <group id "calendar_gregorian" resname = "gregorian" restype = "x-icu-array">
553  */
printContainer(SResource * res,const char * container,const char * restype,const char * mimetype,const char * id,UErrorCode * status)554 static char *printContainer(SResource *res, const char *container, const char *restype, const char *mimetype, const char *id, UErrorCode *status)
555 {
556     const char *resname = NULL;
557     char *sid = NULL;
558 
559     write_tabs(out);
560 
561     resname = res->getKeyString(srBundle);
562     if (resname != NULL && *resname != 0) {
563         sid = getID(id, resname, sid);
564     } else {
565         sid = getID(id, NULL, sid);
566     }
567 
568     write_utf8_file(out, UnicodeString("<"));
569     write_utf8_file(out, UnicodeString(container));
570     printAttribute("id", sid, (int32_t) uprv_strlen(sid));
571 
572     if (resname != NULL) {
573         printAttribute("resname", resname, (int32_t) uprv_strlen(resname));
574     }
575 
576     if (mimetype != NULL) {
577         printAttribute("mime-type", mimetype, (int32_t) uprv_strlen(mimetype));
578     }
579 
580     if (restype != NULL) {
581         printAttribute("restype", restype, (int32_t) uprv_strlen(restype));
582     }
583 
584     tabCount += 1;
585     if (res->fComment.fLength > 0) {
586         /* printComments will print the closing ">\n" */
587         printComments(&res->fComment, resname, TRUE, status);
588     } else {
589         write_utf8_file(out, UnicodeString(">\n"));
590     }
591 
592     return sid;
593 }
594 
595 /* Writing Functions */
596 
597 static const char *trans_unit = "trans-unit";
598 static const char *close_trans_unit = "</trans-unit>\n";
599 static const char *source = "<source>";
600 static const char *close_source = "</source>\n";
601 static const char *group = "group";
602 static const char *close_group = "</group>\n";
603 
604 static const char *bin_unit = "bin-unit";
605 static const char *close_bin_unit = "</bin-unit>\n";
606 static const char *bin_source = "<bin-source>\n";
607 static const char *close_bin_source = "</bin-source>\n";
608 static const char *external_file = "<external-file";
609 /*static const char *close_external_file = "</external-file>\n";*/
610 static const char *internal_file = "<internal-file";
611 static const char *close_internal_file = "</internal-file>\n";
612 
613 static const char *application_mimetype = "application"; /* add "/octet-stream"? */
614 
615 static const char *alias_restype     = "x-icu-alias";
616 static const char *array_restype     = "x-icu-array";
617 static const char *binary_restype    = "x-icu-binary";
618 static const char *integer_restype   = "x-icu-integer";
619 static const char *intvector_restype = "x-icu-intvector";
620 static const char *table_restype     = "x-icu-table";
621 
622 static void
string_write_xml(StringResource * res,const char * id,const char *,UErrorCode * status)623 string_write_xml(StringResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
624 
625     char *sid = NULL;
626     char* buf = NULL;
627     int32_t bufLen = 0;
628 
629     if(status==NULL || U_FAILURE(*status)){
630         return;
631     }
632 
633     sid = printContainer(res, trans_unit, NULL, NULL, id, status);
634 
635     write_tabs(out);
636 
637     write_utf8_file(out, UnicodeString(source));
638 
639     buf = convertAndEscape(&buf, 0, &bufLen, res->getBuffer(), res->length(), status);
640 
641     if (U_FAILURE(*status)) {
642         return;
643     }
644 
645     write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
646     write_utf8_file(out, UnicodeString(close_source));
647 
648     printNoteElements(&res->fComment, status);
649 
650     tabCount -= 1;
651     write_tabs(out);
652 
653     write_utf8_file(out, UnicodeString(close_trans_unit));
654 
655     uprv_free(buf);
656     uprv_free(sid);
657 }
658 
659 static void
alias_write_xml(AliasResource * res,const char * id,const char *,UErrorCode * status)660 alias_write_xml(AliasResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
661     char *sid = NULL;
662     char* buf = NULL;
663     int32_t bufLen=0;
664 
665     sid = printContainer(res, trans_unit, alias_restype, NULL, id, status);
666 
667     write_tabs(out);
668 
669     write_utf8_file(out, UnicodeString(source));
670 
671     buf = convertAndEscape(&buf, 0, &bufLen, res->getBuffer(), res->length(), status);
672 
673     if(U_FAILURE(*status)){
674         return;
675     }
676     write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
677     write_utf8_file(out, UnicodeString(close_source));
678 
679     printNoteElements(&res->fComment, status);
680 
681     tabCount -= 1;
682     write_tabs(out);
683 
684     write_utf8_file(out, UnicodeString(close_trans_unit));
685 
686     uprv_free(buf);
687     uprv_free(sid);
688 }
689 
690 static void
array_write_xml(ArrayResource * res,const char * id,const char * language,UErrorCode * status)691 array_write_xml(ArrayResource *res, const char* id, const char* language, UErrorCode *status) {
692     char* sid = NULL;
693     int index = 0;
694 
695     struct SResource *current = NULL;
696 
697     sid = printContainer(res, group, array_restype, NULL, id, status);
698 
699     current = res->fFirst;
700 
701     while (current != NULL) {
702         char c[256] = {0};
703         char* subId = NULL;
704 
705         itostr(c, index, 10, 0);
706         index += 1;
707         subId = getID(sid, c, subId);
708 
709         res_write_xml(current, subId, language, FALSE, status);
710         uprv_free(subId);
711         subId = NULL;
712 
713         if(U_FAILURE(*status)){
714             return;
715         }
716 
717         current = current->fNext;
718     }
719 
720     tabCount -= 1;
721     write_tabs(out);
722     write_utf8_file(out, UnicodeString(close_group));
723 
724     uprv_free(sid);
725 }
726 
727 static void
intvector_write_xml(IntVectorResource * res,const char * id,const char *,UErrorCode * status)728 intvector_write_xml(IntVectorResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
729     char* sid = NULL;
730     char* ivd = NULL;
731     uint32_t i=0;
732     uint32_t len=0;
733     char buf[256] = {'0'};
734 
735     sid = printContainer(res, group, intvector_restype, NULL, id, status);
736 
737     for(i = 0; i < res->fCount; i += 1) {
738         char c[256] = {0};
739 
740         itostr(c, i, 10, 0);
741         ivd = getID(sid, c, ivd);
742         len = itostr(buf, res->fArray[i], 10, 0);
743 
744         write_tabs(out);
745         write_utf8_file(out, UnicodeString("<"));
746         write_utf8_file(out, UnicodeString(trans_unit));
747 
748         printAttribute("id", ivd, (int32_t)uprv_strlen(ivd));
749         printAttribute("restype", integer_restype, (int32_t) strlen(integer_restype));
750 
751         write_utf8_file(out, UnicodeString(">\n"));
752 
753         tabCount += 1;
754         write_tabs(out);
755         write_utf8_file(out, UnicodeString(source));
756 
757         write_utf8_file(out, UnicodeString(buf, len));
758 
759         write_utf8_file(out, UnicodeString(close_source));
760         tabCount -= 1;
761         write_tabs(out);
762         write_utf8_file(out, UnicodeString(close_trans_unit));
763 
764         uprv_free(ivd);
765         ivd = NULL;
766     }
767 
768     tabCount -= 1;
769     write_tabs(out);
770 
771     write_utf8_file(out, UnicodeString(close_group));
772     uprv_free(sid);
773     sid = NULL;
774 }
775 
776 static void
int_write_xml(IntResource * res,const char * id,const char *,UErrorCode * status)777 int_write_xml(IntResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
778     char* sid = NULL;
779     char buf[256] = {0};
780     uint32_t len = 0;
781 
782     sid = printContainer(res, trans_unit, integer_restype, NULL, id, status);
783 
784     write_tabs(out);
785 
786     write_utf8_file(out, UnicodeString(source));
787 
788     len = itostr(buf, res->fValue, 10, 0);
789     write_utf8_file(out, UnicodeString(buf, len));
790 
791     write_utf8_file(out, UnicodeString(close_source));
792 
793     printNoteElements(&res->fComment, status);
794 
795     tabCount -= 1;
796     write_tabs(out);
797 
798     write_utf8_file(out, UnicodeString(close_trans_unit));
799 
800     uprv_free(sid);
801     sid = NULL;
802 }
803 
804 static void
bin_write_xml(BinaryResource * res,const char * id,const char *,UErrorCode * status)805 bin_write_xml(BinaryResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
806     const char* m_type = application_mimetype;
807     char* sid = NULL;
808     uint32_t crc = 0xFFFFFFFF;
809 
810     char fileName[1024] ={0};
811     int32_t tLen = ( outDir == NULL) ? 0 :(int32_t)uprv_strlen(outDir);
812     char* fn =  (char*) uprv_malloc(sizeof(char) * (tLen+1024 +
813                                                     (res->fFileName !=NULL ?
814                                                     uprv_strlen(res->fFileName) :0)));
815     const char* ext = NULL;
816 
817     char* f = NULL;
818 
819     fn[0]=0;
820 
821     if(res->fFileName != NULL){
822         uprv_strcpy(fileName, res->fFileName);
823         f = uprv_strrchr(fileName, '\\');
824 
825         if (f != NULL) {
826             f++;
827         } else {
828             f = fileName;
829         }
830 
831         ext = uprv_strrchr(fileName, '.');
832 
833         if (ext == NULL) {
834             fprintf(stderr, "Error: %s is an unknown binary filename type.\n", fileName);
835             exit(U_ILLEGAL_ARGUMENT_ERROR);
836         }
837 
838         if(uprv_strcmp(ext, ".jpg")==0 || uprv_strcmp(ext, ".jpeg")==0 || uprv_strcmp(ext, ".gif")==0 ){
839             m_type = "image";
840         } else if(uprv_strcmp(ext, ".wav")==0 || uprv_strcmp(ext, ".au")==0 ){
841             m_type = "audio";
842         } else if(uprv_strcmp(ext, ".avi")==0 || uprv_strcmp(ext, ".mpg")==0 || uprv_strcmp(ext, ".mpeg")==0){
843             m_type = "video";
844         } else if(uprv_strcmp(ext, ".txt")==0 || uprv_strcmp(ext, ".text")==0){
845             m_type = "text";
846         }
847 
848         sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
849 
850         write_tabs(out);
851 
852         write_utf8_file(out, UnicodeString(bin_source));
853 
854         tabCount+= 1;
855         write_tabs(out);
856 
857         write_utf8_file(out, UnicodeString(external_file));
858         printAttribute("href", f, (int32_t)uprv_strlen(f));
859         write_utf8_file(out, UnicodeString("/>\n"));
860         tabCount -= 1;
861         write_tabs(out);
862 
863         write_utf8_file(out, UnicodeString(close_bin_source));
864 
865         printNoteElements(&res->fComment, status);
866         tabCount -= 1;
867         write_tabs(out);
868         write_utf8_file(out, UnicodeString(close_bin_unit));
869     } else {
870         char temp[256] = {0};
871         uint32_t i = 0;
872         int32_t len=0;
873 
874         sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
875 
876         write_tabs(out);
877         write_utf8_file(out, UnicodeString(bin_source));
878 
879         tabCount += 1;
880         write_tabs(out);
881 
882         write_utf8_file(out, UnicodeString(internal_file));
883         printAttribute("form", application_mimetype, (int32_t) uprv_strlen(application_mimetype));
884 
885         while(i <res->fLength){
886             len = itostr(temp, res->fData[i], 16, 2);
887             crc = computeCRC(temp, len, crc);
888             i++;
889         }
890 
891         len = itostr(temp, crc, 10, 0);
892         printAttribute("crc", temp, len);
893 
894         write_utf8_file(out, UnicodeString(">"));
895 
896         i = 0;
897         while(i <res->fLength){
898             len = itostr(temp, res->fData[i], 16, 2);
899             write_utf8_file(out, UnicodeString(temp));
900             i += 1;
901         }
902 
903         write_utf8_file(out, UnicodeString(close_internal_file));
904 
905         tabCount -= 2;
906         write_tabs(out);
907 
908         write_utf8_file(out, UnicodeString(close_bin_source));
909         printNoteElements(&res->fComment, status);
910 
911         tabCount -= 1;
912         write_tabs(out);
913         write_utf8_file(out, UnicodeString(close_bin_unit));
914 
915         uprv_free(sid);
916         sid = NULL;
917     }
918 
919     uprv_free(fn);
920 }
921 
922 
923 
924 static void
table_write_xml(TableResource * res,const char * id,const char * language,UBool isTopLevel,UErrorCode * status)925 table_write_xml(TableResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status) {
926 
927     uint32_t  i         = 0;
928 
929     struct SResource *current = NULL;
930     char* sid = NULL;
931 
932     if (U_FAILURE(*status)) {
933         return ;
934     }
935 
936     sid = printContainer(res, group, table_restype, NULL, id, status);
937 
938     if(isTopLevel) {
939         sid[0] = '\0';
940     }
941 
942     current = res->fFirst;
943     i = 0;
944 
945     while (current != NULL) {
946         res_write_xml(current, sid, language, FALSE, status);
947 
948         if(U_FAILURE(*status)){
949             return;
950         }
951 
952         i += 1;
953         current = current->fNext;
954     }
955 
956     tabCount -= 1;
957     write_tabs(out);
958 
959     write_utf8_file(out, UnicodeString(close_group));
960 
961     uprv_free(sid);
962     sid = NULL;
963 }
964 
965 void
res_write_xml(struct SResource * res,const char * id,const char * language,UBool isTopLevel,UErrorCode * status)966 res_write_xml(struct SResource *res, const char* id,  const char* language, UBool isTopLevel, UErrorCode *status) {
967 
968     if (U_FAILURE(*status)) {
969         return ;
970     }
971 
972     if (res != NULL) {
973         switch (res->fType) {
974         case URES_STRING:
975              string_write_xml    (static_cast<StringResource *>(res), id, language, status);
976              return;
977 
978         case URES_ALIAS:
979              alias_write_xml     (static_cast<AliasResource *>(res), id, language, status);
980              return;
981 
982         case URES_INT_VECTOR:
983              intvector_write_xml (static_cast<IntVectorResource *>(res), id, language, status);
984              return;
985 
986         case URES_BINARY:
987              bin_write_xml       (static_cast<BinaryResource *>(res), id, language, status);
988              return;
989 
990         case URES_INT:
991              int_write_xml       (static_cast<IntResource *>(res), id, language, status);
992              return;
993 
994         case URES_ARRAY:
995              array_write_xml     (static_cast<ArrayResource *>(res), id, language, status);
996              return;
997 
998         case URES_TABLE:
999              table_write_xml     (static_cast<TableResource *>(res), id, language, isTopLevel, status);
1000              return;
1001 
1002         default:
1003             break;
1004         }
1005     }
1006 
1007     *status = U_INTERNAL_PROGRAM_ERROR;
1008 }
1009 
1010 void
bundle_write_xml(struct SRBRoot * bundle,const char * outputDir,const char * outputEnc,const char * filename,char * writtenFilename,int writtenFilenameLen,const char * language,const char * outFileName,UErrorCode * status)1011 bundle_write_xml(struct SRBRoot *bundle, const char *outputDir,const char* outputEnc, const char* filename,
1012                   char *writtenFilename, int writtenFilenameLen,
1013                   const char* language, const char* outFileName, UErrorCode *status) {
1014 
1015     char* xmlfileName = NULL;
1016     char* outputFileName = NULL;
1017     char* originalFileName = NULL;
1018     const char* fileStart = "<file xml:space = \"preserve\" source-language = \"";
1019     const char* file1 = "\" datatype = \"x-icu-resource-bundle\" ";
1020     const char* file2 = "original = \"";
1021     const char* file4 = "\" date = \"";
1022     const char* fileEnd = "</file>\n";
1023     const char* headerStart = "<header>\n";
1024     const char* headerEnd = "</header>\n";
1025     const char* bodyStart = "<body>\n";
1026     const char* bodyEnd = "</body>\n";
1027 
1028     const char *tool_start = "<tool";
1029     const char *tool_id = "genrb-" GENRB_VERSION "-icu-" U_ICU_VERSION;
1030     const char *tool_name = "genrb";
1031 
1032     char* temp = NULL;
1033     char* lang = NULL;
1034     const char* pos = NULL;
1035     int32_t first, index;
1036     time_t currTime;
1037     char timeBuf[128];
1038 
1039     outDir = outputDir;
1040 
1041     srBundle = bundle;
1042 
1043     pos = uprv_strrchr(filename, '\\');
1044     if(pos != NULL) {
1045         first = (int32_t)(pos - filename + 1);
1046     } else {
1047         first = 0;
1048     }
1049     index = (int32_t)(uprv_strlen(filename) - uprv_strlen(textExt) - first);
1050     originalFileName = (char *)uprv_malloc(sizeof(char)*index+1);
1051     uprv_memset(originalFileName, 0, sizeof(char)*index+1);
1052     uprv_strncpy(originalFileName, filename + first, index);
1053 
1054     if(uprv_strcmp(originalFileName, srBundle->fLocale) != 0) {
1055         fprintf(stdout, "Warning: The file name is not same as the resource name!\n");
1056     }
1057 
1058     temp = originalFileName;
1059     originalFileName = (char *)uprv_malloc(sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
1060     uprv_memset(originalFileName, 0, sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
1061     uprv_strcat(originalFileName, temp);
1062     uprv_strcat(originalFileName, textExt);
1063     uprv_free(temp);
1064     temp = NULL;
1065 
1066 
1067     if (language == NULL) {
1068 /*        lang = parseFilename(filename, lang);
1069         if (lang == NULL) {*/
1070             /* now check if locale name is valid or not
1071              * this is to cater for situation where
1072              * pegasusServer.txt contains
1073              *
1074              * en{
1075              *      ..
1076              * }
1077              */
1078              lang = parseFilename(srBundle->fLocale, lang);
1079              /*
1080               * Neither  the file name nor the table name inside the
1081               * txt file contain a valid country and language codes
1082               * throw an error.
1083               * pegasusServer.txt contains
1084               *
1085               * testelements{
1086               *     ....
1087               * }
1088               */
1089              if(lang==NULL){
1090                  fprintf(stderr, "Error: The file name and table name do not contain a valid language code. Please use -l option to specify it.\n");
1091                  exit(U_ILLEGAL_ARGUMENT_ERROR);
1092              }
1093        /* }*/
1094     } else {
1095         lang = (char *)uprv_malloc(sizeof(char)*uprv_strlen(language) +1);
1096         uprv_memset(lang, 0, sizeof(char)*uprv_strlen(language) +1);
1097         uprv_strcpy(lang, language);
1098     }
1099 
1100     if(outFileName) {
1101         outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(outFileName) + 1);
1102         uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(outFileName) + 1);
1103         uprv_strcpy(outputFileName,outFileName);
1104     } else {
1105         outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
1106         uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
1107         uprv_strcpy(outputFileName,srBundle->fLocale);
1108     }
1109 
1110     if(outputDir) {
1111         xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputDir) + uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
1112         uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputDir)+ uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
1113     } else {
1114         xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
1115         uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
1116     }
1117 
1118     if(outputDir){
1119         uprv_strcpy(xmlfileName, outputDir);
1120         if(outputDir[uprv_strlen(outputDir)-1] !=U_FILE_SEP_CHAR){
1121             uprv_strcat(xmlfileName,U_FILE_SEP_STRING);
1122         }
1123     }
1124     uprv_strcat(xmlfileName,outputFileName);
1125     uprv_strcat(xmlfileName,xliffExt);
1126 
1127     if (writtenFilename) {
1128         uprv_strncpy(writtenFilename, xmlfileName, writtenFilenameLen);
1129     }
1130 
1131     if (U_FAILURE(*status)) {
1132         goto cleanup_bundle_write_xml;
1133     }
1134 
1135     out= T_FileStream_open(xmlfileName,"w");
1136 
1137     if(out==NULL){
1138         *status = U_FILE_ACCESS_ERROR;
1139         goto cleanup_bundle_write_xml;
1140     }
1141     write_utf8_file(out, UnicodeString(xmlHeader));
1142 
1143     if(outputEnc && *outputEnc!='\0'){
1144         /* store the output encoding */
1145         enc = outputEnc;
1146         conv=ucnv_open(enc,status);
1147         if(U_FAILURE(*status)){
1148             goto cleanup_bundle_write_xml;
1149         }
1150     }
1151     write_utf8_file(out, UnicodeString(bundleStart));
1152     write_tabs(out);
1153     write_utf8_file(out, UnicodeString(fileStart));
1154     /* check if lang and language are the same */
1155     if(language != NULL && uprv_strcmp(lang, srBundle->fLocale)!=0){
1156         fprintf(stderr,"Warning: The top level tag in the resource and language specified are not the same. Please check the input.\n");
1157     }
1158     write_utf8_file(out, UnicodeString(lang));
1159     write_utf8_file(out, UnicodeString(file1));
1160     write_utf8_file(out, UnicodeString(file2));
1161     write_utf8_file(out, UnicodeString(originalFileName));
1162     write_utf8_file(out, UnicodeString(file4));
1163 
1164     time(&currTime);
1165     strftime(timeBuf, sizeof(timeBuf), "%Y-%m-%dT%H:%M:%SZ", gmtime(&currTime));
1166     write_utf8_file(out, UnicodeString(timeBuf));
1167     write_utf8_file(out, UnicodeString("\">\n"));
1168 
1169     tabCount += 1;
1170     write_tabs(out);
1171     write_utf8_file(out, UnicodeString(headerStart));
1172 
1173     tabCount += 1;
1174     write_tabs(out);
1175 
1176     write_utf8_file(out, UnicodeString(tool_start));
1177     printAttribute("tool-id", tool_id, (int32_t) uprv_strlen(tool_id));
1178     printAttribute("tool-name", tool_name, (int32_t) uprv_strlen(tool_name));
1179     write_utf8_file(out, UnicodeString("/>\n"));
1180 
1181     tabCount -= 1;
1182     write_tabs(out);
1183 
1184     write_utf8_file(out, UnicodeString(headerEnd));
1185 
1186     write_tabs(out);
1187     tabCount += 1;
1188 
1189     write_utf8_file(out, UnicodeString(bodyStart));
1190 
1191 
1192     res_write_xml(bundle->fRoot, bundle->fLocale, lang, TRUE, status);
1193 
1194     tabCount -= 1;
1195     write_tabs(out);
1196 
1197     write_utf8_file(out, UnicodeString(bodyEnd));
1198     tabCount--;
1199     write_tabs(out);
1200     write_utf8_file(out, UnicodeString(fileEnd));
1201     tabCount--;
1202     write_tabs(out);
1203     write_utf8_file(out, UnicodeString(bundleEnd));
1204     T_FileStream_close(out);
1205 
1206     ucnv_close(conv);
1207 
1208 cleanup_bundle_write_xml:
1209     uprv_free(originalFileName);
1210     uprv_free(lang);
1211     if(xmlfileName != NULL) {
1212         uprv_free(xmlfileName);
1213     }
1214     if(outputFileName != NULL){
1215         uprv_free(outputFileName);
1216     }
1217 }
1218