1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /******************************************************************************
4  *   Copyright (C) 2009-2016, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *******************************************************************************
7  */
8 #include "unicode/utypes.h"
9 
10 #if U_PLATFORM_HAS_WIN32_API
11 #   define VC_EXTRALEAN
12 #   define WIN32_LEAN_AND_MEAN
13 #   define NOUSER
14 #   define NOSERVICE
15 #   define NOIME
16 #   define NOMCX
17 #include <windows.h>
18 #include <time.h>
19 #   ifdef __GNUC__
20 #       define WINDOWS_WITH_GNUC
21 #   endif
22 #endif
23 
24 #if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
25 #   define U_ELF
26 #endif
27 
28 #ifdef U_ELF
29 #   include <elf.h>
30 #   if defined(ELFCLASS64)
31 #       define U_ELF64
32 #   endif
33     /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
34 #   ifndef EM_X86_64
35 #       define EM_X86_64 62
36 #   endif
37 #   define ICU_ENTRY_OFFSET 0
38 #endif
39 
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include "unicode/putil.h"
43 #include "cmemory.h"
44 #include "cstring.h"
45 #include "filestrm.h"
46 #include "toolutil.h"
47 #include "unicode/uclean.h"
48 #include "uoptions.h"
49 #include "pkg_genc.h"
50 #include "filetools.h"
51 
52 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
53 
54 #define HEX_0X 0 /*  0x1234 */
55 #define HEX_0H 1 /*  01234h */
56 
57 /* prototypes --------------------------------------------------------------- */
58 static void
59 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
60 
61 static uint32_t
62 write8(FileStream *out, uint8_t byte, uint32_t column);
63 
64 static uint32_t
65 write32(FileStream *out, uint32_t byte, uint32_t column);
66 
67 #if U_PLATFORM == U_PF_OS400
68 static uint32_t
69 write8str(FileStream *out, uint8_t byte, uint32_t column);
70 #endif
71 /* -------------------------------------------------------------------------- */
72 
73 /*
74 Creating Template Files for New Platforms
75 
76 Let the cc compiler help you get started.
77 Compile this program
78     const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
79 with the -S option to produce assembly output.
80 
81 For example, this will generate array.s:
82 gcc -S array.c
83 
84 This will produce a .s file that may look like this:
85 
86     .file   "array.c"
87     .version        "01.01"
88 gcc2_compiled.:
89     .globl x
90     .section        .rodata
91     .align 4
92     .type    x,@object
93     .size    x,20
94 x:
95     .long   1
96     .long   2
97     .long   -559038737
98     .long   -1
99     .long   16
100     .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
101 
102 which gives a starting point that will compile, and can be transformed
103 to become the template, generally with some consulting of as docs and
104 some experimentation.
105 
106 If you want ICU to automatically use this assembly, you should
107 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
108 where the name is the compiler or platform that you used in this
109 assemblyHeader data structure.
110 */
111 static const struct AssemblyType {
112     const char *name;
113     const char *header;
114     const char *beginLine;
115     const char *footer;
116     int8_t      hexType; /* HEX_0X or HEX_0h */
117 } assemblyHeader[] = {
118     /* For gcc assemblers, the meaning of .align changes depending on the */
119     /* hardware, so we use .balign 16 which always means 16 bytes. */
120     /* https://sourceware.org/binutils/docs/as/Pseudo-Ops.html */
121     {"gcc",
122         ".globl %s\n"
123         "\t.section .note.GNU-stack,\"\",%%progbits\n"
124         "\t.section .rodata\n"
125         "\t.balign 16\n"
126         "#ifdef U_HIDE_DATA_SYMBOL\n"
127         "\t.hidden %s\n"
128         "#endif\n"
129         "\t.type %s,%%object\n"
130         "%s:\n\n",
131 
132         ".long ",".size %s, .-%s\n",HEX_0X
133     },
134     {"gcc-darwin",
135         /*"\t.section __TEXT,__text,regular,pure_instructions\n"
136         "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
137         ".globl _%s\n"
138         "#ifdef U_HIDE_DATA_SYMBOL\n"
139         "\t.private_extern _%s\n"
140         "#endif\n"
141         "\t.data\n"
142         "\t.const\n"
143         "\t.balign 16\n"
144         "_%s:\n\n",
145 
146         ".long ","",HEX_0X
147     },
148     {"gcc-cygwin",
149         ".globl _%s\n"
150         "\t.section .rodata\n"
151         "\t.balign 16\n"
152         "_%s:\n\n",
153 
154         ".long ","",HEX_0X
155     },
156     {"gcc-mingw64",
157         ".globl %s\n"
158         "\t.section .rodata\n"
159         "\t.balign 16\n"
160         "%s:\n\n",
161 
162         ".long ","",HEX_0X
163     },
164 /* 16 bytes alignment. */
165 /* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */
166     {"sun",
167         "\t.section \".rodata\"\n"
168         "\t.align   16\n"
169         ".globl     %s\n"
170         "%s:\n",
171 
172         ".word ","",HEX_0X
173     },
174 /* 16 bytes alignment for sun-x86. */
175 /* http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html */
176     {"sun-x86",
177         "Drodata.rodata:\n"
178         "\t.type   Drodata.rodata,@object\n"
179         "\t.size   Drodata.rodata,0\n"
180         "\t.globl  %s\n"
181         "\t.align  16\n"
182         "%s:\n",
183 
184         ".4byte ","",HEX_0X
185     },
186 /* 1<<4 bit alignment for aix. */
187 /* http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm */
188     {"xlc",
189         ".globl %s{RO}\n"
190         "\t.toc\n"
191         "%s:\n"
192         "\t.csect %s{RO}, 4\n",
193 
194         ".long ","",HEX_0X
195     },
196     {"aCC-ia64",
197         "\t.file   \"%s.s\"\n"
198         "\t.type   %s,@object\n"
199         "\t.global %s\n"
200         "\t.secalias .abe$0.rodata, \".rodata\"\n"
201         "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
202         "\t.align  16\n"
203         "%s::\t",
204 
205         "data4 ","",HEX_0X
206     },
207     {"aCC-parisc",
208         "\t.SPACE  $TEXT$\n"
209         "\t.SUBSPA $LIT$\n"
210         "%s\n"
211         "\t.EXPORT %s\n"
212         "\t.ALIGN  16\n",
213 
214         ".WORD ","",HEX_0X
215     },
216 /* align 16 bytes */
217 /*  http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx */
218     { "masm",
219       "\tTITLE %s\n"
220       "; generated by genccode\n"
221       ".386\n"
222       ".model flat\n"
223       "\tPUBLIC _%s\n"
224       "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
225       "\tALIGN 16\n"
226       "_%s\tLABEL DWORD\n",
227       "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
228     }
229 };
230 
231 static int32_t assemblyHeaderIndex = -1;
232 static int32_t hexType = HEX_0X;
233 
234 U_CAPI UBool U_EXPORT2
checkAssemblyHeaderName(const char * optAssembly)235 checkAssemblyHeaderName(const char* optAssembly) {
236     int32_t idx;
237     assemblyHeaderIndex = -1;
238     for (idx = 0; idx < UPRV_LENGTHOF(assemblyHeader); idx++) {
239         if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
240             assemblyHeaderIndex = idx;
241             hexType = assemblyHeader[idx].hexType; /* set the hex type */
242             return TRUE;
243         }
244     }
245 
246     return FALSE;
247 }
248 
249 
250 U_CAPI void U_EXPORT2
printAssemblyHeadersToStdErr(void)251 printAssemblyHeadersToStdErr(void) {
252     int32_t idx;
253     fprintf(stderr, "%s", assemblyHeader[0].name);
254     for (idx = 1; idx < UPRV_LENGTHOF(assemblyHeader); idx++) {
255         fprintf(stderr, ", %s", assemblyHeader[idx].name);
256     }
257     fprintf(stderr,
258         ")\n");
259 }
260 
261 U_CAPI void U_EXPORT2
writeAssemblyCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optFilename,char * outFilePath)262 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
263     uint32_t column = MAX_COLUMN;
264     char entry[64];
265     uint32_t buffer[1024];
266     char *bufferStr = (char *)buffer;
267     FileStream *in, *out;
268     size_t i, length;
269 
270     in=T_FileStream_open(filename, "rb");
271     if(in==NULL) {
272         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
273         exit(U_FILE_ACCESS_ERROR);
274     }
275 
276     getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename);
277     out=T_FileStream_open(bufferStr, "w");
278     if(out==NULL) {
279         fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
280         exit(U_FILE_ACCESS_ERROR);
281     }
282 
283     if (outFilePath != NULL) {
284         uprv_strcpy(outFilePath, bufferStr);
285     }
286 
287 #if defined (WINDOWS_WITH_GNUC) && U_PLATFORM != U_PF_CYGWIN
288     /* Need to fix the file separator character when using MinGW. */
289     swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
290 #endif
291 
292     if(optEntryPoint != NULL) {
293         uprv_strcpy(entry, optEntryPoint);
294         uprv_strcat(entry, "_dat");
295     }
296 
297     /* turn dashes or dots in the entry name into underscores */
298     length=uprv_strlen(entry);
299     for(i=0; i<length; ++i) {
300         if(entry[i]=='-' || entry[i]=='.') {
301             entry[i]='_';
302         }
303     }
304 
305     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
306         entry, entry, entry, entry,
307         entry, entry, entry, entry);
308     T_FileStream_writeLine(out, bufferStr);
309     T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
310 
311     for(;;) {
312         length=T_FileStream_read(in, buffer, sizeof(buffer));
313         if(length==0) {
314             break;
315         }
316         if (length != sizeof(buffer)) {
317             /* pad with extra 0's when at the end of the file */
318             for(i=0; i < (length % sizeof(uint32_t)); ++i) {
319                 buffer[length+i] = 0;
320             }
321         }
322         for(i=0; i<(length/sizeof(buffer[0])); i++) {
323             column = write32(out, buffer[i], column);
324         }
325     }
326 
327     T_FileStream_writeLine(out, "\n");
328 
329     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
330         entry, entry, entry, entry,
331         entry, entry, entry, entry);
332     T_FileStream_writeLine(out, bufferStr);
333 
334     if(T_FileStream_error(in)) {
335         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
336         exit(U_FILE_ACCESS_ERROR);
337     }
338 
339     if(T_FileStream_error(out)) {
340         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
341         exit(U_FILE_ACCESS_ERROR);
342     }
343 
344     T_FileStream_close(out);
345     T_FileStream_close(in);
346 }
347 
348 U_CAPI void U_EXPORT2
writeCCode(const char * filename,const char * destdir,const char * optName,const char * optFilename,char * outFilePath)349 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
350     uint32_t column = MAX_COLUMN;
351     char buffer[4096], entry[64];
352     FileStream *in, *out;
353     size_t i, length;
354 
355     in=T_FileStream_open(filename, "rb");
356     if(in==NULL) {
357         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
358         exit(U_FILE_ACCESS_ERROR);
359     }
360 
361     if(optName != NULL) { /* prepend  'icudt28_' */
362       strcpy(entry, optName);
363       strcat(entry, "_");
364     } else {
365       entry[0] = 0;
366     }
367 
368     getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
369     if (outFilePath != NULL) {
370         uprv_strcpy(outFilePath, buffer);
371     }
372     out=T_FileStream_open(buffer, "w");
373     if(out==NULL) {
374         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
375         exit(U_FILE_ACCESS_ERROR);
376     }
377 
378     /* turn dashes or dots in the entry name into underscores */
379     length=uprv_strlen(entry);
380     for(i=0; i<length; ++i) {
381         if(entry[i]=='-' || entry[i]=='.') {
382             entry[i]='_';
383         }
384     }
385 
386 #if U_PLATFORM == U_PF_OS400
387     /*
388     TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
389 
390     This is here because this platform can't currently put
391     const data into the read-only pages of an object or
392     shared library (service program). Only strings are allowed in read-only
393     pages, so we use char * strings to store the data.
394 
395     In order to prevent the beginning of the data from ever matching the
396     magic numbers we must still use the initial double.
397     [grhoten 4/24/2003]
398     */
399     sprintf(buffer,
400         "#ifndef IN_GENERATED_CCODE\n"
401         "#define IN_GENERATED_CCODE\n"
402         "#define U_DISABLE_RENAMING 1\n"
403         "#include \"unicode/umachine.h\"\n"
404         "#endif\n"
405         "U_CDECL_BEGIN\n"
406         "const struct {\n"
407         "    double bogus;\n"
408         "    const char *bytes; \n"
409         "} %s={ 0.0, \n",
410         entry);
411     T_FileStream_writeLine(out, buffer);
412 
413     for(;;) {
414         length=T_FileStream_read(in, buffer, sizeof(buffer));
415         if(length==0) {
416             break;
417         }
418         for(i=0; i<length; ++i) {
419             column = write8str(out, (uint8_t)buffer[i], column);
420         }
421     }
422 
423     T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
424 #else
425     /* Function renaming shouldn't be done in data */
426     sprintf(buffer,
427         "#ifndef IN_GENERATED_CCODE\n"
428         "#define IN_GENERATED_CCODE\n"
429         "#define U_DISABLE_RENAMING 1\n"
430         "#include \"unicode/umachine.h\"\n"
431         "#endif\n"
432         "U_CDECL_BEGIN\n"
433         "const struct {\n"
434         "    double bogus;\n"
435         "    uint8_t bytes[%ld]; \n"
436         "} %s={ 0.0, {\n",
437         (long)T_FileStream_size(in), entry);
438     T_FileStream_writeLine(out, buffer);
439 
440     for(;;) {
441         length=T_FileStream_read(in, buffer, sizeof(buffer));
442         if(length==0) {
443             break;
444         }
445         for(i=0; i<length; ++i) {
446             column = write8(out, (uint8_t)buffer[i], column);
447         }
448     }
449 
450     T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
451 #endif
452 
453     if(T_FileStream_error(in)) {
454         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
455         exit(U_FILE_ACCESS_ERROR);
456     }
457 
458     if(T_FileStream_error(out)) {
459         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
460         exit(U_FILE_ACCESS_ERROR);
461     }
462 
463     T_FileStream_close(out);
464     T_FileStream_close(in);
465 }
466 
467 static uint32_t
write32(FileStream * out,uint32_t bitField,uint32_t column)468 write32(FileStream *out, uint32_t bitField, uint32_t column) {
469     int32_t i;
470     char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
471     char *s = bitFieldStr;
472     uint8_t *ptrIdx = (uint8_t *)&bitField;
473     static const char hexToStr[16] = {
474         '0','1','2','3',
475         '4','5','6','7',
476         '8','9','A','B',
477         'C','D','E','F'
478     };
479 
480     /* write the value, possibly with comma and newline */
481     if(column==MAX_COLUMN) {
482         /* first byte */
483         column=1;
484     } else if(column<32) {
485         *(s++)=',';
486         ++column;
487     } else {
488         *(s++)='\n';
489         uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
490         s+=uprv_strlen(s);
491         column=1;
492     }
493 
494     if (bitField < 10) {
495         /* It's a small number. Don't waste the space for 0x */
496         *(s++)=hexToStr[bitField];
497     }
498     else {
499         int seenNonZero = 0; /* This is used to remove leading zeros */
500 
501         if(hexType==HEX_0X) {
502          *(s++)='0';
503          *(s++)='x';
504         } else if(hexType==HEX_0H) {
505          *(s++)='0';
506         }
507 
508         /* This creates a 32-bit field */
509 #if U_IS_BIG_ENDIAN
510         for (i = 0; i < sizeof(uint32_t); i++)
511 #else
512         for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
513 #endif
514         {
515             uint8_t value = ptrIdx[i];
516             if (value || seenNonZero) {
517                 *(s++)=hexToStr[value>>4];
518                 *(s++)=hexToStr[value&0xF];
519                 seenNonZero = 1;
520             }
521         }
522         if(hexType==HEX_0H) {
523          *(s++)='h';
524         }
525     }
526 
527     *(s++)=0;
528     T_FileStream_writeLine(out, bitFieldStr);
529     return column;
530 }
531 
532 static uint32_t
write8(FileStream * out,uint8_t byte,uint32_t column)533 write8(FileStream *out, uint8_t byte, uint32_t column) {
534     char s[4];
535     int i=0;
536 
537     /* convert the byte value to a string */
538     if(byte>=100) {
539         s[i++]=(char)('0'+byte/100);
540         byte%=100;
541     }
542     if(i>0 || byte>=10) {
543         s[i++]=(char)('0'+byte/10);
544         byte%=10;
545     }
546     s[i++]=(char)('0'+byte);
547     s[i]=0;
548 
549     /* write the value, possibly with comma and newline */
550     if(column==MAX_COLUMN) {
551         /* first byte */
552         column=1;
553     } else if(column<16) {
554         T_FileStream_writeLine(out, ",");
555         ++column;
556     } else {
557         T_FileStream_writeLine(out, ",\n");
558         column=1;
559     }
560     T_FileStream_writeLine(out, s);
561     return column;
562 }
563 
564 #if U_PLATFORM == U_PF_OS400
565 static uint32_t
write8str(FileStream * out,uint8_t byte,uint32_t column)566 write8str(FileStream *out, uint8_t byte, uint32_t column) {
567     char s[8];
568 
569     if (byte > 7)
570         sprintf(s, "\\x%X", byte);
571     else
572         sprintf(s, "\\%X", byte);
573 
574     /* write the value, possibly with comma and newline */
575     if(column==MAX_COLUMN) {
576         /* first byte */
577         column=1;
578         T_FileStream_writeLine(out, "\"");
579     } else if(column<24) {
580         ++column;
581     } else {
582         T_FileStream_writeLine(out, "\"\n\"");
583         column=1;
584     }
585     T_FileStream_writeLine(out, s);
586     return column;
587 }
588 #endif
589 
590 static void
getOutFilename(const char * inFilename,const char * destdir,char * outFilename,char * entryName,const char * newSuffix,const char * optFilename)591 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
592     const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
593 
594     /* copy path */
595     if(destdir!=NULL && *destdir!=0) {
596         do {
597             *outFilename++=*destdir++;
598         } while(*destdir!=0);
599         if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
600             *outFilename++=U_FILE_SEP_CHAR;
601         }
602         inFilename=basename;
603     } else {
604         while(inFilename<basename) {
605             *outFilename++=*inFilename++;
606         }
607     }
608 
609     if(suffix==NULL) {
610         /* the filename does not have a suffix */
611         uprv_strcpy(entryName, inFilename);
612         if(optFilename != NULL) {
613           uprv_strcpy(outFilename, optFilename);
614         } else {
615           uprv_strcpy(outFilename, inFilename);
616         }
617         uprv_strcat(outFilename, newSuffix);
618     } else {
619         char *saveOutFilename = outFilename;
620         /* copy basename */
621         while(inFilename<suffix) {
622             if(*inFilename=='-') {
623                 /* iSeries cannot have '-' in the .o objects. */
624                 *outFilename++=*entryName++='_';
625                 inFilename++;
626             }
627             else {
628                 *outFilename++=*entryName++=*inFilename++;
629             }
630         }
631 
632         /* replace '.' by '_' */
633         *outFilename++=*entryName++='_';
634         ++inFilename;
635 
636         /* copy suffix */
637         while(*inFilename!=0) {
638             *outFilename++=*entryName++=*inFilename++;
639         }
640 
641         *entryName=0;
642 
643         if(optFilename != NULL) {
644             uprv_strcpy(saveOutFilename, optFilename);
645             uprv_strcat(saveOutFilename, newSuffix);
646         } else {
647             /* add ".c" */
648             uprv_strcpy(outFilename, newSuffix);
649         }
650     }
651 }
652 
653 #ifdef CAN_GENERATE_OBJECTS
654 static void
getArchitecture(uint16_t * pCPU,uint16_t * pBits,UBool * pIsBigEndian,const char * optMatchArch)655 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
656     union {
657         char        bytes[2048];
658 #ifdef U_ELF
659         Elf32_Ehdr  header32;
660         /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
661 #elif U_PLATFORM_HAS_WIN32_API
662         IMAGE_FILE_HEADER header;
663 #endif
664     } buffer;
665 
666     const char *filename;
667     FileStream *in;
668     int32_t length;
669 
670 #ifdef U_ELF
671 
672 #elif U_PLATFORM_HAS_WIN32_API
673     const IMAGE_FILE_HEADER *pHeader;
674 #else
675 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
676 #endif
677 
678     if(optMatchArch != NULL) {
679         filename=optMatchArch;
680     } else {
681         /* set defaults */
682 #ifdef U_ELF
683         /* set EM_386 because elf.h does not provide better defaults */
684         *pCPU=EM_386;
685         *pBits=32;
686         *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
687 #elif U_PLATFORM_HAS_WIN32_API
688 /* _M_IA64 should be defined in windows.h */
689 #   if defined(_M_IA64)
690         *pCPU=IMAGE_FILE_MACHINE_IA64;
691         *pBits = 64;
692 #   elif defined(_M_AMD64)
693 // link.exe does not really care about the .obj machine type and this will
694 // allow us to build a dll for both ARM & x64 with an amd64 built tool
695 // ARM is same as x64 except for first 2 bytes of object file
696         *pCPU = IMAGE_FILE_MACHINE_UNKNOWN;
697         // *pCPU = IMAGE_FILE_MACHINE_ARMNT;   // If we wanted to be explicit
698         // *pCPU = IMAGE_FILE_MACHINE_AMD64;   // We would use one of these names
699         *pBits = 64;                           // Doesn't seem to be used for anything interesting?
700 #   else
701         *pCPU=IMAGE_FILE_MACHINE_I386;    // We would use one of these names
702         *pBits = 32;
703 #   endif
704         *pIsBigEndian=FALSE;
705 #else
706 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
707 #endif
708         return;
709     }
710 
711     in=T_FileStream_open(filename, "rb");
712     if(in==NULL) {
713         fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
714         exit(U_FILE_ACCESS_ERROR);
715     }
716     length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
717 
718 #ifdef U_ELF
719     if(length<(int32_t)sizeof(Elf32_Ehdr)) {
720         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
721         exit(U_UNSUPPORTED_ERROR);
722     }
723     if(
724         buffer.header32.e_ident[0]!=ELFMAG0 ||
725         buffer.header32.e_ident[1]!=ELFMAG1 ||
726         buffer.header32.e_ident[2]!=ELFMAG2 ||
727         buffer.header32.e_ident[3]!=ELFMAG3 ||
728         buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
729     ) {
730         fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
731         exit(U_UNSUPPORTED_ERROR);
732     }
733 
734     *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
735 #ifdef U_ELF64
736     if(*pBits!=32 && *pBits!=64) {
737         fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
738         exit(U_UNSUPPORTED_ERROR);
739     }
740 #else
741     if(*pBits!=32) {
742         fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
743         exit(U_UNSUPPORTED_ERROR);
744     }
745 #endif
746 
747     *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
748     if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
749         fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
750         exit(U_UNSUPPORTED_ERROR);
751     }
752     /* TODO: Support byte swapping */
753 
754     *pCPU=buffer.header32.e_machine;
755 #elif U_PLATFORM_HAS_WIN32_API
756     if(length<sizeof(IMAGE_FILE_HEADER)) {
757         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
758         exit(U_UNSUPPORTED_ERROR);
759     }
760     /* TODO: Use buffer.header.  Keep aliasing legal.  */
761     pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
762     *pCPU=pHeader->Machine;
763     /*
764      * The number of bits is implicit with the Machine value.
765      * *pBits is ignored in the calling code, so this need not be precise.
766      */
767     *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
768     /* Windows always runs on little-endian CPUs. */
769     *pIsBigEndian=FALSE;
770 #else
771 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
772 #endif
773 
774     T_FileStream_close(in);
775 }
776 
777 U_CAPI void U_EXPORT2
writeObjectCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optMatchArch,const char * optFilename,char * outFilePath)778 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
779     /* common variables */
780     char buffer[4096], entry[96]={ 0 };
781     FileStream *in, *out;
782     const char *newSuffix;
783     int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
784 
785     uint16_t cpu, bits;
786     UBool makeBigEndian;
787 
788     /* platform-specific variables and initialization code */
789 #ifdef U_ELF
790     /* 32-bit Elf file header */
791     static Elf32_Ehdr header32={
792         {
793             /* e_ident[] */
794             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
795             ELFCLASS32,
796             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
797             EV_CURRENT /* EI_VERSION */
798         },
799         ET_REL,
800         EM_386,
801         EV_CURRENT, /* e_version */
802         0, /* e_entry */
803         0, /* e_phoff */
804         (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
805         0, /* e_flags */
806         (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
807         0, /* e_phentsize */
808         0, /* e_phnum */
809         (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
810         5, /* e_shnum */
811         2 /* e_shstrndx */
812     };
813 
814     /* 32-bit Elf section header table */
815     static Elf32_Shdr sectionHeaders32[5]={
816         { /* SHN_UNDEF */
817             0, 0, 0, 0, 0, 0, 0, 0, 0, 0
818         },
819         { /* .symtab */
820             1, /* sh_name */
821             SHT_SYMTAB,
822             0, /* sh_flags */
823             0, /* sh_addr */
824             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
825             (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
826             3, /* sh_link=sect hdr index of .strtab */
827             1, /* sh_info=One greater than the symbol table index of the last
828                 * local symbol (with STB_LOCAL). */
829             4, /* sh_addralign */
830             (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
831         },
832         { /* .shstrtab */
833             9, /* sh_name */
834             SHT_STRTAB,
835             0, /* sh_flags */
836             0, /* sh_addr */
837             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
838             40, /* sh_size */
839             0, /* sh_link */
840             0, /* sh_info */
841             1, /* sh_addralign */
842             0 /* sh_entsize */
843         },
844         { /* .strtab */
845             19, /* sh_name */
846             SHT_STRTAB,
847             0, /* sh_flags */
848             0, /* sh_addr */
849             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
850             (Elf32_Word)sizeof(entry), /* sh_size */
851             0, /* sh_link */
852             0, /* sh_info */
853             1, /* sh_addralign */
854             0 /* sh_entsize */
855         },
856         { /* .rodata */
857             27, /* sh_name */
858             SHT_PROGBITS,
859             SHF_ALLOC, /* sh_flags */
860             0, /* sh_addr */
861             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
862             0, /* sh_size */
863             0, /* sh_link */
864             0, /* sh_info */
865             16, /* sh_addralign */
866             0 /* sh_entsize */
867         }
868     };
869 
870     /* symbol table */
871     static Elf32_Sym symbols32[2]={
872         { /* STN_UNDEF */
873             0, 0, 0, 0, 0, 0
874         },
875         { /* data entry point */
876             1, /* st_name */
877             0, /* st_value */
878             0, /* st_size */
879             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
880             0, /* st_other */
881             4 /* st_shndx=index of related section table entry */
882         }
883     };
884 
885     /* section header string table, with decimal string offsets */
886     static const char sectionStrings[40]=
887         /*  0 */ "\0"
888         /*  1 */ ".symtab\0"
889         /*  9 */ ".shstrtab\0"
890         /* 19 */ ".strtab\0"
891         /* 27 */ ".rodata\0"
892         /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
893         /* 40: padded to multiple of 8 bytes */
894 
895     /*
896      * Use entry[] for the string table which will contain only the
897      * entry point name.
898      * entry[0] must be 0 (NUL)
899      * The entry point name can be up to 38 characters long (sizeof(entry)-2).
900      */
901 
902     /* 16-align .rodata in the .o file, just in case */
903     static const char padding[16]={ 0 };
904     int32_t paddingSize;
905 
906 #ifdef U_ELF64
907     /* 64-bit Elf file header */
908     static Elf64_Ehdr header64={
909         {
910             /* e_ident[] */
911             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
912             ELFCLASS64,
913             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
914             EV_CURRENT /* EI_VERSION */
915         },
916         ET_REL,
917         EM_X86_64,
918         EV_CURRENT, /* e_version */
919         0, /* e_entry */
920         0, /* e_phoff */
921         (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
922         0, /* e_flags */
923         (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
924         0, /* e_phentsize */
925         0, /* e_phnum */
926         (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
927         5, /* e_shnum */
928         2 /* e_shstrndx */
929     };
930 
931     /* 64-bit Elf section header table */
932     static Elf64_Shdr sectionHeaders64[5]={
933         { /* SHN_UNDEF */
934             0, 0, 0, 0, 0, 0, 0, 0, 0, 0
935         },
936         { /* .symtab */
937             1, /* sh_name */
938             SHT_SYMTAB,
939             0, /* sh_flags */
940             0, /* sh_addr */
941             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
942             (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
943             3, /* sh_link=sect hdr index of .strtab */
944             1, /* sh_info=One greater than the symbol table index of the last
945                 * local symbol (with STB_LOCAL). */
946             4, /* sh_addralign */
947             (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
948         },
949         { /* .shstrtab */
950             9, /* sh_name */
951             SHT_STRTAB,
952             0, /* sh_flags */
953             0, /* sh_addr */
954             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
955             40, /* sh_size */
956             0, /* sh_link */
957             0, /* sh_info */
958             1, /* sh_addralign */
959             0 /* sh_entsize */
960         },
961         { /* .strtab */
962             19, /* sh_name */
963             SHT_STRTAB,
964             0, /* sh_flags */
965             0, /* sh_addr */
966             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
967             (Elf64_Xword)sizeof(entry), /* sh_size */
968             0, /* sh_link */
969             0, /* sh_info */
970             1, /* sh_addralign */
971             0 /* sh_entsize */
972         },
973         { /* .rodata */
974             27, /* sh_name */
975             SHT_PROGBITS,
976             SHF_ALLOC, /* sh_flags */
977             0, /* sh_addr */
978             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
979             0, /* sh_size */
980             0, /* sh_link */
981             0, /* sh_info */
982             16, /* sh_addralign */
983             0 /* sh_entsize */
984         }
985     };
986 
987     /*
988      * 64-bit symbol table
989      * careful: different order of items compared with Elf32_sym!
990      */
991     static Elf64_Sym symbols64[2]={
992         { /* STN_UNDEF */
993             0, 0, 0, 0, 0, 0
994         },
995         { /* data entry point */
996             1, /* st_name */
997             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
998             0, /* st_other */
999             4, /* st_shndx=index of related section table entry */
1000             0, /* st_value */
1001             0 /* st_size */
1002         }
1003     };
1004 
1005 #endif /* U_ELF64 */
1006 
1007     /* entry[] have a leading NUL */
1008     entryOffset=1;
1009 
1010     /* in the common code, count entryLength from after the NUL */
1011     entryLengthOffset=1;
1012 
1013     newSuffix=".o";
1014 
1015 #elif U_PLATFORM_HAS_WIN32_API
1016     struct {
1017         IMAGE_FILE_HEADER fileHeader;
1018         IMAGE_SECTION_HEADER sections[2];
1019         char linkerOptions[100];
1020     } objHeader;
1021     IMAGE_SYMBOL symbols[1];
1022     struct {
1023         DWORD sizeofLongNames;
1024         char longNames[100];
1025     } symbolNames;
1026 
1027     /*
1028      * entry sometimes have a leading '_'
1029      * overwritten if entryOffset==0 depending on the target platform
1030      * see check for cpu below
1031      */
1032     entry[0]='_';
1033 
1034     newSuffix=".obj";
1035 #else
1036 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
1037 #endif
1038 
1039     /* deal with options, files and the entry point name */
1040     getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1041     if (optMatchArch)
1042     {
1043         printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1044     }
1045     else
1046     {
1047         printf("genccode: using architecture cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1048     }
1049 #if U_PLATFORM_HAS_WIN32_API
1050     if(cpu==IMAGE_FILE_MACHINE_I386) {
1051         entryOffset=1;
1052     }
1053 #endif
1054 
1055     in=T_FileStream_open(filename, "rb");
1056     if(in==NULL) {
1057         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1058         exit(U_FILE_ACCESS_ERROR);
1059     }
1060     size=T_FileStream_size(in);
1061 
1062     getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1063     if (outFilePath != NULL) {
1064         uprv_strcpy(outFilePath, buffer);
1065     }
1066 
1067     if(optEntryPoint != NULL) {
1068         uprv_strcpy(entry+entryOffset, optEntryPoint);
1069         uprv_strcat(entry+entryOffset, "_dat");
1070     }
1071     /* turn dashes in the entry name into underscores */
1072     entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1073     for(i=0; i<entryLength; ++i) {
1074         if(entry[entryLengthOffset+i]=='-') {
1075             entry[entryLengthOffset+i]='_';
1076         }
1077     }
1078 
1079     /* open the output file */
1080     out=T_FileStream_open(buffer, "wb");
1081     if(out==NULL) {
1082         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1083         exit(U_FILE_ACCESS_ERROR);
1084     }
1085 
1086 #ifdef U_ELF
1087     if(bits==32) {
1088         header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1089         header32.e_machine=cpu;
1090 
1091         /* 16-align .rodata in the .o file, just in case */
1092         paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1093         if(paddingSize!=0) {
1094                 paddingSize=0x10-paddingSize;
1095                 sectionHeaders32[4].sh_offset+=paddingSize;
1096         }
1097 
1098         sectionHeaders32[4].sh_size=(Elf32_Word)size;
1099 
1100         symbols32[1].st_size=(Elf32_Word)size;
1101 
1102         /* write .o headers */
1103         T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1104         T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1105         T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1106     } else /* bits==64 */ {
1107 #ifdef U_ELF64
1108         header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1109         header64.e_machine=cpu;
1110 
1111         /* 16-align .rodata in the .o file, just in case */
1112         paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1113         if(paddingSize!=0) {
1114                 paddingSize=0x10-paddingSize;
1115                 sectionHeaders64[4].sh_offset+=paddingSize;
1116         }
1117 
1118         sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1119 
1120         symbols64[1].st_size=(Elf64_Xword)size;
1121 
1122         /* write .o headers */
1123         T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1124         T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1125         T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1126 #endif
1127     }
1128 
1129     T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1130     T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1131     if(paddingSize!=0) {
1132         T_FileStream_write(out, padding, paddingSize);
1133     }
1134 #elif U_PLATFORM_HAS_WIN32_API
1135     /* populate the .obj headers */
1136     uprv_memset(&objHeader, 0, sizeof(objHeader));
1137     uprv_memset(&symbols, 0, sizeof(symbols));
1138     uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1139 
1140     /* write the linker export directive */
1141     uprv_strcpy(objHeader.linkerOptions, "-export:");
1142     length=8;
1143     uprv_strcpy(objHeader.linkerOptions+length, entry);
1144     length+=entryLength;
1145     uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1146     length+=6;
1147 
1148     /* set the file header */
1149     objHeader.fileHeader.Machine=cpu;
1150     objHeader.fileHeader.NumberOfSections=2;
1151     objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1152     objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1153     objHeader.fileHeader.NumberOfSymbols=1;
1154 
1155     /* set the section for the linker options */
1156     uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1157     objHeader.sections[0].SizeOfRawData=length;
1158     objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1159     objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1160 
1161     /* set the data section */
1162     uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1163     objHeader.sections[1].SizeOfRawData=size;
1164     objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1165     objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1166 
1167     /* set the symbol table */
1168     if(entryLength<=8) {
1169         uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1170         symbolNames.sizeofLongNames=4;
1171     } else {
1172         symbols[0].N.Name.Short=0;
1173         symbols[0].N.Name.Long=4;
1174         symbolNames.sizeofLongNames=4+entryLength+1;
1175         uprv_strcpy(symbolNames.longNames, entry);
1176     }
1177     symbols[0].SectionNumber=2;
1178     symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1179 
1180     /* write the file header and the linker options section */
1181     T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1182 #else
1183 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
1184 #endif
1185 
1186     /* copy the data file into section 2 */
1187     for(;;) {
1188         length=T_FileStream_read(in, buffer, sizeof(buffer));
1189         if(length==0) {
1190             break;
1191         }
1192         T_FileStream_write(out, buffer, (int32_t)length);
1193     }
1194 
1195 #if U_PLATFORM_HAS_WIN32_API
1196     /* write the symbol table */
1197     T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1198     T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1199 #endif
1200 
1201     if(T_FileStream_error(in)) {
1202         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1203         exit(U_FILE_ACCESS_ERROR);
1204     }
1205 
1206     if(T_FileStream_error(out)) {
1207         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1208         exit(U_FILE_ACCESS_ERROR);
1209     }
1210 
1211     T_FileStream_close(out);
1212     T_FileStream_close(in);
1213 }
1214 #endif
1215