1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /******************************************************************************
4  *   Copyright (C) 2009-2016, International Business Machines
5  *   Corporation and others.  All Rights Reserved.
6  *******************************************************************************
7  */
8 #include "unicode/utypes.h"
9 
10 #if U_PLATFORM_HAS_WIN32_API
11 #   define VC_EXTRALEAN
12 #   define WIN32_LEAN_AND_MEAN
13 #   define NOUSER
14 #   define NOSERVICE
15 #   define NOIME
16 #   define NOMCX
17 #include <windows.h>
18 #include <time.h>
19 #   ifdef __GNUC__
20 #       define WINDOWS_WITH_GNUC
21 #   endif
22 #endif
23 
24 #if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
25 #   define U_ELF
26 #endif
27 
28 #ifdef U_ELF
29 #   include <elf.h>
30 #   if defined(ELFCLASS64)
31 #       define U_ELF64
32 #   endif
33     /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
34 #   ifndef EM_X86_64
35 #       define EM_X86_64 62
36 #   endif
37 #   define ICU_ENTRY_OFFSET 0
38 #endif
39 
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include "unicode/putil.h"
43 #include "cmemory.h"
44 #include "cstring.h"
45 #include "filestrm.h"
46 #include "toolutil.h"
47 #include "unicode/uclean.h"
48 #include "uoptions.h"
49 #include "pkg_genc.h"
50 
51 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
52 
53 #define HEX_0X 0 /*  0x1234 */
54 #define HEX_0H 1 /*  01234h */
55 
56 /* prototypes --------------------------------------------------------------- */
57 static void
58 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
59 
60 static uint32_t
61 write8(FileStream *out, uint8_t byte, uint32_t column);
62 
63 static uint32_t
64 write32(FileStream *out, uint32_t byte, uint32_t column);
65 
66 #if U_PLATFORM == U_PF_OS400
67 static uint32_t
68 write8str(FileStream *out, uint8_t byte, uint32_t column);
69 #endif
70 /* -------------------------------------------------------------------------- */
71 
72 /*
73 Creating Template Files for New Platforms
74 
75 Let the cc compiler help you get started.
76 Compile this program
77     const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
78 with the -S option to produce assembly output.
79 
80 For example, this will generate array.s:
81 gcc -S array.c
82 
83 This will produce a .s file that may look like this:
84 
85     .file   "array.c"
86     .version        "01.01"
87 gcc2_compiled.:
88     .globl x
89     .section        .rodata
90     .align 4
91     .type    x,@object
92     .size    x,20
93 x:
94     .long   1
95     .long   2
96     .long   -559038737
97     .long   -1
98     .long   16
99     .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
100 
101 which gives a starting point that will compile, and can be transformed
102 to become the template, generally with some consulting of as docs and
103 some experimentation.
104 
105 If you want ICU to automatically use this assembly, you should
106 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
107 where the name is the compiler or platform that you used in this
108 assemblyHeader data structure.
109 */
110 static const struct AssemblyType {
111     const char *name;
112     const char *header;
113     const char *beginLine;
114     const char *footer;
115     int8_t      hexType; /* HEX_0X or HEX_0h */
116 } assemblyHeader[] = {
117     /* For gcc assemblers, the meaning of .align changes depending on the */
118     /* hardware, so we use .balign 16 which always means 16 bytes. */
119     /* https://sourceware.org/binutils/docs/as/Pseudo-Ops.html */
120     {"gcc",
121         ".globl %s\n"
122         "\t.section .note.GNU-stack,\"\",%%progbits\n"
123         "\t.section .rodata\n"
124         "\t.balign 16\n"
125         "#ifdef U_HIDE_DATA_SYMBOL\n"
126         "\t.hidden %s\n"
127         "#endif\n"
128         "\t.type %s,%%object\n"
129         "%s:\n\n",
130 
131         ".long ",".size %s, .-%s\n",HEX_0X
132     },
133     {"gcc-darwin",
134         /*"\t.section __TEXT,__text,regular,pure_instructions\n"
135         "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
136         ".globl _%s\n"
137         "#ifdef U_HIDE_DATA_SYMBOL\n"
138         "\t.private_extern _%s\n"
139         "#endif\n"
140         "\t.data\n"
141         "\t.const\n"
142         "\t.balign 16\n"
143         "_%s:\n\n",
144 
145         ".long ","",HEX_0X
146     },
147     {"gcc-cygwin",
148         ".globl _%s\n"
149         "\t.section .rodata\n"
150         "\t.balign 16\n"
151         "_%s:\n\n",
152 
153         ".long ","",HEX_0X
154     },
155     {"gcc-mingw64",
156         ".globl %s\n"
157         "\t.section .rodata\n"
158         "\t.balign 16\n"
159         "%s:\n\n",
160 
161         ".long ","",HEX_0X
162     },
163 /* 16 bytes alignment. */
164 /* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */
165     {"sun",
166         "\t.section \".rodata\"\n"
167         "\t.align   16\n"
168         ".globl     %s\n"
169         "%s:\n",
170 
171         ".word ","",HEX_0X
172     },
173 /* 16 bytes alignment for sun-x86. */
174 /* http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html */
175     {"sun-x86",
176         "Drodata.rodata:\n"
177         "\t.type   Drodata.rodata,@object\n"
178         "\t.size   Drodata.rodata,0\n"
179         "\t.globl  %s\n"
180         "\t.align  16\n"
181         "%s:\n",
182 
183         ".4byte ","",HEX_0X
184     },
185 /* 1<<4 bit alignment for aix. */
186 /* http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm */
187     {"xlc",
188         ".globl %s{RO}\n"
189         "\t.toc\n"
190         "%s:\n"
191         "\t.csect %s{RO}, 4\n",
192 
193         ".long ","",HEX_0X
194     },
195     {"aCC-ia64",
196         "\t.file   \"%s.s\"\n"
197         "\t.type   %s,@object\n"
198         "\t.global %s\n"
199         "\t.secalias .abe$0.rodata, \".rodata\"\n"
200         "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
201         "\t.align  16\n"
202         "%s::\t",
203 
204         "data4 ","",HEX_0X
205     },
206     {"aCC-parisc",
207         "\t.SPACE  $TEXT$\n"
208         "\t.SUBSPA $LIT$\n"
209         "%s\n"
210         "\t.EXPORT %s\n"
211         "\t.ALIGN  16\n",
212 
213         ".WORD ","",HEX_0X
214     },
215 /* align 16 bytes */
216 /*  http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx */
217     { "masm",
218       "\tTITLE %s\n"
219       "; generated by genccode\n"
220       ".386\n"
221       ".model flat\n"
222       "\tPUBLIC _%s\n"
223       "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
224       "\tALIGN 16\n"
225       "_%s\tLABEL DWORD\n",
226       "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
227     }
228 };
229 
230 static int32_t assemblyHeaderIndex = -1;
231 static int32_t hexType = HEX_0X;
232 
233 U_CAPI UBool U_EXPORT2
checkAssemblyHeaderName(const char * optAssembly)234 checkAssemblyHeaderName(const char* optAssembly) {
235     int32_t idx;
236     assemblyHeaderIndex = -1;
237     for (idx = 0; idx < UPRV_LENGTHOF(assemblyHeader); idx++) {
238         if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
239             assemblyHeaderIndex = idx;
240             hexType = assemblyHeader[idx].hexType; /* set the hex type */
241             return TRUE;
242         }
243     }
244 
245     return FALSE;
246 }
247 
248 
249 U_CAPI void U_EXPORT2
printAssemblyHeadersToStdErr(void)250 printAssemblyHeadersToStdErr(void) {
251     int32_t idx;
252     fprintf(stderr, "%s", assemblyHeader[0].name);
253     for (idx = 1; idx < UPRV_LENGTHOF(assemblyHeader); idx++) {
254         fprintf(stderr, ", %s", assemblyHeader[idx].name);
255     }
256     fprintf(stderr,
257         ")\n");
258 }
259 
260 U_CAPI void U_EXPORT2
writeAssemblyCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optFilename,char * outFilePath)261 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
262     uint32_t column = MAX_COLUMN;
263     char entry[64];
264     uint32_t buffer[1024];
265     char *bufferStr = (char *)buffer;
266     FileStream *in, *out;
267     size_t i, length;
268 
269     in=T_FileStream_open(filename, "rb");
270     if(in==NULL) {
271         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
272         exit(U_FILE_ACCESS_ERROR);
273     }
274 
275     getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename);
276     out=T_FileStream_open(bufferStr, "w");
277     if(out==NULL) {
278         fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
279         exit(U_FILE_ACCESS_ERROR);
280     }
281 
282     if (outFilePath != NULL) {
283         uprv_strcpy(outFilePath, bufferStr);
284     }
285 
286 #ifdef WINDOWS_WITH_GNUC
287     /* Need to fix the file seperator character when using MinGW. */
288     swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
289 #endif
290 
291     if(optEntryPoint != NULL) {
292         uprv_strcpy(entry, optEntryPoint);
293         uprv_strcat(entry, "_dat");
294     }
295 
296     /* turn dashes or dots in the entry name into underscores */
297     length=uprv_strlen(entry);
298     for(i=0; i<length; ++i) {
299         if(entry[i]=='-' || entry[i]=='.') {
300             entry[i]='_';
301         }
302     }
303 
304     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
305         entry, entry, entry, entry,
306         entry, entry, entry, entry);
307     T_FileStream_writeLine(out, bufferStr);
308     T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
309 
310     for(;;) {
311         length=T_FileStream_read(in, buffer, sizeof(buffer));
312         if(length==0) {
313             break;
314         }
315         if (length != sizeof(buffer)) {
316             /* pad with extra 0's when at the end of the file */
317             for(i=0; i < (length % sizeof(uint32_t)); ++i) {
318                 buffer[length+i] = 0;
319             }
320         }
321         for(i=0; i<(length/sizeof(buffer[0])); i++) {
322             column = write32(out, buffer[i], column);
323         }
324     }
325 
326     T_FileStream_writeLine(out, "\n");
327 
328     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
329         entry, entry, entry, entry,
330         entry, entry, entry, entry);
331     T_FileStream_writeLine(out, bufferStr);
332 
333     if(T_FileStream_error(in)) {
334         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
335         exit(U_FILE_ACCESS_ERROR);
336     }
337 
338     if(T_FileStream_error(out)) {
339         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
340         exit(U_FILE_ACCESS_ERROR);
341     }
342 
343     T_FileStream_close(out);
344     T_FileStream_close(in);
345 }
346 
347 U_CAPI void U_EXPORT2
writeCCode(const char * filename,const char * destdir,const char * optName,const char * optFilename,char * outFilePath)348 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
349     uint32_t column = MAX_COLUMN;
350     char buffer[4096], entry[64];
351     FileStream *in, *out;
352     size_t i, length;
353 
354     in=T_FileStream_open(filename, "rb");
355     if(in==NULL) {
356         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
357         exit(U_FILE_ACCESS_ERROR);
358     }
359 
360     if(optName != NULL) { /* prepend  'icudt28_' */
361       strcpy(entry, optName);
362       strcat(entry, "_");
363     } else {
364       entry[0] = 0;
365     }
366 
367     getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
368     if (outFilePath != NULL) {
369         uprv_strcpy(outFilePath, buffer);
370     }
371     out=T_FileStream_open(buffer, "w");
372     if(out==NULL) {
373         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
374         exit(U_FILE_ACCESS_ERROR);
375     }
376 
377     /* turn dashes or dots in the entry name into underscores */
378     length=uprv_strlen(entry);
379     for(i=0; i<length; ++i) {
380         if(entry[i]=='-' || entry[i]=='.') {
381             entry[i]='_';
382         }
383     }
384 
385 #if U_PLATFORM == U_PF_OS400
386     /*
387     TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
388 
389     This is here because this platform can't currently put
390     const data into the read-only pages of an object or
391     shared library (service program). Only strings are allowed in read-only
392     pages, so we use char * strings to store the data.
393 
394     In order to prevent the beginning of the data from ever matching the
395     magic numbers we must still use the initial double.
396     [grhoten 4/24/2003]
397     */
398     sprintf(buffer,
399         "#ifndef IN_GENERATED_CCODE\n"
400         "#define IN_GENERATED_CCODE\n"
401         "#define U_DISABLE_RENAMING 1\n"
402         "#include \"unicode/umachine.h\"\n"
403         "#endif\n"
404         "U_CDECL_BEGIN\n"
405         "const struct {\n"
406         "    double bogus;\n"
407         "    const char *bytes; \n"
408         "} %s={ 0.0, \n",
409         entry);
410     T_FileStream_writeLine(out, buffer);
411 
412     for(;;) {
413         length=T_FileStream_read(in, buffer, sizeof(buffer));
414         if(length==0) {
415             break;
416         }
417         for(i=0; i<length; ++i) {
418             column = write8str(out, (uint8_t)buffer[i], column);
419         }
420     }
421 
422     T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
423 #else
424     /* Function renaming shouldn't be done in data */
425     sprintf(buffer,
426         "#ifndef IN_GENERATED_CCODE\n"
427         "#define IN_GENERATED_CCODE\n"
428         "#define U_DISABLE_RENAMING 1\n"
429         "#include \"unicode/umachine.h\"\n"
430         "#endif\n"
431         "U_CDECL_BEGIN\n"
432         "const struct {\n"
433         "    double bogus;\n"
434         "    uint8_t bytes[%ld]; \n"
435         "} %s={ 0.0, {\n",
436         (long)T_FileStream_size(in), entry);
437     T_FileStream_writeLine(out, buffer);
438 
439     for(;;) {
440         length=T_FileStream_read(in, buffer, sizeof(buffer));
441         if(length==0) {
442             break;
443         }
444         for(i=0; i<length; ++i) {
445             column = write8(out, (uint8_t)buffer[i], column);
446         }
447     }
448 
449     T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
450 #endif
451 
452     if(T_FileStream_error(in)) {
453         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
454         exit(U_FILE_ACCESS_ERROR);
455     }
456 
457     if(T_FileStream_error(out)) {
458         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
459         exit(U_FILE_ACCESS_ERROR);
460     }
461 
462     T_FileStream_close(out);
463     T_FileStream_close(in);
464 }
465 
466 static uint32_t
write32(FileStream * out,uint32_t bitField,uint32_t column)467 write32(FileStream *out, uint32_t bitField, uint32_t column) {
468     int32_t i;
469     char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
470     char *s = bitFieldStr;
471     uint8_t *ptrIdx = (uint8_t *)&bitField;
472     static const char hexToStr[16] = {
473         '0','1','2','3',
474         '4','5','6','7',
475         '8','9','A','B',
476         'C','D','E','F'
477     };
478 
479     /* write the value, possibly with comma and newline */
480     if(column==MAX_COLUMN) {
481         /* first byte */
482         column=1;
483     } else if(column<32) {
484         *(s++)=',';
485         ++column;
486     } else {
487         *(s++)='\n';
488         uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
489         s+=uprv_strlen(s);
490         column=1;
491     }
492 
493     if (bitField < 10) {
494         /* It's a small number. Don't waste the space for 0x */
495         *(s++)=hexToStr[bitField];
496     }
497     else {
498         int seenNonZero = 0; /* This is used to remove leading zeros */
499 
500         if(hexType==HEX_0X) {
501          *(s++)='0';
502          *(s++)='x';
503         } else if(hexType==HEX_0H) {
504          *(s++)='0';
505         }
506 
507         /* This creates a 32-bit field */
508 #if U_IS_BIG_ENDIAN
509         for (i = 0; i < sizeof(uint32_t); i++)
510 #else
511         for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
512 #endif
513         {
514             uint8_t value = ptrIdx[i];
515             if (value || seenNonZero) {
516                 *(s++)=hexToStr[value>>4];
517                 *(s++)=hexToStr[value&0xF];
518                 seenNonZero = 1;
519             }
520         }
521         if(hexType==HEX_0H) {
522          *(s++)='h';
523         }
524     }
525 
526     *(s++)=0;
527     T_FileStream_writeLine(out, bitFieldStr);
528     return column;
529 }
530 
531 static uint32_t
write8(FileStream * out,uint8_t byte,uint32_t column)532 write8(FileStream *out, uint8_t byte, uint32_t column) {
533     char s[4];
534     int i=0;
535 
536     /* convert the byte value to a string */
537     if(byte>=100) {
538         s[i++]=(char)('0'+byte/100);
539         byte%=100;
540     }
541     if(i>0 || byte>=10) {
542         s[i++]=(char)('0'+byte/10);
543         byte%=10;
544     }
545     s[i++]=(char)('0'+byte);
546     s[i]=0;
547 
548     /* write the value, possibly with comma and newline */
549     if(column==MAX_COLUMN) {
550         /* first byte */
551         column=1;
552     } else if(column<16) {
553         T_FileStream_writeLine(out, ",");
554         ++column;
555     } else {
556         T_FileStream_writeLine(out, ",\n");
557         column=1;
558     }
559     T_FileStream_writeLine(out, s);
560     return column;
561 }
562 
563 #if U_PLATFORM == U_PF_OS400
564 static uint32_t
write8str(FileStream * out,uint8_t byte,uint32_t column)565 write8str(FileStream *out, uint8_t byte, uint32_t column) {
566     char s[8];
567 
568     if (byte > 7)
569         sprintf(s, "\\x%X", byte);
570     else
571         sprintf(s, "\\%X", byte);
572 
573     /* write the value, possibly with comma and newline */
574     if(column==MAX_COLUMN) {
575         /* first byte */
576         column=1;
577         T_FileStream_writeLine(out, "\"");
578     } else if(column<24) {
579         ++column;
580     } else {
581         T_FileStream_writeLine(out, "\"\n\"");
582         column=1;
583     }
584     T_FileStream_writeLine(out, s);
585     return column;
586 }
587 #endif
588 
589 static void
getOutFilename(const char * inFilename,const char * destdir,char * outFilename,char * entryName,const char * newSuffix,const char * optFilename)590 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
591     const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
592 
593     /* copy path */
594     if(destdir!=NULL && *destdir!=0) {
595         do {
596             *outFilename++=*destdir++;
597         } while(*destdir!=0);
598         if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
599             *outFilename++=U_FILE_SEP_CHAR;
600         }
601         inFilename=basename;
602     } else {
603         while(inFilename<basename) {
604             *outFilename++=*inFilename++;
605         }
606     }
607 
608     if(suffix==NULL) {
609         /* the filename does not have a suffix */
610         uprv_strcpy(entryName, inFilename);
611         if(optFilename != NULL) {
612           uprv_strcpy(outFilename, optFilename);
613         } else {
614           uprv_strcpy(outFilename, inFilename);
615         }
616         uprv_strcat(outFilename, newSuffix);
617     } else {
618         char *saveOutFilename = outFilename;
619         /* copy basename */
620         while(inFilename<suffix) {
621             if(*inFilename=='-') {
622                 /* iSeries cannot have '-' in the .o objects. */
623                 *outFilename++=*entryName++='_';
624                 inFilename++;
625             }
626             else {
627                 *outFilename++=*entryName++=*inFilename++;
628             }
629         }
630 
631         /* replace '.' by '_' */
632         *outFilename++=*entryName++='_';
633         ++inFilename;
634 
635         /* copy suffix */
636         while(*inFilename!=0) {
637             *outFilename++=*entryName++=*inFilename++;
638         }
639 
640         *entryName=0;
641 
642         if(optFilename != NULL) {
643             uprv_strcpy(saveOutFilename, optFilename);
644             uprv_strcat(saveOutFilename, newSuffix);
645         } else {
646             /* add ".c" */
647             uprv_strcpy(outFilename, newSuffix);
648         }
649     }
650 }
651 
652 #ifdef CAN_GENERATE_OBJECTS
653 static void
getArchitecture(uint16_t * pCPU,uint16_t * pBits,UBool * pIsBigEndian,const char * optMatchArch)654 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
655     union {
656         char        bytes[2048];
657 #ifdef U_ELF
658         Elf32_Ehdr  header32;
659         /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
660 #elif U_PLATFORM_HAS_WIN32_API
661         IMAGE_FILE_HEADER header;
662 #endif
663     } buffer;
664 
665     const char *filename;
666     FileStream *in;
667     int32_t length;
668 
669 #ifdef U_ELF
670 
671 #elif U_PLATFORM_HAS_WIN32_API
672     const IMAGE_FILE_HEADER *pHeader;
673 #else
674 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
675 #endif
676 
677     if(optMatchArch != NULL) {
678         filename=optMatchArch;
679     } else {
680         /* set defaults */
681 #ifdef U_ELF
682         /* set EM_386 because elf.h does not provide better defaults */
683         *pCPU=EM_386;
684         *pBits=32;
685         *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
686 #elif U_PLATFORM_HAS_WIN32_API
687 /* _M_IA64 should be defined in windows.h */
688 #   if defined(_M_IA64)
689         *pCPU=IMAGE_FILE_MACHINE_IA64;
690 #   elif defined(_M_AMD64)
691         *pCPU=IMAGE_FILE_MACHINE_AMD64;
692 #   else
693         *pCPU=IMAGE_FILE_MACHINE_I386;
694 #   endif
695         *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
696         *pIsBigEndian=FALSE;
697 #else
698 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
699 #endif
700         return;
701     }
702 
703     in=T_FileStream_open(filename, "rb");
704     if(in==NULL) {
705         fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
706         exit(U_FILE_ACCESS_ERROR);
707     }
708     length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
709 
710 #ifdef U_ELF
711     if(length<sizeof(Elf32_Ehdr)) {
712         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
713         exit(U_UNSUPPORTED_ERROR);
714     }
715     if(
716         buffer.header32.e_ident[0]!=ELFMAG0 ||
717         buffer.header32.e_ident[1]!=ELFMAG1 ||
718         buffer.header32.e_ident[2]!=ELFMAG2 ||
719         buffer.header32.e_ident[3]!=ELFMAG3 ||
720         buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
721     ) {
722         fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
723         exit(U_UNSUPPORTED_ERROR);
724     }
725 
726     *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
727 #ifdef U_ELF64
728     if(*pBits!=32 && *pBits!=64) {
729         fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
730         exit(U_UNSUPPORTED_ERROR);
731     }
732 #else
733     if(*pBits!=32) {
734         fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
735         exit(U_UNSUPPORTED_ERROR);
736     }
737 #endif
738 
739     *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
740     if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
741         fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
742         exit(U_UNSUPPORTED_ERROR);
743     }
744     /* TODO: Support byte swapping */
745 
746     *pCPU=buffer.header32.e_machine;
747 #elif U_PLATFORM_HAS_WIN32_API
748     if(length<sizeof(IMAGE_FILE_HEADER)) {
749         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
750         exit(U_UNSUPPORTED_ERROR);
751     }
752     /* TODO: Use buffer.header.  Keep aliasing legal.  */
753     pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
754     *pCPU=pHeader->Machine;
755     /*
756      * The number of bits is implicit with the Machine value.
757      * *pBits is ignored in the calling code, so this need not be precise.
758      */
759     *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
760     /* Windows always runs on little-endian CPUs. */
761     *pIsBigEndian=FALSE;
762 #else
763 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
764 #endif
765 
766     T_FileStream_close(in);
767 }
768 
769 U_CAPI void U_EXPORT2
writeObjectCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optMatchArch,const char * optFilename,char * outFilePath)770 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
771     /* common variables */
772     char buffer[4096], entry[96]={ 0 };
773     FileStream *in, *out;
774     const char *newSuffix;
775     int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
776 
777     uint16_t cpu, bits;
778     UBool makeBigEndian;
779 
780     /* platform-specific variables and initialization code */
781 #ifdef U_ELF
782     /* 32-bit Elf file header */
783     static Elf32_Ehdr header32={
784         {
785             /* e_ident[] */
786             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
787             ELFCLASS32,
788             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
789             EV_CURRENT /* EI_VERSION */
790         },
791         ET_REL,
792         EM_386,
793         EV_CURRENT, /* e_version */
794         0, /* e_entry */
795         0, /* e_phoff */
796         (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
797         0, /* e_flags */
798         (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
799         0, /* e_phentsize */
800         0, /* e_phnum */
801         (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
802         5, /* e_shnum */
803         2 /* e_shstrndx */
804     };
805 
806     /* 32-bit Elf section header table */
807     static Elf32_Shdr sectionHeaders32[5]={
808         { /* SHN_UNDEF */
809             0
810         },
811         { /* .symtab */
812             1, /* sh_name */
813             SHT_SYMTAB,
814             0, /* sh_flags */
815             0, /* sh_addr */
816             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
817             (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
818             3, /* sh_link=sect hdr index of .strtab */
819             1, /* sh_info=One greater than the symbol table index of the last
820                 * local symbol (with STB_LOCAL). */
821             4, /* sh_addralign */
822             (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
823         },
824         { /* .shstrtab */
825             9, /* sh_name */
826             SHT_STRTAB,
827             0, /* sh_flags */
828             0, /* sh_addr */
829             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
830             40, /* sh_size */
831             0, /* sh_link */
832             0, /* sh_info */
833             1, /* sh_addralign */
834             0 /* sh_entsize */
835         },
836         { /* .strtab */
837             19, /* sh_name */
838             SHT_STRTAB,
839             0, /* sh_flags */
840             0, /* sh_addr */
841             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
842             (Elf32_Word)sizeof(entry), /* sh_size */
843             0, /* sh_link */
844             0, /* sh_info */
845             1, /* sh_addralign */
846             0 /* sh_entsize */
847         },
848         { /* .rodata */
849             27, /* sh_name */
850             SHT_PROGBITS,
851             SHF_ALLOC, /* sh_flags */
852             0, /* sh_addr */
853             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
854             0, /* sh_size */
855             0, /* sh_link */
856             0, /* sh_info */
857             16, /* sh_addralign */
858             0 /* sh_entsize */
859         }
860     };
861 
862     /* symbol table */
863     static Elf32_Sym symbols32[2]={
864         { /* STN_UNDEF */
865             0
866         },
867         { /* data entry point */
868             1, /* st_name */
869             0, /* st_value */
870             0, /* st_size */
871             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
872             0, /* st_other */
873             4 /* st_shndx=index of related section table entry */
874         }
875     };
876 
877     /* section header string table, with decimal string offsets */
878     static const char sectionStrings[40]=
879         /*  0 */ "\0"
880         /*  1 */ ".symtab\0"
881         /*  9 */ ".shstrtab\0"
882         /* 19 */ ".strtab\0"
883         /* 27 */ ".rodata\0"
884         /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
885         /* 40: padded to multiple of 8 bytes */
886 
887     /*
888      * Use entry[] for the string table which will contain only the
889      * entry point name.
890      * entry[0] must be 0 (NUL)
891      * The entry point name can be up to 38 characters long (sizeof(entry)-2).
892      */
893 
894     /* 16-align .rodata in the .o file, just in case */
895     static const char padding[16]={ 0 };
896     int32_t paddingSize;
897 
898 #ifdef U_ELF64
899     /* 64-bit Elf file header */
900     static Elf64_Ehdr header64={
901         {
902             /* e_ident[] */
903             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
904             ELFCLASS64,
905             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
906             EV_CURRENT /* EI_VERSION */
907         },
908         ET_REL,
909         EM_X86_64,
910         EV_CURRENT, /* e_version */
911         0, /* e_entry */
912         0, /* e_phoff */
913         (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
914         0, /* e_flags */
915         (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
916         0, /* e_phentsize */
917         0, /* e_phnum */
918         (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
919         5, /* e_shnum */
920         2 /* e_shstrndx */
921     };
922 
923     /* 64-bit Elf section header table */
924     static Elf64_Shdr sectionHeaders64[5]={
925         { /* SHN_UNDEF */
926             0
927         },
928         { /* .symtab */
929             1, /* sh_name */
930             SHT_SYMTAB,
931             0, /* sh_flags */
932             0, /* sh_addr */
933             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
934             (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
935             3, /* sh_link=sect hdr index of .strtab */
936             1, /* sh_info=One greater than the symbol table index of the last
937                 * local symbol (with STB_LOCAL). */
938             4, /* sh_addralign */
939             (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
940         },
941         { /* .shstrtab */
942             9, /* sh_name */
943             SHT_STRTAB,
944             0, /* sh_flags */
945             0, /* sh_addr */
946             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
947             40, /* sh_size */
948             0, /* sh_link */
949             0, /* sh_info */
950             1, /* sh_addralign */
951             0 /* sh_entsize */
952         },
953         { /* .strtab */
954             19, /* sh_name */
955             SHT_STRTAB,
956             0, /* sh_flags */
957             0, /* sh_addr */
958             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
959             (Elf64_Xword)sizeof(entry), /* sh_size */
960             0, /* sh_link */
961             0, /* sh_info */
962             1, /* sh_addralign */
963             0 /* sh_entsize */
964         },
965         { /* .rodata */
966             27, /* sh_name */
967             SHT_PROGBITS,
968             SHF_ALLOC, /* sh_flags */
969             0, /* sh_addr */
970             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
971             0, /* sh_size */
972             0, /* sh_link */
973             0, /* sh_info */
974             16, /* sh_addralign */
975             0 /* sh_entsize */
976         }
977     };
978 
979     /*
980      * 64-bit symbol table
981      * careful: different order of items compared with Elf32_sym!
982      */
983     static Elf64_Sym symbols64[2]={
984         { /* STN_UNDEF */
985             0
986         },
987         { /* data entry point */
988             1, /* st_name */
989             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
990             0, /* st_other */
991             4, /* st_shndx=index of related section table entry */
992             0, /* st_value */
993             0 /* st_size */
994         }
995     };
996 
997 #endif /* U_ELF64 */
998 
999     /* entry[] have a leading NUL */
1000     entryOffset=1;
1001 
1002     /* in the common code, count entryLength from after the NUL */
1003     entryLengthOffset=1;
1004 
1005     newSuffix=".o";
1006 
1007 #elif U_PLATFORM_HAS_WIN32_API
1008     struct {
1009         IMAGE_FILE_HEADER fileHeader;
1010         IMAGE_SECTION_HEADER sections[2];
1011         char linkerOptions[100];
1012     } objHeader;
1013     IMAGE_SYMBOL symbols[1];
1014     struct {
1015         DWORD sizeofLongNames;
1016         char longNames[100];
1017     } symbolNames;
1018 
1019     /*
1020      * entry sometimes have a leading '_'
1021      * overwritten if entryOffset==0 depending on the target platform
1022      * see check for cpu below
1023      */
1024     entry[0]='_';
1025 
1026     newSuffix=".obj";
1027 #else
1028 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
1029 #endif
1030 
1031     /* deal with options, files and the entry point name */
1032     getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1033     printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1034 #if U_PLATFORM_HAS_WIN32_API
1035     if(cpu==IMAGE_FILE_MACHINE_I386) {
1036         entryOffset=1;
1037     }
1038 #endif
1039 
1040     in=T_FileStream_open(filename, "rb");
1041     if(in==NULL) {
1042         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1043         exit(U_FILE_ACCESS_ERROR);
1044     }
1045     size=T_FileStream_size(in);
1046 
1047     getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1048     if (outFilePath != NULL) {
1049         uprv_strcpy(outFilePath, buffer);
1050     }
1051 
1052     if(optEntryPoint != NULL) {
1053         uprv_strcpy(entry+entryOffset, optEntryPoint);
1054         uprv_strcat(entry+entryOffset, "_dat");
1055     }
1056     /* turn dashes in the entry name into underscores */
1057     entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1058     for(i=0; i<entryLength; ++i) {
1059         if(entry[entryLengthOffset+i]=='-') {
1060             entry[entryLengthOffset+i]='_';
1061         }
1062     }
1063 
1064     /* open the output file */
1065     out=T_FileStream_open(buffer, "wb");
1066     if(out==NULL) {
1067         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1068         exit(U_FILE_ACCESS_ERROR);
1069     }
1070 
1071 #ifdef U_ELF
1072     if(bits==32) {
1073         header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1074         header32.e_machine=cpu;
1075 
1076         /* 16-align .rodata in the .o file, just in case */
1077         paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1078         if(paddingSize!=0) {
1079                 paddingSize=0x10-paddingSize;
1080                 sectionHeaders32[4].sh_offset+=paddingSize;
1081         }
1082 
1083         sectionHeaders32[4].sh_size=(Elf32_Word)size;
1084 
1085         symbols32[1].st_size=(Elf32_Word)size;
1086 
1087         /* write .o headers */
1088         T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1089         T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1090         T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1091     } else /* bits==64 */ {
1092 #ifdef U_ELF64
1093         header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1094         header64.e_machine=cpu;
1095 
1096         /* 16-align .rodata in the .o file, just in case */
1097         paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1098         if(paddingSize!=0) {
1099                 paddingSize=0x10-paddingSize;
1100                 sectionHeaders64[4].sh_offset+=paddingSize;
1101         }
1102 
1103         sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1104 
1105         symbols64[1].st_size=(Elf64_Xword)size;
1106 
1107         /* write .o headers */
1108         T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1109         T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1110         T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1111 #endif
1112     }
1113 
1114     T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1115     T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1116     if(paddingSize!=0) {
1117         T_FileStream_write(out, padding, paddingSize);
1118     }
1119 #elif U_PLATFORM_HAS_WIN32_API
1120     /* populate the .obj headers */
1121     uprv_memset(&objHeader, 0, sizeof(objHeader));
1122     uprv_memset(&symbols, 0, sizeof(symbols));
1123     uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1124 
1125     /* write the linker export directive */
1126     uprv_strcpy(objHeader.linkerOptions, "-export:");
1127     length=8;
1128     uprv_strcpy(objHeader.linkerOptions+length, entry);
1129     length+=entryLength;
1130     uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1131     length+=6;
1132 
1133     /* set the file header */
1134     objHeader.fileHeader.Machine=cpu;
1135     objHeader.fileHeader.NumberOfSections=2;
1136     objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1137     objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1138     objHeader.fileHeader.NumberOfSymbols=1;
1139 
1140     /* set the section for the linker options */
1141     uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1142     objHeader.sections[0].SizeOfRawData=length;
1143     objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1144     objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1145 
1146     /* set the data section */
1147     uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1148     objHeader.sections[1].SizeOfRawData=size;
1149     objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1150     objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1151 
1152     /* set the symbol table */
1153     if(entryLength<=8) {
1154         uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1155         symbolNames.sizeofLongNames=4;
1156     } else {
1157         symbols[0].N.Name.Short=0;
1158         symbols[0].N.Name.Long=4;
1159         symbolNames.sizeofLongNames=4+entryLength+1;
1160         uprv_strcpy(symbolNames.longNames, entry);
1161     }
1162     symbols[0].SectionNumber=2;
1163     symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1164 
1165     /* write the file header and the linker options section */
1166     T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1167 #else
1168 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
1169 #endif
1170 
1171     /* copy the data file into section 2 */
1172     for(;;) {
1173         length=T_FileStream_read(in, buffer, sizeof(buffer));
1174         if(length==0) {
1175             break;
1176         }
1177         T_FileStream_write(out, buffer, (int32_t)length);
1178     }
1179 
1180 #if U_PLATFORM_HAS_WIN32_API
1181     /* write the symbol table */
1182     T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1183     T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1184 #endif
1185 
1186     if(T_FileStream_error(in)) {
1187         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1188         exit(U_FILE_ACCESS_ERROR);
1189     }
1190 
1191     if(T_FileStream_error(out)) {
1192         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1193         exit(U_FILE_ACCESS_ERROR);
1194     }
1195 
1196     T_FileStream_close(out);
1197     T_FileStream_close(in);
1198 }
1199 #endif
1200