1 /******************************************************************************
2  *   Copyright (C) 2009-2015, International Business Machines
3  *   Corporation and others.  All Rights Reserved.
4  *******************************************************************************
5  */
6 #include "unicode/utypes.h"
7 
8 #if U_PLATFORM_HAS_WIN32_API
9 #   define VC_EXTRALEAN
10 #   define WIN32_LEAN_AND_MEAN
11 #   define NOUSER
12 #   define NOSERVICE
13 #   define NOIME
14 #   define NOMCX
15 #include <windows.h>
16 #include <time.h>
17 #   ifdef __GNUC__
18 #       define WINDOWS_WITH_GNUC
19 #   endif
20 #endif
21 
22 #if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
23 #   define U_ELF
24 #endif
25 
26 #ifdef U_ELF
27 #   include <elf.h>
28 #   if defined(ELFCLASS64)
29 #       define U_ELF64
30 #   endif
31     /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32 #   ifndef EM_X86_64
33 #       define EM_X86_64 62
34 #   endif
35 #   define ICU_ENTRY_OFFSET 0
36 #endif
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include "unicode/putil.h"
41 #include "cmemory.h"
42 #include "cstring.h"
43 #include "filestrm.h"
44 #include "toolutil.h"
45 #include "unicode/uclean.h"
46 #include "uoptions.h"
47 #include "pkg_genc.h"
48 
49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50 
51 #define HEX_0X 0 /*  0x1234 */
52 #define HEX_0H 1 /*  01234h */
53 
54 /* prototypes --------------------------------------------------------------- */
55 static void
56 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
57 
58 static uint32_t
59 write8(FileStream *out, uint8_t byte, uint32_t column);
60 
61 static uint32_t
62 write32(FileStream *out, uint32_t byte, uint32_t column);
63 
64 #if U_PLATFORM == U_PF_OS400
65 static uint32_t
66 write8str(FileStream *out, uint8_t byte, uint32_t column);
67 #endif
68 /* -------------------------------------------------------------------------- */
69 
70 /*
71 Creating Template Files for New Platforms
72 
73 Let the cc compiler help you get started.
74 Compile this program
75     const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
76 with the -S option to produce assembly output.
77 
78 For example, this will generate array.s:
79 gcc -S array.c
80 
81 This will produce a .s file that may look like this:
82 
83     .file   "array.c"
84     .version        "01.01"
85 gcc2_compiled.:
86     .globl x
87     .section        .rodata
88     .align 4
89     .type    x,@object
90     .size    x,20
91 x:
92     .long   1
93     .long   2
94     .long   -559038737
95     .long   -1
96     .long   16
97     .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
98 
99 which gives a starting point that will compile, and can be transformed
100 to become the template, generally with some consulting of as docs and
101 some experimentation.
102 
103 If you want ICU to automatically use this assembly, you should
104 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
105 where the name is the compiler or platform that you used in this
106 assemblyHeader data structure.
107 */
108 static const struct AssemblyType {
109     const char *name;
110     const char *header;
111     const char *beginLine;
112     const char *footer;
113     int8_t      hexType; /* HEX_0X or HEX_0h */
114 } assemblyHeader[] = {
115     /* For gcc assemblers, the meaning of .align changes depending on the */
116     /* hardware, so we use .balign 16 which always means 16 bytes. */
117     /* https://sourceware.org/binutils/docs/as/Pseudo-Ops.html */
118     {"gcc",
119         ".globl %s\n"
120         "\t.section .note.GNU-stack,\"\",%%progbits\n"
121         "\t.section .rodata\n"
122         "\t.balign 16\n"
123         "#ifdef U_HIDE_DATA_SYMBOL\n"
124         "\t.hidden %s\n"
125         "#endif\n"
126         "\t.type %s,%%object\n"
127         "%s:\n\n",
128 
129         ".long ","",HEX_0X
130     },
131     {"gcc-darwin",
132         /*"\t.section __TEXT,__text,regular,pure_instructions\n"
133         "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
134         ".globl _%s\n"
135         "#ifdef U_HIDE_DATA_SYMBOL\n"
136         "\t.private_extern _%s\n"
137         "#endif\n"
138         "\t.data\n"
139         "\t.const\n"
140         "\t.balign 16\n"
141         "_%s:\n\n",
142 
143         ".long ","",HEX_0X
144     },
145     {"gcc-cygwin",
146         ".globl _%s\n"
147         "\t.section .rodata\n"
148         "\t.balign 16\n"
149         "_%s:\n\n",
150 
151         ".long ","",HEX_0X
152     },
153     {"gcc-mingw64",
154         ".globl %s\n"
155         "\t.section .rodata\n"
156         "\t.balign 16\n"
157         "%s:\n\n",
158 
159         ".long ","",HEX_0X
160     },
161 /* 16 bytes alignment. */
162 /* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */
163     {"sun",
164         "\t.section \".rodata\"\n"
165         "\t.align   16\n"
166         ".globl     %s\n"
167         "%s:\n",
168 
169         ".word ","",HEX_0X
170     },
171 /* 16 bytes alignment for sun-x86. */
172 /* http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html */
173     {"sun-x86",
174         "Drodata.rodata:\n"
175         "\t.type   Drodata.rodata,@object\n"
176         "\t.size   Drodata.rodata,0\n"
177         "\t.globl  %s\n"
178         "\t.align  16\n"
179         "%s:\n",
180 
181         ".4byte ","",HEX_0X
182     },
183 /* 1<<4 bit alignment for aix. */
184 /* http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm */
185     {"xlc",
186         ".globl %s{RO}\n"
187         "\t.toc\n"
188         "%s:\n"
189         "\t.csect %s{RO}, 4\n",
190 
191         ".long ","",HEX_0X
192     },
193     {"aCC-ia64",
194         "\t.file   \"%s.s\"\n"
195         "\t.type   %s,@object\n"
196         "\t.global %s\n"
197         "\t.secalias .abe$0.rodata, \".rodata\"\n"
198         "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
199         "\t.align  16\n"
200         "%s::\t",
201 
202         "data4 ","",HEX_0X
203     },
204     {"aCC-parisc",
205         "\t.SPACE  $TEXT$\n"
206         "\t.SUBSPA $LIT$\n"
207         "%s\n"
208         "\t.EXPORT %s\n"
209         "\t.ALIGN  16\n",
210 
211         ".WORD ","",HEX_0X
212     },
213 /* align 16 bytes */
214 /*  http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx */
215     { "masm",
216       "\tTITLE %s\n"
217       "; generated by genccode\n"
218       ".386\n"
219       ".model flat\n"
220       "\tPUBLIC _%s\n"
221       "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
222       "\tALIGN 16\n"
223       "_%s\tLABEL DWORD\n",
224       "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
225     }
226 };
227 
228 static int32_t assemblyHeaderIndex = -1;
229 static int32_t hexType = HEX_0X;
230 
231 U_CAPI UBool U_EXPORT2
checkAssemblyHeaderName(const char * optAssembly)232 checkAssemblyHeaderName(const char* optAssembly) {
233     int32_t idx;
234     assemblyHeaderIndex = -1;
235     for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
236         if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
237             assemblyHeaderIndex = idx;
238             hexType = assemblyHeader[idx].hexType; /* set the hex type */
239             return TRUE;
240         }
241     }
242 
243     return FALSE;
244 }
245 
246 
247 U_CAPI void U_EXPORT2
printAssemblyHeadersToStdErr(void)248 printAssemblyHeadersToStdErr(void) {
249     int32_t idx;
250     fprintf(stderr, "%s", assemblyHeader[0].name);
251     for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
252         fprintf(stderr, ", %s", assemblyHeader[idx].name);
253     }
254     fprintf(stderr,
255         ")\n");
256 }
257 
258 U_CAPI void U_EXPORT2
writeAssemblyCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optFilename,char * outFilePath)259 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
260     uint32_t column = MAX_COLUMN;
261     char entry[64];
262     uint32_t buffer[1024];
263     char *bufferStr = (char *)buffer;
264     FileStream *in, *out;
265     size_t i, length;
266 
267     in=T_FileStream_open(filename, "rb");
268     if(in==NULL) {
269         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
270         exit(U_FILE_ACCESS_ERROR);
271     }
272 
273     getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename);
274     out=T_FileStream_open(bufferStr, "w");
275     if(out==NULL) {
276         fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
277         exit(U_FILE_ACCESS_ERROR);
278     }
279 
280     if (outFilePath != NULL) {
281         uprv_strcpy(outFilePath, bufferStr);
282     }
283 
284 #ifdef WINDOWS_WITH_GNUC
285     /* Need to fix the file seperator character when using MinGW. */
286     swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
287 #endif
288 
289     if(optEntryPoint != NULL) {
290         uprv_strcpy(entry, optEntryPoint);
291         uprv_strcat(entry, "_dat");
292     }
293 
294     /* turn dashes or dots in the entry name into underscores */
295     length=uprv_strlen(entry);
296     for(i=0; i<length; ++i) {
297         if(entry[i]=='-' || entry[i]=='.') {
298             entry[i]='_';
299         }
300     }
301 
302     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
303         entry, entry, entry, entry,
304         entry, entry, entry, entry);
305     T_FileStream_writeLine(out, bufferStr);
306     T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
307 
308     for(;;) {
309         length=T_FileStream_read(in, buffer, sizeof(buffer));
310         if(length==0) {
311             break;
312         }
313         if (length != sizeof(buffer)) {
314             /* pad with extra 0's when at the end of the file */
315             for(i=0; i < (length % sizeof(uint32_t)); ++i) {
316                 buffer[length+i] = 0;
317             }
318         }
319         for(i=0; i<(length/sizeof(buffer[0])); i++) {
320             column = write32(out, buffer[i], column);
321         }
322     }
323 
324     T_FileStream_writeLine(out, "\n");
325 
326     sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
327         entry, entry, entry, entry,
328         entry, entry, entry, entry);
329     T_FileStream_writeLine(out, bufferStr);
330 
331     if(T_FileStream_error(in)) {
332         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
333         exit(U_FILE_ACCESS_ERROR);
334     }
335 
336     if(T_FileStream_error(out)) {
337         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
338         exit(U_FILE_ACCESS_ERROR);
339     }
340 
341     T_FileStream_close(out);
342     T_FileStream_close(in);
343 }
344 
345 U_CAPI void U_EXPORT2
writeCCode(const char * filename,const char * destdir,const char * optName,const char * optFilename,char * outFilePath)346 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
347     uint32_t column = MAX_COLUMN;
348     char buffer[4096], entry[64];
349     FileStream *in, *out;
350     size_t i, length;
351 
352     in=T_FileStream_open(filename, "rb");
353     if(in==NULL) {
354         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
355         exit(U_FILE_ACCESS_ERROR);
356     }
357 
358     if(optName != NULL) { /* prepend  'icudt28_' */
359       strcpy(entry, optName);
360       strcat(entry, "_");
361     } else {
362       entry[0] = 0;
363     }
364 
365     getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
366     if (outFilePath != NULL) {
367         uprv_strcpy(outFilePath, buffer);
368     }
369     out=T_FileStream_open(buffer, "w");
370     if(out==NULL) {
371         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
372         exit(U_FILE_ACCESS_ERROR);
373     }
374 
375     /* turn dashes or dots in the entry name into underscores */
376     length=uprv_strlen(entry);
377     for(i=0; i<length; ++i) {
378         if(entry[i]=='-' || entry[i]=='.') {
379             entry[i]='_';
380         }
381     }
382 
383 #if U_PLATFORM == U_PF_OS400
384     /*
385     TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
386 
387     This is here because this platform can't currently put
388     const data into the read-only pages of an object or
389     shared library (service program). Only strings are allowed in read-only
390     pages, so we use char * strings to store the data.
391 
392     In order to prevent the beginning of the data from ever matching the
393     magic numbers we must still use the initial double.
394     [grhoten 4/24/2003]
395     */
396     sprintf(buffer,
397         "#ifndef IN_GENERATED_CCODE\n"
398         "#define IN_GENERATED_CCODE\n"
399         "#define U_DISABLE_RENAMING 1\n"
400         "#include \"unicode/umachine.h\"\n"
401         "#endif\n"
402         "U_CDECL_BEGIN\n"
403         "const struct {\n"
404         "    double bogus;\n"
405         "    const char *bytes; \n"
406         "} %s={ 0.0, \n",
407         entry);
408     T_FileStream_writeLine(out, buffer);
409 
410     for(;;) {
411         length=T_FileStream_read(in, buffer, sizeof(buffer));
412         if(length==0) {
413             break;
414         }
415         for(i=0; i<length; ++i) {
416             column = write8str(out, (uint8_t)buffer[i], column);
417         }
418     }
419 
420     T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
421 #else
422     /* Function renaming shouldn't be done in data */
423     sprintf(buffer,
424         "#ifndef IN_GENERATED_CCODE\n"
425         "#define IN_GENERATED_CCODE\n"
426         "#define U_DISABLE_RENAMING 1\n"
427         "#include \"unicode/umachine.h\"\n"
428         "#endif\n"
429         "U_CDECL_BEGIN\n"
430         "const struct {\n"
431         "    double bogus;\n"
432         "    uint8_t bytes[%ld]; \n"
433         "} %s={ 0.0, {\n",
434         (long)T_FileStream_size(in), entry);
435     T_FileStream_writeLine(out, buffer);
436 
437     for(;;) {
438         length=T_FileStream_read(in, buffer, sizeof(buffer));
439         if(length==0) {
440             break;
441         }
442         for(i=0; i<length; ++i) {
443             column = write8(out, (uint8_t)buffer[i], column);
444         }
445     }
446 
447     T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
448 #endif
449 
450     if(T_FileStream_error(in)) {
451         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
452         exit(U_FILE_ACCESS_ERROR);
453     }
454 
455     if(T_FileStream_error(out)) {
456         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
457         exit(U_FILE_ACCESS_ERROR);
458     }
459 
460     T_FileStream_close(out);
461     T_FileStream_close(in);
462 }
463 
464 static uint32_t
write32(FileStream * out,uint32_t bitField,uint32_t column)465 write32(FileStream *out, uint32_t bitField, uint32_t column) {
466     int32_t i;
467     char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
468     char *s = bitFieldStr;
469     uint8_t *ptrIdx = (uint8_t *)&bitField;
470     static const char hexToStr[16] = {
471         '0','1','2','3',
472         '4','5','6','7',
473         '8','9','A','B',
474         'C','D','E','F'
475     };
476 
477     /* write the value, possibly with comma and newline */
478     if(column==MAX_COLUMN) {
479         /* first byte */
480         column=1;
481     } else if(column<32) {
482         *(s++)=',';
483         ++column;
484     } else {
485         *(s++)='\n';
486         uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
487         s+=uprv_strlen(s);
488         column=1;
489     }
490 
491     if (bitField < 10) {
492         /* It's a small number. Don't waste the space for 0x */
493         *(s++)=hexToStr[bitField];
494     }
495     else {
496         int seenNonZero = 0; /* This is used to remove leading zeros */
497 
498         if(hexType==HEX_0X) {
499          *(s++)='0';
500          *(s++)='x';
501         } else if(hexType==HEX_0H) {
502          *(s++)='0';
503         }
504 
505         /* This creates a 32-bit field */
506 #if U_IS_BIG_ENDIAN
507         for (i = 0; i < sizeof(uint32_t); i++)
508 #else
509         for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
510 #endif
511         {
512             uint8_t value = ptrIdx[i];
513             if (value || seenNonZero) {
514                 *(s++)=hexToStr[value>>4];
515                 *(s++)=hexToStr[value&0xF];
516                 seenNonZero = 1;
517             }
518         }
519         if(hexType==HEX_0H) {
520          *(s++)='h';
521         }
522     }
523 
524     *(s++)=0;
525     T_FileStream_writeLine(out, bitFieldStr);
526     return column;
527 }
528 
529 static uint32_t
write8(FileStream * out,uint8_t byte,uint32_t column)530 write8(FileStream *out, uint8_t byte, uint32_t column) {
531     char s[4];
532     int i=0;
533 
534     /* convert the byte value to a string */
535     if(byte>=100) {
536         s[i++]=(char)('0'+byte/100);
537         byte%=100;
538     }
539     if(i>0 || byte>=10) {
540         s[i++]=(char)('0'+byte/10);
541         byte%=10;
542     }
543     s[i++]=(char)('0'+byte);
544     s[i]=0;
545 
546     /* write the value, possibly with comma and newline */
547     if(column==MAX_COLUMN) {
548         /* first byte */
549         column=1;
550     } else if(column<16) {
551         T_FileStream_writeLine(out, ",");
552         ++column;
553     } else {
554         T_FileStream_writeLine(out, ",\n");
555         column=1;
556     }
557     T_FileStream_writeLine(out, s);
558     return column;
559 }
560 
561 #if U_PLATFORM == U_PF_OS400
562 static uint32_t
write8str(FileStream * out,uint8_t byte,uint32_t column)563 write8str(FileStream *out, uint8_t byte, uint32_t column) {
564     char s[8];
565 
566     if (byte > 7)
567         sprintf(s, "\\x%X", byte);
568     else
569         sprintf(s, "\\%X", byte);
570 
571     /* write the value, possibly with comma and newline */
572     if(column==MAX_COLUMN) {
573         /* first byte */
574         column=1;
575         T_FileStream_writeLine(out, "\"");
576     } else if(column<24) {
577         ++column;
578     } else {
579         T_FileStream_writeLine(out, "\"\n\"");
580         column=1;
581     }
582     T_FileStream_writeLine(out, s);
583     return column;
584 }
585 #endif
586 
587 static void
getOutFilename(const char * inFilename,const char * destdir,char * outFilename,char * entryName,const char * newSuffix,const char * optFilename)588 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
589     const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
590 
591     /* copy path */
592     if(destdir!=NULL && *destdir!=0) {
593         do {
594             *outFilename++=*destdir++;
595         } while(*destdir!=0);
596         if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
597             *outFilename++=U_FILE_SEP_CHAR;
598         }
599         inFilename=basename;
600     } else {
601         while(inFilename<basename) {
602             *outFilename++=*inFilename++;
603         }
604     }
605 
606     if(suffix==NULL) {
607         /* the filename does not have a suffix */
608         uprv_strcpy(entryName, inFilename);
609         if(optFilename != NULL) {
610           uprv_strcpy(outFilename, optFilename);
611         } else {
612           uprv_strcpy(outFilename, inFilename);
613         }
614         uprv_strcat(outFilename, newSuffix);
615     } else {
616         char *saveOutFilename = outFilename;
617         /* copy basename */
618         while(inFilename<suffix) {
619             if(*inFilename=='-') {
620                 /* iSeries cannot have '-' in the .o objects. */
621                 *outFilename++=*entryName++='_';
622                 inFilename++;
623             }
624             else {
625                 *outFilename++=*entryName++=*inFilename++;
626             }
627         }
628 
629         /* replace '.' by '_' */
630         *outFilename++=*entryName++='_';
631         ++inFilename;
632 
633         /* copy suffix */
634         while(*inFilename!=0) {
635             *outFilename++=*entryName++=*inFilename++;
636         }
637 
638         *entryName=0;
639 
640         if(optFilename != NULL) {
641             uprv_strcpy(saveOutFilename, optFilename);
642             uprv_strcat(saveOutFilename, newSuffix);
643         } else {
644             /* add ".c" */
645             uprv_strcpy(outFilename, newSuffix);
646         }
647     }
648 }
649 
650 #ifdef CAN_GENERATE_OBJECTS
651 static void
getArchitecture(uint16_t * pCPU,uint16_t * pBits,UBool * pIsBigEndian,const char * optMatchArch)652 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
653     union {
654         char        bytes[2048];
655 #ifdef U_ELF
656         Elf32_Ehdr  header32;
657         /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
658 #elif U_PLATFORM_HAS_WIN32_API
659         IMAGE_FILE_HEADER header;
660 #endif
661     } buffer;
662 
663     const char *filename;
664     FileStream *in;
665     int32_t length;
666 
667 #ifdef U_ELF
668 
669 #elif U_PLATFORM_HAS_WIN32_API
670     const IMAGE_FILE_HEADER *pHeader;
671 #else
672 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
673 #endif
674 
675     if(optMatchArch != NULL) {
676         filename=optMatchArch;
677     } else {
678         /* set defaults */
679 #ifdef U_ELF
680         /* set EM_386 because elf.h does not provide better defaults */
681         *pCPU=EM_386;
682         *pBits=32;
683         *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
684 #elif U_PLATFORM_HAS_WIN32_API
685 /* _M_IA64 should be defined in windows.h */
686 #   if defined(_M_IA64)
687         *pCPU=IMAGE_FILE_MACHINE_IA64;
688 #   elif defined(_M_AMD64)
689         *pCPU=IMAGE_FILE_MACHINE_AMD64;
690 #   else
691         *pCPU=IMAGE_FILE_MACHINE_I386;
692 #   endif
693         *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
694         *pIsBigEndian=FALSE;
695 #else
696 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
697 #endif
698         return;
699     }
700 
701     in=T_FileStream_open(filename, "rb");
702     if(in==NULL) {
703         fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
704         exit(U_FILE_ACCESS_ERROR);
705     }
706     length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
707 
708 #ifdef U_ELF
709     if(length<sizeof(Elf32_Ehdr)) {
710         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
711         exit(U_UNSUPPORTED_ERROR);
712     }
713     if(
714         buffer.header32.e_ident[0]!=ELFMAG0 ||
715         buffer.header32.e_ident[1]!=ELFMAG1 ||
716         buffer.header32.e_ident[2]!=ELFMAG2 ||
717         buffer.header32.e_ident[3]!=ELFMAG3 ||
718         buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
719     ) {
720         fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
721         exit(U_UNSUPPORTED_ERROR);
722     }
723 
724     *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
725 #ifdef U_ELF64
726     if(*pBits!=32 && *pBits!=64) {
727         fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
728         exit(U_UNSUPPORTED_ERROR);
729     }
730 #else
731     if(*pBits!=32) {
732         fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
733         exit(U_UNSUPPORTED_ERROR);
734     }
735 #endif
736 
737     *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
738     if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
739         fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
740         exit(U_UNSUPPORTED_ERROR);
741     }
742     /* TODO: Support byte swapping */
743 
744     *pCPU=buffer.header32.e_machine;
745 #elif U_PLATFORM_HAS_WIN32_API
746     if(length<sizeof(IMAGE_FILE_HEADER)) {
747         fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
748         exit(U_UNSUPPORTED_ERROR);
749     }
750     /* TODO: Use buffer.header.  Keep aliasing legal.  */
751     pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
752     *pCPU=pHeader->Machine;
753     /*
754      * The number of bits is implicit with the Machine value.
755      * *pBits is ignored in the calling code, so this need not be precise.
756      */
757     *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
758     /* Windows always runs on little-endian CPUs. */
759     *pIsBigEndian=FALSE;
760 #else
761 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
762 #endif
763 
764     T_FileStream_close(in);
765 }
766 
767 U_CAPI void U_EXPORT2
writeObjectCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optMatchArch,const char * optFilename,char * outFilePath)768 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
769     /* common variables */
770     char buffer[4096], entry[96]={ 0 };
771     FileStream *in, *out;
772     const char *newSuffix;
773     int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
774 
775     uint16_t cpu, bits;
776     UBool makeBigEndian;
777 
778     /* platform-specific variables and initialization code */
779 #ifdef U_ELF
780     /* 32-bit Elf file header */
781     static Elf32_Ehdr header32={
782         {
783             /* e_ident[] */
784             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
785             ELFCLASS32,
786             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
787             EV_CURRENT /* EI_VERSION */
788         },
789         ET_REL,
790         EM_386,
791         EV_CURRENT, /* e_version */
792         0, /* e_entry */
793         0, /* e_phoff */
794         (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
795         0, /* e_flags */
796         (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
797         0, /* e_phentsize */
798         0, /* e_phnum */
799         (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
800         5, /* e_shnum */
801         2 /* e_shstrndx */
802     };
803 
804     /* 32-bit Elf section header table */
805     static Elf32_Shdr sectionHeaders32[5]={
806         { /* SHN_UNDEF */
807             0
808         },
809         { /* .symtab */
810             1, /* sh_name */
811             SHT_SYMTAB,
812             0, /* sh_flags */
813             0, /* sh_addr */
814             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
815             (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
816             3, /* sh_link=sect hdr index of .strtab */
817             1, /* sh_info=One greater than the symbol table index of the last
818                 * local symbol (with STB_LOCAL). */
819             4, /* sh_addralign */
820             (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
821         },
822         { /* .shstrtab */
823             9, /* sh_name */
824             SHT_STRTAB,
825             0, /* sh_flags */
826             0, /* sh_addr */
827             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
828             40, /* sh_size */
829             0, /* sh_link */
830             0, /* sh_info */
831             1, /* sh_addralign */
832             0 /* sh_entsize */
833         },
834         { /* .strtab */
835             19, /* sh_name */
836             SHT_STRTAB,
837             0, /* sh_flags */
838             0, /* sh_addr */
839             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
840             (Elf32_Word)sizeof(entry), /* sh_size */
841             0, /* sh_link */
842             0, /* sh_info */
843             1, /* sh_addralign */
844             0 /* sh_entsize */
845         },
846         { /* .rodata */
847             27, /* sh_name */
848             SHT_PROGBITS,
849             SHF_ALLOC, /* sh_flags */
850             0, /* sh_addr */
851             (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
852             0, /* sh_size */
853             0, /* sh_link */
854             0, /* sh_info */
855             16, /* sh_addralign */
856             0 /* sh_entsize */
857         }
858     };
859 
860     /* symbol table */
861     static Elf32_Sym symbols32[2]={
862         { /* STN_UNDEF */
863             0
864         },
865         { /* data entry point */
866             1, /* st_name */
867             0, /* st_value */
868             0, /* st_size */
869             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
870             0, /* st_other */
871             4 /* st_shndx=index of related section table entry */
872         }
873     };
874 
875     /* section header string table, with decimal string offsets */
876     static const char sectionStrings[40]=
877         /*  0 */ "\0"
878         /*  1 */ ".symtab\0"
879         /*  9 */ ".shstrtab\0"
880         /* 19 */ ".strtab\0"
881         /* 27 */ ".rodata\0"
882         /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
883         /* 40: padded to multiple of 8 bytes */
884 
885     /*
886      * Use entry[] for the string table which will contain only the
887      * entry point name.
888      * entry[0] must be 0 (NUL)
889      * The entry point name can be up to 38 characters long (sizeof(entry)-2).
890      */
891 
892     /* 16-align .rodata in the .o file, just in case */
893     static const char padding[16]={ 0 };
894     int32_t paddingSize;
895 
896 #ifdef U_ELF64
897     /* 64-bit Elf file header */
898     static Elf64_Ehdr header64={
899         {
900             /* e_ident[] */
901             ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
902             ELFCLASS64,
903             U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
904             EV_CURRENT /* EI_VERSION */
905         },
906         ET_REL,
907         EM_X86_64,
908         EV_CURRENT, /* e_version */
909         0, /* e_entry */
910         0, /* e_phoff */
911         (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
912         0, /* e_flags */
913         (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
914         0, /* e_phentsize */
915         0, /* e_phnum */
916         (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
917         5, /* e_shnum */
918         2 /* e_shstrndx */
919     };
920 
921     /* 64-bit Elf section header table */
922     static Elf64_Shdr sectionHeaders64[5]={
923         { /* SHN_UNDEF */
924             0
925         },
926         { /* .symtab */
927             1, /* sh_name */
928             SHT_SYMTAB,
929             0, /* sh_flags */
930             0, /* sh_addr */
931             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
932             (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
933             3, /* sh_link=sect hdr index of .strtab */
934             1, /* sh_info=One greater than the symbol table index of the last
935                 * local symbol (with STB_LOCAL). */
936             4, /* sh_addralign */
937             (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
938         },
939         { /* .shstrtab */
940             9, /* sh_name */
941             SHT_STRTAB,
942             0, /* sh_flags */
943             0, /* sh_addr */
944             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
945             40, /* sh_size */
946             0, /* sh_link */
947             0, /* sh_info */
948             1, /* sh_addralign */
949             0 /* sh_entsize */
950         },
951         { /* .strtab */
952             19, /* sh_name */
953             SHT_STRTAB,
954             0, /* sh_flags */
955             0, /* sh_addr */
956             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
957             (Elf64_Xword)sizeof(entry), /* sh_size */
958             0, /* sh_link */
959             0, /* sh_info */
960             1, /* sh_addralign */
961             0 /* sh_entsize */
962         },
963         { /* .rodata */
964             27, /* sh_name */
965             SHT_PROGBITS,
966             SHF_ALLOC, /* sh_flags */
967             0, /* sh_addr */
968             (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
969             0, /* sh_size */
970             0, /* sh_link */
971             0, /* sh_info */
972             16, /* sh_addralign */
973             0 /* sh_entsize */
974         }
975     };
976 
977     /*
978      * 64-bit symbol table
979      * careful: different order of items compared with Elf32_sym!
980      */
981     static Elf64_Sym symbols64[2]={
982         { /* STN_UNDEF */
983             0
984         },
985         { /* data entry point */
986             1, /* st_name */
987             ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
988             0, /* st_other */
989             4, /* st_shndx=index of related section table entry */
990             0, /* st_value */
991             0 /* st_size */
992         }
993     };
994 
995 #endif /* U_ELF64 */
996 
997     /* entry[] have a leading NUL */
998     entryOffset=1;
999 
1000     /* in the common code, count entryLength from after the NUL */
1001     entryLengthOffset=1;
1002 
1003     newSuffix=".o";
1004 
1005 #elif U_PLATFORM_HAS_WIN32_API
1006     struct {
1007         IMAGE_FILE_HEADER fileHeader;
1008         IMAGE_SECTION_HEADER sections[2];
1009         char linkerOptions[100];
1010     } objHeader;
1011     IMAGE_SYMBOL symbols[1];
1012     struct {
1013         DWORD sizeofLongNames;
1014         char longNames[100];
1015     } symbolNames;
1016 
1017     /*
1018      * entry sometimes have a leading '_'
1019      * overwritten if entryOffset==0 depending on the target platform
1020      * see check for cpu below
1021      */
1022     entry[0]='_';
1023 
1024     newSuffix=".obj";
1025 #else
1026 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
1027 #endif
1028 
1029     /* deal with options, files and the entry point name */
1030     getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1031     printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1032 #if U_PLATFORM_HAS_WIN32_API
1033     if(cpu==IMAGE_FILE_MACHINE_I386) {
1034         entryOffset=1;
1035     }
1036 #endif
1037 
1038     in=T_FileStream_open(filename, "rb");
1039     if(in==NULL) {
1040         fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1041         exit(U_FILE_ACCESS_ERROR);
1042     }
1043     size=T_FileStream_size(in);
1044 
1045     getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1046     if (outFilePath != NULL) {
1047         uprv_strcpy(outFilePath, buffer);
1048     }
1049 
1050     if(optEntryPoint != NULL) {
1051         uprv_strcpy(entry+entryOffset, optEntryPoint);
1052         uprv_strcat(entry+entryOffset, "_dat");
1053     }
1054     /* turn dashes in the entry name into underscores */
1055     entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1056     for(i=0; i<entryLength; ++i) {
1057         if(entry[entryLengthOffset+i]=='-') {
1058             entry[entryLengthOffset+i]='_';
1059         }
1060     }
1061 
1062     /* open the output file */
1063     out=T_FileStream_open(buffer, "wb");
1064     if(out==NULL) {
1065         fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1066         exit(U_FILE_ACCESS_ERROR);
1067     }
1068 
1069 #ifdef U_ELF
1070     if(bits==32) {
1071         header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1072         header32.e_machine=cpu;
1073 
1074         /* 16-align .rodata in the .o file, just in case */
1075         paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1076         if(paddingSize!=0) {
1077                 paddingSize=0x10-paddingSize;
1078                 sectionHeaders32[4].sh_offset+=paddingSize;
1079         }
1080 
1081         sectionHeaders32[4].sh_size=(Elf32_Word)size;
1082 
1083         symbols32[1].st_size=(Elf32_Word)size;
1084 
1085         /* write .o headers */
1086         T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1087         T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1088         T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1089     } else /* bits==64 */ {
1090 #ifdef U_ELF64
1091         header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1092         header64.e_machine=cpu;
1093 
1094         /* 16-align .rodata in the .o file, just in case */
1095         paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1096         if(paddingSize!=0) {
1097                 paddingSize=0x10-paddingSize;
1098                 sectionHeaders64[4].sh_offset+=paddingSize;
1099         }
1100 
1101         sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1102 
1103         symbols64[1].st_size=(Elf64_Xword)size;
1104 
1105         /* write .o headers */
1106         T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1107         T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1108         T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1109 #endif
1110     }
1111 
1112     T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1113     T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1114     if(paddingSize!=0) {
1115         T_FileStream_write(out, padding, paddingSize);
1116     }
1117 #elif U_PLATFORM_HAS_WIN32_API
1118     /* populate the .obj headers */
1119     uprv_memset(&objHeader, 0, sizeof(objHeader));
1120     uprv_memset(&symbols, 0, sizeof(symbols));
1121     uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1122 
1123     /* write the linker export directive */
1124     uprv_strcpy(objHeader.linkerOptions, "-export:");
1125     length=8;
1126     uprv_strcpy(objHeader.linkerOptions+length, entry);
1127     length+=entryLength;
1128     uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1129     length+=6;
1130 
1131     /* set the file header */
1132     objHeader.fileHeader.Machine=cpu;
1133     objHeader.fileHeader.NumberOfSections=2;
1134     objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1135     objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1136     objHeader.fileHeader.NumberOfSymbols=1;
1137 
1138     /* set the section for the linker options */
1139     uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1140     objHeader.sections[0].SizeOfRawData=length;
1141     objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1142     objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1143 
1144     /* set the data section */
1145     uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1146     objHeader.sections[1].SizeOfRawData=size;
1147     objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1148     objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1149 
1150     /* set the symbol table */
1151     if(entryLength<=8) {
1152         uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1153         symbolNames.sizeofLongNames=4;
1154     } else {
1155         symbols[0].N.Name.Short=0;
1156         symbols[0].N.Name.Long=4;
1157         symbolNames.sizeofLongNames=4+entryLength+1;
1158         uprv_strcpy(symbolNames.longNames, entry);
1159     }
1160     symbols[0].SectionNumber=2;
1161     symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1162 
1163     /* write the file header and the linker options section */
1164     T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1165 #else
1166 #   error "Unknown platform for CAN_GENERATE_OBJECTS."
1167 #endif
1168 
1169     /* copy the data file into section 2 */
1170     for(;;) {
1171         length=T_FileStream_read(in, buffer, sizeof(buffer));
1172         if(length==0) {
1173             break;
1174         }
1175         T_FileStream_write(out, buffer, (int32_t)length);
1176     }
1177 
1178 #if U_PLATFORM_HAS_WIN32_API
1179     /* write the symbol table */
1180     T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1181     T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1182 #endif
1183 
1184     if(T_FileStream_error(in)) {
1185         fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1186         exit(U_FILE_ACCESS_ERROR);
1187     }
1188 
1189     if(T_FileStream_error(out)) {
1190         fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1191         exit(U_FILE_ACCESS_ERROR);
1192     }
1193 
1194     T_FileStream_close(out);
1195     T_FileStream_close(in);
1196 }
1197 #endif
1198