1 /******************************************************************************
2 * Copyright (C) 2009-2015, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 *******************************************************************************
5 */
6 #include "unicode/utypes.h"
7
8 #if U_PLATFORM_HAS_WIN32_API
9 # define VC_EXTRALEAN
10 # define WIN32_LEAN_AND_MEAN
11 # define NOUSER
12 # define NOSERVICE
13 # define NOIME
14 # define NOMCX
15 #include <windows.h>
16 #include <time.h>
17 # ifdef __GNUC__
18 # define WINDOWS_WITH_GNUC
19 # endif
20 #endif
21
22 #if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
23 # define U_ELF
24 #endif
25
26 #ifdef U_ELF
27 # include <elf.h>
28 # if defined(ELFCLASS64)
29 # define U_ELF64
30 # endif
31 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32 # ifndef EM_X86_64
33 # define EM_X86_64 62
34 # endif
35 # define ICU_ENTRY_OFFSET 0
36 #endif
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include "unicode/putil.h"
41 #include "cmemory.h"
42 #include "cstring.h"
43 #include "filestrm.h"
44 #include "toolutil.h"
45 #include "unicode/uclean.h"
46 #include "uoptions.h"
47 #include "pkg_genc.h"
48
49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50
51 #define HEX_0X 0 /* 0x1234 */
52 #define HEX_0H 1 /* 01234h */
53
54 /* prototypes --------------------------------------------------------------- */
55 static void
56 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
57
58 static uint32_t
59 write8(FileStream *out, uint8_t byte, uint32_t column);
60
61 static uint32_t
62 write32(FileStream *out, uint32_t byte, uint32_t column);
63
64 #if U_PLATFORM == U_PF_OS400
65 static uint32_t
66 write8str(FileStream *out, uint8_t byte, uint32_t column);
67 #endif
68 /* -------------------------------------------------------------------------- */
69
70 /*
71 Creating Template Files for New Platforms
72
73 Let the cc compiler help you get started.
74 Compile this program
75 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
76 with the -S option to produce assembly output.
77
78 For example, this will generate array.s:
79 gcc -S array.c
80
81 This will produce a .s file that may look like this:
82
83 .file "array.c"
84 .version "01.01"
85 gcc2_compiled.:
86 .globl x
87 .section .rodata
88 .align 4
89 .type x,@object
90 .size x,20
91 x:
92 .long 1
93 .long 2
94 .long -559038737
95 .long -1
96 .long 16
97 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
98
99 which gives a starting point that will compile, and can be transformed
100 to become the template, generally with some consulting of as docs and
101 some experimentation.
102
103 If you want ICU to automatically use this assembly, you should
104 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
105 where the name is the compiler or platform that you used in this
106 assemblyHeader data structure.
107 */
108 static const struct AssemblyType {
109 const char *name;
110 const char *header;
111 const char *beginLine;
112 const char *footer;
113 int8_t hexType; /* HEX_0X or HEX_0h */
114 } assemblyHeader[] = {
115 /* For gcc assemblers, the meaning of .align changes depending on the */
116 /* hardware, so we use .balign 16 which always means 16 bytes. */
117 /* https://sourceware.org/binutils/docs/as/Pseudo-Ops.html */
118 {"gcc",
119 ".globl %s\n"
120 "\t.section .note.GNU-stack,\"\",%%progbits\n"
121 "\t.section .rodata\n"
122 "\t.balign 16\n"
123 "#ifdef U_HIDE_DATA_SYMBOL\n"
124 "\t.hidden %s\n"
125 "#endif\n"
126 "\t.type %s,%%object\n"
127 "%s:\n\n",
128
129 ".long ","",HEX_0X
130 },
131 {"gcc-darwin",
132 /*"\t.section __TEXT,__text,regular,pure_instructions\n"
133 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
134 ".globl _%s\n"
135 "#ifdef U_HIDE_DATA_SYMBOL\n"
136 "\t.private_extern _%s\n"
137 "#endif\n"
138 "\t.data\n"
139 "\t.const\n"
140 "\t.balign 16\n"
141 "_%s:\n\n",
142
143 ".long ","",HEX_0X
144 },
145 {"gcc-cygwin",
146 ".globl _%s\n"
147 "\t.section .rodata\n"
148 "\t.balign 16\n"
149 "_%s:\n\n",
150
151 ".long ","",HEX_0X
152 },
153 {"gcc-mingw64",
154 ".globl %s\n"
155 "\t.section .rodata\n"
156 "\t.balign 16\n"
157 "%s:\n\n",
158
159 ".long ","",HEX_0X
160 },
161 /* 16 bytes alignment. */
162 /* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */
163 {"sun",
164 "\t.section \".rodata\"\n"
165 "\t.align 16\n"
166 ".globl %s\n"
167 "%s:\n",
168
169 ".word ","",HEX_0X
170 },
171 /* 16 bytes alignment for sun-x86. */
172 /* http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html */
173 {"sun-x86",
174 "Drodata.rodata:\n"
175 "\t.type Drodata.rodata,@object\n"
176 "\t.size Drodata.rodata,0\n"
177 "\t.globl %s\n"
178 "\t.align 16\n"
179 "%s:\n",
180
181 ".4byte ","",HEX_0X
182 },
183 /* 1<<4 bit alignment for aix. */
184 /* http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm */
185 {"xlc",
186 ".globl %s{RO}\n"
187 "\t.toc\n"
188 "%s:\n"
189 "\t.csect %s{RO}, 4\n",
190
191 ".long ","",HEX_0X
192 },
193 {"aCC-ia64",
194 "\t.file \"%s.s\"\n"
195 "\t.type %s,@object\n"
196 "\t.global %s\n"
197 "\t.secalias .abe$0.rodata, \".rodata\"\n"
198 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
199 "\t.align 16\n"
200 "%s::\t",
201
202 "data4 ","",HEX_0X
203 },
204 {"aCC-parisc",
205 "\t.SPACE $TEXT$\n"
206 "\t.SUBSPA $LIT$\n"
207 "%s\n"
208 "\t.EXPORT %s\n"
209 "\t.ALIGN 16\n",
210
211 ".WORD ","",HEX_0X
212 },
213 /* align 16 bytes */
214 /* http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx */
215 { "masm",
216 "\tTITLE %s\n"
217 "; generated by genccode\n"
218 ".386\n"
219 ".model flat\n"
220 "\tPUBLIC _%s\n"
221 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
222 "\tALIGN 16\n"
223 "_%s\tLABEL DWORD\n",
224 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
225 }
226 };
227
228 static int32_t assemblyHeaderIndex = -1;
229 static int32_t hexType = HEX_0X;
230
231 U_CAPI UBool U_EXPORT2
checkAssemblyHeaderName(const char * optAssembly)232 checkAssemblyHeaderName(const char* optAssembly) {
233 int32_t idx;
234 assemblyHeaderIndex = -1;
235 for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
236 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
237 assemblyHeaderIndex = idx;
238 hexType = assemblyHeader[idx].hexType; /* set the hex type */
239 return TRUE;
240 }
241 }
242
243 return FALSE;
244 }
245
246
247 U_CAPI void U_EXPORT2
printAssemblyHeadersToStdErr(void)248 printAssemblyHeadersToStdErr(void) {
249 int32_t idx;
250 fprintf(stderr, "%s", assemblyHeader[0].name);
251 for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
252 fprintf(stderr, ", %s", assemblyHeader[idx].name);
253 }
254 fprintf(stderr,
255 ")\n");
256 }
257
258 U_CAPI void U_EXPORT2
writeAssemblyCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optFilename,char * outFilePath)259 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
260 uint32_t column = MAX_COLUMN;
261 char entry[64];
262 uint32_t buffer[1024];
263 char *bufferStr = (char *)buffer;
264 FileStream *in, *out;
265 size_t i, length;
266
267 in=T_FileStream_open(filename, "rb");
268 if(in==NULL) {
269 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
270 exit(U_FILE_ACCESS_ERROR);
271 }
272
273 getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename);
274 out=T_FileStream_open(bufferStr, "w");
275 if(out==NULL) {
276 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
277 exit(U_FILE_ACCESS_ERROR);
278 }
279
280 if (outFilePath != NULL) {
281 uprv_strcpy(outFilePath, bufferStr);
282 }
283
284 #ifdef WINDOWS_WITH_GNUC
285 /* Need to fix the file seperator character when using MinGW. */
286 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
287 #endif
288
289 if(optEntryPoint != NULL) {
290 uprv_strcpy(entry, optEntryPoint);
291 uprv_strcat(entry, "_dat");
292 }
293
294 /* turn dashes or dots in the entry name into underscores */
295 length=uprv_strlen(entry);
296 for(i=0; i<length; ++i) {
297 if(entry[i]=='-' || entry[i]=='.') {
298 entry[i]='_';
299 }
300 }
301
302 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
303 entry, entry, entry, entry,
304 entry, entry, entry, entry);
305 T_FileStream_writeLine(out, bufferStr);
306 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
307
308 for(;;) {
309 length=T_FileStream_read(in, buffer, sizeof(buffer));
310 if(length==0) {
311 break;
312 }
313 if (length != sizeof(buffer)) {
314 /* pad with extra 0's when at the end of the file */
315 for(i=0; i < (length % sizeof(uint32_t)); ++i) {
316 buffer[length+i] = 0;
317 }
318 }
319 for(i=0; i<(length/sizeof(buffer[0])); i++) {
320 column = write32(out, buffer[i], column);
321 }
322 }
323
324 T_FileStream_writeLine(out, "\n");
325
326 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
327 entry, entry, entry, entry,
328 entry, entry, entry, entry);
329 T_FileStream_writeLine(out, bufferStr);
330
331 if(T_FileStream_error(in)) {
332 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
333 exit(U_FILE_ACCESS_ERROR);
334 }
335
336 if(T_FileStream_error(out)) {
337 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
338 exit(U_FILE_ACCESS_ERROR);
339 }
340
341 T_FileStream_close(out);
342 T_FileStream_close(in);
343 }
344
345 U_CAPI void U_EXPORT2
writeCCode(const char * filename,const char * destdir,const char * optName,const char * optFilename,char * outFilePath)346 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
347 uint32_t column = MAX_COLUMN;
348 char buffer[4096], entry[64];
349 FileStream *in, *out;
350 size_t i, length;
351
352 in=T_FileStream_open(filename, "rb");
353 if(in==NULL) {
354 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
355 exit(U_FILE_ACCESS_ERROR);
356 }
357
358 if(optName != NULL) { /* prepend 'icudt28_' */
359 strcpy(entry, optName);
360 strcat(entry, "_");
361 } else {
362 entry[0] = 0;
363 }
364
365 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
366 if (outFilePath != NULL) {
367 uprv_strcpy(outFilePath, buffer);
368 }
369 out=T_FileStream_open(buffer, "w");
370 if(out==NULL) {
371 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
372 exit(U_FILE_ACCESS_ERROR);
373 }
374
375 /* turn dashes or dots in the entry name into underscores */
376 length=uprv_strlen(entry);
377 for(i=0; i<length; ++i) {
378 if(entry[i]=='-' || entry[i]=='.') {
379 entry[i]='_';
380 }
381 }
382
383 #if U_PLATFORM == U_PF_OS400
384 /*
385 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
386
387 This is here because this platform can't currently put
388 const data into the read-only pages of an object or
389 shared library (service program). Only strings are allowed in read-only
390 pages, so we use char * strings to store the data.
391
392 In order to prevent the beginning of the data from ever matching the
393 magic numbers we must still use the initial double.
394 [grhoten 4/24/2003]
395 */
396 sprintf(buffer,
397 "#ifndef IN_GENERATED_CCODE\n"
398 "#define IN_GENERATED_CCODE\n"
399 "#define U_DISABLE_RENAMING 1\n"
400 "#include \"unicode/umachine.h\"\n"
401 "#endif\n"
402 "U_CDECL_BEGIN\n"
403 "const struct {\n"
404 " double bogus;\n"
405 " const char *bytes; \n"
406 "} %s={ 0.0, \n",
407 entry);
408 T_FileStream_writeLine(out, buffer);
409
410 for(;;) {
411 length=T_FileStream_read(in, buffer, sizeof(buffer));
412 if(length==0) {
413 break;
414 }
415 for(i=0; i<length; ++i) {
416 column = write8str(out, (uint8_t)buffer[i], column);
417 }
418 }
419
420 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
421 #else
422 /* Function renaming shouldn't be done in data */
423 sprintf(buffer,
424 "#ifndef IN_GENERATED_CCODE\n"
425 "#define IN_GENERATED_CCODE\n"
426 "#define U_DISABLE_RENAMING 1\n"
427 "#include \"unicode/umachine.h\"\n"
428 "#endif\n"
429 "U_CDECL_BEGIN\n"
430 "const struct {\n"
431 " double bogus;\n"
432 " uint8_t bytes[%ld]; \n"
433 "} %s={ 0.0, {\n",
434 (long)T_FileStream_size(in), entry);
435 T_FileStream_writeLine(out, buffer);
436
437 for(;;) {
438 length=T_FileStream_read(in, buffer, sizeof(buffer));
439 if(length==0) {
440 break;
441 }
442 for(i=0; i<length; ++i) {
443 column = write8(out, (uint8_t)buffer[i], column);
444 }
445 }
446
447 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
448 #endif
449
450 if(T_FileStream_error(in)) {
451 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
452 exit(U_FILE_ACCESS_ERROR);
453 }
454
455 if(T_FileStream_error(out)) {
456 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
457 exit(U_FILE_ACCESS_ERROR);
458 }
459
460 T_FileStream_close(out);
461 T_FileStream_close(in);
462 }
463
464 static uint32_t
write32(FileStream * out,uint32_t bitField,uint32_t column)465 write32(FileStream *out, uint32_t bitField, uint32_t column) {
466 int32_t i;
467 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
468 char *s = bitFieldStr;
469 uint8_t *ptrIdx = (uint8_t *)&bitField;
470 static const char hexToStr[16] = {
471 '0','1','2','3',
472 '4','5','6','7',
473 '8','9','A','B',
474 'C','D','E','F'
475 };
476
477 /* write the value, possibly with comma and newline */
478 if(column==MAX_COLUMN) {
479 /* first byte */
480 column=1;
481 } else if(column<32) {
482 *(s++)=',';
483 ++column;
484 } else {
485 *(s++)='\n';
486 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
487 s+=uprv_strlen(s);
488 column=1;
489 }
490
491 if (bitField < 10) {
492 /* It's a small number. Don't waste the space for 0x */
493 *(s++)=hexToStr[bitField];
494 }
495 else {
496 int seenNonZero = 0; /* This is used to remove leading zeros */
497
498 if(hexType==HEX_0X) {
499 *(s++)='0';
500 *(s++)='x';
501 } else if(hexType==HEX_0H) {
502 *(s++)='0';
503 }
504
505 /* This creates a 32-bit field */
506 #if U_IS_BIG_ENDIAN
507 for (i = 0; i < sizeof(uint32_t); i++)
508 #else
509 for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
510 #endif
511 {
512 uint8_t value = ptrIdx[i];
513 if (value || seenNonZero) {
514 *(s++)=hexToStr[value>>4];
515 *(s++)=hexToStr[value&0xF];
516 seenNonZero = 1;
517 }
518 }
519 if(hexType==HEX_0H) {
520 *(s++)='h';
521 }
522 }
523
524 *(s++)=0;
525 T_FileStream_writeLine(out, bitFieldStr);
526 return column;
527 }
528
529 static uint32_t
write8(FileStream * out,uint8_t byte,uint32_t column)530 write8(FileStream *out, uint8_t byte, uint32_t column) {
531 char s[4];
532 int i=0;
533
534 /* convert the byte value to a string */
535 if(byte>=100) {
536 s[i++]=(char)('0'+byte/100);
537 byte%=100;
538 }
539 if(i>0 || byte>=10) {
540 s[i++]=(char)('0'+byte/10);
541 byte%=10;
542 }
543 s[i++]=(char)('0'+byte);
544 s[i]=0;
545
546 /* write the value, possibly with comma and newline */
547 if(column==MAX_COLUMN) {
548 /* first byte */
549 column=1;
550 } else if(column<16) {
551 T_FileStream_writeLine(out, ",");
552 ++column;
553 } else {
554 T_FileStream_writeLine(out, ",\n");
555 column=1;
556 }
557 T_FileStream_writeLine(out, s);
558 return column;
559 }
560
561 #if U_PLATFORM == U_PF_OS400
562 static uint32_t
write8str(FileStream * out,uint8_t byte,uint32_t column)563 write8str(FileStream *out, uint8_t byte, uint32_t column) {
564 char s[8];
565
566 if (byte > 7)
567 sprintf(s, "\\x%X", byte);
568 else
569 sprintf(s, "\\%X", byte);
570
571 /* write the value, possibly with comma and newline */
572 if(column==MAX_COLUMN) {
573 /* first byte */
574 column=1;
575 T_FileStream_writeLine(out, "\"");
576 } else if(column<24) {
577 ++column;
578 } else {
579 T_FileStream_writeLine(out, "\"\n\"");
580 column=1;
581 }
582 T_FileStream_writeLine(out, s);
583 return column;
584 }
585 #endif
586
587 static void
getOutFilename(const char * inFilename,const char * destdir,char * outFilename,char * entryName,const char * newSuffix,const char * optFilename)588 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
589 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
590
591 /* copy path */
592 if(destdir!=NULL && *destdir!=0) {
593 do {
594 *outFilename++=*destdir++;
595 } while(*destdir!=0);
596 if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
597 *outFilename++=U_FILE_SEP_CHAR;
598 }
599 inFilename=basename;
600 } else {
601 while(inFilename<basename) {
602 *outFilename++=*inFilename++;
603 }
604 }
605
606 if(suffix==NULL) {
607 /* the filename does not have a suffix */
608 uprv_strcpy(entryName, inFilename);
609 if(optFilename != NULL) {
610 uprv_strcpy(outFilename, optFilename);
611 } else {
612 uprv_strcpy(outFilename, inFilename);
613 }
614 uprv_strcat(outFilename, newSuffix);
615 } else {
616 char *saveOutFilename = outFilename;
617 /* copy basename */
618 while(inFilename<suffix) {
619 if(*inFilename=='-') {
620 /* iSeries cannot have '-' in the .o objects. */
621 *outFilename++=*entryName++='_';
622 inFilename++;
623 }
624 else {
625 *outFilename++=*entryName++=*inFilename++;
626 }
627 }
628
629 /* replace '.' by '_' */
630 *outFilename++=*entryName++='_';
631 ++inFilename;
632
633 /* copy suffix */
634 while(*inFilename!=0) {
635 *outFilename++=*entryName++=*inFilename++;
636 }
637
638 *entryName=0;
639
640 if(optFilename != NULL) {
641 uprv_strcpy(saveOutFilename, optFilename);
642 uprv_strcat(saveOutFilename, newSuffix);
643 } else {
644 /* add ".c" */
645 uprv_strcpy(outFilename, newSuffix);
646 }
647 }
648 }
649
650 #ifdef CAN_GENERATE_OBJECTS
651 static void
getArchitecture(uint16_t * pCPU,uint16_t * pBits,UBool * pIsBigEndian,const char * optMatchArch)652 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
653 union {
654 char bytes[2048];
655 #ifdef U_ELF
656 Elf32_Ehdr header32;
657 /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
658 #elif U_PLATFORM_HAS_WIN32_API
659 IMAGE_FILE_HEADER header;
660 #endif
661 } buffer;
662
663 const char *filename;
664 FileStream *in;
665 int32_t length;
666
667 #ifdef U_ELF
668
669 #elif U_PLATFORM_HAS_WIN32_API
670 const IMAGE_FILE_HEADER *pHeader;
671 #else
672 # error "Unknown platform for CAN_GENERATE_OBJECTS."
673 #endif
674
675 if(optMatchArch != NULL) {
676 filename=optMatchArch;
677 } else {
678 /* set defaults */
679 #ifdef U_ELF
680 /* set EM_386 because elf.h does not provide better defaults */
681 *pCPU=EM_386;
682 *pBits=32;
683 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
684 #elif U_PLATFORM_HAS_WIN32_API
685 /* _M_IA64 should be defined in windows.h */
686 # if defined(_M_IA64)
687 *pCPU=IMAGE_FILE_MACHINE_IA64;
688 # elif defined(_M_AMD64)
689 *pCPU=IMAGE_FILE_MACHINE_AMD64;
690 # else
691 *pCPU=IMAGE_FILE_MACHINE_I386;
692 # endif
693 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
694 *pIsBigEndian=FALSE;
695 #else
696 # error "Unknown platform for CAN_GENERATE_OBJECTS."
697 #endif
698 return;
699 }
700
701 in=T_FileStream_open(filename, "rb");
702 if(in==NULL) {
703 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
704 exit(U_FILE_ACCESS_ERROR);
705 }
706 length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
707
708 #ifdef U_ELF
709 if(length<sizeof(Elf32_Ehdr)) {
710 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
711 exit(U_UNSUPPORTED_ERROR);
712 }
713 if(
714 buffer.header32.e_ident[0]!=ELFMAG0 ||
715 buffer.header32.e_ident[1]!=ELFMAG1 ||
716 buffer.header32.e_ident[2]!=ELFMAG2 ||
717 buffer.header32.e_ident[3]!=ELFMAG3 ||
718 buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
719 ) {
720 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
721 exit(U_UNSUPPORTED_ERROR);
722 }
723
724 *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
725 #ifdef U_ELF64
726 if(*pBits!=32 && *pBits!=64) {
727 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
728 exit(U_UNSUPPORTED_ERROR);
729 }
730 #else
731 if(*pBits!=32) {
732 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
733 exit(U_UNSUPPORTED_ERROR);
734 }
735 #endif
736
737 *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
738 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
739 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
740 exit(U_UNSUPPORTED_ERROR);
741 }
742 /* TODO: Support byte swapping */
743
744 *pCPU=buffer.header32.e_machine;
745 #elif U_PLATFORM_HAS_WIN32_API
746 if(length<sizeof(IMAGE_FILE_HEADER)) {
747 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
748 exit(U_UNSUPPORTED_ERROR);
749 }
750 /* TODO: Use buffer.header. Keep aliasing legal. */
751 pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
752 *pCPU=pHeader->Machine;
753 /*
754 * The number of bits is implicit with the Machine value.
755 * *pBits is ignored in the calling code, so this need not be precise.
756 */
757 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
758 /* Windows always runs on little-endian CPUs. */
759 *pIsBigEndian=FALSE;
760 #else
761 # error "Unknown platform for CAN_GENERATE_OBJECTS."
762 #endif
763
764 T_FileStream_close(in);
765 }
766
767 U_CAPI void U_EXPORT2
writeObjectCode(const char * filename,const char * destdir,const char * optEntryPoint,const char * optMatchArch,const char * optFilename,char * outFilePath)768 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
769 /* common variables */
770 char buffer[4096], entry[96]={ 0 };
771 FileStream *in, *out;
772 const char *newSuffix;
773 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
774
775 uint16_t cpu, bits;
776 UBool makeBigEndian;
777
778 /* platform-specific variables and initialization code */
779 #ifdef U_ELF
780 /* 32-bit Elf file header */
781 static Elf32_Ehdr header32={
782 {
783 /* e_ident[] */
784 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
785 ELFCLASS32,
786 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
787 EV_CURRENT /* EI_VERSION */
788 },
789 ET_REL,
790 EM_386,
791 EV_CURRENT, /* e_version */
792 0, /* e_entry */
793 0, /* e_phoff */
794 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
795 0, /* e_flags */
796 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
797 0, /* e_phentsize */
798 0, /* e_phnum */
799 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
800 5, /* e_shnum */
801 2 /* e_shstrndx */
802 };
803
804 /* 32-bit Elf section header table */
805 static Elf32_Shdr sectionHeaders32[5]={
806 { /* SHN_UNDEF */
807 0
808 },
809 { /* .symtab */
810 1, /* sh_name */
811 SHT_SYMTAB,
812 0, /* sh_flags */
813 0, /* sh_addr */
814 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
815 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
816 3, /* sh_link=sect hdr index of .strtab */
817 1, /* sh_info=One greater than the symbol table index of the last
818 * local symbol (with STB_LOCAL). */
819 4, /* sh_addralign */
820 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
821 },
822 { /* .shstrtab */
823 9, /* sh_name */
824 SHT_STRTAB,
825 0, /* sh_flags */
826 0, /* sh_addr */
827 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
828 40, /* sh_size */
829 0, /* sh_link */
830 0, /* sh_info */
831 1, /* sh_addralign */
832 0 /* sh_entsize */
833 },
834 { /* .strtab */
835 19, /* sh_name */
836 SHT_STRTAB,
837 0, /* sh_flags */
838 0, /* sh_addr */
839 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
840 (Elf32_Word)sizeof(entry), /* sh_size */
841 0, /* sh_link */
842 0, /* sh_info */
843 1, /* sh_addralign */
844 0 /* sh_entsize */
845 },
846 { /* .rodata */
847 27, /* sh_name */
848 SHT_PROGBITS,
849 SHF_ALLOC, /* sh_flags */
850 0, /* sh_addr */
851 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
852 0, /* sh_size */
853 0, /* sh_link */
854 0, /* sh_info */
855 16, /* sh_addralign */
856 0 /* sh_entsize */
857 }
858 };
859
860 /* symbol table */
861 static Elf32_Sym symbols32[2]={
862 { /* STN_UNDEF */
863 0
864 },
865 { /* data entry point */
866 1, /* st_name */
867 0, /* st_value */
868 0, /* st_size */
869 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
870 0, /* st_other */
871 4 /* st_shndx=index of related section table entry */
872 }
873 };
874
875 /* section header string table, with decimal string offsets */
876 static const char sectionStrings[40]=
877 /* 0 */ "\0"
878 /* 1 */ ".symtab\0"
879 /* 9 */ ".shstrtab\0"
880 /* 19 */ ".strtab\0"
881 /* 27 */ ".rodata\0"
882 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
883 /* 40: padded to multiple of 8 bytes */
884
885 /*
886 * Use entry[] for the string table which will contain only the
887 * entry point name.
888 * entry[0] must be 0 (NUL)
889 * The entry point name can be up to 38 characters long (sizeof(entry)-2).
890 */
891
892 /* 16-align .rodata in the .o file, just in case */
893 static const char padding[16]={ 0 };
894 int32_t paddingSize;
895
896 #ifdef U_ELF64
897 /* 64-bit Elf file header */
898 static Elf64_Ehdr header64={
899 {
900 /* e_ident[] */
901 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
902 ELFCLASS64,
903 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
904 EV_CURRENT /* EI_VERSION */
905 },
906 ET_REL,
907 EM_X86_64,
908 EV_CURRENT, /* e_version */
909 0, /* e_entry */
910 0, /* e_phoff */
911 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
912 0, /* e_flags */
913 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
914 0, /* e_phentsize */
915 0, /* e_phnum */
916 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
917 5, /* e_shnum */
918 2 /* e_shstrndx */
919 };
920
921 /* 64-bit Elf section header table */
922 static Elf64_Shdr sectionHeaders64[5]={
923 { /* SHN_UNDEF */
924 0
925 },
926 { /* .symtab */
927 1, /* sh_name */
928 SHT_SYMTAB,
929 0, /* sh_flags */
930 0, /* sh_addr */
931 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
932 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
933 3, /* sh_link=sect hdr index of .strtab */
934 1, /* sh_info=One greater than the symbol table index of the last
935 * local symbol (with STB_LOCAL). */
936 4, /* sh_addralign */
937 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
938 },
939 { /* .shstrtab */
940 9, /* sh_name */
941 SHT_STRTAB,
942 0, /* sh_flags */
943 0, /* sh_addr */
944 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
945 40, /* sh_size */
946 0, /* sh_link */
947 0, /* sh_info */
948 1, /* sh_addralign */
949 0 /* sh_entsize */
950 },
951 { /* .strtab */
952 19, /* sh_name */
953 SHT_STRTAB,
954 0, /* sh_flags */
955 0, /* sh_addr */
956 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
957 (Elf64_Xword)sizeof(entry), /* sh_size */
958 0, /* sh_link */
959 0, /* sh_info */
960 1, /* sh_addralign */
961 0 /* sh_entsize */
962 },
963 { /* .rodata */
964 27, /* sh_name */
965 SHT_PROGBITS,
966 SHF_ALLOC, /* sh_flags */
967 0, /* sh_addr */
968 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
969 0, /* sh_size */
970 0, /* sh_link */
971 0, /* sh_info */
972 16, /* sh_addralign */
973 0 /* sh_entsize */
974 }
975 };
976
977 /*
978 * 64-bit symbol table
979 * careful: different order of items compared with Elf32_sym!
980 */
981 static Elf64_Sym symbols64[2]={
982 { /* STN_UNDEF */
983 0
984 },
985 { /* data entry point */
986 1, /* st_name */
987 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
988 0, /* st_other */
989 4, /* st_shndx=index of related section table entry */
990 0, /* st_value */
991 0 /* st_size */
992 }
993 };
994
995 #endif /* U_ELF64 */
996
997 /* entry[] have a leading NUL */
998 entryOffset=1;
999
1000 /* in the common code, count entryLength from after the NUL */
1001 entryLengthOffset=1;
1002
1003 newSuffix=".o";
1004
1005 #elif U_PLATFORM_HAS_WIN32_API
1006 struct {
1007 IMAGE_FILE_HEADER fileHeader;
1008 IMAGE_SECTION_HEADER sections[2];
1009 char linkerOptions[100];
1010 } objHeader;
1011 IMAGE_SYMBOL symbols[1];
1012 struct {
1013 DWORD sizeofLongNames;
1014 char longNames[100];
1015 } symbolNames;
1016
1017 /*
1018 * entry sometimes have a leading '_'
1019 * overwritten if entryOffset==0 depending on the target platform
1020 * see check for cpu below
1021 */
1022 entry[0]='_';
1023
1024 newSuffix=".obj";
1025 #else
1026 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1027 #endif
1028
1029 /* deal with options, files and the entry point name */
1030 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1031 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1032 #if U_PLATFORM_HAS_WIN32_API
1033 if(cpu==IMAGE_FILE_MACHINE_I386) {
1034 entryOffset=1;
1035 }
1036 #endif
1037
1038 in=T_FileStream_open(filename, "rb");
1039 if(in==NULL) {
1040 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1041 exit(U_FILE_ACCESS_ERROR);
1042 }
1043 size=T_FileStream_size(in);
1044
1045 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1046 if (outFilePath != NULL) {
1047 uprv_strcpy(outFilePath, buffer);
1048 }
1049
1050 if(optEntryPoint != NULL) {
1051 uprv_strcpy(entry+entryOffset, optEntryPoint);
1052 uprv_strcat(entry+entryOffset, "_dat");
1053 }
1054 /* turn dashes in the entry name into underscores */
1055 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1056 for(i=0; i<entryLength; ++i) {
1057 if(entry[entryLengthOffset+i]=='-') {
1058 entry[entryLengthOffset+i]='_';
1059 }
1060 }
1061
1062 /* open the output file */
1063 out=T_FileStream_open(buffer, "wb");
1064 if(out==NULL) {
1065 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1066 exit(U_FILE_ACCESS_ERROR);
1067 }
1068
1069 #ifdef U_ELF
1070 if(bits==32) {
1071 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1072 header32.e_machine=cpu;
1073
1074 /* 16-align .rodata in the .o file, just in case */
1075 paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1076 if(paddingSize!=0) {
1077 paddingSize=0x10-paddingSize;
1078 sectionHeaders32[4].sh_offset+=paddingSize;
1079 }
1080
1081 sectionHeaders32[4].sh_size=(Elf32_Word)size;
1082
1083 symbols32[1].st_size=(Elf32_Word)size;
1084
1085 /* write .o headers */
1086 T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1087 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1088 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1089 } else /* bits==64 */ {
1090 #ifdef U_ELF64
1091 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1092 header64.e_machine=cpu;
1093
1094 /* 16-align .rodata in the .o file, just in case */
1095 paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1096 if(paddingSize!=0) {
1097 paddingSize=0x10-paddingSize;
1098 sectionHeaders64[4].sh_offset+=paddingSize;
1099 }
1100
1101 sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1102
1103 symbols64[1].st_size=(Elf64_Xword)size;
1104
1105 /* write .o headers */
1106 T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1107 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1108 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1109 #endif
1110 }
1111
1112 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1113 T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1114 if(paddingSize!=0) {
1115 T_FileStream_write(out, padding, paddingSize);
1116 }
1117 #elif U_PLATFORM_HAS_WIN32_API
1118 /* populate the .obj headers */
1119 uprv_memset(&objHeader, 0, sizeof(objHeader));
1120 uprv_memset(&symbols, 0, sizeof(symbols));
1121 uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1122
1123 /* write the linker export directive */
1124 uprv_strcpy(objHeader.linkerOptions, "-export:");
1125 length=8;
1126 uprv_strcpy(objHeader.linkerOptions+length, entry);
1127 length+=entryLength;
1128 uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1129 length+=6;
1130
1131 /* set the file header */
1132 objHeader.fileHeader.Machine=cpu;
1133 objHeader.fileHeader.NumberOfSections=2;
1134 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1135 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1136 objHeader.fileHeader.NumberOfSymbols=1;
1137
1138 /* set the section for the linker options */
1139 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1140 objHeader.sections[0].SizeOfRawData=length;
1141 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1142 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1143
1144 /* set the data section */
1145 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1146 objHeader.sections[1].SizeOfRawData=size;
1147 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1148 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1149
1150 /* set the symbol table */
1151 if(entryLength<=8) {
1152 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1153 symbolNames.sizeofLongNames=4;
1154 } else {
1155 symbols[0].N.Name.Short=0;
1156 symbols[0].N.Name.Long=4;
1157 symbolNames.sizeofLongNames=4+entryLength+1;
1158 uprv_strcpy(symbolNames.longNames, entry);
1159 }
1160 symbols[0].SectionNumber=2;
1161 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1162
1163 /* write the file header and the linker options section */
1164 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1165 #else
1166 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1167 #endif
1168
1169 /* copy the data file into section 2 */
1170 for(;;) {
1171 length=T_FileStream_read(in, buffer, sizeof(buffer));
1172 if(length==0) {
1173 break;
1174 }
1175 T_FileStream_write(out, buffer, (int32_t)length);
1176 }
1177
1178 #if U_PLATFORM_HAS_WIN32_API
1179 /* write the symbol table */
1180 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1181 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1182 #endif
1183
1184 if(T_FileStream_error(in)) {
1185 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1186 exit(U_FILE_ACCESS_ERROR);
1187 }
1188
1189 if(T_FileStream_error(out)) {
1190 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1191 exit(U_FILE_ACCESS_ERROR);
1192 }
1193
1194 T_FileStream_close(out);
1195 T_FileStream_close(in);
1196 }
1197 #endif
1198