1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 #include <stdarg.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 #include "vpx_config.h"
18 #include "vpx/vpx_integer.h"
19 
20 typedef enum {
21   OUTPUT_FMT_PLAIN,
22   OUTPUT_FMT_RVDS,
23   OUTPUT_FMT_GAS,
24   OUTPUT_FMT_C_HEADER,
25 } output_fmt_t;
26 
log_msg(const char * fmt,...)27 int log_msg(const char *fmt, ...) {
28   int res;
29   va_list ap;
30   va_start(ap, fmt);
31   res = vfprintf(stderr, fmt, ap);
32   va_end(ap);
33   return res;
34 }
35 
36 #if defined(__GNUC__) && __GNUC__
37 
38 #if defined(FORCE_PARSE_ELF)
39 
40 #if defined(__MACH__)
41 #undef __MACH__
42 #endif
43 
44 #if !defined(__ELF__)
45 #define __ELF__
46 #endif
47 #endif
48 
49 #if defined(__MACH__)
50 
51 #include <mach-o/loader.h>
52 #include <mach-o/nlist.h>
53 
print_macho_equ(output_fmt_t mode,uint8_t * name,int val)54 int print_macho_equ(output_fmt_t mode, uint8_t* name, int val) {
55   switch (mode) {
56     case OUTPUT_FMT_RVDS:
57       printf("%-40s EQU %5d\n", name, val);
58       return 0;
59     case OUTPUT_FMT_GAS:
60       printf(".set %-40s, %5d\n", name, val);
61       return 0;
62     case OUTPUT_FMT_C_HEADER:
63       printf("#define %-40s %5d\n", name, val);
64       return 0;
65     default:
66       log_msg("Unsupported mode: %d", mode);
67       return 1;
68   }
69 }
70 
parse_macho(uint8_t * base_buf,size_t sz,output_fmt_t mode)71 int parse_macho(uint8_t *base_buf, size_t sz, output_fmt_t mode) {
72   int i, j;
73   struct mach_header header;
74   uint8_t *buf = base_buf;
75   int base_data_section = 0;
76   int bits = 0;
77 
78   /* We can read in mach_header for 32 and 64 bit architectures
79    * because it's identical to mach_header_64 except for the last
80    * element (uint32_t reserved), which we don't use. Then, when
81    * we know which architecture we're looking at, increment buf
82    * appropriately.
83    */
84   memcpy(&header, buf, sizeof(struct mach_header));
85 
86   if (header.magic == MH_MAGIC) {
87     if (header.cputype == CPU_TYPE_ARM
88         || header.cputype == CPU_TYPE_X86) {
89       bits = 32;
90       buf += sizeof(struct mach_header);
91     } else {
92       log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_[ARM|X86].\n");
93       goto bail;
94     }
95   } else if (header.magic == MH_MAGIC_64) {
96     if (header.cputype == CPU_TYPE_X86_64) {
97       bits = 64;
98       buf += sizeof(struct mach_header_64);
99     } else {
100       log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_X86_64.\n");
101       goto bail;
102     }
103   } else {
104     log_msg("Bad magic number for object file. 0x%x or 0x%x expected, 0x%x found.\n",
105             MH_MAGIC, MH_MAGIC_64, header.magic);
106     goto bail;
107   }
108 
109   if (header.filetype != MH_OBJECT) {
110     log_msg("Bad filetype for object file. Currently only tested for MH_OBJECT.\n");
111     goto bail;
112   }
113 
114   for (i = 0; i < header.ncmds; i++) {
115     struct load_command lc;
116 
117     memcpy(&lc, buf, sizeof(struct load_command));
118 
119     if (lc.cmd == LC_SEGMENT) {
120       uint8_t *seg_buf = buf;
121       struct section s;
122       struct segment_command seg_c;
123 
124       memcpy(&seg_c, seg_buf, sizeof(struct segment_command));
125       seg_buf += sizeof(struct segment_command);
126 
127       /* Although each section is given it's own offset, nlist.n_value
128        * references the offset of the first section. This isn't
129        * apparent without debug information because the offset of the
130        * data section is the same as the first section. However, with
131        * debug sections mixed in, the offset of the debug section
132        * increases but n_value still references the first section.
133        */
134       if (seg_c.nsects < 1) {
135         log_msg("Not enough sections\n");
136         goto bail;
137       }
138 
139       memcpy(&s, seg_buf, sizeof(struct section));
140       base_data_section = s.offset;
141     } else if (lc.cmd == LC_SEGMENT_64) {
142       uint8_t *seg_buf = buf;
143       struct section_64 s;
144       struct segment_command_64 seg_c;
145 
146       memcpy(&seg_c, seg_buf, sizeof(struct segment_command_64));
147       seg_buf += sizeof(struct segment_command_64);
148 
149       /* Explanation in LG_SEGMENT */
150       if (seg_c.nsects < 1) {
151         log_msg("Not enough sections\n");
152         goto bail;
153       }
154 
155       memcpy(&s, seg_buf, sizeof(struct section_64));
156       base_data_section = s.offset;
157     } else if (lc.cmd == LC_SYMTAB) {
158       if (base_data_section != 0) {
159         struct symtab_command sc;
160         uint8_t *sym_buf = base_buf;
161         uint8_t *str_buf = base_buf;
162 
163         memcpy(&sc, buf, sizeof(struct symtab_command));
164 
165         if (sc.cmdsize != sizeof(struct symtab_command)) {
166           log_msg("Can't find symbol table!\n");
167           goto bail;
168         }
169 
170         sym_buf += sc.symoff;
171         str_buf += sc.stroff;
172 
173         for (j = 0; j < sc.nsyms; j++) {
174           /* Location of string is cacluated each time from the
175            * start of the string buffer.  On darwin the symbols
176            * are prefixed by "_", so we bump the pointer by 1.
177            * The target value is defined as an int in *_asm_*_offsets.c,
178            * which is 4 bytes on all targets we currently use.
179            */
180           if (bits == 32) {
181             struct nlist nl;
182             int val;
183 
184             memcpy(&nl, sym_buf, sizeof(struct nlist));
185             sym_buf += sizeof(struct nlist);
186 
187             memcpy(&val, base_buf + base_data_section + nl.n_value,
188                    sizeof(val));
189             print_macho_equ(mode, str_buf + nl.n_un.n_strx + 1, val);
190           } else { /* if (bits == 64) */
191             struct nlist_64 nl;
192             int val;
193 
194             memcpy(&nl, sym_buf, sizeof(struct nlist_64));
195             sym_buf += sizeof(struct nlist_64);
196 
197             memcpy(&val, base_buf + base_data_section + nl.n_value,
198                    sizeof(val));
199             print_macho_equ(mode, str_buf + nl.n_un.n_strx + 1, val);
200           }
201         }
202       }
203     }
204 
205     buf += lc.cmdsize;
206   }
207 
208   return 0;
209 bail:
210   return 1;
211 
212 }
213 
214 #elif defined(__ELF__)
215 #include "elf.h"
216 
217 #define COPY_STRUCT(dst, buf, ofst, sz) do {\
218     if(ofst + sizeof((*(dst))) > sz) goto bail;\
219     memcpy(dst, buf+ofst, sizeof((*(dst))));\
220   } while(0)
221 
222 #define ENDIAN_ASSIGN(val, memb) do {\
223     if(!elf->le_data) {log_msg("Big Endian data not supported yet!\n");goto bail;}\
224     (val) = (memb);\
225   } while(0)
226 
227 #define ENDIAN_ASSIGN_IN_PLACE(memb) do {\
228     ENDIAN_ASSIGN(memb, memb);\
229   } while(0)
230 
231 typedef struct {
232   uint8_t      *buf; /* Buffer containing ELF data */
233   size_t        sz;  /* Buffer size */
234   int           le_data; /* Data is little-endian */
235   unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
236   int           bits; /* 32 or 64 */
237   Elf32_Ehdr    hdr32;
238   Elf64_Ehdr    hdr64;
239 } elf_obj_t;
240 
parse_elf_header(elf_obj_t * elf)241 int parse_elf_header(elf_obj_t *elf) {
242   int res;
243   /* Verify ELF Magic numbers */
244   COPY_STRUCT(&elf->e_ident, elf->buf, 0, elf->sz);
245   res = elf->e_ident[EI_MAG0] == ELFMAG0;
246   res &= elf->e_ident[EI_MAG1] == ELFMAG1;
247   res &= elf->e_ident[EI_MAG2] == ELFMAG2;
248   res &= elf->e_ident[EI_MAG3] == ELFMAG3;
249   res &= elf->e_ident[EI_CLASS] == ELFCLASS32
250          || elf->e_ident[EI_CLASS] == ELFCLASS64;
251   res &= elf->e_ident[EI_DATA] == ELFDATA2LSB;
252 
253   if (!res) goto bail;
254 
255   elf->le_data = elf->e_ident[EI_DATA] == ELFDATA2LSB;
256 
257   /* Read in relevant values */
258   if (elf->e_ident[EI_CLASS] == ELFCLASS32) {
259     elf->bits = 32;
260     COPY_STRUCT(&elf->hdr32, elf->buf, 0, elf->sz);
261 
262     ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_type);
263     ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_machine);
264     ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_version);
265     ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_entry);
266     ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phoff);
267     ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shoff);
268     ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_flags);
269     ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_ehsize);
270     ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phentsize);
271     ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phnum);
272     ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shentsize);
273     ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shnum);
274     ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shstrndx);
275   } else { /* if (elf->e_ident[EI_CLASS] == ELFCLASS64) */
276     elf->bits = 64;
277     COPY_STRUCT(&elf->hdr64, elf->buf, 0, elf->sz);
278 
279     ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_type);
280     ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_machine);
281     ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_version);
282     ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_entry);
283     ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phoff);
284     ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shoff);
285     ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_flags);
286     ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_ehsize);
287     ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phentsize);
288     ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phnum);
289     ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shentsize);
290     ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shnum);
291     ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shstrndx);
292   }
293 
294   return 0;
295 bail:
296   log_msg("Failed to parse ELF file header");
297   return 1;
298 }
299 
parse_elf_section(elf_obj_t * elf,int idx,Elf32_Shdr * hdr32,Elf64_Shdr * hdr64)300 int parse_elf_section(elf_obj_t *elf, int idx, Elf32_Shdr *hdr32, Elf64_Shdr *hdr64) {
301   if (hdr32) {
302     if (idx >= elf->hdr32.e_shnum)
303       goto bail;
304 
305     COPY_STRUCT(hdr32, elf->buf, elf->hdr32.e_shoff + idx * elf->hdr32.e_shentsize,
306                 elf->sz);
307     ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_name);
308     ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_type);
309     ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_flags);
310     ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addr);
311     ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_offset);
312     ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_size);
313     ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_link);
314     ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_info);
315     ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addralign);
316     ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_entsize);
317   } else { /* if (hdr64) */
318     if (idx >= elf->hdr64.e_shnum)
319       goto bail;
320 
321     COPY_STRUCT(hdr64, elf->buf, elf->hdr64.e_shoff + idx * elf->hdr64.e_shentsize,
322                 elf->sz);
323     ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_name);
324     ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_type);
325     ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_flags);
326     ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addr);
327     ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_offset);
328     ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_size);
329     ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_link);
330     ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_info);
331     ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addralign);
332     ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_entsize);
333   }
334 
335   return 0;
336 bail:
337   return 1;
338 }
339 
parse_elf_string_table(elf_obj_t * elf,int s_idx,int idx)340 const char *parse_elf_string_table(elf_obj_t *elf, int s_idx, int idx) {
341   if (elf->bits == 32) {
342     Elf32_Shdr shdr;
343 
344     if (parse_elf_section(elf, s_idx, &shdr, NULL)) {
345       log_msg("Failed to parse ELF string table: section %d, index %d\n",
346               s_idx, idx);
347       return "";
348     }
349 
350     return (char *)(elf->buf + shdr.sh_offset + idx);
351   } else { /* if (elf->bits == 64) */
352     Elf64_Shdr shdr;
353 
354     if (parse_elf_section(elf, s_idx, NULL, &shdr)) {
355       log_msg("Failed to parse ELF string table: section %d, index %d\n",
356               s_idx, idx);
357       return "";
358     }
359 
360     return (char *)(elf->buf + shdr.sh_offset + idx);
361   }
362 }
363 
parse_elf_symbol(elf_obj_t * elf,unsigned int ofst,Elf32_Sym * sym32,Elf64_Sym * sym64)364 int parse_elf_symbol(elf_obj_t *elf, unsigned int ofst, Elf32_Sym *sym32, Elf64_Sym *sym64) {
365   if (sym32) {
366     COPY_STRUCT(sym32, elf->buf, ofst, elf->sz);
367     ENDIAN_ASSIGN_IN_PLACE(sym32->st_name);
368     ENDIAN_ASSIGN_IN_PLACE(sym32->st_value);
369     ENDIAN_ASSIGN_IN_PLACE(sym32->st_size);
370     ENDIAN_ASSIGN_IN_PLACE(sym32->st_info);
371     ENDIAN_ASSIGN_IN_PLACE(sym32->st_other);
372     ENDIAN_ASSIGN_IN_PLACE(sym32->st_shndx);
373   } else { /* if (sym64) */
374     COPY_STRUCT(sym64, elf->buf, ofst, elf->sz);
375     ENDIAN_ASSIGN_IN_PLACE(sym64->st_name);
376     ENDIAN_ASSIGN_IN_PLACE(sym64->st_value);
377     ENDIAN_ASSIGN_IN_PLACE(sym64->st_size);
378     ENDIAN_ASSIGN_IN_PLACE(sym64->st_info);
379     ENDIAN_ASSIGN_IN_PLACE(sym64->st_other);
380     ENDIAN_ASSIGN_IN_PLACE(sym64->st_shndx);
381   }
382   return 0;
383 bail:
384   return 1;
385 }
386 
parse_elf(uint8_t * buf,size_t sz,output_fmt_t mode)387 int parse_elf(uint8_t *buf, size_t sz, output_fmt_t mode) {
388   elf_obj_t    elf;
389   unsigned int ofst;
390   int          i;
391   Elf32_Off    strtab_off32;
392   Elf64_Off    strtab_off64; /* save String Table offset for later use */
393 
394   memset(&elf, 0, sizeof(elf));
395   elf.buf = buf;
396   elf.sz = sz;
397 
398   /* Parse Header */
399   if (parse_elf_header(&elf))
400     goto bail;
401 
402   if (elf.bits == 32) {
403     Elf32_Shdr shdr;
404     for (i = 0; i < elf.hdr32.e_shnum; i++) {
405       parse_elf_section(&elf, i, &shdr, NULL);
406 
407       if (shdr.sh_type == SHT_STRTAB) {
408         char strtsb_name[128];
409 
410         strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
411 
412         if (!(strcmp(strtsb_name, ".shstrtab"))) {
413           /* log_msg("found section: %s\n", strtsb_name); */
414           strtab_off32 = shdr.sh_offset;
415           break;
416         }
417       }
418     }
419   } else { /* if (elf.bits == 64) */
420     Elf64_Shdr shdr;
421     for (i = 0; i < elf.hdr64.e_shnum; i++) {
422       parse_elf_section(&elf, i, NULL, &shdr);
423 
424       if (shdr.sh_type == SHT_STRTAB) {
425         char strtsb_name[128];
426 
427         strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
428 
429         if (!(strcmp(strtsb_name, ".shstrtab"))) {
430           /* log_msg("found section: %s\n", strtsb_name); */
431           strtab_off64 = shdr.sh_offset;
432           break;
433         }
434       }
435     }
436   }
437 
438   /* Parse all Symbol Tables */
439   if (elf.bits == 32) {
440     Elf32_Shdr shdr;
441     for (i = 0; i < elf.hdr32.e_shnum; i++) {
442       parse_elf_section(&elf, i, &shdr, NULL);
443 
444       if (shdr.sh_type == SHT_SYMTAB) {
445         for (ofst = shdr.sh_offset;
446              ofst < shdr.sh_offset + shdr.sh_size;
447              ofst += shdr.sh_entsize) {
448           Elf32_Sym sym;
449 
450           parse_elf_symbol(&elf, ofst, &sym, NULL);
451 
452           /* For all OBJECTS (data objects), extract the value from the
453            * proper data segment.
454            */
455           /* if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
456               log_msg("found data object %s\n",
457                       parse_elf_string_table(&elf,
458                                              shdr.sh_link,
459                                              sym.st_name));
460            */
461 
462           if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT
463               && sym.st_size == 4) {
464             Elf32_Shdr dhdr;
465             int val = 0;
466             char section_name[128];
467 
468             parse_elf_section(&elf, sym.st_shndx, &dhdr, NULL);
469 
470             /* For explanition - refer to _MSC_VER version of code */
471             strcpy(section_name, (char *)(elf.buf + strtab_off32 + dhdr.sh_name));
472             /* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
473 
474             if (strcmp(section_name, ".bss")) {
475               if (sizeof(val) != sym.st_size) {
476                 /* The target value is declared as an int in
477                  * *_asm_*_offsets.c, which is 4 bytes on all
478                  * targets we currently use. Complain loudly if
479                  * this is not true.
480                  */
481                 log_msg("Symbol size is wrong\n");
482                 goto bail;
483               }
484 
485               memcpy(&val,
486                      elf.buf + dhdr.sh_offset + sym.st_value,
487                      sym.st_size);
488             }
489 
490             if (!elf.le_data) {
491               log_msg("Big Endian data not supported yet!\n");
492               goto bail;
493             }
494 
495             switch (mode) {
496               case OUTPUT_FMT_RVDS:
497                 printf("%-40s EQU %5d\n",
498                        parse_elf_string_table(&elf,
499                                               shdr.sh_link,
500                                               sym.st_name),
501                        val);
502                 break;
503               case OUTPUT_FMT_GAS:
504                 printf(".equ %-40s, %5d\n",
505                        parse_elf_string_table(&elf,
506                                               shdr.sh_link,
507                                               sym.st_name),
508                        val);
509                 break;
510               case OUTPUT_FMT_C_HEADER:
511                 printf("#define %-40s %5d\n",
512                        parse_elf_string_table(&elf,
513                                               shdr.sh_link,
514                                               sym.st_name),
515                        val);
516                 break;
517               default:
518                 printf("%s = %d\n",
519                        parse_elf_string_table(&elf,
520                                               shdr.sh_link,
521                                               sym.st_name),
522                        val);
523             }
524           }
525         }
526       }
527     }
528   } else { /* if (elf.bits == 64) */
529     Elf64_Shdr shdr;
530     for (i = 0; i < elf.hdr64.e_shnum; i++) {
531       parse_elf_section(&elf, i, NULL, &shdr);
532 
533       if (shdr.sh_type == SHT_SYMTAB) {
534         for (ofst = shdr.sh_offset;
535              ofst < shdr.sh_offset + shdr.sh_size;
536              ofst += shdr.sh_entsize) {
537           Elf64_Sym sym;
538 
539           parse_elf_symbol(&elf, ofst, NULL, &sym);
540 
541           /* For all OBJECTS (data objects), extract the value from the
542            * proper data segment.
543            */
544           /* if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
545               log_msg("found data object %s\n",
546                       parse_elf_string_table(&elf,
547                                              shdr.sh_link,
548                                              sym.st_name));
549            */
550 
551           if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT
552               && sym.st_size == 4) {
553             Elf64_Shdr dhdr;
554             int val = 0;
555             char section_name[128];
556 
557             parse_elf_section(&elf, sym.st_shndx, NULL, &dhdr);
558 
559             /* For explanition - refer to _MSC_VER version of code */
560             strcpy(section_name, (char *)(elf.buf + strtab_off64 + dhdr.sh_name));
561             /* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
562 
563             if ((strcmp(section_name, ".bss"))) {
564               if (sizeof(val) != sym.st_size) {
565                 /* The target value is declared as an int in
566                  * *_asm_*_offsets.c, which is 4 bytes on all
567                  * targets we currently use. Complain loudly if
568                  * this is not true.
569                  */
570                 log_msg("Symbol size is wrong\n");
571                 goto bail;
572               }
573 
574               memcpy(&val,
575                      elf.buf + dhdr.sh_offset + sym.st_value,
576                      sym.st_size);
577             }
578 
579             if (!elf.le_data) {
580               log_msg("Big Endian data not supported yet!\n");
581               goto bail;
582             }
583 
584             switch (mode) {
585               case OUTPUT_FMT_RVDS:
586                 printf("%-40s EQU %5d\n",
587                        parse_elf_string_table(&elf,
588                                               shdr.sh_link,
589                                               sym.st_name),
590                        val);
591                 break;
592               case OUTPUT_FMT_GAS:
593                 printf(".equ %-40s, %5d\n",
594                        parse_elf_string_table(&elf,
595                                               shdr.sh_link,
596                                               sym.st_name),
597                        val);
598                 break;
599               default:
600                 printf("%s = %d\n",
601                        parse_elf_string_table(&elf,
602                                               shdr.sh_link,
603                                               sym.st_name),
604                        val);
605             }
606           }
607         }
608       }
609     }
610   }
611 
612   if (mode == OUTPUT_FMT_RVDS)
613     printf("    END\n");
614 
615   return 0;
616 bail:
617   log_msg("Parse error: File does not appear to be valid ELF32 or ELF64\n");
618   return 1;
619 }
620 
621 #endif
622 #endif /* defined(__GNUC__) && __GNUC__ */
623 
624 
625 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
626 /*  See "Microsoft Portable Executable and Common Object File Format Specification"
627     for reference.
628 */
629 #define get_le32(x) ((*(x)) | (*(x+1)) << 8 |(*(x+2)) << 16 | (*(x+3)) << 24 )
630 #define get_le16(x) ((*(x)) | (*(x+1)) << 8)
631 
parse_coff(uint8_t * buf,size_t sz)632 int parse_coff(uint8_t *buf, size_t sz) {
633   unsigned int nsections, symtab_ptr, symtab_sz, strtab_ptr;
634   unsigned int sectionrawdata_ptr;
635   unsigned int i;
636   uint8_t *ptr;
637   uint32_t symoffset;
638 
639   char **sectionlist;  // this array holds all section names in their correct order.
640   // it is used to check if the symbol is in .bss or .rdata section.
641 
642   nsections = get_le16(buf + 2);
643   symtab_ptr = get_le32(buf + 8);
644   symtab_sz = get_le32(buf + 12);
645   strtab_ptr = symtab_ptr + symtab_sz * 18;
646 
647   if (nsections > 96) {
648     log_msg("Too many sections\n");
649     return 1;
650   }
651 
652   sectionlist = malloc(nsections * sizeof(sectionlist));
653 
654   if (sectionlist == NULL) {
655     log_msg("Allocating first level of section list failed\n");
656     return 1;
657   }
658 
659   // log_msg("COFF: Found %u symbols in %u sections.\n", symtab_sz, nsections);
660 
661   /*
662   The size of optional header is always zero for an obj file. So, the section header
663   follows the file header immediately.
664   */
665 
666   ptr = buf + 20;     // section header
667 
668   for (i = 0; i < nsections; i++) {
669     char sectionname[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
670     strncpy(sectionname, ptr, 8);
671     // log_msg("COFF: Parsing section %s\n",sectionname);
672 
673     sectionlist[i] = malloc(strlen(sectionname) + 1);
674 
675     if (sectionlist[i] == NULL) {
676       log_msg("Allocating storage for %s failed\n", sectionname);
677       goto bail;
678     }
679     strcpy(sectionlist[i], sectionname);
680 
681     // check if it's .rdata and is not a COMDAT section.
682     if (!strcmp(sectionname, ".rdata") &&
683         (get_le32(ptr + 36) & 0x1000) == 0) {
684       sectionrawdata_ptr = get_le32(ptr + 20);
685     }
686 
687     ptr += 40;
688   }
689 
690   // log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
691   // log_msg("COFF: raw data pointer ofset for section .rdata is %u\n", sectionrawdata_ptr);
692 
693   /*  The compiler puts the data with non-zero offset in .rdata section, but puts the data with
694       zero offset in .bss section. So, if the data in in .bss section, set offset=0.
695       Note from Wiki: In an object module compiled from C, the bss section contains
696       the local variables (but not functions) that were declared with the static keyword,
697       except for those with non-zero initial values. (In C, static variables are initialized
698       to zero by default.) It also contains the non-local (both extern and static) variables
699       that are also initialized to zero (either explicitly or by default).
700       */
701   // move to symbol table
702   /* COFF symbol table:
703       offset      field
704       0           Name(*)
705       8           Value
706       12          SectionNumber
707       14          Type
708       16          StorageClass
709       17          NumberOfAuxSymbols
710       */
711   ptr = buf + symtab_ptr;
712 
713   for (i = 0; i < symtab_sz; i++) {
714     int16_t section = get_le16(ptr + 12); // section number
715 
716     if (section > 0 && ptr[16] == 2) {
717       // if(section > 0 && ptr[16] == 3 && get_le32(ptr+8)) {
718 
719       if (get_le32(ptr)) {
720         char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
721         strncpy(name, ptr, 8);
722         // log_msg("COFF: Parsing symbol %s\n",name);
723         /* The 64bit Windows compiler doesn't prefix with an _.
724          * Check what's there, and bump if necessary
725          */
726         if (name[0] == '_')
727           printf("%-40s EQU ", name + 1);
728         else
729           printf("%-40s EQU ", name);
730       } else {
731         // log_msg("COFF: Parsing symbol %s\n",
732         //        buf + strtab_ptr + get_le32(ptr+4));
733         if ((buf + strtab_ptr + get_le32(ptr + 4))[0] == '_')
734           printf("%-40s EQU ",
735                  buf + strtab_ptr + get_le32(ptr + 4) + 1);
736         else
737           printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
738       }
739 
740       if (!(strcmp(sectionlist[section - 1], ".bss"))) {
741         symoffset = 0;
742       } else {
743         symoffset = get_le32(buf + sectionrawdata_ptr + get_le32(ptr + 8));
744       }
745 
746       // log_msg("      Section: %d\n",section);
747       // log_msg("      Class:   %d\n",ptr[16]);
748       // log_msg("      Address: %u\n",get_le32(ptr+8));
749       // log_msg("      Offset: %u\n", symoffset);
750 
751       printf("%5d\n", symoffset);
752     }
753 
754     ptr += 18;
755   }
756 
757   printf("    END\n");
758 
759   for (i = 0; i < nsections; i++) {
760     free(sectionlist[i]);
761   }
762 
763   free(sectionlist);
764 
765   return 0;
766 bail:
767 
768   for (i = 0; i < nsections; i++) {
769     free(sectionlist[i]);
770   }
771 
772   free(sectionlist);
773 
774   return 1;
775 }
776 #endif /* defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__) */
777 
main(int argc,char ** argv)778 int main(int argc, char **argv) {
779   output_fmt_t mode = OUTPUT_FMT_PLAIN;
780   const char *f;
781   uint8_t *file_buf;
782   int res;
783   FILE *fp;
784   long int file_size;
785 
786   if (argc < 2 || argc > 3) {
787     fprintf(stderr, "Usage: %s [output format] <obj file>\n\n", argv[0]);
788     fprintf(stderr, "  <obj file>\tobject file to parse\n");
789     fprintf(stderr, "Output Formats:\n");
790     fprintf(stderr, "  gas  - compatible with GNU assembler\n");
791     fprintf(stderr, "  rvds - compatible with armasm\n");
792     fprintf(stderr, "  cheader - c/c++ header file\n");
793     goto bail;
794   }
795 
796   f = argv[2];
797 
798   if (!strcmp(argv[1], "rvds"))
799     mode = OUTPUT_FMT_RVDS;
800   else if (!strcmp(argv[1], "gas"))
801     mode = OUTPUT_FMT_GAS;
802   else if (!strcmp(argv[1], "cheader"))
803     mode = OUTPUT_FMT_C_HEADER;
804   else
805     f = argv[1];
806 
807   fp = fopen(f, "rb");
808 
809   if (!fp) {
810     perror("Unable to open file");
811     goto bail;
812   }
813 
814   if (fseek(fp, 0, SEEK_END)) {
815     perror("stat");
816     goto bail;
817   }
818 
819   file_size = ftell(fp);
820   file_buf = malloc(file_size);
821 
822   if (!file_buf) {
823     perror("malloc");
824     goto bail;
825   }
826 
827   rewind(fp);
828 
829   if (fread(file_buf, sizeof(char), file_size, fp) != file_size) {
830     perror("read");
831     goto bail;
832   }
833 
834   if (fclose(fp)) {
835     perror("close");
836     goto bail;
837   }
838 
839 #if defined(__GNUC__) && __GNUC__
840 #if defined(__MACH__)
841   res = parse_macho(file_buf, file_size, mode);
842 #elif defined(__ELF__)
843   res = parse_elf(file_buf, file_size, mode);
844 #endif
845 #endif
846 #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
847   res = parse_coff(file_buf, file_size);
848 #endif
849 
850   free(file_buf);
851 
852   if (!res)
853     return EXIT_SUCCESS;
854 
855 bail:
856   return EXIT_FAILURE;
857 }
858