1 
2 /*--------------------------------------------------------------------*/
3 /*--- User-mode execve() for ELF executables           m_ume_elf.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2015 Julian Seward
11       jseward@acm.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #if defined(VGO_linux) || defined(VGO_solaris)
32 
33 #include "pub_core_basics.h"
34 #include "pub_core_vki.h"
35 
36 #include "pub_core_aspacemgr.h"     // various mapping fns
37 #include "pub_core_debuglog.h"
38 #include "pub_core_libcassert.h"    // VG_(exit), vg_assert
39 #include "pub_core_libcbase.h"      // VG_(memcmp), etc
40 #include "pub_core_libcprint.h"
41 #include "pub_core_libcfile.h"      // VG_(open) et al
42 #include "pub_core_machine.h"       // VG_ELF_CLASS (XXX: which should be moved)
43 #include "pub_core_mallocfree.h"    // VG_(malloc), VG_(free)
44 #include "pub_core_syscall.h"       // VG_(strerror)
45 #include "pub_core_ume.h"           // self
46 
47 #include "priv_ume.h"
48 
49 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
50 #if defined(VGO_linux)
51 #  define _GNU_SOURCE
52 #  define _FILE_OFFSET_BITS 64
53 #endif
54 /* This is for ELF types etc, and also the AT_ constants. */
55 #include <elf.h>
56 #if defined(VGO_solaris)
57 #  include <sys/fasttrap.h> // PT_SUNWDTRACE_SIZE
58 #endif
59 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
60 
61 
62 #if     VG_WORDSIZE == 8
63 #define ESZ(x)  Elf64_##x
64 #elif   VG_WORDSIZE == 4
65 #define ESZ(x)  Elf32_##x
66 #else
67 #error VG_WORDSIZE needs to ==4 or ==8
68 #endif
69 
70 struct elfinfo
71 {
72    ESZ(Ehdr)    e;
73    ESZ(Phdr)    *p;
74    Int          fd;
75 };
76 
check_mmap(SysRes res,Addr base,SizeT len)77 static void check_mmap(SysRes res, Addr base, SizeT len)
78 {
79    if (sr_isError(res)) {
80       VG_(printf)("valgrind: mmap(0x%llx, %lld) failed in UME "
81                   "with error %lu (%s).\n",
82                   (ULong)base, (Long)len,
83                   sr_Err(res), VG_(strerror)(sr_Err(res)) );
84       if (sr_Err(res) == VKI_EINVAL) {
85          VG_(printf)("valgrind: this can be caused by executables with "
86                      "very large text, data or bss segments.\n");
87       }
88       VG_(exit)(1);
89    }
90 }
91 
92 /*------------------------------------------------------------*/
93 /*--- Loading ELF files                                    ---*/
94 /*------------------------------------------------------------*/
95 
96 static
readelf(Int fd,const HChar * filename)97 struct elfinfo *readelf(Int fd, const HChar *filename)
98 {
99    SysRes sres;
100    struct elfinfo *e = VG_(malloc)("ume.re.1", sizeof(*e));
101    Int phsz;
102 
103    e->fd = fd;
104 
105    sres = VG_(pread)(fd, &e->e, sizeof(e->e), 0);
106    if (sr_isError(sres) || sr_Res(sres) != sizeof(e->e)) {
107       VG_(printf)("valgrind: %s: can't read ELF header: %s\n",
108                   filename, VG_(strerror)(sr_Err(sres)));
109       goto bad;
110    }
111 
112    if (VG_(memcmp)(&e->e.e_ident[0], ELFMAG, SELFMAG) != 0) {
113       VG_(printf)("valgrind: %s: bad ELF magic number\n", filename);
114       goto bad;
115    }
116    if (e->e.e_ident[EI_CLASS] != VG_ELF_CLASS) {
117       VG_(printf)("valgrind: wrong ELF executable class "
118                   "(eg. 32-bit instead of 64-bit)\n");
119       goto bad;
120    }
121    if (e->e.e_ident[EI_DATA] != VG_ELF_DATA2XXX) {
122       VG_(printf)("valgrind: executable has wrong endian-ness\n");
123       goto bad;
124    }
125    if (!(e->e.e_type == ET_EXEC || e->e.e_type == ET_DYN)) {
126       VG_(printf)("valgrind: this is not an executable\n");
127       goto bad;
128    }
129 
130    if (e->e.e_machine != VG_ELF_MACHINE) {
131       VG_(printf)("valgrind: executable is not for "
132                   "this architecture\n");
133       goto bad;
134    }
135 
136    if (e->e.e_phentsize != sizeof(ESZ(Phdr))) {
137       VG_(printf)("valgrind: sizeof ELF Phdr wrong\n");
138       goto bad;
139    }
140 
141    phsz = sizeof(ESZ(Phdr)) * e->e.e_phnum;
142    e->p = VG_(malloc)("ume.re.2", phsz);
143 
144    sres = VG_(pread)(fd, e->p, phsz, e->e.e_phoff);
145    if (sr_isError(sres) || sr_Res(sres) != phsz) {
146       VG_(printf)("valgrind: can't read phdr: %s\n",
147                   VG_(strerror)(sr_Err(sres)));
148       VG_(free)(e->p);
149       goto bad;
150    }
151 
152    return e;
153 
154   bad:
155    VG_(free)(e);
156    return NULL;
157 }
158 
159 /* Map an ELF file.  Returns the brk address. */
160 static
mapelf(struct elfinfo * e,ESZ (Addr)base)161 ESZ(Addr) mapelf(struct elfinfo *e, ESZ(Addr) base)
162 {
163    Int    i;
164    SysRes res;
165    ESZ(Addr) elfbrk = 0;
166 
167    for (i = 0; i < e->e.e_phnum; i++) {
168       ESZ(Phdr) *ph = &e->p[i];
169       ESZ(Addr) addr, brkaddr;
170       ESZ(Word) memsz;
171 
172       if (ph->p_type != PT_LOAD)
173          continue;
174 
175       addr    = ph->p_vaddr+base;
176       memsz   = ph->p_memsz;
177       brkaddr = addr+memsz;
178 
179       if (brkaddr > elfbrk)
180          elfbrk = brkaddr;
181    }
182 
183    for (i = 0; i < e->e.e_phnum; i++) {
184       ESZ(Phdr) *ph = &e->p[i];
185       ESZ(Addr) addr, bss, brkaddr;
186       ESZ(Off) off;
187       ESZ(Word) filesz;
188       ESZ(Word) memsz;
189       unsigned prot = 0;
190 
191       if (ph->p_type != PT_LOAD)
192          continue;
193 
194       if (ph->p_flags & PF_X) prot |= VKI_PROT_EXEC;
195       if (ph->p_flags & PF_W) prot |= VKI_PROT_WRITE;
196       if (ph->p_flags & PF_R) prot |= VKI_PROT_READ;
197 
198       addr    = ph->p_vaddr+base;
199       off     = ph->p_offset;
200       filesz  = ph->p_filesz;
201       bss     = addr+filesz;
202       memsz   = ph->p_memsz;
203       brkaddr = addr+memsz;
204 
205       // Tom says: In the following, do what the Linux kernel does and only
206       // map the pages that are required instead of rounding everything to
207       // the specified alignment (ph->p_align).  (AMD64 doesn't work if you
208       // use ph->p_align -- part of stage2's memory gets trashed somehow.)
209       //
210       // The condition handles the case of a zero-length segment.
211       if (VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr) > 0) {
212          if (0) VG_(debugLog)(0,"ume","mmap_file_fixed_client #1\n");
213          res = VG_(am_mmap_file_fixed_client)(
214                   VG_PGROUNDDN(addr),
215                   VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr),
216                   prot, /*VKI_MAP_FIXED|VKI_MAP_PRIVATE, */
217                   e->fd, VG_PGROUNDDN(off)
218                );
219          if (0) VG_(am_show_nsegments)(0,"after #1");
220          check_mmap(res, VG_PGROUNDDN(addr),
221                          VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr));
222       }
223 
224       // if memsz > filesz, fill the remainder with zeroed pages
225       if (memsz > filesz) {
226          UInt bytes;
227 
228          bytes = VG_PGROUNDUP(brkaddr)-VG_PGROUNDUP(bss);
229          if (bytes > 0) {
230             if (0) VG_(debugLog)(0,"ume","mmap_anon_fixed_client #2\n");
231             res = VG_(am_mmap_anon_fixed_client)(
232                      VG_PGROUNDUP(bss), bytes,
233                      prot
234                   );
235             if (0) VG_(am_show_nsegments)(0,"after #2");
236             check_mmap(res, VG_PGROUNDUP(bss), bytes);
237          }
238 
239          bytes = bss & (VKI_PAGE_SIZE - 1);
240 
241          // The 'prot' condition allows for a read-only bss
242          if ((prot & VKI_PROT_WRITE) && (bytes > 0)) {
243             bytes = VKI_PAGE_SIZE - bytes;
244             VG_(memset)((void *)bss, 0, bytes);
245          }
246       }
247    }
248 
249    return elfbrk;
250 }
251 
VG_(match_ELF)252 Bool VG_(match_ELF)(const void *hdr, SizeT len)
253 {
254    const ESZ(Ehdr) *e = hdr;
255    return (len > sizeof(*e)) && VG_(memcmp)(&e->e_ident[0], ELFMAG, SELFMAG) == 0;
256 }
257 
258 
259 /* load_ELF pulls an ELF executable into the address space, prepares
260    it for execution, and writes info about it into INFO.  In
261    particular it fills in .init_eip, which is the starting point.
262 
263    Returns zero on success, non-zero (a VKI_E.. value) on failure.
264 
265    The sequence of activities is roughly as follows:
266 
267    - use readelf() to extract program header info from the exe file.
268 
269    - scan the program header, collecting info (not sure what all those
270      info-> fields are, or whether they are used, but still) and in
271      particular looking out fo the PT_INTERP header, which describes
272      the interpreter.  If such a field is found, the space needed to
273      hold the interpreter is computed into interp_size.
274 
275    - map the executable in, by calling mapelf().  This maps in all
276      loadable sections, and I _think_ also creates any .bss areas
277      required.  mapelf() returns the address just beyond the end of
278      the furthest-along mapping it creates.  The executable is mapped
279      starting at EBASE, which is usually read from it (eg, 0x8048000
280      etc) except if it's a PIE, in which case I'm not sure what
281      happens.
282 
283      The returned address is recorded in info->brkbase as the start
284      point of the brk (data) segment, as it is traditional to place
285      the data segment just after the executable.  Neither load_ELF nor
286      mapelf creates the brk segment, though: that is for the caller of
287      load_ELF to attend to.
288 
289    - If the initial phdr scan didn't find any mention of an
290      interpreter (interp == NULL), this must be a statically linked
291      executable, and we're pretty much done.
292 
293    - Otherwise, we need to use mapelf() a second time to load the
294      interpreter.  The interpreter can go anywhere, but mapelf() wants
295      to be told a specific address to put it at.  So an advisory query
296      is passed to aspacem, asking where it would put an anonymous
297      client mapping of size INTERP_SIZE.  That address is then used
298      as the mapping address for the interpreter.
299 
300    - The entry point in INFO is set to the interpreter's entry point,
301      and we're done.  */
VG_(load_ELF)302 Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
303 {
304    SysRes sres;
305    struct elfinfo *e;
306    struct elfinfo *interp = NULL;
307    ESZ(Addr) minaddr = ~0;      /* lowest mapped address */
308    ESZ(Addr) maxaddr = 0;       /* highest mapped address */
309    ESZ(Addr) interp_addr = 0;   /* interpreter (ld.so) address */
310    ESZ(Word) interp_size = 0;   /* interpreter size */
311    /* ESZ(Word) interp_align = VKI_PAGE_SIZE; */ /* UNUSED */
312    Int i;
313    void *entry;
314    ESZ(Addr) ebase = 0;
315 #  if defined(VGO_solaris)
316    ESZ(Addr) thrptr_addr = 0;
317 #  endif
318 
319 #  if defined(HAVE_PIE)
320    ebase = info->exe_base;
321 #  endif
322 
323    e = readelf(fd, name);
324 
325    if (e == NULL)
326       return VKI_ENOEXEC;
327 
328    /* The kernel maps position-independent executables at TASK_SIZE*2/3;
329       duplicate this behavior as close as we can. */
330    if (e->e.e_type == ET_DYN && ebase == 0) {
331       ebase = VG_PGROUNDDN(info->exe_base
332                            + (info->exe_end - info->exe_base) * 2 / 3);
333       /* We really don't want to load PIEs at zero or too close.  It
334          works, but it's unrobust (NULL pointer reads and writes
335          become legit, which is really bad) and causes problems for
336          exp-ptrcheck, which assumes all numbers below 1MB are
337          nonpointers.  So, hackily, move it above 1MB. */
338       /* Later .. it appears ppc32-linux tries to put [vdso] at 1MB,
339          which totally screws things up, because nothing else can go
340          there.  The size of [vdso] is around 2 or 3 pages, so bump
341          the hacky load addess along by 8 * VKI_PAGE_SIZE to be safe. */
342       /* Later .. on mips64 we can't use 0x108000, because mapelf will
343          fail. */
344 #     if defined(VGP_mips64_linux)
345       if (ebase < 0x100000)
346          ebase = 0x100000;
347 #     else
348       vg_assert(VKI_PAGE_SIZE >= 4096); /* stay sane */
349       ESZ(Addr) hacky_load_address = 0x100000 + 8 * VKI_PAGE_SIZE;
350       if (ebase < hacky_load_address)
351          ebase = hacky_load_address;
352 #     endif
353 
354 #     if defined(VGO_solaris)
355       /* Record for later use in AT_BASE. */
356       info->interp_offset = ebase;
357 #     endif
358    }
359 
360    info->phnum = e->e.e_phnum;
361    info->entry = e->e.e_entry + ebase;
362    info->phdr = 0;
363    info->stack_prot = VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC;
364 
365    for (i = 0; i < e->e.e_phnum; i++) {
366       ESZ(Phdr) *ph = &e->p[i];
367 
368       switch(ph->p_type) {
369       case PT_PHDR:
370          info->phdr = ph->p_vaddr + ebase;
371 #        if defined(VGO_solaris)
372          info->real_phdr_present = True;
373 #        endif
374          break;
375 
376       case PT_LOAD:
377          if (ph->p_vaddr < minaddr)
378             minaddr = ph->p_vaddr;
379          if (ph->p_vaddr+ph->p_memsz > maxaddr)
380             maxaddr = ph->p_vaddr+ph->p_memsz;
381          break;
382 
383 #     if defined(VGO_solaris)
384       case PT_SUNWDTRACE:
385          if (ph->p_memsz < PT_SUNWDTRACE_SIZE ||
386              (ph->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X)) {
387             VG_(printf)("valgrind: m_ume.c: too small SUNWDTRACE size\n");
388             return VKI_ENOEXEC;
389          }
390 
391          info->init_thrptr = ph->p_vaddr + ebase;
392          break;
393 #     endif
394 
395       case PT_INTERP: {
396          HChar *buf = VG_(malloc)("ume.LE.1", ph->p_filesz+1);
397          Int j;
398          Int intfd;
399          Int baseaddr_set;
400 
401          VG_(pread)(fd, buf, ph->p_filesz, ph->p_offset);
402          buf[ph->p_filesz] = '\0';
403 
404          sres = VG_(open)(buf, VKI_O_RDONLY, 0);
405          if (sr_isError(sres)) {
406             VG_(printf)("valgrind: m_ume.c: can't open interpreter\n");
407             VG_(exit)(1);
408          }
409          intfd = sr_Res(sres);
410 
411          interp = readelf(intfd, buf);
412          if (interp == NULL) {
413             VG_(printf)("valgrind: m_ume.c: can't read interpreter\n");
414             return 1;
415          }
416          VG_(free)(buf);
417 
418          baseaddr_set = 0;
419          for (j = 0; j < interp->e.e_phnum; j++) {
420             ESZ(Phdr) *iph = &interp->p[j];
421             ESZ(Addr) end;
422 
423 #           if defined(VGO_solaris)
424             if (iph->p_type == PT_SUNWDTRACE) {
425                if (iph->p_memsz < PT_SUNWDTRACE_SIZE ||
426                    (iph->p_flags & (PF_R | PF_W | PF_X))
427                       != (PF_R | PF_W | PF_X)) {
428                   VG_(printf)("valgrind: m_ume.c: too small SUNWDTRACE size\n");
429                   return VKI_ENOEXEC;
430                }
431 
432                /* Store the thrptr value into a temporary because we do not
433                   know yet where the interpreter is mapped. */
434                thrptr_addr = iph->p_vaddr;
435             }
436 #           endif
437 
438             if (iph->p_type != PT_LOAD || iph->p_memsz == 0)
439                continue;
440 
441             if (!baseaddr_set) {
442                interp_addr  = iph->p_vaddr;
443                /* interp_align = iph->p_align; */ /* UNUSED */
444                baseaddr_set = 1;
445             }
446 
447             /* assumes that all segments in the interp are close */
448             end = (iph->p_vaddr - interp_addr) + iph->p_memsz;
449 
450             if (end > interp_size)
451                interp_size = end;
452          }
453          break;
454          }
455 
456 #     if defined(PT_GNU_STACK) || defined(PT_SUNWSTACK)
457 #     if defined(PT_GNU_STACK)
458       /* Android's elf.h doesn't appear to have PT_GNU_STACK. */
459       case PT_GNU_STACK:
460 #     endif
461 #     if defined(PT_SUNWSTACK)
462       /* Solaris-specific program header. */
463       case PT_SUNWSTACK:
464 #     endif
465          if ((ph->p_flags & PF_X) == 0) info->stack_prot &= ~VKI_PROT_EXEC;
466          if ((ph->p_flags & PF_W) == 0) info->stack_prot &= ~VKI_PROT_WRITE;
467          if ((ph->p_flags & PF_R) == 0) info->stack_prot &= ~VKI_PROT_READ;
468          break;
469 #     endif
470 
471 #     if defined(PT_SUNW_SYSSTAT)
472       /* Solaris-specific program header which requires link-time support. */
473       case PT_SUNW_SYSSTAT:
474          VG_(unimplemented)("Support for program header PT_SUNW_SYSSTAT.");
475          break;
476 #     endif
477 #     if defined(PT_SUNW_SYSSTAT_ZONE)
478       /* Solaris-specific program header which requires link-time support. */
479       case PT_SUNW_SYSSTAT_ZONE:
480          VG_(unimplemented)("Support for program header PT_SUNW_SYSSTAT_ZONE.");
481          break;
482 #     endif
483 
484       default:
485          // do nothing
486          break;
487       }
488    }
489 
490    if (info->phdr == 0)
491       info->phdr = minaddr + ebase + e->e.e_phoff;
492 
493    if (info->exe_base != info->exe_end) {
494       if (minaddr >= maxaddr ||
495           (minaddr + ebase < info->exe_base ||
496            maxaddr + ebase > info->exe_end)) {
497          VG_(printf)("Executable range %p-%p is outside the\n"
498                      "acceptable range %p-%p\n",
499                      (char *)minaddr + ebase, (char *)maxaddr + ebase,
500                      (char *)info->exe_base,  (char *)info->exe_end);
501          return VKI_ENOMEM;
502       }
503    }
504 
505    info->brkbase = mapelf(e, ebase);    /* map the executable */
506 
507    if (info->brkbase == 0)
508       return VKI_ENOMEM;
509 
510    if (interp != NULL) {
511       /* reserve a chunk of address space for interpreter */
512       MapRequest mreq;
513       Addr       advised;
514       Bool       ok;
515 
516       /* Don't actually reserve the space.  Just get an advisory
517          indicating where it would be allocated, and pass that to
518          mapelf(), which in turn asks aspacem to do some fixed maps at
519          the specified address.  This is a bit of hack, but it should
520          work because there should be no intervening transactions with
521          aspacem which could cause those fixed maps to fail.
522 
523          Placement policy is:
524 
525          if the interpreter asks to be loaded at zero
526             ignore that and put it wherever we like (mappings at zero
527             are bad news)
528          else
529             try and put it where it asks for, but if that doesn't work,
530             just put it anywhere.
531       */
532       if (interp_addr == 0) {
533          mreq.rkind = MAny;
534          mreq.start = 0;
535          mreq.len   = interp_size;
536       } else {
537          mreq.rkind = MHint;
538          mreq.start = interp_addr;
539          mreq.len   = interp_size;
540       }
541 
542       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &ok );
543 
544       if (!ok) {
545          /* bomb out */
546          SysRes res = VG_(mk_SysRes_Error)(VKI_EINVAL);
547          if (0) VG_(printf)("reserve for interp: failed\n");
548          check_mmap(res, (Addr)interp_addr, interp_size);
549          /*NOTREACHED*/
550       }
551 
552       (void)mapelf(interp, (ESZ(Addr))advised - interp_addr);
553 
554       VG_(close)(interp->fd);
555 
556       entry = (void *)(advised - interp_addr + interp->e.e_entry);
557 
558       info->interp_offset = advised - interp_addr;
559 #     if defined(VGO_solaris)
560       if (thrptr_addr)
561          info->init_thrptr = thrptr_addr + info->interp_offset;
562 #     endif
563 
564       VG_(free)(interp->p);
565       VG_(free)(interp);
566    } else {
567       entry = (void *)(ebase + e->e.e_entry);
568 
569 #     if defined(VGO_solaris)
570       if (e->e.e_type == ET_DYN)
571          info->ldsoexec = True;
572 #     endif
573    }
574 
575    info->exe_base = minaddr + ebase;
576    info->exe_end  = maxaddr + ebase;
577 
578 #if defined(VGP_ppc64be_linux)
579    /* On PPC64BE, ELF ver 1, a func ptr is represented by a TOC entry ptr.
580       This TOC entry contains three words; the first word is the function
581       address, the second word is the TOC ptr (r2), and the third word
582       is the static chain value. */
583    info->init_ip  = ((ULong*)entry)[0];
584    info->init_toc = ((ULong*)entry)[1];
585    info->init_ip  += info->interp_offset;
586    info->init_toc += info->interp_offset;
587 #elif defined(VGP_ppc64le_linux)
588    /* On PPC64LE, ELF ver 2. API doesn't use a func ptr */
589    info->init_ip  = (Addr)entry;
590    info->init_toc = 0; /* meaningless on this platform */
591 #else
592    info->init_ip  = (Addr)entry;
593    info->init_toc = 0; /* meaningless on this platform */
594 #endif
595    VG_(free)(e->p);
596    VG_(free)(e);
597 
598    return 0;
599 }
600 
601 #endif // defined(VGO_linux) || defined(VGO_solaris)
602 
603 /*--------------------------------------------------------------------*/
604 /*--- end                                                          ---*/
605 /*--------------------------------------------------------------------*/
606