1
2 /*--------------------------------------------------------------------*/
3 /*--- User-mode execve() for ELF executables m_ume_elf.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2000-2015 Julian Seward
11 jseward@acm.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #if defined(VGO_linux) || defined(VGO_solaris)
32
33 #include "pub_core_basics.h"
34 #include "pub_core_vki.h"
35
36 #include "pub_core_aspacemgr.h" // various mapping fns
37 #include "pub_core_debuglog.h"
38 #include "pub_core_libcassert.h" // VG_(exit), vg_assert
39 #include "pub_core_libcbase.h" // VG_(memcmp), etc
40 #include "pub_core_libcprint.h"
41 #include "pub_core_libcfile.h" // VG_(open) et al
42 #include "pub_core_machine.h" // VG_ELF_CLASS (XXX: which should be moved)
43 #include "pub_core_mallocfree.h" // VG_(malloc), VG_(free)
44 #include "pub_core_syscall.h" // VG_(strerror)
45 #include "pub_core_ume.h" // self
46
47 #include "priv_ume.h"
48
49 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
50 #if defined(VGO_linux)
51 # define _GNU_SOURCE
52 # define _FILE_OFFSET_BITS 64
53 #endif
54 /* This is for ELF types etc, and also the AT_ constants. */
55 #include <elf.h>
56 #if defined(VGO_solaris)
57 # include <sys/fasttrap.h> // PT_SUNWDTRACE_SIZE
58 #endif
59 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
60
61
62 #if VG_WORDSIZE == 8
63 #define ESZ(x) Elf64_##x
64 #elif VG_WORDSIZE == 4
65 #define ESZ(x) Elf32_##x
66 #else
67 #error VG_WORDSIZE needs to ==4 or ==8
68 #endif
69
70 struct elfinfo
71 {
72 ESZ(Ehdr) e;
73 ESZ(Phdr) *p;
74 Int fd;
75 };
76
check_mmap(SysRes res,Addr base,SizeT len)77 static void check_mmap(SysRes res, Addr base, SizeT len)
78 {
79 if (sr_isError(res)) {
80 VG_(printf)("valgrind: mmap(0x%llx, %lld) failed in UME "
81 "with error %lu (%s).\n",
82 (ULong)base, (Long)len,
83 sr_Err(res), VG_(strerror)(sr_Err(res)) );
84 if (sr_Err(res) == VKI_EINVAL) {
85 VG_(printf)("valgrind: this can be caused by executables with "
86 "very large text, data or bss segments.\n");
87 }
88 VG_(exit)(1);
89 }
90 }
91
92 /*------------------------------------------------------------*/
93 /*--- Loading ELF files ---*/
94 /*------------------------------------------------------------*/
95
96 static
readelf(Int fd,const HChar * filename)97 struct elfinfo *readelf(Int fd, const HChar *filename)
98 {
99 SysRes sres;
100 struct elfinfo *e = VG_(malloc)("ume.re.1", sizeof(*e));
101 Int phsz;
102
103 e->fd = fd;
104
105 sres = VG_(pread)(fd, &e->e, sizeof(e->e), 0);
106 if (sr_isError(sres) || sr_Res(sres) != sizeof(e->e)) {
107 VG_(printf)("valgrind: %s: can't read ELF header: %s\n",
108 filename, VG_(strerror)(sr_Err(sres)));
109 goto bad;
110 }
111
112 if (VG_(memcmp)(&e->e.e_ident[0], ELFMAG, SELFMAG) != 0) {
113 VG_(printf)("valgrind: %s: bad ELF magic number\n", filename);
114 goto bad;
115 }
116 if (e->e.e_ident[EI_CLASS] != VG_ELF_CLASS) {
117 VG_(printf)("valgrind: wrong ELF executable class "
118 "(eg. 32-bit instead of 64-bit)\n");
119 goto bad;
120 }
121 if (e->e.e_ident[EI_DATA] != VG_ELF_DATA2XXX) {
122 VG_(printf)("valgrind: executable has wrong endian-ness\n");
123 goto bad;
124 }
125 if (!(e->e.e_type == ET_EXEC || e->e.e_type == ET_DYN)) {
126 VG_(printf)("valgrind: this is not an executable\n");
127 goto bad;
128 }
129
130 if (e->e.e_machine != VG_ELF_MACHINE) {
131 VG_(printf)("valgrind: executable is not for "
132 "this architecture\n");
133 goto bad;
134 }
135
136 if (e->e.e_phentsize != sizeof(ESZ(Phdr))) {
137 VG_(printf)("valgrind: sizeof ELF Phdr wrong\n");
138 goto bad;
139 }
140
141 phsz = sizeof(ESZ(Phdr)) * e->e.e_phnum;
142 e->p = VG_(malloc)("ume.re.2", phsz);
143
144 sres = VG_(pread)(fd, e->p, phsz, e->e.e_phoff);
145 if (sr_isError(sres) || sr_Res(sres) != phsz) {
146 VG_(printf)("valgrind: can't read phdr: %s\n",
147 VG_(strerror)(sr_Err(sres)));
148 VG_(free)(e->p);
149 goto bad;
150 }
151
152 return e;
153
154 bad:
155 VG_(free)(e);
156 return NULL;
157 }
158
159 /* Map an ELF file. Returns the brk address. */
160 static
mapelf(struct elfinfo * e,ESZ (Addr)base)161 ESZ(Addr) mapelf(struct elfinfo *e, ESZ(Addr) base)
162 {
163 Int i;
164 SysRes res;
165 ESZ(Addr) elfbrk = 0;
166
167 for (i = 0; i < e->e.e_phnum; i++) {
168 ESZ(Phdr) *ph = &e->p[i];
169 ESZ(Addr) addr, brkaddr;
170 ESZ(Word) memsz;
171
172 if (ph->p_type != PT_LOAD)
173 continue;
174
175 addr = ph->p_vaddr+base;
176 memsz = ph->p_memsz;
177 brkaddr = addr+memsz;
178
179 if (brkaddr > elfbrk)
180 elfbrk = brkaddr;
181 }
182
183 for (i = 0; i < e->e.e_phnum; i++) {
184 ESZ(Phdr) *ph = &e->p[i];
185 ESZ(Addr) addr, bss, brkaddr;
186 ESZ(Off) off;
187 ESZ(Word) filesz;
188 ESZ(Word) memsz;
189 unsigned prot = 0;
190
191 if (ph->p_type != PT_LOAD)
192 continue;
193
194 if (ph->p_flags & PF_X) prot |= VKI_PROT_EXEC;
195 if (ph->p_flags & PF_W) prot |= VKI_PROT_WRITE;
196 if (ph->p_flags & PF_R) prot |= VKI_PROT_READ;
197
198 addr = ph->p_vaddr+base;
199 off = ph->p_offset;
200 filesz = ph->p_filesz;
201 bss = addr+filesz;
202 memsz = ph->p_memsz;
203 brkaddr = addr+memsz;
204
205 // Tom says: In the following, do what the Linux kernel does and only
206 // map the pages that are required instead of rounding everything to
207 // the specified alignment (ph->p_align). (AMD64 doesn't work if you
208 // use ph->p_align -- part of stage2's memory gets trashed somehow.)
209 //
210 // The condition handles the case of a zero-length segment.
211 if (VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr) > 0) {
212 if (0) VG_(debugLog)(0,"ume","mmap_file_fixed_client #1\n");
213 res = VG_(am_mmap_file_fixed_client)(
214 VG_PGROUNDDN(addr),
215 VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr),
216 prot, /*VKI_MAP_FIXED|VKI_MAP_PRIVATE, */
217 e->fd, VG_PGROUNDDN(off)
218 );
219 if (0) VG_(am_show_nsegments)(0,"after #1");
220 check_mmap(res, VG_PGROUNDDN(addr),
221 VG_PGROUNDUP(bss)-VG_PGROUNDDN(addr));
222 }
223
224 // if memsz > filesz, fill the remainder with zeroed pages
225 if (memsz > filesz) {
226 UInt bytes;
227
228 bytes = VG_PGROUNDUP(brkaddr)-VG_PGROUNDUP(bss);
229 if (bytes > 0) {
230 if (0) VG_(debugLog)(0,"ume","mmap_anon_fixed_client #2\n");
231 res = VG_(am_mmap_anon_fixed_client)(
232 VG_PGROUNDUP(bss), bytes,
233 prot
234 );
235 if (0) VG_(am_show_nsegments)(0,"after #2");
236 check_mmap(res, VG_PGROUNDUP(bss), bytes);
237 }
238
239 bytes = bss & (VKI_PAGE_SIZE - 1);
240
241 // The 'prot' condition allows for a read-only bss
242 if ((prot & VKI_PROT_WRITE) && (bytes > 0)) {
243 bytes = VKI_PAGE_SIZE - bytes;
244 VG_(memset)((void *)bss, 0, bytes);
245 }
246 }
247 }
248
249 return elfbrk;
250 }
251
VG_(match_ELF)252 Bool VG_(match_ELF)(const void *hdr, SizeT len)
253 {
254 const ESZ(Ehdr) *e = hdr;
255 return (len > sizeof(*e)) && VG_(memcmp)(&e->e_ident[0], ELFMAG, SELFMAG) == 0;
256 }
257
258
259 /* load_ELF pulls an ELF executable into the address space, prepares
260 it for execution, and writes info about it into INFO. In
261 particular it fills in .init_eip, which is the starting point.
262
263 Returns zero on success, non-zero (a VKI_E.. value) on failure.
264
265 The sequence of activities is roughly as follows:
266
267 - use readelf() to extract program header info from the exe file.
268
269 - scan the program header, collecting info (not sure what all those
270 info-> fields are, or whether they are used, but still) and in
271 particular looking out fo the PT_INTERP header, which describes
272 the interpreter. If such a field is found, the space needed to
273 hold the interpreter is computed into interp_size.
274
275 - map the executable in, by calling mapelf(). This maps in all
276 loadable sections, and I _think_ also creates any .bss areas
277 required. mapelf() returns the address just beyond the end of
278 the furthest-along mapping it creates. The executable is mapped
279 starting at EBASE, which is usually read from it (eg, 0x8048000
280 etc) except if it's a PIE, in which case I'm not sure what
281 happens.
282
283 The returned address is recorded in info->brkbase as the start
284 point of the brk (data) segment, as it is traditional to place
285 the data segment just after the executable. Neither load_ELF nor
286 mapelf creates the brk segment, though: that is for the caller of
287 load_ELF to attend to.
288
289 - If the initial phdr scan didn't find any mention of an
290 interpreter (interp == NULL), this must be a statically linked
291 executable, and we're pretty much done.
292
293 - Otherwise, we need to use mapelf() a second time to load the
294 interpreter. The interpreter can go anywhere, but mapelf() wants
295 to be told a specific address to put it at. So an advisory query
296 is passed to aspacem, asking where it would put an anonymous
297 client mapping of size INTERP_SIZE. That address is then used
298 as the mapping address for the interpreter.
299
300 - The entry point in INFO is set to the interpreter's entry point,
301 and we're done. */
VG_(load_ELF)302 Int VG_(load_ELF)(Int fd, const HChar* name, /*MOD*/ExeInfo* info)
303 {
304 SysRes sres;
305 struct elfinfo *e;
306 struct elfinfo *interp = NULL;
307 ESZ(Addr) minaddr = ~0; /* lowest mapped address */
308 ESZ(Addr) maxaddr = 0; /* highest mapped address */
309 ESZ(Addr) interp_addr = 0; /* interpreter (ld.so) address */
310 ESZ(Word) interp_size = 0; /* interpreter size */
311 /* ESZ(Word) interp_align = VKI_PAGE_SIZE; */ /* UNUSED */
312 Int i;
313 void *entry;
314 ESZ(Addr) ebase = 0;
315 # if defined(VGO_solaris)
316 ESZ(Addr) thrptr_addr = 0;
317 # endif
318
319 # if defined(HAVE_PIE)
320 ebase = info->exe_base;
321 # endif
322
323 e = readelf(fd, name);
324
325 if (e == NULL)
326 return VKI_ENOEXEC;
327
328 /* The kernel maps position-independent executables at TASK_SIZE*2/3;
329 duplicate this behavior as close as we can. */
330 if (e->e.e_type == ET_DYN && ebase == 0) {
331 ebase = VG_PGROUNDDN(info->exe_base
332 + (info->exe_end - info->exe_base) * 2 / 3);
333 /* We really don't want to load PIEs at zero or too close. It
334 works, but it's unrobust (NULL pointer reads and writes
335 become legit, which is really bad) and causes problems for
336 exp-ptrcheck, which assumes all numbers below 1MB are
337 nonpointers. So, hackily, move it above 1MB. */
338 /* Later .. it appears ppc32-linux tries to put [vdso] at 1MB,
339 which totally screws things up, because nothing else can go
340 there. The size of [vdso] is around 2 or 3 pages, so bump
341 the hacky load addess along by 8 * VKI_PAGE_SIZE to be safe. */
342 /* Later .. on mips64 we can't use 0x108000, because mapelf will
343 fail. */
344 # if defined(VGP_mips64_linux)
345 if (ebase < 0x100000)
346 ebase = 0x100000;
347 # else
348 vg_assert(VKI_PAGE_SIZE >= 4096); /* stay sane */
349 ESZ(Addr) hacky_load_address = 0x100000 + 8 * VKI_PAGE_SIZE;
350 if (ebase < hacky_load_address)
351 ebase = hacky_load_address;
352 # endif
353
354 # if defined(VGO_solaris)
355 /* Record for later use in AT_BASE. */
356 info->interp_offset = ebase;
357 # endif
358 }
359
360 info->phnum = e->e.e_phnum;
361 info->entry = e->e.e_entry + ebase;
362 info->phdr = 0;
363 info->stack_prot = VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC;
364
365 for (i = 0; i < e->e.e_phnum; i++) {
366 ESZ(Phdr) *ph = &e->p[i];
367
368 switch(ph->p_type) {
369 case PT_PHDR:
370 info->phdr = ph->p_vaddr + ebase;
371 # if defined(VGO_solaris)
372 info->real_phdr_present = True;
373 # endif
374 break;
375
376 case PT_LOAD:
377 if (ph->p_vaddr < minaddr)
378 minaddr = ph->p_vaddr;
379 if (ph->p_vaddr+ph->p_memsz > maxaddr)
380 maxaddr = ph->p_vaddr+ph->p_memsz;
381 break;
382
383 # if defined(VGO_solaris)
384 case PT_SUNWDTRACE:
385 if (ph->p_memsz < PT_SUNWDTRACE_SIZE ||
386 (ph->p_flags & (PF_R | PF_W | PF_X)) != (PF_R | PF_W | PF_X)) {
387 VG_(printf)("valgrind: m_ume.c: too small SUNWDTRACE size\n");
388 return VKI_ENOEXEC;
389 }
390
391 info->init_thrptr = ph->p_vaddr + ebase;
392 break;
393 # endif
394
395 case PT_INTERP: {
396 HChar *buf = VG_(malloc)("ume.LE.1", ph->p_filesz+1);
397 Int j;
398 Int intfd;
399 Int baseaddr_set;
400
401 VG_(pread)(fd, buf, ph->p_filesz, ph->p_offset);
402 buf[ph->p_filesz] = '\0';
403
404 sres = VG_(open)(buf, VKI_O_RDONLY, 0);
405 if (sr_isError(sres)) {
406 VG_(printf)("valgrind: m_ume.c: can't open interpreter\n");
407 VG_(exit)(1);
408 }
409 intfd = sr_Res(sres);
410
411 interp = readelf(intfd, buf);
412 if (interp == NULL) {
413 VG_(printf)("valgrind: m_ume.c: can't read interpreter\n");
414 return 1;
415 }
416 VG_(free)(buf);
417
418 baseaddr_set = 0;
419 for (j = 0; j < interp->e.e_phnum; j++) {
420 ESZ(Phdr) *iph = &interp->p[j];
421 ESZ(Addr) end;
422
423 # if defined(VGO_solaris)
424 if (iph->p_type == PT_SUNWDTRACE) {
425 if (iph->p_memsz < PT_SUNWDTRACE_SIZE ||
426 (iph->p_flags & (PF_R | PF_W | PF_X))
427 != (PF_R | PF_W | PF_X)) {
428 VG_(printf)("valgrind: m_ume.c: too small SUNWDTRACE size\n");
429 return VKI_ENOEXEC;
430 }
431
432 /* Store the thrptr value into a temporary because we do not
433 know yet where the interpreter is mapped. */
434 thrptr_addr = iph->p_vaddr;
435 }
436 # endif
437
438 if (iph->p_type != PT_LOAD || iph->p_memsz == 0)
439 continue;
440
441 if (!baseaddr_set) {
442 interp_addr = iph->p_vaddr;
443 /* interp_align = iph->p_align; */ /* UNUSED */
444 baseaddr_set = 1;
445 }
446
447 /* assumes that all segments in the interp are close */
448 end = (iph->p_vaddr - interp_addr) + iph->p_memsz;
449
450 if (end > interp_size)
451 interp_size = end;
452 }
453 break;
454 }
455
456 # if defined(PT_GNU_STACK) || defined(PT_SUNWSTACK)
457 # if defined(PT_GNU_STACK)
458 /* Android's elf.h doesn't appear to have PT_GNU_STACK. */
459 case PT_GNU_STACK:
460 # endif
461 # if defined(PT_SUNWSTACK)
462 /* Solaris-specific program header. */
463 case PT_SUNWSTACK:
464 # endif
465 if ((ph->p_flags & PF_X) == 0) info->stack_prot &= ~VKI_PROT_EXEC;
466 if ((ph->p_flags & PF_W) == 0) info->stack_prot &= ~VKI_PROT_WRITE;
467 if ((ph->p_flags & PF_R) == 0) info->stack_prot &= ~VKI_PROT_READ;
468 break;
469 # endif
470
471 # if defined(PT_SUNW_SYSSTAT)
472 /* Solaris-specific program header which requires link-time support. */
473 case PT_SUNW_SYSSTAT:
474 VG_(unimplemented)("Support for program header PT_SUNW_SYSSTAT.");
475 break;
476 # endif
477 # if defined(PT_SUNW_SYSSTAT_ZONE)
478 /* Solaris-specific program header which requires link-time support. */
479 case PT_SUNW_SYSSTAT_ZONE:
480 VG_(unimplemented)("Support for program header PT_SUNW_SYSSTAT_ZONE.");
481 break;
482 # endif
483
484 default:
485 // do nothing
486 break;
487 }
488 }
489
490 if (info->phdr == 0)
491 info->phdr = minaddr + ebase + e->e.e_phoff;
492
493 if (info->exe_base != info->exe_end) {
494 if (minaddr >= maxaddr ||
495 (minaddr + ebase < info->exe_base ||
496 maxaddr + ebase > info->exe_end)) {
497 VG_(printf)("Executable range %p-%p is outside the\n"
498 "acceptable range %p-%p\n",
499 (char *)minaddr + ebase, (char *)maxaddr + ebase,
500 (char *)info->exe_base, (char *)info->exe_end);
501 return VKI_ENOMEM;
502 }
503 }
504
505 info->brkbase = mapelf(e, ebase); /* map the executable */
506
507 if (info->brkbase == 0)
508 return VKI_ENOMEM;
509
510 if (interp != NULL) {
511 /* reserve a chunk of address space for interpreter */
512 MapRequest mreq;
513 Addr advised;
514 Bool ok;
515
516 /* Don't actually reserve the space. Just get an advisory
517 indicating where it would be allocated, and pass that to
518 mapelf(), which in turn asks aspacem to do some fixed maps at
519 the specified address. This is a bit of hack, but it should
520 work because there should be no intervening transactions with
521 aspacem which could cause those fixed maps to fail.
522
523 Placement policy is:
524
525 if the interpreter asks to be loaded at zero
526 ignore that and put it wherever we like (mappings at zero
527 are bad news)
528 else
529 try and put it where it asks for, but if that doesn't work,
530 just put it anywhere.
531 */
532 if (interp_addr == 0) {
533 mreq.rkind = MAny;
534 mreq.start = 0;
535 mreq.len = interp_size;
536 } else {
537 mreq.rkind = MHint;
538 mreq.start = interp_addr;
539 mreq.len = interp_size;
540 }
541
542 advised = VG_(am_get_advisory)( &mreq, True/*client*/, &ok );
543
544 if (!ok) {
545 /* bomb out */
546 SysRes res = VG_(mk_SysRes_Error)(VKI_EINVAL);
547 if (0) VG_(printf)("reserve for interp: failed\n");
548 check_mmap(res, (Addr)interp_addr, interp_size);
549 /*NOTREACHED*/
550 }
551
552 (void)mapelf(interp, (ESZ(Addr))advised - interp_addr);
553
554 VG_(close)(interp->fd);
555
556 entry = (void *)(advised - interp_addr + interp->e.e_entry);
557
558 info->interp_offset = advised - interp_addr;
559 # if defined(VGO_solaris)
560 if (thrptr_addr)
561 info->init_thrptr = thrptr_addr + info->interp_offset;
562 # endif
563
564 VG_(free)(interp->p);
565 VG_(free)(interp);
566 } else {
567 entry = (void *)(ebase + e->e.e_entry);
568
569 # if defined(VGO_solaris)
570 if (e->e.e_type == ET_DYN)
571 info->ldsoexec = True;
572 # endif
573 }
574
575 info->exe_base = minaddr + ebase;
576 info->exe_end = maxaddr + ebase;
577
578 #if defined(VGP_ppc64be_linux)
579 /* On PPC64BE, ELF ver 1, a func ptr is represented by a TOC entry ptr.
580 This TOC entry contains three words; the first word is the function
581 address, the second word is the TOC ptr (r2), and the third word
582 is the static chain value. */
583 info->init_ip = ((ULong*)entry)[0];
584 info->init_toc = ((ULong*)entry)[1];
585 info->init_ip += info->interp_offset;
586 info->init_toc += info->interp_offset;
587 #elif defined(VGP_ppc64le_linux)
588 /* On PPC64LE, ELF ver 2. API doesn't use a func ptr */
589 info->init_ip = (Addr)entry;
590 info->init_toc = 0; /* meaningless on this platform */
591 #else
592 info->init_ip = (Addr)entry;
593 info->init_toc = 0; /* meaningless on this platform */
594 #endif
595 VG_(free)(e->p);
596 VG_(free)(e);
597
598 return 0;
599 }
600
601 #endif // defined(VGO_linux) || defined(VGO_solaris)
602
603 /*--------------------------------------------------------------------*/
604 /*--- end ---*/
605 /*--------------------------------------------------------------------*/
606